mirror of
https://github.com/typeorm/typeorm.git
synced 2025-12-08 21:26:23 +00:00
feat(mysql): add support for vector columns on MariaDB and MySQL (#11670)
This commit is contained in:
parent
dd55218648
commit
cfb3d6c015
@ -12,7 +12,7 @@ services:
|
||||
MYSQL_DATABASE: "test"
|
||||
|
||||
mysql-9:
|
||||
image: "mysql:9.4.0"
|
||||
image: "mysql:9.5.0"
|
||||
container_name: "typeorm-mysql-9"
|
||||
ports:
|
||||
- "3306:3306"
|
||||
@ -24,7 +24,7 @@ services:
|
||||
|
||||
# mariadb
|
||||
mariadb-10:
|
||||
image: "mariadb:10.6.22-jammy"
|
||||
image: "mariadb:10.6.24-jammy"
|
||||
container_name: "typeorm-mariadb-10"
|
||||
ports:
|
||||
- "3307:3306"
|
||||
@ -35,7 +35,7 @@ services:
|
||||
MYSQL_DATABASE: "test"
|
||||
|
||||
mariadb-12:
|
||||
image: "mariadb:12.0.1-rc"
|
||||
image: "mariadb:12.1.2"
|
||||
container_name: "typeorm-mariadb-12"
|
||||
ports:
|
||||
- "3307:3306"
|
||||
|
||||
@ -207,7 +207,7 @@ const queryEmbedding = [
|
||||
const results = await dataSource.query(
|
||||
`
|
||||
DECLARE @question AS VECTOR (1998) = @0;
|
||||
SELECT TOP (10) dc.*,
|
||||
SELECT TOP (10) dc.*,
|
||||
VECTOR_DISTANCE('cosine', @question, embedding) AS distance
|
||||
FROM document_chunk dc
|
||||
ORDER BY VECTOR_DISTANCE('cosine', @question, embedding)
|
||||
|
||||
@ -139,3 +139,7 @@ export class User {
|
||||
roles: UserRoleType[]
|
||||
}
|
||||
```
|
||||
|
||||
### Vector Types
|
||||
|
||||
MySQL supports the [VECTOR type](https://dev.mysql.com/doc/refman/en/vector.html) since version 9.0, while in MariaDB, [vectors](https://mariadb.com/docs/server/reference/sql-structure/vectors/vector-overview) are available since 11.7.
|
||||
|
||||
@ -60,7 +60,7 @@ Additional options can be added to the `extra` object and will be passed directl
|
||||
|
||||
### Column types for `postgres`
|
||||
|
||||
`int`, `int2`, `int4`, `int8`, `smallint`, `integer`, `bigint`, `decimal`, `numeric`, `real`, `float`, `float4`, `float8`, `double precision`, `money`, `character varying`, `varchar`, `character`, `char`, `text`, `citext`, `hstore`, `bytea`, `bit`, `varbit`, `bit varying`, `timetz`, `timestamptz`, `timestamp`, `timestamp without time zone`, `timestamp with time zone`, `date`, `time`, `time without time zone`, `time with time zone`, `interval`, `bool`, `boolean`, `enum`, `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle`, `cidr`, `inet`, `macaddr`, `macaddr8`, `tsvector`, `tsquery`, `uuid`, `xml`, `json`, `jsonb`, `jsonpath`, `int4range`, `int8range`, `numrange`, `tsrange`, `tstzrange`, `daterange`, `int4multirange`, `int8multirange`, `nummultirange`, `tsmultirange`, `tstzmultirange`, `multidaterange`, `geometry`, `geography`, `cube`, `ltree`
|
||||
`int`, `int2`, `int4`, `int8`, `smallint`, `integer`, `bigint`, `decimal`, `numeric`, `real`, `float`, `float4`, `float8`, `double precision`, `money`, `character varying`, `varchar`, `character`, `char`, `text`, `citext`, `hstore`, `bytea`, `bit`, `varbit`, `bit varying`, `timetz`, `timestamptz`, `timestamp`, `timestamp without time zone`, `timestamp with time zone`, `date`, `time`, `time without time zone`, `time with time zone`, `interval`, `bool`, `boolean`, `enum`, `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle`, `cidr`, `inet`, `macaddr`, `macaddr8`, `tsvector`, `tsquery`, `uuid`, `xml`, `json`, `jsonb`, `jsonpath`, `int4range`, `int8range`, `numrange`, `tsrange`, `tstzrange`, `daterange`, `int4multirange`, `int8multirange`, `nummultirange`, `tsmultirange`, `tstzmultirange`, `multidaterange`, `geometry`, `geography`, `cube`, `ltree`, `vector`, `halfvec`.
|
||||
|
||||
### Column types for `cockroachdb`
|
||||
|
||||
@ -68,6 +68,33 @@ Additional options can be added to the `extra` object and will be passed directl
|
||||
|
||||
Note: CockroachDB returns all numeric data types as `string`. However, if you omit the column type and define your property as `number` ORM will `parseInt` string into number.
|
||||
|
||||
### Vector columns
|
||||
|
||||
Vector columns can be used for similarity searches using PostgreSQL's vector operators:
|
||||
|
||||
```typescript
|
||||
// L2 distance (Euclidean) - <->
|
||||
const results = await dataSource.sql`
|
||||
SELECT id, embedding
|
||||
FROM post
|
||||
ORDER BY embedding <-> ${"[1,2,3]"}
|
||||
LIMIT 5`
|
||||
|
||||
// Cosine distance - <=>
|
||||
const results = await dataSource.sql`
|
||||
SELECT id, embedding
|
||||
FROM post
|
||||
ORDER BY embedding <=> ${"[1,2,3]"}
|
||||
LIMIT 5`
|
||||
|
||||
// Inner product - <#>
|
||||
const results = await dataSource.sql`
|
||||
SELECT id, embedding
|
||||
FROM post
|
||||
ORDER BY embedding <#> ${"[1,2,3]"}
|
||||
LIMIT 5`
|
||||
```
|
||||
|
||||
### Spatial columns
|
||||
|
||||
TypeORM's PostgreSQL and CockroachDB support uses [GeoJSON](http://geojson.org/) as an interchange format, so geometry columns should be tagged either as `object` or `Geometry` (or subclasses, e.g. `Point`) after importing [`geojson` types](https://www.npmjs.com/package/@types/geojson) or using the TypeORM built-in GeoJSON types:
|
||||
|
||||
@ -37,15 +37,16 @@ SAP HANA 2.0 and SAP HANA Cloud support slightly different data types. Check the
|
||||
- [SAP HANA 2.0 Data Types](https://help.sap.com/docs/SAP_HANA_PLATFORM/4fe29514fd584807ac9f2a04f6754767/20a1569875191014b507cf392724b7eb.html?locale=en-US)
|
||||
- [SAP HANA Cloud Data Types](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/data-types)
|
||||
|
||||
TypeORM's `SapDriver` supports `tinyint`, `smallint`, `integer`, `bigint`, `smalldecimal`, `decimal`, `real`, `double`, `date`, `time`, `seconddate`, `timestamp`, `boolean`, `char`, `nchar`, `varchar`, `nvarchar`, `text`, `alphanum`, `shorttext`, `array`, `varbinary`, `blob`, `clob`, `nclob`, `st_geometry`, `st_point`, `real_vector`, `half_vector`, `vector`, and `halfvec`. Some of these data types have been deprecated or removed in SAP HANA Cloud, and will be converted to the closest available alternative when connected to a Cloud database.
|
||||
TypeORM's `SapDriver` supports `tinyint`, `smallint`, `integer`, `bigint`, `smalldecimal`, `decimal`, `real`, `double`, `date`, `time`, `seconddate`, `timestamp`, `boolean`, `char`, `nchar`, `varchar`, `nvarchar`, `text`, `alphanum`, `shorttext`, `array`, `varbinary`, `blob`, `clob`, `nclob`, `st_geometry`, `st_point`, `real_vector` and `half_vector`. Some of these data types have been deprecated or removed in SAP HANA Cloud, and will be converted to the closest available alternative when connected to a Cloud database.
|
||||
|
||||
### Vector Types
|
||||
|
||||
The `real_vector` and `half_vector` data types were introduced in SAP HANA Cloud (2024Q1 and 2025Q2 respectively), and require a supported version of `@sap/hana-client` as well.
|
||||
The `real_vector` and `half_vector` data types were introduced in SAP HANA Cloud (2024Q1 and 2025Q2 respectively), and require a supported version of `@sap/hana-client` as well.
|
||||
|
||||
For consistency with PostgreSQL's vector support, TypeORM also provides aliases:
|
||||
- `vector` (alias for `real_vector`) - stores vectors as 4-byte floats
|
||||
- `halfvec` (alias for `half_vector`) - stores vectors as 2-byte floats for memory efficiency
|
||||
|
||||
- `vector` (alias for `real_vector`) - stores vectors as 4-byte floats
|
||||
- `halfvec` (alias for `half_vector`) - stores vectors as 2-byte floats for memory efficiency
|
||||
|
||||
```typescript
|
||||
@Entity()
|
||||
@ -70,3 +71,5 @@ export class Document {
|
||||
```
|
||||
|
||||
By default, the client will return a `Buffer` in the `fvecs`/`hvecs` format, which is more efficient. It is possible to let the driver convert the values to a `number[]` by adding `{ extra: { vectorOutputType: "Array" } }` to the connection options. Check the SAP HANA Client documentation for more information about [REAL_VECTOR](https://help.sap.com/docs/SAP_HANA_CLIENT/f1b440ded6144a54ada97ff95dac7adf/0d197e4389c64e6b9cf90f6f698f62fe.html) or [HALF_VECTOR](https://help.sap.com/docs/SAP_HANA_CLIENT/f1b440ded6144a54ada97ff95dac7adf/8bb854b4ce4a4299bed27c365b717e91.html).
|
||||
|
||||
Use the appropriate [vector functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches.
|
||||
|
||||
@ -180,88 +180,6 @@ There are several special column types with additional functionality available:
|
||||
each time you call `save` of entity manager or repository, or during `upsert` operations when an update occurs.
|
||||
You don't need to set this column - it will be automatically set.
|
||||
|
||||
### Vector columns
|
||||
|
||||
Vector columns are supported on PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension), Microsoft SQL Server, and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications.
|
||||
|
||||
TypeORM supports both `vector` and `halfvec` column types across databases:
|
||||
|
||||
- `vector` - stores vectors as 4-byte floats (single precision)
|
||||
- PostgreSQL: native `vector` type via pgvector extension
|
||||
- SQL Server: native `vector` type
|
||||
- SAP HANA: alias for `real_vector` type
|
||||
- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency
|
||||
- PostgreSQL: native `halfvec` type via pgvector extension
|
||||
- SAP HANA: alias for `half_vector` type
|
||||
|
||||
You can specify the vector dimensions using the `length` option:
|
||||
|
||||
```typescript
|
||||
@Entity()
|
||||
export class Post {
|
||||
@PrimaryGeneratedColumn()
|
||||
id: number
|
||||
|
||||
// Vector without specified dimensions (works on PostgreSQL and SAP HANA; SQL Server requires explicit dimensions)
|
||||
@Column("vector")
|
||||
embedding: number[] | Buffer
|
||||
|
||||
// Vector with 3 dimensions: vector(3)
|
||||
@Column("vector", { length: 3 })
|
||||
embedding_3d: number[] | Buffer
|
||||
|
||||
// Half-precision vector with 4 dimensions: halfvec(4) (PostgreSQL and SAP HANA only)
|
||||
@Column("halfvec", { length: 4 })
|
||||
halfvec_embedding: number[] | Buffer
|
||||
}
|
||||
```
|
||||
|
||||
**PostgreSQL** - Vector columns can be used for similarity searches using vector operators:
|
||||
|
||||
```typescript
|
||||
// L2 distance (Euclidean) - <->
|
||||
const results = await dataSource.query(
|
||||
`SELECT id, embedding FROM post ORDER BY embedding <-> $1 LIMIT 5`,
|
||||
["[1,2,3]"],
|
||||
)
|
||||
|
||||
// Cosine distance - <=>
|
||||
const results = await dataSource.query(
|
||||
`SELECT id, embedding FROM post ORDER BY embedding <=> $1 LIMIT 5`,
|
||||
["[1,2,3]"],
|
||||
)
|
||||
|
||||
// Inner product - <#>
|
||||
const results = await dataSource.query(
|
||||
`SELECT id, embedding FROM post ORDER BY embedding <#> $1 LIMIT 5`,
|
||||
["[1,2,3]"],
|
||||
)
|
||||
```
|
||||
|
||||
**SQL Server** - Use the `VECTOR_DISTANCE` function for similarity searches:
|
||||
|
||||
```typescript
|
||||
const queryEmbedding = [1, 2, 3]
|
||||
|
||||
// Cosine distance
|
||||
const results = await dataSource.query(
|
||||
`
|
||||
DECLARE @question AS VECTOR(3) = @0;
|
||||
SELECT TOP (5) id, embedding,
|
||||
VECTOR_DISTANCE('cosine', @question, embedding) AS distance
|
||||
FROM post
|
||||
ORDER BY VECTOR_DISTANCE('cosine', @question, embedding)
|
||||
`,
|
||||
[JSON.stringify(queryEmbedding)],
|
||||
)
|
||||
```
|
||||
|
||||
> **Note**:
|
||||
>
|
||||
> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators.
|
||||
> - **SQL Server**: Vector type support requires a compatible SQL Server version with vector functionality enabled.
|
||||
> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`. Use the appropriate [vector similarity functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches.
|
||||
|
||||
## Column types
|
||||
|
||||
TypeORM supports all of the most commonly used database-supported column types.
|
||||
@ -414,6 +332,50 @@ Besides "uuid" there is also "increment", "identity" (Postgres 10+ only) and "ro
|
||||
on some database platforms with this type of generation (for example some databases can only have one increment column,
|
||||
or some of them require increment to be a primary key).
|
||||
|
||||
### Vector columns
|
||||
|
||||
Vector columns are supported on MariaDB/MySQL, Microsoft SQL Server, PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension) and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications.
|
||||
|
||||
TypeORM supports both `vector` and `halfvec` column types across databases:
|
||||
|
||||
- `vector` - stores vectors as 4-byte floats (single precision)
|
||||
- MariaDB/MySQL: native `vector` type
|
||||
- Microsoft SQL Server: native `vector` type
|
||||
- PostgreSQL: `vector` type, available via `pgvector` extension
|
||||
- SAP HANA Cloud: alias for `real_vector` type
|
||||
- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency
|
||||
- PostgreSQL: `halfvec` type, available via `pgvector` extension
|
||||
- SAP HANA Cloud: alias for `half_vector` type
|
||||
|
||||
You can specify the number of vector dimensions using the `length` option:
|
||||
|
||||
```typescript
|
||||
@Entity()
|
||||
export class Post {
|
||||
@PrimaryGeneratedColumn()
|
||||
id: number
|
||||
|
||||
// Vector without specified dimensions
|
||||
@Column("vector")
|
||||
embedding: number[] | Buffer
|
||||
|
||||
// Vector with 3 dimensions: vector(3)
|
||||
@Column("vector", { length: 3 })
|
||||
embedding_3d: number[] | Buffer
|
||||
|
||||
// Half-precision vector with 4 dimensions: halfvec(4) (works on PostgreSQL and SAP HANA only)
|
||||
@Column("halfvec", { length: 4 })
|
||||
halfvec_embedding: number[] | Buffer
|
||||
}
|
||||
```
|
||||
|
||||
> **Note**:
|
||||
>
|
||||
> - **MariaDB/MySQL**: Vectors are supported since MariaDB 11.7 and MySQL 9
|
||||
> - **Microsoft SQL Server**: Vector type support requires SQL Server 2025 (17.x) or newer.
|
||||
> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators.
|
||||
> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`.
|
||||
|
||||
### Spatial columns
|
||||
|
||||
Microsoft SQLServer, MySQL/MariaDB, PostgreSQL/CockroachDB and SAP HANA all support spatial columns. TypeORM's support for each varies slightly between databases, particularly as the column names vary between databases.
|
||||
|
||||
@ -157,6 +157,8 @@ export class MysqlDriver implements Driver {
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
"geometrycollection",
|
||||
// vector data types
|
||||
"vector",
|
||||
// additional data types for mariadb
|
||||
"uuid",
|
||||
"inet4",
|
||||
@ -191,6 +193,7 @@ export class MysqlDriver implements Driver {
|
||||
"nvarchar",
|
||||
"binary",
|
||||
"varbinary",
|
||||
"vector",
|
||||
]
|
||||
|
||||
/**
|
||||
@ -280,6 +283,7 @@ export class MysqlDriver implements Driver {
|
||||
char: { length: 1 },
|
||||
binary: { length: 1 },
|
||||
varbinary: { length: 255 },
|
||||
vector: { length: 2048 }, // default length MySQL uses if not provided a value
|
||||
decimal: { precision: 10, scale: 0 },
|
||||
dec: { precision: 10, scale: 0 },
|
||||
numeric: { precision: 10, scale: 0 },
|
||||
|
||||
@ -2802,17 +2802,19 @@ export class MysqlQueryRunner extends BaseQueryRunner implements QueryRunner {
|
||||
) !== -1 &&
|
||||
dbColumn["CHARACTER_MAXIMUM_LENGTH"]
|
||||
) {
|
||||
const length =
|
||||
dbColumn[
|
||||
"CHARACTER_MAXIMUM_LENGTH"
|
||||
].toString()
|
||||
let length: number =
|
||||
dbColumn["CHARACTER_MAXIMUM_LENGTH"]
|
||||
if (tableColumn.type === "vector") {
|
||||
// MySQL and MariaDb store the vector length in bytes, not in number of dimensions.
|
||||
length = length / 4
|
||||
}
|
||||
tableColumn.length =
|
||||
!this.isDefaultColumnLength(
|
||||
table,
|
||||
tableColumn,
|
||||
length,
|
||||
length.toString(),
|
||||
)
|
||||
? length
|
||||
? length.toString()
|
||||
: ""
|
||||
}
|
||||
|
||||
|
||||
@ -75,7 +75,7 @@ export type WithLengthColumnType =
|
||||
| "binary" // mssql
|
||||
| "varbinary" // mssql, sap
|
||||
| "string" // cockroachdb, spanner
|
||||
| "vector" // postgres, mssql, sap
|
||||
| "vector" // mariadb, mysql, mssql, postgres, sap
|
||||
| "halfvec" // postgres, sap
|
||||
| "half_vector" // sap
|
||||
| "real_vector" // sap
|
||||
|
||||
@ -0,0 +1,61 @@
|
||||
import {
|
||||
Column,
|
||||
Entity,
|
||||
PrimaryColumn,
|
||||
ValueTransformer,
|
||||
} from "../../../../../../src"
|
||||
|
||||
/*
|
||||
* The mysql2 client partially supports the vector type. Newer versions support
|
||||
* only deserializing from binary format. Currently mysql2 only accepts binary
|
||||
* parameters for vector values, and not numeric arrays.
|
||||
*/
|
||||
const vectorTransformer: ValueTransformer = {
|
||||
to: (value: number[]) => {
|
||||
const length = value.length
|
||||
const arrayBuffer = new ArrayBuffer(length * 4)
|
||||
const dataView = new DataView(arrayBuffer)
|
||||
|
||||
for (let index = 0; index < length; index++) {
|
||||
dataView.setFloat32(index * 4, value[index], true)
|
||||
}
|
||||
|
||||
return Buffer.from(arrayBuffer)
|
||||
},
|
||||
from: (value: Buffer | number[]) => {
|
||||
if (Array.isArray(value)) {
|
||||
// newer versions of mysql2 already deserialize vector as number[]
|
||||
return value
|
||||
}
|
||||
const dataView = new DataView(
|
||||
value.buffer,
|
||||
value.byteOffset,
|
||||
value.byteLength,
|
||||
)
|
||||
const length = value.byteLength / 4
|
||||
const array = new Array<number>(length)
|
||||
for (let index = 0; index < length; index++) {
|
||||
array[index] = dataView.getFloat32(index * 4, true)
|
||||
}
|
||||
|
||||
return array
|
||||
},
|
||||
}
|
||||
|
||||
@Entity()
|
||||
export class Embedding {
|
||||
@PrimaryColumn()
|
||||
id: number
|
||||
|
||||
@Column()
|
||||
content: string
|
||||
|
||||
@Column()
|
||||
metadata: string
|
||||
|
||||
@Column("vector", {
|
||||
length: 16,
|
||||
transformer: vectorTransformer,
|
||||
})
|
||||
vector: number[]
|
||||
}
|
||||
85
test/functional/database-schema/vectors/mysql/vector.ts
Normal file
85
test/functional/database-schema/vectors/mysql/vector.ts
Normal file
@ -0,0 +1,85 @@
|
||||
import { expect } from "chai"
|
||||
import { DataSource, DeepPartial } from "../../../../../src"
|
||||
import { DriverUtils } from "../../../../../src/driver/DriverUtils"
|
||||
import {
|
||||
closeTestingConnections,
|
||||
createTestingConnections,
|
||||
} from "../../../../utils/test-utils"
|
||||
import { Embedding } from "./entity/Embedding"
|
||||
|
||||
describe("database-schema > vectors > mysql", () => {
|
||||
describe("with vector output type Array", () => {
|
||||
let dataSources: DataSource[]
|
||||
before(async () => {
|
||||
dataSources = await createTestingConnections({
|
||||
entities: [Embedding],
|
||||
enabledDrivers: ["mariadb", "mysql"],
|
||||
driverSpecific: {
|
||||
synchronize: false,
|
||||
},
|
||||
})
|
||||
})
|
||||
after(() => closeTestingConnections(dataSources))
|
||||
|
||||
it("should work correctly - create, persist and hydrate", () =>
|
||||
Promise.all(
|
||||
dataSources.map(async (dataSource) => {
|
||||
if (
|
||||
(dataSource.options.type === "mysql" &&
|
||||
!DriverUtils.isReleaseVersionOrGreater(
|
||||
dataSource.driver,
|
||||
"9.0",
|
||||
)) ||
|
||||
(dataSource.options.type === "mariadb" &&
|
||||
!DriverUtils.isReleaseVersionOrGreater(
|
||||
dataSource.driver,
|
||||
"11.7",
|
||||
))
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
await dataSource.synchronize()
|
||||
|
||||
// Verify column metadata
|
||||
const queryRunner = dataSource.createQueryRunner()
|
||||
const table = (await queryRunner.getTable(
|
||||
dataSource.getMetadata(Embedding).tableName,
|
||||
))!
|
||||
await queryRunner.release()
|
||||
|
||||
expect(table.findColumnByName("vector")).to.contain({
|
||||
type: "vector",
|
||||
length: "16",
|
||||
})
|
||||
|
||||
const vector = [
|
||||
0.004318627528846264, -0.008295782841742039,
|
||||
0.011462775990366936, -0.03171011060476303,
|
||||
-0.003404685528948903, 0.018827877938747406,
|
||||
0.010692788287997246, 0.014154385775327682,
|
||||
-0.026206370443105698, -0.03977154940366745,
|
||||
-0.008630559779703617, 0.040039367973804474,
|
||||
0.0019048830727115273, 0.01347813569009304,
|
||||
-0.02147931419312954, -0.004211498890072107,
|
||||
]
|
||||
const plainEmbedding = {
|
||||
id: 1,
|
||||
content: "This is a sample text to be analyzed by AI",
|
||||
metadata: `{"client":"typeorm"}`,
|
||||
vector,
|
||||
} satisfies DeepPartial<Embedding>
|
||||
|
||||
const embeddingRepository =
|
||||
dataSource.getRepository(Embedding)
|
||||
const embedding = embeddingRepository.create(plainEmbedding)
|
||||
await embeddingRepository.save(embedding)
|
||||
|
||||
const loadedEmbedding = await embeddingRepository.findOneBy(
|
||||
{ id: 1 },
|
||||
)
|
||||
expect(loadedEmbedding).to.deep.equal(plainEmbedding)
|
||||
}),
|
||||
))
|
||||
})
|
||||
})
|
||||
@ -1,4 +1,38 @@
|
||||
import { Column, Entity, PrimaryColumn } from "../../../../../../src"
|
||||
import {
|
||||
Column,
|
||||
Entity,
|
||||
PrimaryColumn,
|
||||
ValueTransformer,
|
||||
} from "../../../../../../src"
|
||||
|
||||
const vectorTransformer: ValueTransformer = {
|
||||
to: (value: number[]) => {
|
||||
const length = value.length
|
||||
const arrayBuffer = new ArrayBuffer(4 + length * 4)
|
||||
const dataView = new DataView(arrayBuffer)
|
||||
|
||||
dataView.setUint32(0, length, true)
|
||||
for (let index = 0; index < length; index++) {
|
||||
dataView.setFloat32(4 + index * 4, value[index], true)
|
||||
}
|
||||
|
||||
return Buffer.from(arrayBuffer)
|
||||
},
|
||||
from: (value: Buffer) => {
|
||||
const dataView = new DataView(
|
||||
value.buffer,
|
||||
value.byteOffset,
|
||||
value.byteLength,
|
||||
)
|
||||
const length = dataView.getUint32(0, true)
|
||||
const array = new Array<number>(length)
|
||||
for (let index = 0; index < length; index++) {
|
||||
array[index] = dataView.getFloat32(4 + index * 4, true)
|
||||
}
|
||||
|
||||
return array
|
||||
},
|
||||
}
|
||||
|
||||
@Entity()
|
||||
export class BufferEmbedding {
|
||||
@ -11,6 +45,8 @@ export class BufferEmbedding {
|
||||
@Column("nclob")
|
||||
metadata: string
|
||||
|
||||
@Column("real_vector")
|
||||
realVector: Buffer
|
||||
@Column("real_vector", {
|
||||
transformer: vectorTransformer,
|
||||
})
|
||||
realVector: number[]
|
||||
}
|
||||
|
||||
@ -119,34 +119,6 @@ describe("database-schema > vectors > sap", () => {
|
||||
})
|
||||
after(() => closeTestingConnections(dataSources))
|
||||
|
||||
function deserializeFvecs(buffer: Buffer) {
|
||||
const dataView = new DataView(
|
||||
buffer.buffer,
|
||||
buffer.byteOffset,
|
||||
buffer.byteLength,
|
||||
)
|
||||
const length = dataView.getUint32(0, true)
|
||||
const array = new Array<number>(length)
|
||||
for (let index = 0; index < length; index++) {
|
||||
array[index] = dataView.getFloat32(4 + index * 4, true)
|
||||
}
|
||||
|
||||
return array
|
||||
}
|
||||
|
||||
function serializeFvecs(array: number[]) {
|
||||
const length = array.length
|
||||
const arrayBuffer = new ArrayBuffer(4 + length * 4)
|
||||
const dataView = new DataView(arrayBuffer)
|
||||
|
||||
dataView.setUint32(0, length, true)
|
||||
for (let index = 0; index < length; index++) {
|
||||
dataView.setFloat32(4 + index * 4, array[index], true)
|
||||
}
|
||||
|
||||
return Buffer.from(arrayBuffer)
|
||||
}
|
||||
|
||||
it("should work correctly - persist and hydrate ", () =>
|
||||
Promise.all(
|
||||
dataSources.map(async (dataSource) => {
|
||||
@ -177,7 +149,7 @@ describe("database-schema > vectors > sap", () => {
|
||||
content:
|
||||
"This is a sample text to be analyzed by SAP Joule AI",
|
||||
metadata: `{"client":"typeorm"}`,
|
||||
realVector: serializeFvecs(plainVector),
|
||||
realVector: plainVector,
|
||||
} satisfies DeepPartial<BufferEmbedding>
|
||||
|
||||
const embeddingRepository =
|
||||
@ -188,10 +160,9 @@ describe("database-schema > vectors > sap", () => {
|
||||
const loadedEmbedding = await embeddingRepository.findOneBy(
|
||||
{ id: 1 },
|
||||
)
|
||||
const loadedVector = deserializeFvecs(
|
||||
loadedEmbedding!.realVector,
|
||||
expect(loadedEmbedding!.realVector).to.deep.equal(
|
||||
plainVector,
|
||||
)
|
||||
expect(loadedVector).to.deep.equal(plainVector)
|
||||
}),
|
||||
))
|
||||
})
|
||||
Loading…
x
Reference in New Issue
Block a user