Skip to content

Commit e27fbfc

Browse files
committed
Add support for text2vec-databricks
1 parent 8f35761 commit e27fbfc

File tree

4 files changed

+123
-34
lines changed

4 files changed

+123
-34
lines changed

src/collections/config/types/vectorizer.ts

+53-34
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export type Vectorizer =
2121
| 'text2vec-azure-openai'
2222
| 'text2vec-cohere'
2323
| 'text2vec-contextionary'
24+
| 'text2vec-databricks'
2425
| 'text2vec-gpt4all'
2526
| 'text2vec-huggingface'
2627
| 'text2vec-jina'
@@ -33,9 +34,9 @@ export type Vectorizer =
3334
| 'text2vec-voyageai'
3435
| 'none';
3536

36-
/** The configuration for image vectorization using a neural network.
37+
/** The configuration for image vectorization using a neural network module.
3738
*
38-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/img2vec-neural) for detailed usage.
39+
* See the [documentation](https://weaviate.io/developers/weaviate/modules/img2vec-neural) for detailed usage.
3940
*/
4041
export type Img2VecNeuralConfig = {
4142
/** The image fields used when vectorizing. This is a required field and must match the property fields of the collection that are defined as `DataType.BLOB`. */
@@ -50,9 +51,9 @@ export type Multi2VecField = {
5051
weight?: number;
5152
};
5253

53-
/** The configuration for multi-media vectorization using CLIP.
54+
/** The configuration for multi-media vectorization using the CLIP module.
5455
*
55-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/multi2vec-clip) for detailed usage.
56+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
5657
*/
5758
export type Multi2VecClipConfig = {
5859
/** The image fields used when vectorizing. */
@@ -72,9 +73,9 @@ export type Multi2VecClipConfig = {
7273
};
7374
};
7475

75-
/** The configuration for multi-media vectorization using Bind.
76+
/** The configuration for multi-media vectorization using the Bind module.
7677
*
77-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/multi2vec-bind) for detailed usage.
78+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal) for detailed usage.
7879
*/
7980
export type Multi2VecBindConfig = {
8081
/** The audio fields used when vectorizing. */
@@ -112,9 +113,9 @@ export type Multi2VecBindConfig = {
112113
};
113114
};
114115

115-
/** The configuration for multi-media vectorization using Palm.
116+
/** The configuration for multi-media vectorization using the PaLM model.
116117
*
117-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-palm) for detailed usage.
118+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
118119
*/
119120
export type Multi2VecPalmConfig = {
120121
/** The project ID of the Palm model. */
@@ -144,9 +145,9 @@ export type Multi2VecPalmConfig = {
144145
};
145146
};
146147

147-
/** The configuration for reference-based vectorization using centroids.
148+
/** The configuration for reference-based vectorization using the centroid method.
148149
*
149-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/ref2vec-centroid) for detailed usage.
150+
* See the [documentation](https://weaviate.io/developers/weaviate/modules/ref2vec-centroid) for detailed usage.
150151
*/
151152
export type Ref2VecCentroidConfig = {
152153
/** The properties used as reference points for vectorization. */
@@ -155,7 +156,7 @@ export type Ref2VecCentroidConfig = {
155156
method: 'mean' | string;
156157
};
157158

158-
/** The configuration for text vectorization using AWS.
159+
/** The configuration for text vectorization using the AWS module.
159160
*
160161
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-aws) for detailed usage.
161162
*/
@@ -172,9 +173,9 @@ export type Text2VecAWSConfig = {
172173
vectorizeCollectionName?: boolean;
173174
};
174175

175-
/** The configuration for text vectorization using Azure OpenAI.
176+
/** The configuration for text vectorization using the OpenAI module with Azure.
176177
*
177-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-azure-openai) for detailed usage.
178+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings) for detailed usage.
178179
*/
179180
export type Text2VecAzureOpenAIConfig = {
180181
/** The base URL to use where API requests should go. */
@@ -187,9 +188,9 @@ export type Text2VecAzureOpenAIConfig = {
187188
vectorizeCollectionName?: boolean;
188189
};
189190

190-
/** The configuration for text vectorization using Cohere.
191+
/** The configuration for text vectorization using the Cohere module.
191192
*
192-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-cohere) for detailed usage.
193+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings) for detailed usage.
193194
*/
194195
export type Text2VecCohereConfig = {
195196
/** The base URL to use where API requests should go. */
@@ -202,28 +203,38 @@ export type Text2VecCohereConfig = {
202203
vectorizeCollectionName?: boolean;
203204
};
204205

205-
/** The configuration for text vectorization using Contextionary.
206+
/** The configuration for text vectorization using the Contextionary module.
206207
*
207-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-contextionary) for detailed usage.
208+
* See the [documentation](https://weaviate.io/developers/weaviate/modules/text2vec-contextionary) for detailed usage.
208209
*/
209210
export type Text2VecContextionaryConfig = {
210211
/** Whether to vectorize the collection name. */
211212
vectorizeCollectionName?: boolean;
212213
};
213214

214-
/** The configuration for text vectorization using GPT4All.
215+
/** The configuration for text vectorization using the Databricks module.
215216
*
216-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all) for detailed usage.
217+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings) for detailed usage.
218+
*/
219+
export type Text2VecDatabricksConfig = {
220+
endpoint: string;
221+
instruction?: string;
222+
vectorizeCollectionName?: boolean;
223+
};
224+
225+
/** The configuration for text vectorization using the GPT-4-All module.
226+
*
227+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings) for detailed usage.
217228
*/
218229
export type Text2VecGPT4AllConfig = {
219230
/** Whether to vectorize the collection name. */
220231
vectorizeCollectionName?: boolean;
221232
};
222233

223234
/**
224-
* The configuration for text vectorization using Hugging Face.
235+
* The configuration for text vectorization using the HuggingFace module.
225236
*
226-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-huggingface) for detailed usage.
237+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings) for detailed usage.
227238
*/
228239
export type Text2VecHuggingFaceConfig = {
229240
/** The endpoint URL to use. */
@@ -245,9 +256,9 @@ export type Text2VecHuggingFaceConfig = {
245256
};
246257

247258
/**
248-
* The configuration for text vectorization using Jina.
259+
* The configuration for text vectorization using the Jina module.
249260
*
250-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jina) for detailed usage.
261+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage.
251262
*/
252263
export type Text2VecJinaConfig = {
253264
/** The model to use. */
@@ -257,7 +268,7 @@ export type Text2VecJinaConfig = {
257268
};
258269

259270
/**
260-
* The configuration for text vectorization using Mistral.
271+
* The configuration for text vectorization using the Mistral module.
261272
*
262273
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/mistral/embeddings) for detailed usage.
263274
*/
@@ -269,9 +280,9 @@ export type Text2VecMistralConfig = {
269280
};
270281

271282
/**
272-
* The configuration for text vectorization using OctoAI.
283+
* The configuration for text vectorization using the OctoAI module.
273284
*
274-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-octoai) for detailed usage.
285+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/octoai/embeddings) for detailed usage.
275286
*/
276287
export type Text2VecOctoAIConfig = {
277288
/** The base URL to use where API requests should go. */
@@ -283,9 +294,9 @@ export type Text2VecOctoAIConfig = {
283294
};
284295

285296
/**
286-
* The configuration for text vectorization using Ollama.
297+
* The configuration for text vectorization using the Ollama module.
287298
*
288-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-ollama) for detailed usage.
299+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/ollama/embeddings) for detailed usage.
289300
*/
290301
export type Text2VecOllamaConfig = {
291302
/** The base URL to use where API requests should go. */
@@ -297,9 +308,9 @@ export type Text2VecOllamaConfig = {
297308
};
298309

299310
/**
300-
* The configuration for text vectorization using OpenAI.
311+
* The configuration for text vectorization using the OpenAI module.
301312
*
302-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-openai) for detailed usage.
313+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings) for detailed usage.
303314
*/
304315
export type Text2VecOpenAIConfig = {
305316
/** The base URL to use where API requests should go. */
@@ -317,9 +328,9 @@ export type Text2VecOpenAIConfig = {
317328
};
318329

319330
/**
320-
* The configuration for text vectorization using Palm.
331+
* The configuration for text vectorization using the PaLM module.
321332
*
322-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-palm) for detailed usage.
333+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings) for detailed usage.
323334
*/
324335
export type Text2VecPalmConfig = {
325336
/** The API endpoint to use without a leading scheme such as `http://`. */
@@ -334,6 +345,11 @@ export type Text2VecPalmConfig = {
334345
vectorizeCollectionName?: boolean;
335346
};
336347

348+
/**
349+
* The configuration for text vectorization using the Transformers module.
350+
*
351+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings) for detailed usage.
352+
*/
337353
export type Text2VecTransformersConfig = {
338354
/** The inference url to use where API requests should go. You can use either this OR (`passage_inference_url` & `query_inference_url`). */
339355
inferenceUrl?: string;
@@ -348,9 +364,9 @@ export type Text2VecTransformersConfig = {
348364
};
349365

350366
/**
351-
* The configuration for text vectorization using Voyage AI.
367+
* The configuration for text vectorization using the VoyageAI module.
352368
*
353-
* See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai) for detailed usage.
369+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings) for detailed usage.
354370
*/
355371
export type Text2VecVoyageAIConfig = {
356372
/** The base URL to use where API requests should go. */
@@ -375,6 +391,7 @@ export type VectorizerConfig =
375391
| Text2VecAzureOpenAIConfig
376392
| Text2VecContextionaryConfig
377393
| Text2VecCohereConfig
394+
| Text2VecDatabricksConfig
378395
| Text2VecGPT4AllConfig
379396
| Text2VecHuggingFaceConfig
380397
| Text2VecJinaConfig
@@ -400,6 +417,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
400417
? Text2VecContextionaryConfig | undefined
401418
: V extends 'text2vec-cohere'
402419
? Text2VecCohereConfig | undefined
420+
: V extends 'text2vec-databricks'
421+
? Text2VecDatabricksConfig
403422
: V extends 'text2vec-gpt4all'
404423
? Text2VecGPT4AllConfig | undefined
405424
: V extends 'text2vec-huggingface'

src/collections/configure/types/vectorizer.ts

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
Text2VecAzureOpenAIConfig,
88
Text2VecCohereConfig,
99
Text2VecContextionaryConfig,
10+
Text2VecDatabricksConfig,
1011
Text2VecGPT4AllConfig,
1112
Text2VecHuggingFaceConfig,
1213
Text2VecJinaConfig,
@@ -140,6 +141,8 @@ export type Text2VecCohereConfigCreate = Text2VecCohereConfig;
140141

141142
export type Text2VecContextionaryConfigCreate = Text2VecContextionaryConfig;
142143

144+
export type Text2VecDatabricksConfigCreate = Text2VecDatabricksConfig;
145+
143146
export type Text2VecGPT4AllConfigCreate = Text2VecGPT4AllConfig;
144147

145148
export type Text2VecHuggingFaceConfigCreate = Text2VecHuggingFaceConfig;
@@ -176,6 +179,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
176179
? Text2VecContextionaryConfigCreate | undefined
177180
: V extends 'text2vec-cohere'
178181
? Text2VecCohereConfigCreate | undefined
182+
: V extends 'text2vec-databricks'
183+
? Text2VecDatabricksConfigCreate
179184
: V extends 'text2vec-gpt4all'
180185
? Text2VecGPT4AllConfigCreate | undefined
181186
: V extends 'text2vec-huggingface'

src/collections/configure/unit.test.ts

+44
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,50 @@ describe('Unit testing of the vectorizer factory class', () => {
704704
});
705705
});
706706

707+
it('should create the correct Text2VecDatabricksConfig type with required & defaults', () => {
708+
const config = configure.vectorizer.text2VecDatabricks({
709+
name: 'test',
710+
endpoint: 'endpoint',
711+
});
712+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-databricks'>>({
713+
name: 'test',
714+
vectorIndex: {
715+
name: 'hnsw',
716+
config: undefined,
717+
},
718+
vectorizer: {
719+
name: 'text2vec-databricks',
720+
config: {
721+
endpoint: 'endpoint',
722+
},
723+
},
724+
});
725+
});
726+
727+
it('should create the correct Text2VecDatabricksConfig type with all values', () => {
728+
const config = configure.vectorizer.text2VecDatabricks({
729+
name: 'test',
730+
endpoint: 'endpoint',
731+
instruction: 'instruction',
732+
vectorizeCollectionName: true,
733+
});
734+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-databricks'>>({
735+
name: 'test',
736+
vectorIndex: {
737+
name: 'hnsw',
738+
config: undefined,
739+
},
740+
vectorizer: {
741+
name: 'text2vec-databricks',
742+
config: {
743+
endpoint: 'endpoint',
744+
instruction: 'instruction',
745+
vectorizeCollectionName: true,
746+
},
747+
},
748+
});
749+
});
750+
707751
it('should create the correct Text2VecGPT4AllConfig type with defaults', () => {
708752
const config = configure.vectorizer.text2VecGPT4All();
709753
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-gpt4all'>>({

src/collections/configure/vectorizer.ts

+21
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,27 @@ export const vectorizer = {
299299
},
300300
});
301301
},
302+
/**
303+
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-databricks'`.
304+
*
305+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings) for detailed usage.
306+
*
307+
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-databricks'>} opts The configuration for the `text2vec-databricks` vectorizer.
308+
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-databricks'>} The configuration object.
309+
*/
310+
text2VecDatabricks: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
311+
opts: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-databricks'>
312+
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-databricks'> => {
313+
const { name, sourceProperties, vectorIndexConfig, ...config } = opts;
314+
return makeVectorizer(name, {
315+
sourceProperties,
316+
vectorIndexConfig,
317+
vectorizerConfig: {
318+
name: 'text2vec-databricks',
319+
config: config,
320+
},
321+
});
322+
},
302323
/**
303324
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-gpt4all'`.
304325
*

0 commit comments

Comments
 (0)