File: | root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c |
Warning: | line 8789, column 10 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "sqlite-vec.h" | |||
2 | ||||
3 | #include <assert.h> | |||
4 | #include <errno(*__errno_location ()).h> | |||
5 | #include <float.h> | |||
6 | #include <inttypes.h> | |||
7 | #include <limits.h> | |||
8 | #include <math.h> | |||
9 | #include <stdbool.h> | |||
10 | #include <stdint.h> | |||
11 | #include <stdlib.h> | |||
12 | #include <string.h> | |||
13 | ||||
14 | #ifndef SQLITE_VEC_OMIT_FS | |||
15 | #include <stdio.h> | |||
16 | #endif | |||
17 | ||||
18 | #ifndef SQLITE_CORE | |||
19 | #include "sqlite3ext.h" | |||
20 | SQLITE_EXTENSION_INIT3extern const sqlite3_api_routines *sqlite3_api; | |||
21 | #else | |||
22 | #include "sqlite3.h" | |||
23 | #endif | |||
24 | ||||
25 | #ifndef UINT32_TYPEunsigned int | |||
26 | #ifdef HAVE_UINT32_T | |||
27 | #define UINT32_TYPEunsigned int uint32_t | |||
28 | #else | |||
29 | #define UINT32_TYPEunsigned int unsigned int | |||
30 | #endif | |||
31 | #endif | |||
32 | #ifndef UINT16_TYPEunsigned short int | |||
33 | #ifdef HAVE_UINT16_T | |||
34 | #define UINT16_TYPEunsigned short int uint16_t | |||
35 | #else | |||
36 | #define UINT16_TYPEunsigned short int unsigned short int | |||
37 | #endif | |||
38 | #endif | |||
39 | #ifndef INT16_TYPEshort int | |||
40 | #ifdef HAVE_INT16_T | |||
41 | #define INT16_TYPEshort int int16_t | |||
42 | #else | |||
43 | #define INT16_TYPEshort int short int | |||
44 | #endif | |||
45 | #endif | |||
46 | #ifndef UINT8_TYPEunsigned char | |||
47 | #ifdef HAVE_UINT8_T | |||
48 | #define UINT8_TYPEunsigned char uint8_t | |||
49 | #else | |||
50 | #define UINT8_TYPEunsigned char unsigned char | |||
51 | #endif | |||
52 | #endif | |||
53 | #ifndef INT8_TYPEsigned char | |||
54 | #ifdef HAVE_INT8_T | |||
55 | #define INT8_TYPEsigned char int8_t | |||
56 | #else | |||
57 | #define INT8_TYPEsigned char signed char | |||
58 | #endif | |||
59 | #endif | |||
60 | #ifndef LONGDOUBLE_TYPElong double | |||
61 | #define LONGDOUBLE_TYPElong double long double | |||
62 | #endif | |||
63 | ||||
64 | typedef int8_t i8; | |||
65 | typedef uint8_t u8; | |||
66 | typedef int16_t i16; | |||
67 | typedef int32_t i32; | |||
68 | typedef sqlite3_int64 i64; | |||
69 | typedef uint32_t u32; | |||
70 | typedef uint64_t u64; | |||
71 | typedef float f32; | |||
72 | typedef size_t usize; | |||
73 | ||||
74 | #ifndef UNUSED_PARAMETER | |||
75 | #define UNUSED_PARAMETER(X)(void)(X) (void)(X) | |||
76 | #endif | |||
77 | ||||
78 | // sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22) | |||
79 | // https://www.sqlite.org/changes.html#version_3_38_0 | |||
80 | #if SQLITE_VERSION_NUMBER3050001 >= 3038000 | |||
81 | #define COMPILER_SUPPORTS_VTAB_IN1 1 | |||
82 | #endif | |||
83 | ||||
84 | #ifndef SQLITE_SUBTYPE0x000100000 | |||
85 | #define SQLITE_SUBTYPE0x000100000 0x000100000 | |||
86 | #endif | |||
87 | ||||
88 | #ifndef SQLITE_RESULT_SUBTYPE0x001000000 | |||
89 | #define SQLITE_RESULT_SUBTYPE0x001000000 0x001000000 | |||
90 | #endif | |||
91 | ||||
92 | #ifndef SQLITE_INDEX_CONSTRAINT_LIMIT73 | |||
93 | #define SQLITE_INDEX_CONSTRAINT_LIMIT73 73 | |||
94 | #endif | |||
95 | ||||
96 | #ifndef SQLITE_INDEX_CONSTRAINT_OFFSET74 | |||
97 | #define SQLITE_INDEX_CONSTRAINT_OFFSET74 74 | |||
98 | #endif | |||
99 | ||||
100 | #define countof(x)(sizeof(x) / sizeof((x)[0])) (sizeof(x) / sizeof((x)[0])) | |||
101 | #define min(a, b)(((a) <= (b)) ? (a) : (b)) (((a) <= (b)) ? (a) : (b)) | |||
102 | ||||
103 | enum VectorElementType { | |||
104 | // clang-format off | |||
105 | SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0, | |||
106 | SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1, | |||
107 | SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2, | |||
108 | // clang-format on | |||
109 | }; | |||
110 | ||||
111 | #ifdef SQLITE_VEC_ENABLE_AVX | |||
112 | #include <immintrin.h> | |||
113 | #define PORTABLE_ALIGN32 __attribute__((aligned(32))) | |||
114 | #define PORTABLE_ALIGN64 __attribute__((aligned(64))) | |||
115 | ||||
116 | static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v, | |||
117 | const void *qty_ptr) { | |||
118 | f32 *pVect1 = (f32 *)pVect1v; | |||
119 | f32 *pVect2 = (f32 *)pVect2v; | |||
120 | size_t qty = *((size_t *)qty_ptr); | |||
121 | f32 PORTABLE_ALIGN32 TmpRes[8]; | |||
122 | size_t qty16 = qty >> 4; | |||
123 | ||||
124 | const f32 *pEnd1 = pVect1 + (qty16 << 4); | |||
125 | ||||
126 | __m256 diff, v1, v2; | |||
127 | __m256 sum = _mm256_set1_ps(0); | |||
128 | ||||
129 | while (pVect1 < pEnd1) { | |||
130 | v1 = _mm256_loadu_ps(pVect1); | |||
131 | pVect1 += 8; | |||
132 | v2 = _mm256_loadu_ps(pVect2); | |||
133 | pVect2 += 8; | |||
134 | diff = _mm256_sub_ps(v1, v2); | |||
135 | sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); | |||
136 | ||||
137 | v1 = _mm256_loadu_ps(pVect1); | |||
138 | pVect1 += 8; | |||
139 | v2 = _mm256_loadu_ps(pVect2); | |||
140 | pVect2 += 8; | |||
141 | diff = _mm256_sub_ps(v1, v2); | |||
142 | sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); | |||
143 | } | |||
144 | ||||
145 | _mm256_store_ps(TmpRes, sum); | |||
146 | return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + | |||
147 | TmpRes[5] + TmpRes[6] + TmpRes[7]); | |||
148 | } | |||
149 | #endif | |||
150 | ||||
151 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
152 | #include <arm_neon.h> | |||
153 | ||||
154 | #define PORTABLE_ALIGN32 __attribute__((aligned(32))) | |||
155 | ||||
156 | // thx https://github.com/nmslib/hnswlib/pull/299/files | |||
157 | static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v, | |||
158 | const void *qty_ptr) { | |||
159 | f32 *pVect1 = (f32 *)pVect1v; | |||
160 | f32 *pVect2 = (f32 *)pVect2v; | |||
161 | size_t qty = *((size_t *)qty_ptr); | |||
162 | size_t qty16 = qty >> 4; | |||
163 | ||||
164 | const f32 *pEnd1 = pVect1 + (qty16 << 4); | |||
165 | ||||
166 | float32x4_t diff, v1, v2; | |||
167 | float32x4_t sum0 = vdupq_n_f32(0); | |||
168 | float32x4_t sum1 = vdupq_n_f32(0); | |||
169 | float32x4_t sum2 = vdupq_n_f32(0); | |||
170 | float32x4_t sum3 = vdupq_n_f32(0); | |||
171 | ||||
172 | while (pVect1 < pEnd1) { | |||
173 | v1 = vld1q_f32(pVect1); | |||
174 | pVect1 += 4; | |||
175 | v2 = vld1q_f32(pVect2); | |||
176 | pVect2 += 4; | |||
177 | diff = vsubq_f32(v1, v2); | |||
178 | sum0 = vfmaq_f32(sum0, diff, diff); | |||
179 | ||||
180 | v1 = vld1q_f32(pVect1); | |||
181 | pVect1 += 4; | |||
182 | v2 = vld1q_f32(pVect2); | |||
183 | pVect2 += 4; | |||
184 | diff = vsubq_f32(v1, v2); | |||
185 | sum1 = vfmaq_f32(sum1, diff, diff); | |||
186 | ||||
187 | v1 = vld1q_f32(pVect1); | |||
188 | pVect1 += 4; | |||
189 | v2 = vld1q_f32(pVect2); | |||
190 | pVect2 += 4; | |||
191 | diff = vsubq_f32(v1, v2); | |||
192 | sum2 = vfmaq_f32(sum2, diff, diff); | |||
193 | ||||
194 | v1 = vld1q_f32(pVect1); | |||
195 | pVect1 += 4; | |||
196 | v2 = vld1q_f32(pVect2); | |||
197 | pVect2 += 4; | |||
198 | diff = vsubq_f32(v1, v2); | |||
199 | sum3 = vfmaq_f32(sum3, diff, diff); | |||
200 | } | |||
201 | ||||
202 | f32 sum_scalar = | |||
203 | vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3))); | |||
204 | const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4)); | |||
205 | while (pVect1 < pEnd2) { | |||
206 | f32 diff = *pVect1 - *pVect2; | |||
207 | sum_scalar += diff * diff; | |||
208 | pVect1++; | |||
209 | pVect2++; | |||
210 | } | |||
211 | ||||
212 | return sqrt(sum_scalar); | |||
213 | } | |||
214 | ||||
215 | static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v, | |||
216 | const void *qty_ptr) { | |||
217 | i8 *pVect1 = (i8 *)pVect1v; | |||
218 | i8 *pVect2 = (i8 *)pVect2v; | |||
219 | size_t qty = *((size_t *)qty_ptr); | |||
220 | ||||
221 | const i8 *pEnd1 = pVect1 + qty; | |||
222 | i32 sum_scalar = 0; | |||
223 | ||||
224 | while (pVect1 < pEnd1 - 7) { | |||
225 | // loading 8 at a time | |||
226 | int8x8_t v1 = vld1_s8(pVect1); | |||
227 | int8x8_t v2 = vld1_s8(pVect2); | |||
228 | pVect1 += 8; | |||
229 | pVect2 += 8; | |||
230 | ||||
231 | // widen to protect against overflow | |||
232 | int16x8_t v1_wide = vmovl_s8(v1); | |||
233 | int16x8_t v2_wide = vmovl_s8(v2); | |||
234 | ||||
235 | int16x8_t diff = vsubq_s16(v1_wide, v2_wide); | |||
236 | int16x8_t squared_diff = vmulq_s16(diff, diff); | |||
237 | int32x4_t sum = vpaddlq_s16(squared_diff); | |||
238 | ||||
239 | sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) + | |||
240 | vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3); | |||
241 | } | |||
242 | ||||
243 | // handle leftovers | |||
244 | while (pVect1 < pEnd1) { | |||
245 | i16 diff = (i16)*pVect1 - (i16)*pVect2; | |||
246 | sum_scalar += diff * diff; | |||
247 | pVect1++; | |||
248 | pVect2++; | |||
249 | } | |||
250 | ||||
251 | return sqrtf(sum_scalar); | |||
252 | } | |||
253 | ||||
254 | static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v, | |||
255 | const void *qty_ptr) { | |||
256 | i8 *pVect1 = (i8 *)pVect1v; | |||
257 | i8 *pVect2 = (i8 *)pVect2v; | |||
258 | size_t qty = *((size_t *)qty_ptr); | |||
259 | ||||
260 | const int8_t *pEnd1 = pVect1 + qty; | |||
261 | ||||
262 | int32x4_t acc1 = vdupq_n_s32(0); | |||
263 | int32x4_t acc2 = vdupq_n_s32(0); | |||
264 | int32x4_t acc3 = vdupq_n_s32(0); | |||
265 | int32x4_t acc4 = vdupq_n_s32(0); | |||
266 | ||||
267 | while (pVect1 < pEnd1 - 63) { | |||
268 | int8x16_t v1 = vld1q_s8(pVect1); | |||
269 | int8x16_t v2 = vld1q_s8(pVect2); | |||
270 | int8x16_t diff1 = vabdq_s8(v1, v2); | |||
271 | acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1))); | |||
272 | ||||
273 | v1 = vld1q_s8(pVect1 + 16); | |||
274 | v2 = vld1q_s8(pVect2 + 16); | |||
275 | int8x16_t diff2 = vabdq_s8(v1, v2); | |||
276 | acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2))); | |||
277 | ||||
278 | v1 = vld1q_s8(pVect1 + 32); | |||
279 | v2 = vld1q_s8(pVect2 + 32); | |||
280 | int8x16_t diff3 = vabdq_s8(v1, v2); | |||
281 | acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3))); | |||
282 | ||||
283 | v1 = vld1q_s8(pVect1 + 48); | |||
284 | v2 = vld1q_s8(pVect2 + 48); | |||
285 | int8x16_t diff4 = vabdq_s8(v1, v2); | |||
286 | acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4))); | |||
287 | ||||
288 | pVect1 += 64; | |||
289 | pVect2 += 64; | |||
290 | } | |||
291 | ||||
292 | while (pVect1 < pEnd1 - 15) { | |||
293 | int8x16_t v1 = vld1q_s8(pVect1); | |||
294 | int8x16_t v2 = vld1q_s8(pVect2); | |||
295 | int8x16_t diff = vabdq_s8(v1, v2); | |||
296 | acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff))); | |||
297 | pVect1 += 16; | |||
298 | pVect2 += 16; | |||
299 | } | |||
300 | ||||
301 | int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4)); | |||
302 | ||||
303 | int32_t sum = 0; | |||
304 | while (pVect1 < pEnd1) { | |||
305 | int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2); | |||
306 | sum += diff; | |||
307 | pVect1++; | |||
308 | pVect2++; | |||
309 | } | |||
310 | ||||
311 | return vaddvq_s32(acc) + sum; | |||
312 | } | |||
313 | ||||
314 | static double l1_f32_neon(const void *pVect1v, const void *pVect2v, | |||
315 | const void *qty_ptr) { | |||
316 | f32 *pVect1 = (f32 *)pVect1v; | |||
317 | f32 *pVect2 = (f32 *)pVect2v; | |||
318 | size_t qty = *((size_t *)qty_ptr); | |||
319 | ||||
320 | const f32 *pEnd1 = pVect1 + qty; | |||
321 | float64x2_t acc = vdupq_n_f64(0); | |||
322 | ||||
323 | while (pVect1 < pEnd1 - 3) { | |||
324 | float32x4_t v1 = vld1q_f32(pVect1); | |||
325 | float32x4_t v2 = vld1q_f32(pVect2); | |||
326 | pVect1 += 4; | |||
327 | pVect2 += 4; | |||
328 | ||||
329 | // f32x4 -> f64x2 pad for overflow | |||
330 | float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)), | |||
331 | vcvt_f64_f32(vget_low_f32(v2))); | |||
332 | float64x2_t high_diff = | |||
333 | vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2)); | |||
334 | ||||
335 | acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff)); | |||
336 | } | |||
337 | ||||
338 | double sum = 0; | |||
339 | while (pVect1 < pEnd1) { | |||
340 | sum += fabs((double)*pVect1 - (double)*pVect2); | |||
341 | pVect1++; | |||
342 | pVect2++; | |||
343 | } | |||
344 | ||||
345 | return vaddvq_f64(acc) + sum; | |||
346 | } | |||
347 | #endif | |||
348 | ||||
349 | static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v, | |||
350 | const void *qty_ptr) { | |||
351 | f32 *pVect1 = (f32 *)pVect1v; | |||
352 | f32 *pVect2 = (f32 *)pVect2v; | |||
353 | size_t qty = *((size_t *)qty_ptr); | |||
354 | ||||
355 | f32 res = 0; | |||
356 | for (size_t i = 0; i < qty; i++) { | |||
357 | f32 t = *pVect1 - *pVect2; | |||
358 | pVect1++; | |||
359 | pVect2++; | |||
360 | res += t * t; | |||
361 | } | |||
362 | return sqrt(res); | |||
363 | } | |||
364 | ||||
365 | static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) { | |||
366 | i8 *a = (i8 *)pA; | |||
367 | i8 *b = (i8 *)pB; | |||
368 | size_t d = *((size_t *)pD); | |||
369 | ||||
370 | f32 res = 0; | |||
371 | for (size_t i = 0; i < d; i++) { | |||
372 | f32 t = *a - *b; | |||
373 | a++; | |||
374 | b++; | |||
375 | res += t * t; | |||
376 | } | |||
377 | return sqrt(res); | |||
378 | } | |||
379 | ||||
380 | static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) { | |||
381 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
382 | if ((*(const size_t *)d) > 16) { | |||
383 | return l2_sqr_float_neon(a, b, d); | |||
384 | } | |||
385 | #endif | |||
386 | #ifdef SQLITE_VEC_ENABLE_AVX | |||
387 | if (((*(const size_t *)d) % 16 == 0)) { | |||
388 | return l2_sqr_float_avx(a, b, d); | |||
389 | } | |||
390 | #endif | |||
391 | return l2_sqr_float(a, b, d); | |||
392 | } | |||
393 | ||||
394 | static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) { | |||
395 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
396 | if ((*(const size_t *)d) > 7) { | |||
397 | return l2_sqr_int8_neon(a, b, d); | |||
398 | } | |||
399 | #endif | |||
400 | return l2_sqr_int8(a, b, d); | |||
401 | } | |||
402 | ||||
403 | static i32 l1_int8(const void *pA, const void *pB, const void *pD) { | |||
404 | i8 *a = (i8 *)pA; | |||
405 | i8 *b = (i8 *)pB; | |||
406 | size_t d = *((size_t *)pD); | |||
407 | ||||
408 | i32 res = 0; | |||
409 | for (size_t i = 0; i < d; i++) { | |||
410 | res += abs(*a - *b); | |||
411 | a++; | |||
412 | b++; | |||
413 | } | |||
414 | ||||
415 | return res; | |||
416 | } | |||
417 | ||||
418 | static i32 distance_l1_int8(const void *a, const void *b, const void *d) { | |||
419 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
420 | if ((*(const size_t *)d) > 15) { | |||
421 | return l1_int8_neon(a, b, d); | |||
422 | } | |||
423 | #endif | |||
424 | return l1_int8(a, b, d); | |||
425 | } | |||
426 | ||||
427 | static double l1_f32(const void *pA, const void *pB, const void *pD) { | |||
428 | f32 *a = (f32 *)pA; | |||
429 | f32 *b = (f32 *)pB; | |||
430 | size_t d = *((size_t *)pD); | |||
431 | ||||
432 | double res = 0; | |||
433 | for (size_t i = 0; i < d; i++) { | |||
434 | res += fabs((double)*a - (double)*b); | |||
435 | a++; | |||
436 | b++; | |||
437 | } | |||
438 | ||||
439 | return res; | |||
440 | } | |||
441 | ||||
442 | static double distance_l1_f32(const void *a, const void *b, const void *d) { | |||
443 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
444 | if ((*(const size_t *)d) > 3) { | |||
445 | return l1_f32_neon(a, b, d); | |||
446 | } | |||
447 | #endif | |||
448 | return l1_f32(a, b, d); | |||
449 | } | |||
450 | ||||
451 | static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v, | |||
452 | const void *qty_ptr) { | |||
453 | f32 *pVect1 = (f32 *)pVect1v; | |||
454 | f32 *pVect2 = (f32 *)pVect2v; | |||
455 | size_t qty = *((size_t *)qty_ptr); | |||
456 | ||||
457 | f32 dot = 0; | |||
458 | f32 aMag = 0; | |||
459 | f32 bMag = 0; | |||
460 | for (size_t i = 0; i < qty; i++) { | |||
461 | dot += *pVect1 * *pVect2; | |||
462 | aMag += *pVect1 * *pVect1; | |||
463 | bMag += *pVect2 * *pVect2; | |||
464 | pVect1++; | |||
465 | pVect2++; | |||
466 | } | |||
467 | return 1 - (dot / (sqrt(aMag) * sqrt(bMag))); | |||
468 | } | |||
469 | static f32 distance_cosine_int8(const void *pA, const void *pB, | |||
470 | const void *pD) { | |||
471 | i8 *a = (i8 *)pA; | |||
472 | i8 *b = (i8 *)pB; | |||
473 | size_t d = *((size_t *)pD); | |||
474 | ||||
475 | f32 dot = 0; | |||
476 | f32 aMag = 0; | |||
477 | f32 bMag = 0; | |||
478 | for (size_t i = 0; i < d; i++) { | |||
479 | dot += *a * *b; | |||
480 | aMag += *a * *a; | |||
481 | bMag += *b * *b; | |||
482 | a++; | |||
483 | b++; | |||
484 | } | |||
485 | return 1 - (dot / (sqrt(aMag) * sqrt(bMag))); | |||
486 | } | |||
487 | ||||
488 | // https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34 | |||
489 | static u8 hamdist_table[256] = { | |||
490 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, | |||
491 | 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | |||
492 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, | |||
493 | 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | |||
494 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, | |||
495 | 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | |||
496 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, | |||
497 | 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | |||
498 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, | |||
499 | 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | |||
500 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; | |||
501 | ||||
502 | static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) { | |||
503 | int same = 0; | |||
504 | for (unsigned long i = 0; i < n; i++) { | |||
505 | same += hamdist_table[a[i] ^ b[i]]; | |||
506 | } | |||
507 | return (f32)same; | |||
508 | } | |||
509 | ||||
510 | #ifdef _MSC_VER | |||
511 | #if !defined(__clang__1) && (defined(_M_ARM) || defined(_M_ARM64)) | |||
512 | // From | |||
513 | // https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c, | |||
514 | // line 34-43 | |||
515 | static unsigned int __builtin_popcountl(unsigned int x) { | |||
516 | unsigned int c = 0; | |||
517 | for (; x; ++c) { | |||
518 | x &= x - 1; | |||
519 | } | |||
520 | return c; | |||
521 | } | |||
522 | #else | |||
523 | #include <intrin.h> | |||
524 | #define __builtin_popcountl __popcnt64 | |||
525 | #endif | |||
526 | #endif | |||
527 | ||||
528 | static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) { | |||
529 | int same = 0; | |||
530 | for (unsigned long i = 0; i < n; i++) { | |||
531 | same += __builtin_popcountl(a[i] ^ b[i]); | |||
532 | } | |||
533 | return (f32)same; | |||
534 | } | |||
535 | ||||
536 | /** | |||
537 | * @brief Calculate the hamming distance between two bitvectors. | |||
538 | * | |||
539 | * @param a - first bitvector, MUST have d dimensions | |||
540 | * @param b - second bitvector, MUST have d dimensions | |||
541 | * @param d - pointer to size_t, MUST be divisible by CHAR_BIT | |||
542 | * @return f32 | |||
543 | */ | |||
544 | static f32 distance_hamming(const void *a, const void *b, const void *d) { | |||
545 | size_t dimensions = *((size_t *)d); | |||
546 | ||||
547 | if ((dimensions % 64) == 0) { | |||
548 | return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT8); | |||
549 | } | |||
550 | return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT8); | |||
551 | } | |||
552 | ||||
553 | // from SQLite source: | |||
554 | // https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153 | |||
555 | static const char vecJsonIsSpaceX[] = { | |||
556 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |||
557 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
558 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
559 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
560 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
561 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
562 | ||||
563 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
564 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
565 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
566 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
567 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
568 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
569 | }; | |||
570 | ||||
571 | #define vecJsonIsspace(x)(vecJsonIsSpaceX[(unsigned char)x]) (vecJsonIsSpaceX[(unsigned char)x]) | |||
572 | ||||
573 | typedef void (*vector_cleanup)(void *p); | |||
574 | ||||
575 | void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_)(void)(_); } | |||
576 | ||||
577 | #define JSON_SUBTYPE74 74 | |||
578 | ||||
579 | void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) { | |||
580 | va_list args; | |||
581 | sqlite3_freesqlite3_api->free(pVTab->zErrMsg); | |||
582 | va_start(args, zFormat)__builtin_va_start(args, zFormat); | |||
583 | pVTab->zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, args); | |||
584 | va_end(args)__builtin_va_end(args); | |||
585 | } | |||
586 | struct Array { | |||
587 | size_t element_size; | |||
588 | size_t length; | |||
589 | size_t capacity; | |||
590 | void *z; | |||
591 | }; | |||
592 | ||||
593 | /** | |||
594 | * @brief Initial an array with the given element size and capacity. | |||
595 | * | |||
596 | * @param array | |||
597 | * @param element_size | |||
598 | * @param init_capacity | |||
599 | * @return SQLITE_OK on success, error code on failure. Only error is | |||
600 | * SQLITE_NOMEM | |||
601 | */ | |||
602 | int array_init(struct Array *array, size_t element_size, size_t init_capacity) { | |||
603 | int sz = element_size * init_capacity; | |||
604 | void *z = sqlite3_mallocsqlite3_api->malloc(sz); | |||
605 | if (!z) { | |||
606 | return SQLITE_NOMEM7; | |||
607 | } | |||
608 | memset(z, 0, sz); | |||
609 | ||||
610 | array->element_size = element_size; | |||
611 | array->length = 0; | |||
612 | array->capacity = init_capacity; | |||
613 | array->z = z; | |||
614 | return SQLITE_OK0; | |||
615 | } | |||
616 | ||||
617 | int array_append(struct Array *array, const void *element) { | |||
618 | if (array->length == array->capacity) { | |||
619 | size_t new_capacity = array->capacity * 2 + 100; | |||
620 | void *z = sqlite3_realloc64sqlite3_api->realloc64(array->z, array->element_size * new_capacity); | |||
621 | if (z) { | |||
622 | array->capacity = new_capacity; | |||
623 | array->z = z; | |||
624 | } else { | |||
625 | return SQLITE_NOMEM7; | |||
626 | } | |||
627 | } | |||
628 | memcpy(&((unsigned char *)array->z)[array->length * array->element_size], | |||
629 | element, array->element_size); | |||
630 | array->length++; | |||
631 | return SQLITE_OK0; | |||
632 | } | |||
633 | ||||
634 | void array_cleanup(struct Array *array) { | |||
635 | if (!array) | |||
636 | return; | |||
637 | array->element_size = 0; | |||
638 | array->length = 0; | |||
639 | array->capacity = 0; | |||
640 | sqlite3_freesqlite3_api->free(array->z); | |||
641 | array->z = NULL((void*)0); | |||
642 | } | |||
643 | ||||
644 | char *vector_subtype_name(int subtype) { | |||
645 | switch (subtype) { | |||
646 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: | |||
647 | return "float32"; | |||
648 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
649 | return "int8"; | |||
650 | case SQLITE_VEC_ELEMENT_TYPE_BIT: | |||
651 | return "bit"; | |||
652 | } | |||
653 | return ""; | |||
654 | } | |||
655 | char *type_name(int type) { | |||
656 | switch (type) { | |||
657 | case SQLITE_INTEGER1: | |||
658 | return "INTEGER"; | |||
659 | case SQLITE_BLOB4: | |||
660 | return "BLOB"; | |||
661 | case SQLITE_TEXT3: | |||
662 | return "TEXT"; | |||
663 | case SQLITE_FLOAT2: | |||
664 | return "FLOAT"; | |||
665 | case SQLITE_NULL5: | |||
666 | return "NULL"; | |||
667 | } | |||
668 | return ""; | |||
669 | } | |||
670 | ||||
671 | typedef void (*fvec_cleanup)(f32 *vector); | |||
672 | ||||
673 | void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_)(void)(_); } | |||
674 | ||||
675 | static int fvec_from_value(sqlite3_value *value, f32 **vector, | |||
676 | size_t *dimensions, fvec_cleanup *cleanup, | |||
677 | char **pzErr) { | |||
678 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); | |||
679 | ||||
680 | if (value_type == SQLITE_BLOB4) { | |||
681 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); | |||
682 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
683 | if (bytes == 0) { | |||
684 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
685 | return SQLITE_ERROR1; | |||
686 | } | |||
687 | if ((bytes % sizeof(f32)) != 0) { | |||
688 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("invalid float32 vector BLOB length. Must be " | |||
689 | "divisible by %d, found %d", | |||
690 | sizeof(f32), bytes); | |||
691 | return SQLITE_ERROR1; | |||
692 | } | |||
693 | *vector = (f32 *)blob; | |||
694 | *dimensions = bytes / sizeof(f32); | |||
695 | *cleanup = fvec_cleanup_noop; | |||
696 | return SQLITE_OK0; | |||
697 | } | |||
698 | ||||
699 | if (value_type == SQLITE_TEXT3) { | |||
700 | const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value); | |||
701 | int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
702 | if (source_len == 0) { | |||
703 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
704 | return SQLITE_ERROR1; | |||
705 | } | |||
706 | int i = 0; | |||
707 | ||||
708 | struct Array x; | |||
709 | int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0)); | |||
710 | if (rc != SQLITE_OK0) { | |||
711 | return rc; | |||
712 | } | |||
713 | ||||
714 | // advance leading whitespace to first '[' | |||
715 | while (i < source_len) { | |||
716 | if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) { | |||
717 | i++; | |||
718 | continue; | |||
719 | } | |||
720 | if (source[i] == '[') { | |||
721 | break; | |||
722 | } | |||
723 | ||||
724 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
725 | "JSON array parsing error: Input does not start with '['"); | |||
726 | array_cleanup(&x); | |||
727 | return SQLITE_ERROR1; | |||
728 | } | |||
729 | if (source[i] != '[') { | |||
730 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
731 | "JSON array parsing error: Input does not start with '['"); | |||
732 | array_cleanup(&x); | |||
733 | return SQLITE_ERROR1; | |||
734 | } | |||
735 | int offset = i + 1; | |||
736 | ||||
737 | while (offset < source_len) { | |||
738 | char *ptr = (char *)&source[offset]; | |||
739 | char *endptr; | |||
740 | ||||
741 | errno(*__errno_location ()) = 0; | |||
742 | double result = strtod(ptr, &endptr); | |||
743 | if ((errno(*__errno_location ()) != 0 && result == 0) // some interval error? | |||
744 | || (errno(*__errno_location ()) == ERANGE34 && | |||
745 | (result == HUGE_VAL(__builtin_huge_val ()) || result == -HUGE_VAL(__builtin_huge_val ()))) // too big / smalls | |||
746 | ) { | |||
747 | sqlite3_freesqlite3_api->free(x.z); | |||
748 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); | |||
749 | return SQLITE_ERROR1; | |||
750 | } | |||
751 | ||||
752 | if (endptr == ptr) { | |||
753 | if (*ptr != ']') { | |||
754 | sqlite3_freesqlite3_api->free(x.z); | |||
755 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); | |||
756 | return SQLITE_ERROR1; | |||
757 | } | |||
758 | goto done; | |||
759 | } | |||
760 | ||||
761 | f32 res = (f32)result; | |||
762 | array_append(&x, (const void *)&res); | |||
763 | ||||
764 | offset += (endptr - ptr); | |||
765 | while (offset < source_len) { | |||
766 | if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) { | |||
767 | offset++; | |||
768 | continue; | |||
769 | } | |||
770 | if (source[offset] == ',') { | |||
771 | offset++; | |||
772 | continue; | |||
773 | } | |||
774 | if (source[offset] == ']') | |||
775 | goto done; | |||
776 | break; | |||
777 | } | |||
778 | } | |||
779 | ||||
780 | done: | |||
781 | ||||
782 | if (x.length > 0) { | |||
783 | *vector = (f32 *)x.z; | |||
784 | *dimensions = x.length; | |||
785 | *cleanup = (fvec_cleanup)sqlite3_freesqlite3_api->free; | |||
786 | return SQLITE_OK0; | |||
787 | } | |||
788 | sqlite3_freesqlite3_api->free(x.z); | |||
789 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
790 | return SQLITE_ERROR1; | |||
791 | } | |||
792 | ||||
793 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
794 | "Input must have type BLOB (compact format) or TEXT (JSON), found %s", | |||
795 | type_name(value_type)); | |||
796 | return SQLITE_ERROR1; | |||
797 | } | |||
798 | ||||
799 | static int bitvec_from_value(sqlite3_value *value, u8 **vector, | |||
800 | size_t *dimensions, vector_cleanup *cleanup, | |||
801 | char **pzErr) { | |||
802 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); | |||
803 | if (value_type == SQLITE_BLOB4) { | |||
804 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); | |||
805 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
806 | if (bytes == 0) { | |||
807 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
808 | return SQLITE_ERROR1; | |||
809 | } | |||
810 | *vector = (u8 *)blob; | |||
811 | *dimensions = bytes * CHAR_BIT8; | |||
812 | *cleanup = vector_cleanup_noop; | |||
813 | return SQLITE_OK0; | |||
814 | } | |||
815 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for bitvector."); | |||
816 | return SQLITE_ERROR1; | |||
817 | } | |||
818 | ||||
819 | static int int8_vec_from_value(sqlite3_value *value, i8 **vector, | |||
820 | size_t *dimensions, vector_cleanup *cleanup, | |||
821 | char **pzErr) { | |||
822 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); | |||
823 | if (value_type == SQLITE_BLOB4) { | |||
824 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); | |||
825 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
826 | if (bytes == 0) { | |||
827 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
828 | return SQLITE_ERROR1; | |||
829 | } | |||
830 | *vector = (i8 *)blob; | |||
831 | *dimensions = bytes; | |||
832 | *cleanup = vector_cleanup_noop; | |||
833 | return SQLITE_OK0; | |||
834 | } | |||
835 | ||||
836 | if (value_type == SQLITE_TEXT3) { | |||
837 | const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value); | |||
838 | int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
839 | int i = 0; | |||
840 | ||||
841 | if (source_len == 0) { | |||
842 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
843 | return SQLITE_ERROR1; | |||
844 | } | |||
845 | ||||
846 | struct Array x; | |||
847 | int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0)); | |||
848 | if (rc != SQLITE_OK0) { | |||
849 | return rc; | |||
850 | } | |||
851 | ||||
852 | // advance leading whitespace to first '[' | |||
853 | while (i < source_len) { | |||
854 | if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) { | |||
855 | i++; | |||
856 | continue; | |||
857 | } | |||
858 | if (source[i] == '[') { | |||
859 | break; | |||
860 | } | |||
861 | ||||
862 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
863 | "JSON array parsing error: Input does not start with '['"); | |||
864 | array_cleanup(&x); | |||
865 | return SQLITE_ERROR1; | |||
866 | } | |||
867 | if (source[i] != '[') { | |||
868 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
869 | "JSON array parsing error: Input does not start with '['"); | |||
870 | array_cleanup(&x); | |||
871 | return SQLITE_ERROR1; | |||
872 | } | |||
873 | int offset = i + 1; | |||
874 | ||||
875 | while (offset < source_len) { | |||
876 | char *ptr = (char *)&source[offset]; | |||
877 | char *endptr; | |||
878 | ||||
879 | errno(*__errno_location ()) = 0; | |||
880 | long result = strtol(ptr, &endptr, 10); | |||
881 | if ((errno(*__errno_location ()) != 0 && result == 0) || | |||
882 | (errno(*__errno_location ()) == ERANGE34 && (result == LONG_MAX9223372036854775807L || result == LONG_MIN(-9223372036854775807L -1L)))) { | |||
883 | sqlite3_freesqlite3_api->free(x.z); | |||
884 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); | |||
885 | return SQLITE_ERROR1; | |||
886 | } | |||
887 | ||||
888 | if (endptr == ptr) { | |||
889 | if (*ptr != ']') { | |||
890 | sqlite3_freesqlite3_api->free(x.z); | |||
891 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); | |||
892 | return SQLITE_ERROR1; | |||
893 | } | |||
894 | goto done; | |||
895 | } | |||
896 | ||||
897 | if (result < INT8_MIN(-128) || result > INT8_MAX(127)) { | |||
898 | sqlite3_freesqlite3_api->free(x.z); | |||
899 | *pzErr = | |||
900 | sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error: value out of range for int8"); | |||
901 | return SQLITE_ERROR1; | |||
902 | } | |||
903 | ||||
904 | i8 res = (i8)result; | |||
905 | array_append(&x, (const void *)&res); | |||
906 | ||||
907 | offset += (endptr - ptr); | |||
908 | while (offset < source_len) { | |||
909 | if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) { | |||
910 | offset++; | |||
911 | continue; | |||
912 | } | |||
913 | if (source[offset] == ',') { | |||
914 | offset++; | |||
915 | continue; | |||
916 | } | |||
917 | if (source[offset] == ']') | |||
918 | goto done; | |||
919 | break; | |||
920 | } | |||
921 | } | |||
922 | ||||
923 | done: | |||
924 | ||||
925 | if (x.length > 0) { | |||
926 | *vector = (i8 *)x.z; | |||
927 | *dimensions = x.length; | |||
928 | *cleanup = (vector_cleanup)sqlite3_freesqlite3_api->free; | |||
929 | return SQLITE_OK0; | |||
930 | } | |||
931 | sqlite3_freesqlite3_api->free(x.z); | |||
932 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); | |||
933 | return SQLITE_ERROR1; | |||
934 | } | |||
935 | ||||
936 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for int8 vector."); | |||
937 | return SQLITE_ERROR1; | |||
938 | } | |||
939 | ||||
940 | /** | |||
941 | * @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit | |||
942 | * vector. | |||
943 | * | |||
944 | * @param value: the sqlite3_value to read from. | |||
945 | * @param vector: Output pointer to vector data. | |||
946 | * @param dimensions: Output number of dimensions | |||
947 | * @param dimensions: Output vector element type | |||
948 | * @param cleanup | |||
949 | * @param pzErrorMessage | |||
950 | * @return int SQLITE_OK on success, error code otherwise | |||
951 | */ | |||
952 | int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions, | |||
953 | enum VectorElementType *element_type, | |||
954 | vector_cleanup *cleanup, char **pzErrorMessage) { | |||
955 | int subtype = sqlite3_value_subtypesqlite3_api->value_subtype(value); | |||
956 | if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) || | |||
957 | (subtype == JSON_SUBTYPE74)) { | |||
958 | int rc = fvec_from_value(value, (f32 **)vector, dimensions, | |||
959 | (fvec_cleanup *)cleanup, pzErrorMessage); | |||
960 | if (rc == SQLITE_OK0) { | |||
961 | *element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; | |||
962 | } | |||
963 | return rc; | |||
964 | } | |||
965 | ||||
966 | if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) { | |||
967 | int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup, | |||
968 | pzErrorMessage); | |||
969 | if (rc == SQLITE_OK0) { | |||
970 | *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT; | |||
971 | } | |||
972 | return rc; | |||
973 | } | |||
974 | if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) { | |||
975 | int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup, | |||
976 | pzErrorMessage); | |||
977 | if (rc == SQLITE_OK0) { | |||
978 | *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8; | |||
979 | } | |||
980 | return rc; | |||
981 | } | |||
982 | *pzErrorMessage = sqlite3_mprintfsqlite3_api->mprintf("Unknown subtype: %d", subtype); | |||
983 | return SQLITE_ERROR1; | |||
984 | } | |||
985 | ||||
986 | int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a, | |||
987 | void **b, enum VectorElementType *element_type, | |||
988 | size_t *dimensions, vector_cleanup *outACleanup, | |||
989 | vector_cleanup *outBCleanup, char **outError) { | |||
990 | int rc; | |||
991 | enum VectorElementType aType, bType; | |||
992 | size_t aDims, bDims; | |||
993 | char *error = NULL((void*)0); | |||
994 | vector_cleanup aCleanup, bCleanup; | |||
995 | ||||
996 | rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error); | |||
997 | if (rc != SQLITE_OK0) { | |||
998 | *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 1st vector: %s", error); | |||
999 | sqlite3_freesqlite3_api->free(error); | |||
1000 | return SQLITE_ERROR1; | |||
1001 | } | |||
1002 | ||||
1003 | rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error); | |||
1004 | if (rc != SQLITE_OK0) { | |||
1005 | *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 2nd vector: %s", error); | |||
1006 | sqlite3_freesqlite3_api->free(error); | |||
1007 | aCleanup(a); | |||
1008 | return SQLITE_ERROR1; | |||
1009 | } | |||
1010 | ||||
1011 | if (aType != bType) { | |||
1012 | *outError = | |||
1013 | sqlite3_mprintfsqlite3_api->mprintf("Vector type mistmatch. First vector has type %s, " | |||
1014 | "while the second has type %s.", | |||
1015 | vector_subtype_name(aType), vector_subtype_name(bType)); | |||
1016 | aCleanup(*a); | |||
1017 | bCleanup(*b); | |||
1018 | return SQLITE_ERROR1; | |||
1019 | } | |||
1020 | if (aDims != bDims) { | |||
1021 | *outError = sqlite3_mprintfsqlite3_api->mprintf( | |||
1022 | "Vector dimension mistmatch. First vector has %ld dimensions, " | |||
1023 | "while the second has %ld dimensions.", | |||
1024 | aDims, bDims); | |||
1025 | aCleanup(*a); | |||
1026 | bCleanup(*b); | |||
1027 | return SQLITE_ERROR1; | |||
1028 | } | |||
1029 | *element_type = aType; | |||
1030 | *dimensions = aDims; | |||
1031 | *outACleanup = aCleanup; | |||
1032 | *outBCleanup = bCleanup; | |||
1033 | return SQLITE_OK0; | |||
1034 | } | |||
1035 | ||||
1036 | int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); } | |||
1037 | ||||
1038 | struct VecNpyFile { | |||
1039 | char *path; | |||
1040 | size_t pathLength; | |||
1041 | }; | |||
1042 | #define SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file" "vec0-npy-file" | |||
1043 | ||||
1044 | #ifndef SQLITE_VEC_OMIT_FS | |||
1045 | static void vec_npy_file(sqlite3_context *context, int argc, | |||
1046 | sqlite3_value **argv) { | |||
1047 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1047, __extension__ __PRETTY_FUNCTION__); })); | |||
1048 | char *path = (char *)sqlite3_value_textsqlite3_api->value_text(argv[0]); | |||
1049 | size_t pathLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); | |||
1050 | struct VecNpyFile *f; | |||
1051 | ||||
1052 | f = sqlite3_mallocsqlite3_api->malloc(sizeof(*f)); | |||
1053 | if (!f) { | |||
1054 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1055 | return; | |||
1056 | } | |||
1057 | memset(f, 0, sizeof(*f)); | |||
1058 | ||||
1059 | f->path = path; | |||
1060 | f->pathLength = pathLength; | |||
1061 | sqlite3_result_pointersqlite3_api->result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file", sqlite3_freesqlite3_api->free); | |||
1062 | } | |||
1063 | #endif | |||
1064 | ||||
1065 | #pragma region scalar functions | |||
1066 | static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1067 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1067, __extension__ __PRETTY_FUNCTION__); })); | |||
1068 | int rc; | |||
1069 | f32 *vector = NULL((void*)0); | |||
1070 | size_t dimensions; | |||
1071 | fvec_cleanup cleanup; | |||
1072 | char *errmsg; | |||
1073 | rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); | |||
1074 | if (rc != SQLITE_OK0) { | |||
1075 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); | |||
1076 | sqlite3_freesqlite3_api->free(errmsg); | |||
1077 | return; | |||
1078 | } | |||
1079 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions * sizeof(f32), | |||
1080 | (void (*)(void *))cleanup); | |||
1081 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); | |||
1082 | } | |||
1083 | ||||
1084 | static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1085 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1085, __extension__ __PRETTY_FUNCTION__); })); | |||
1086 | int rc; | |||
1087 | u8 *vector; | |||
1088 | size_t dimensions; | |||
1089 | vector_cleanup cleanup; | |||
1090 | char *errmsg; | |||
1091 | rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); | |||
1092 | if (rc != SQLITE_OK0) { | |||
1093 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); | |||
1094 | sqlite3_freesqlite3_api->free(errmsg); | |||
1095 | return; | |||
1096 | } | |||
1097 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions / CHAR_BIT8, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
1098 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); | |||
1099 | cleanup(vector); | |||
1100 | } | |||
1101 | static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1102 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1102, __extension__ __PRETTY_FUNCTION__); })); | |||
1103 | int rc; | |||
1104 | i8 *vector; | |||
1105 | size_t dimensions; | |||
1106 | vector_cleanup cleanup; | |||
1107 | char *errmsg; | |||
1108 | rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); | |||
1109 | if (rc != SQLITE_OK0) { | |||
1110 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); | |||
1111 | sqlite3_freesqlite3_api->free(errmsg); | |||
1112 | return; | |||
1113 | } | |||
1114 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
1115 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); | |||
1116 | cleanup(vector); | |||
1117 | } | |||
1118 | ||||
1119 | static void vec_length(sqlite3_context *context, int argc, | |||
1120 | sqlite3_value **argv) { | |||
1121 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1121, __extension__ __PRETTY_FUNCTION__); })); | |||
1122 | int rc; | |||
1123 | void *vector; | |||
1124 | size_t dimensions; | |||
1125 | vector_cleanup cleanup; | |||
1126 | char *errmsg; | |||
1127 | enum VectorElementType elementType; | |||
1128 | rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup, | |||
1129 | &errmsg); | |||
1130 | if (rc != SQLITE_OK0) { | |||
1131 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); | |||
1132 | sqlite3_freesqlite3_api->free(errmsg); | |||
1133 | return; | |||
1134 | } | |||
1135 | sqlite3_result_int64sqlite3_api->result_int64(context, dimensions); | |||
1136 | cleanup(vector); | |||
1137 | } | |||
1138 | ||||
1139 | static void vec_distance_cosine(sqlite3_context *context, int argc, | |||
1140 | sqlite3_value **argv) { | |||
1141 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1141, __extension__ __PRETTY_FUNCTION__); })); | |||
1142 | int rc; | |||
1143 | void *a = NULL((void*)0), *b = NULL((void*)0); | |||
1144 | size_t dimensions; | |||
1145 | vector_cleanup aCleanup, bCleanup; | |||
1146 | char *error; | |||
1147 | enum VectorElementType elementType; | |||
1148 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1149 | &aCleanup, &bCleanup, &error); | |||
1150 | if (rc != SQLITE_OK0) { | |||
1151 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1152 | sqlite3_freesqlite3_api->free(error); | |||
1153 | return; | |||
1154 | } | |||
1155 | ||||
1156 | switch (elementType) { | |||
1157 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1158 | sqlite3_result_errorsqlite3_api->result_error( | |||
1159 | context, "Cannot calculate cosine distance between two bitvectors.", | |||
1160 | -1); | |||
1161 | goto finish; | |||
1162 | } | |||
1163 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1164 | f32 result = distance_cosine_float(a, b, &dimensions); | |||
1165 | sqlite3_result_doublesqlite3_api->result_double(context, result); | |||
1166 | goto finish; | |||
1167 | } | |||
1168 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1169 | f32 result = distance_cosine_int8(a, b, &dimensions); | |||
1170 | sqlite3_result_doublesqlite3_api->result_double(context, result); | |||
1171 | goto finish; | |||
1172 | } | |||
1173 | } | |||
1174 | ||||
1175 | finish: | |||
1176 | aCleanup(a); | |||
1177 | bCleanup(b); | |||
1178 | return; | |||
1179 | } | |||
1180 | ||||
1181 | static void vec_distance_l2(sqlite3_context *context, int argc, | |||
1182 | sqlite3_value **argv) { | |||
1183 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1183, __extension__ __PRETTY_FUNCTION__); })); | |||
1184 | int rc; | |||
1185 | void *a = NULL((void*)0), *b = NULL((void*)0); | |||
1186 | size_t dimensions; | |||
1187 | vector_cleanup aCleanup, bCleanup; | |||
1188 | char *error; | |||
1189 | enum VectorElementType elementType; | |||
1190 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1191 | &aCleanup, &bCleanup, &error); | |||
1192 | if (rc != SQLITE_OK0) { | |||
1193 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1194 | sqlite3_freesqlite3_api->free(error); | |||
1195 | return; | |||
1196 | } | |||
1197 | ||||
1198 | switch (elementType) { | |||
1199 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1200 | sqlite3_result_errorsqlite3_api->result_error( | |||
1201 | context, "Cannot calculate L2 distance between two bitvectors.", -1); | |||
1202 | goto finish; | |||
1203 | } | |||
1204 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1205 | f32 result = distance_l2_sqr_float(a, b, &dimensions); | |||
1206 | sqlite3_result_doublesqlite3_api->result_double(context, result); | |||
1207 | goto finish; | |||
1208 | } | |||
1209 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1210 | f32 result = distance_l2_sqr_int8(a, b, &dimensions); | |||
1211 | sqlite3_result_doublesqlite3_api->result_double(context, result); | |||
1212 | goto finish; | |||
1213 | } | |||
1214 | } | |||
1215 | ||||
1216 | finish: | |||
1217 | aCleanup(a); | |||
1218 | bCleanup(b); | |||
1219 | return; | |||
1220 | } | |||
1221 | ||||
1222 | static void vec_distance_l1(sqlite3_context *context, int argc, | |||
1223 | sqlite3_value **argv) { | |||
1224 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1224, __extension__ __PRETTY_FUNCTION__); })); | |||
1225 | int rc; | |||
1226 | void *a, *b; | |||
1227 | size_t dimensions; | |||
1228 | vector_cleanup aCleanup, bCleanup; | |||
1229 | char *error; | |||
1230 | enum VectorElementType elementType; | |||
1231 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1232 | &aCleanup, &bCleanup, &error); | |||
1233 | if (rc != SQLITE_OK0) { | |||
1234 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1235 | sqlite3_freesqlite3_api->free(error); | |||
1236 | return; | |||
1237 | } | |||
1238 | ||||
1239 | switch (elementType) { | |||
1240 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1241 | sqlite3_result_errorsqlite3_api->result_error( | |||
1242 | context, "Cannot calculate L1 distance between two bitvectors.", -1); | |||
1243 | goto finish; | |||
1244 | } | |||
1245 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1246 | double result = distance_l1_f32(a, b, &dimensions); | |||
1247 | sqlite3_result_doublesqlite3_api->result_double(context, result); | |||
1248 | goto finish; | |||
1249 | } | |||
1250 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1251 | i64 result = distance_l1_int8(a, b, &dimensions); | |||
1252 | sqlite3_result_intsqlite3_api->result_int(context, result); | |||
1253 | goto finish; | |||
1254 | } | |||
1255 | } | |||
1256 | ||||
1257 | finish: | |||
1258 | aCleanup(a); | |||
1259 | bCleanup(b); | |||
1260 | return; | |||
1261 | } | |||
1262 | ||||
1263 | static void vec_distance_hamming(sqlite3_context *context, int argc, | |||
1264 | sqlite3_value **argv) { | |||
1265 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1265, __extension__ __PRETTY_FUNCTION__); })); | |||
1266 | int rc; | |||
1267 | void *a = NULL((void*)0), *b = NULL((void*)0); | |||
1268 | size_t dimensions; | |||
1269 | vector_cleanup aCleanup, bCleanup; | |||
1270 | char *error; | |||
1271 | enum VectorElementType elementType; | |||
1272 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1273 | &aCleanup, &bCleanup, &error); | |||
1274 | if (rc != SQLITE_OK0) { | |||
1275 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1276 | sqlite3_freesqlite3_api->free(error); | |||
1277 | return; | |||
1278 | } | |||
1279 | ||||
1280 | switch (elementType) { | |||
1281 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1282 | sqlite3_result_doublesqlite3_api->result_double(context, distance_hamming(a, b, &dimensions)); | |||
1283 | goto finish; | |||
1284 | } | |||
1285 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1286 | sqlite3_result_errorsqlite3_api->result_error( | |||
1287 | context, | |||
1288 | "Cannot calculate hamming distance between two float32 vectors.", -1); | |||
1289 | goto finish; | |||
1290 | } | |||
1291 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1292 | sqlite3_result_errorsqlite3_api->result_error( | |||
1293 | context, "Cannot calculate hamming distance between two int8 vectors.", | |||
1294 | -1); | |||
1295 | goto finish; | |||
1296 | } | |||
1297 | } | |||
1298 | ||||
1299 | finish: | |||
1300 | aCleanup(a); | |||
1301 | bCleanup(b); | |||
1302 | return; | |||
1303 | } | |||
1304 | ||||
1305 | char *vec_type_name(enum VectorElementType elementType) { | |||
1306 | switch (elementType) { | |||
1307 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: | |||
1308 | return "float32"; | |||
1309 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
1310 | return "int8"; | |||
1311 | case SQLITE_VEC_ELEMENT_TYPE_BIT: | |||
1312 | return "bit"; | |||
1313 | } | |||
1314 | return ""; | |||
1315 | } | |||
1316 | ||||
1317 | static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1318 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1318, __extension__ __PRETTY_FUNCTION__); })); | |||
1319 | void *vector; | |||
1320 | size_t dimensions; | |||
1321 | vector_cleanup cleanup; | |||
1322 | char *pzError; | |||
1323 | enum VectorElementType elementType; | |||
1324 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, | |||
1325 | &cleanup, &pzError); | |||
1326 | if (rc != SQLITE_OK0) { | |||
1327 | sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1); | |||
1328 | sqlite3_freesqlite3_api->free(pzError); | |||
1329 | return; | |||
1330 | } | |||
1331 | sqlite3_result_textsqlite3_api->result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
1332 | cleanup(vector); | |||
1333 | } | |||
1334 | static void vec_quantize_binary(sqlite3_context *context, int argc, | |||
1335 | sqlite3_value **argv) { | |||
1336 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1336, __extension__ __PRETTY_FUNCTION__); })); | |||
1337 | void *vector; | |||
1338 | size_t dimensions; | |||
1339 | vector_cleanup vectorCleanup; | |||
1340 | char *pzError; | |||
1341 | enum VectorElementType elementType; | |||
1342 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, | |||
1343 | &vectorCleanup, &pzError); | |||
1344 | if (rc != SQLITE_OK0) { | |||
1345 | sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1); | |||
1346 | sqlite3_freesqlite3_api->free(pzError); | |||
1347 | return; | |||
1348 | } | |||
1349 | ||||
1350 | if (dimensions <= 0) { | |||
1351 | sqlite3_result_errorsqlite3_api->result_error(context, "Zero length vectors are not supported.", -1); | |||
1352 | goto cleanup; | |||
1353 | return; | |||
1354 | } | |||
1355 | if ((dimensions % CHAR_BIT8) != 0) { | |||
1356 | sqlite3_result_errorsqlite3_api->result_error( | |||
1357 | context, | |||
1358 | "Binary quantization requires vectors with a length divisible by 8", | |||
1359 | -1); | |||
1360 | goto cleanup; | |||
1361 | return; | |||
1362 | } | |||
1363 | ||||
1364 | int sz = dimensions / CHAR_BIT8; | |||
1365 | u8 *out = sqlite3_mallocsqlite3_api->malloc(sz); | |||
1366 | if (!out) { | |||
1367 | sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7); | |||
1368 | goto cleanup; | |||
1369 | return; | |||
1370 | } | |||
1371 | memset(out, 0, sz); | |||
1372 | ||||
1373 | switch (elementType) { | |||
1374 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1375 | ||||
1376 | for (size_t i = 0; i < dimensions; i++) { | |||
1377 | int res = ((f32 *)vector)[i] > 0.0; | |||
1378 | out[i / 8] |= (res << (i % 8)); | |||
1379 | } | |||
1380 | break; | |||
1381 | } | |||
1382 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1383 | for (size_t i = 0; i < dimensions; i++) { | |||
1384 | int res = ((i8 *)vector)[i] > 0; | |||
1385 | out[i / 8] |= (res << (i % 8)); | |||
1386 | } | |||
1387 | break; | |||
1388 | } | |||
1389 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1390 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
1391 | "Can only binary quantize float or int8 vectors", -1); | |||
1392 | sqlite3_freesqlite3_api->free(out); | |||
1393 | return; | |||
1394 | } | |||
1395 | } | |||
1396 | sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free); | |||
1397 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); | |||
1398 | ||||
1399 | cleanup: | |||
1400 | vectorCleanup(vector); | |||
1401 | } | |||
1402 | ||||
1403 | static void vec_quantize_int8(sqlite3_context *context, int argc, | |||
1404 | sqlite3_value **argv) { | |||
1405 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1405, __extension__ __PRETTY_FUNCTION__); })); | |||
1406 | f32 *srcVector; | |||
1407 | size_t dimensions; | |||
1408 | fvec_cleanup srcCleanup; | |||
1409 | char *err; | |||
1410 | i8 *out = NULL((void*)0); | |||
1411 | int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err); | |||
1412 | if (rc != SQLITE_OK0) { | |||
1413 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); | |||
1414 | sqlite3_freesqlite3_api->free(err); | |||
1415 | return; | |||
1416 | } | |||
1417 | ||||
1418 | int sz = dimensions * sizeof(i8); | |||
1419 | out = sqlite3_mallocsqlite3_api->malloc(sz); | |||
1420 | if (!out) { | |||
1421 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1422 | goto cleanup; | |||
1423 | } | |||
1424 | memset(out, 0, sz); | |||
1425 | ||||
1426 | if ((sqlite3_value_typesqlite3_api->value_type(argv[1]) != SQLITE_TEXT3) || | |||
1427 | (sqlite3_value_bytessqlite3_api->value_bytes(argv[1]) != strlen("unit")) || | |||
1428 | (sqlite3_stricmpsqlite3_api->stricmp((const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]), "unit") != | |||
1429 | 0)) { | |||
1430 | sqlite3_result_errorsqlite3_api->result_error( | |||
1431 | context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1); | |||
1432 | sqlite3_freesqlite3_api->free(out); | |||
1433 | goto cleanup; | |||
1434 | } | |||
1435 | f32 step = (1.0 - (-1.0)) / 255; | |||
1436 | for (size_t i = 0; i < dimensions; i++) { | |||
1437 | out[i] = ((srcVector[i] - (-1.0)) / step) - 128; | |||
1438 | } | |||
1439 | ||||
1440 | sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(i8), sqlite3_freesqlite3_api->free); | |||
1441 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); | |||
1442 | ||||
1443 | cleanup: | |||
1444 | srcCleanup(srcVector); | |||
1445 | } | |||
1446 | ||||
1447 | static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1448 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1448, __extension__ __PRETTY_FUNCTION__); })); | |||
1449 | int rc; | |||
1450 | void *a = NULL((void*)0), *b = NULL((void*)0); | |||
1451 | size_t dimensions; | |||
1452 | vector_cleanup aCleanup, bCleanup; | |||
1453 | char *error; | |||
1454 | enum VectorElementType elementType; | |||
1455 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1456 | &aCleanup, &bCleanup, &error); | |||
1457 | if (rc != SQLITE_OK0) { | |||
1458 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1459 | sqlite3_freesqlite3_api->free(error); | |||
1460 | return; | |||
1461 | } | |||
1462 | ||||
1463 | switch (elementType) { | |||
1464 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1465 | sqlite3_result_errorsqlite3_api->result_error(context, "Cannot add two bitvectors together.", -1); | |||
1466 | goto finish; | |||
1467 | } | |||
1468 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1469 | size_t outSize = dimensions * sizeof(f32); | |||
1470 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1471 | if (!out) { | |||
1472 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1473 | goto finish; | |||
1474 | } | |||
1475 | memset(out, 0, outSize); | |||
1476 | for (size_t i = 0; i < dimensions; i++) { | |||
1477 | out[i] = ((f32 *)a)[i] + ((f32 *)b)[i]; | |||
1478 | } | |||
1479 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1480 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); | |||
1481 | goto finish; | |||
1482 | } | |||
1483 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1484 | size_t outSize = dimensions * sizeof(i8); | |||
1485 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1486 | if (!out) { | |||
1487 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1488 | goto finish; | |||
1489 | } | |||
1490 | memset(out, 0, outSize); | |||
1491 | for (size_t i = 0; i < dimensions; i++) { | |||
1492 | out[i] = ((i8 *)a)[i] + ((i8 *)b)[i]; | |||
1493 | } | |||
1494 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1495 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); | |||
1496 | goto finish; | |||
1497 | } | |||
1498 | } | |||
1499 | finish: | |||
1500 | aCleanup(a); | |||
1501 | bCleanup(b); | |||
1502 | return; | |||
1503 | } | |||
1504 | static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) { | |||
1505 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1505, __extension__ __PRETTY_FUNCTION__); })); | |||
1506 | int rc; | |||
1507 | void *a = NULL((void*)0), *b = NULL((void*)0); | |||
1508 | size_t dimensions; | |||
1509 | vector_cleanup aCleanup, bCleanup; | |||
1510 | char *error; | |||
1511 | enum VectorElementType elementType; | |||
1512 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, | |||
1513 | &aCleanup, &bCleanup, &error); | |||
1514 | if (rc != SQLITE_OK0) { | |||
1515 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); | |||
1516 | sqlite3_freesqlite3_api->free(error); | |||
1517 | return; | |||
1518 | } | |||
1519 | ||||
1520 | switch (elementType) { | |||
1521 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1522 | sqlite3_result_errorsqlite3_api->result_error(context, "Cannot subtract two bitvectors together.", | |||
1523 | -1); | |||
1524 | goto finish; | |||
1525 | } | |||
1526 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1527 | size_t outSize = dimensions * sizeof(f32); | |||
1528 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1529 | if (!out) { | |||
1530 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1531 | goto finish; | |||
1532 | } | |||
1533 | memset(out, 0, outSize); | |||
1534 | for (size_t i = 0; i < dimensions; i++) { | |||
1535 | out[i] = ((f32 *)a)[i] - ((f32 *)b)[i]; | |||
1536 | } | |||
1537 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1538 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); | |||
1539 | goto finish; | |||
1540 | } | |||
1541 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1542 | size_t outSize = dimensions * sizeof(i8); | |||
1543 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1544 | if (!out) { | |||
1545 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1546 | goto finish; | |||
1547 | } | |||
1548 | memset(out, 0, outSize); | |||
1549 | for (size_t i = 0; i < dimensions; i++) { | |||
1550 | out[i] = ((i8 *)a)[i] - ((i8 *)b)[i]; | |||
1551 | } | |||
1552 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1553 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); | |||
1554 | goto finish; | |||
1555 | } | |||
1556 | } | |||
1557 | finish: | |||
1558 | aCleanup(a); | |||
1559 | bCleanup(b); | |||
1560 | return; | |||
1561 | } | |||
1562 | static void vec_slice(sqlite3_context *context, int argc, | |||
1563 | sqlite3_value **argv) { | |||
1564 | assert(argc == 3)((void) sizeof ((argc == 3) ? 1 : 0), __extension__ ({ if (argc == 3) ; else __assert_fail ("argc == 3", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1564, __extension__ __PRETTY_FUNCTION__); })); | |||
1565 | ||||
1566 | void *vector; | |||
1567 | size_t dimensions; | |||
1568 | vector_cleanup cleanup; | |||
1569 | char *err; | |||
1570 | enum VectorElementType elementType; | |||
1571 | ||||
1572 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, | |||
1573 | &cleanup, &err); | |||
1574 | if (rc != SQLITE_OK0) { | |||
1575 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); | |||
1576 | sqlite3_freesqlite3_api->free(err); | |||
1577 | return; | |||
1578 | } | |||
1579 | ||||
1580 | int start = sqlite3_value_intsqlite3_api->value_int(argv[1]); | |||
1581 | int end = sqlite3_value_intsqlite3_api->value_int(argv[2]); | |||
1582 | ||||
1583 | if (start < 0) { | |||
1584 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
1585 | "slice 'start' index must be a postive number.", -1); | |||
1586 | goto done; | |||
1587 | } | |||
1588 | if (end < 0) { | |||
1589 | sqlite3_result_errorsqlite3_api->result_error(context, "slice 'end' index must be a postive number.", | |||
1590 | -1); | |||
1591 | goto done; | |||
1592 | } | |||
1593 | if (((size_t)start) > dimensions) { | |||
1594 | sqlite3_result_errorsqlite3_api->result_error( | |||
1595 | context, "slice 'start' index is greater than the number of dimensions", | |||
1596 | -1); | |||
1597 | goto done; | |||
1598 | } | |||
1599 | if (((size_t)end) > dimensions) { | |||
1600 | sqlite3_result_errorsqlite3_api->result_error( | |||
1601 | context, "slice 'end' index is greater than the number of dimensions", | |||
1602 | -1); | |||
1603 | goto done; | |||
1604 | } | |||
1605 | if (start > end) { | |||
1606 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
1607 | "slice 'start' index is greater than 'end' index", -1); | |||
1608 | goto done; | |||
1609 | } | |||
1610 | if (start == end) { | |||
1611 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
1612 | "slice 'start' index is equal to the 'end' index, " | |||
1613 | "vectors must have non-zero length", | |||
1614 | -1); | |||
1615 | goto done; | |||
1616 | } | |||
1617 | size_t n = end - start; | |||
1618 | ||||
1619 | switch (elementType) { | |||
1620 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
1621 | int outSize = n * sizeof(f32); | |||
1622 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1623 | if (!out) { | |||
1624 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1625 | goto done; | |||
1626 | } | |||
1627 | memset(out, 0, outSize); | |||
1628 | for (size_t i = 0; i < n; i++) { | |||
1629 | out[i] = ((f32 *)vector)[start + i]; | |||
1630 | } | |||
1631 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1632 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); | |||
1633 | goto done; | |||
1634 | } | |||
1635 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
1636 | int outSize = n * sizeof(i8); | |||
1637 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1638 | if (!out) { | |||
1639 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1640 | return; | |||
1641 | } | |||
1642 | memset(out, 0, outSize); | |||
1643 | for (size_t i = 0; i < n; i++) { | |||
1644 | out[i] = ((i8 *)vector)[start + i]; | |||
1645 | } | |||
1646 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1647 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); | |||
1648 | goto done; | |||
1649 | } | |||
1650 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
1651 | if ((start % CHAR_BIT8) != 0) { | |||
1652 | sqlite3_result_errorsqlite3_api->result_error(context, "start index must be divisible by 8.", -1); | |||
1653 | goto done; | |||
1654 | } | |||
1655 | if ((end % CHAR_BIT8) != 0) { | |||
1656 | sqlite3_result_errorsqlite3_api->result_error(context, "end index must be divisible by 8.", -1); | |||
1657 | goto done; | |||
1658 | } | |||
1659 | int outSize = n / CHAR_BIT8; | |||
1660 | u8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1661 | if (!out) { | |||
1662 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1663 | return; | |||
1664 | } | |||
1665 | memset(out, 0, outSize); | |||
1666 | for (size_t i = 0; i < n / CHAR_BIT8; i++) { | |||
1667 | out[i] = ((u8 *)vector)[(start / CHAR_BIT8) + i]; | |||
1668 | } | |||
1669 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); | |||
1670 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); | |||
1671 | goto done; | |||
1672 | } | |||
1673 | } | |||
1674 | done: | |||
1675 | cleanup(vector); | |||
1676 | } | |||
1677 | ||||
1678 | static void vec_to_json(sqlite3_context *context, int argc, | |||
1679 | sqlite3_value **argv) { | |||
1680 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1680, __extension__ __PRETTY_FUNCTION__); })); | |||
1681 | void *vector; | |||
1682 | size_t dimensions; | |||
1683 | vector_cleanup cleanup; | |||
1684 | char *err; | |||
1685 | enum VectorElementType elementType; | |||
1686 | ||||
1687 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, | |||
1688 | &cleanup, &err); | |||
1689 | if (rc != SQLITE_OK0) { | |||
1690 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); | |||
1691 | sqlite3_freesqlite3_api->free(err); | |||
1692 | return; | |||
1693 | } | |||
1694 | ||||
1695 | sqlite3_str *str = sqlite3_str_newsqlite3_api->str_new(sqlite3_context_db_handlesqlite3_api->context_db_handle(context)); | |||
1696 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "["); | |||
1697 | for (size_t i = 0; i < dimensions; i++) { | |||
1698 | if (i != 0) { | |||
1699 | sqlite3_str_appendallsqlite3_api->str_appendall(str, ","); | |||
1700 | } | |||
1701 | if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { | |||
1702 | f32 value = ((f32 *)vector)[i]; | |||
1703 | if (isnan(value)__builtin_isnan (value)) { | |||
1704 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "null"); | |||
1705 | } else { | |||
1706 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%f", value); | |||
1707 | } | |||
1708 | ||||
1709 | } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) { | |||
1710 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", ((i8 *)vector)[i]); | |||
1711 | } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) { | |||
1712 | u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT8)) & 1; | |||
1713 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", b); | |||
1714 | } | |||
1715 | } | |||
1716 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "]"); | |||
1717 | int len = sqlite3_str_lengthsqlite3_api->str_length(str); | |||
1718 | char *s = sqlite3_str_finishsqlite3_api->str_finish(str); | |||
1719 | if (s) { | |||
1720 | sqlite3_result_textsqlite3_api->result_text(context, s, len, sqlite3_freesqlite3_api->free); | |||
1721 | sqlite3_result_subtypesqlite3_api->result_subtype(context, JSON_SUBTYPE74); | |||
1722 | } else { | |||
1723 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
1724 | } | |||
1725 | cleanup(vector); | |||
1726 | } | |||
1727 | ||||
1728 | static void vec_normalize(sqlite3_context *context, int argc, | |||
1729 | sqlite3_value **argv) { | |||
1730 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1730, __extension__ __PRETTY_FUNCTION__); })); | |||
1731 | void *vector; | |||
1732 | size_t dimensions; | |||
1733 | vector_cleanup cleanup; | |||
1734 | char *err; | |||
1735 | enum VectorElementType elementType; | |||
1736 | ||||
1737 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, | |||
1738 | &cleanup, &err); | |||
1739 | if (rc != SQLITE_OK0) { | |||
1740 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); | |||
1741 | sqlite3_freesqlite3_api->free(err); | |||
1742 | return; | |||
1743 | } | |||
1744 | ||||
1745 | if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { | |||
1746 | sqlite3_result_errorsqlite3_api->result_error( | |||
1747 | context, "only float32 vectors are supported when normalizing", -1); | |||
1748 | cleanup(vector); | |||
1749 | return; | |||
1750 | } | |||
1751 | ||||
1752 | int outSize = dimensions * sizeof(f32); | |||
1753 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); | |||
1754 | if (!out) { | |||
1755 | cleanup(vector); | |||
1756 | sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7); | |||
1757 | return; | |||
1758 | } | |||
1759 | memset(out, 0, outSize); | |||
1760 | ||||
1761 | f32 *v = (f32 *)vector; | |||
1762 | ||||
1763 | f32 norm = 0; | |||
1764 | for (size_t i = 0; i < dimensions; i++) { | |||
1765 | norm += v[i] * v[i]; | |||
1766 | } | |||
1767 | norm = sqrt(norm); | |||
1768 | for (size_t i = 0; i < dimensions; i++) { | |||
1769 | out[i] = v[i] / norm; | |||
1770 | } | |||
1771 | ||||
1772 | sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(f32), sqlite3_freesqlite3_api->free); | |||
1773 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); | |||
1774 | cleanup(vector); | |||
1775 | } | |||
1776 | ||||
1777 | static void _static_text_func(sqlite3_context *context, int argc, | |||
1778 | sqlite3_value **argv) { | |||
1779 | UNUSED_PARAMETER(argc)(void)(argc); | |||
1780 | UNUSED_PARAMETER(argv)(void)(argv); | |||
1781 | sqlite3_result_textsqlite3_api->result_text(context, sqlite3_user_datasqlite3_api->user_data(context), -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
1782 | } | |||
1783 | ||||
1784 | #pragma endregion | |||
1785 | ||||
1786 | enum Vec0TokenType { | |||
1787 | TOKEN_TYPE_IDENTIFIER, | |||
1788 | TOKEN_TYPE_DIGIT, | |||
1789 | TOKEN_TYPE_LBRACKET, | |||
1790 | TOKEN_TYPE_RBRACKET, | |||
1791 | TOKEN_TYPE_PLUS, | |||
1792 | TOKEN_TYPE_EQ, | |||
1793 | }; | |||
1794 | struct Vec0Token { | |||
1795 | enum Vec0TokenType token_type; | |||
1796 | char *start; | |||
1797 | char *end; | |||
1798 | }; | |||
1799 | ||||
1800 | int is_alpha(char x) { | |||
1801 | return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z'); | |||
1802 | } | |||
1803 | int is_digit(char x) { return (x >= '0' && x <= '9'); } | |||
1804 | int is_whitespace(char x) { | |||
1805 | return x == ' ' || x == '\t' || x == '\n' || x == '\r'; | |||
1806 | } | |||
1807 | ||||
1808 | #define VEC0_TOKEN_RESULT_EOF1 1 | |||
1809 | #define VEC0_TOKEN_RESULT_SOME2 2 | |||
1810 | #define VEC0_TOKEN_RESULT_ERROR3 3 | |||
1811 | ||||
1812 | int vec0_token_next(char *start, char *end, struct Vec0Token *out) { | |||
1813 | char *ptr = start; | |||
1814 | while (ptr < end) { | |||
1815 | char curr = *ptr; | |||
1816 | if (is_whitespace(curr)) { | |||
1817 | ptr++; | |||
1818 | continue; | |||
1819 | } else if (curr == '+') { | |||
1820 | ptr++; | |||
1821 | out->start = ptr; | |||
1822 | out->end = ptr; | |||
1823 | out->token_type = TOKEN_TYPE_PLUS; | |||
1824 | return VEC0_TOKEN_RESULT_SOME2; | |||
1825 | } else if (curr == '[') { | |||
1826 | ptr++; | |||
1827 | out->start = ptr; | |||
1828 | out->end = ptr; | |||
1829 | out->token_type = TOKEN_TYPE_LBRACKET; | |||
1830 | return VEC0_TOKEN_RESULT_SOME2; | |||
1831 | } else if (curr == ']') { | |||
1832 | ptr++; | |||
1833 | out->start = ptr; | |||
1834 | out->end = ptr; | |||
1835 | out->token_type = TOKEN_TYPE_RBRACKET; | |||
1836 | return VEC0_TOKEN_RESULT_SOME2; | |||
1837 | } else if (curr == '=') { | |||
1838 | ptr++; | |||
1839 | out->start = ptr; | |||
1840 | out->end = ptr; | |||
1841 | out->token_type = TOKEN_TYPE_EQ; | |||
1842 | return VEC0_TOKEN_RESULT_SOME2; | |||
1843 | } else if (is_alpha(curr)) { | |||
1844 | char *start = ptr; | |||
1845 | while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) { | |||
1846 | ptr++; | |||
1847 | } | |||
1848 | out->start = start; | |||
1849 | out->end = ptr; | |||
1850 | out->token_type = TOKEN_TYPE_IDENTIFIER; | |||
1851 | return VEC0_TOKEN_RESULT_SOME2; | |||
1852 | } else if (is_digit(curr)) { | |||
1853 | char *start = ptr; | |||
1854 | while (ptr < end && (is_digit(*ptr))) { | |||
1855 | ptr++; | |||
1856 | } | |||
1857 | out->start = start; | |||
1858 | out->end = ptr; | |||
1859 | out->token_type = TOKEN_TYPE_DIGIT; | |||
1860 | return VEC0_TOKEN_RESULT_SOME2; | |||
1861 | } else { | |||
1862 | return VEC0_TOKEN_RESULT_ERROR3; | |||
1863 | } | |||
1864 | } | |||
1865 | return VEC0_TOKEN_RESULT_EOF1; | |||
1866 | } | |||
1867 | ||||
1868 | struct Vec0Scanner { | |||
1869 | char *start; | |||
1870 | char *end; | |||
1871 | char *ptr; | |||
1872 | }; | |||
1873 | ||||
1874 | void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source, | |||
1875 | int source_length) { | |||
1876 | scanner->start = (char *)source; | |||
1877 | scanner->end = (char *)source + source_length; | |||
1878 | scanner->ptr = (char *)source; | |||
1879 | } | |||
1880 | int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) { | |||
1881 | int rc = vec0_token_next(scanner->start, scanner->end, out); | |||
1882 | if (rc == VEC0_TOKEN_RESULT_SOME2) { | |||
1883 | scanner->start = out->end; | |||
1884 | } | |||
1885 | return rc; | |||
1886 | } | |||
1887 | ||||
1888 | int vec0_parse_table_option(const char *source, int source_length, | |||
1889 | char **out_key, int *out_key_length, | |||
1890 | char **out_value, int *out_value_length) { | |||
1891 | int rc; | |||
1892 | struct Vec0Scanner scanner; | |||
1893 | struct Vec0Token token; | |||
1894 | char *key; | |||
1895 | char *value; | |||
1896 | int keyLength, valueLength; | |||
1897 | ||||
1898 | vec0_scanner_init(&scanner, source, source_length); | |||
1899 | ||||
1900 | rc = vec0_scanner_next(&scanner, &token); | |||
1901 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1902 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
1903 | return SQLITE_EMPTY16; | |||
1904 | } | |||
1905 | key = token.start; | |||
1906 | keyLength = token.end - token.start; | |||
1907 | ||||
1908 | rc = vec0_scanner_next(&scanner, &token); | |||
1909 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) { | |||
1910 | return SQLITE_EMPTY16; | |||
1911 | } | |||
1912 | ||||
1913 | rc = vec0_scanner_next(&scanner, &token); | |||
1914 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1915 | !((token.token_type == TOKEN_TYPE_IDENTIFIER) || | |||
1916 | (token.token_type == TOKEN_TYPE_DIGIT))) { | |||
1917 | return SQLITE_ERROR1; | |||
1918 | } | |||
1919 | value = token.start; | |||
1920 | valueLength = token.end - token.start; | |||
1921 | ||||
1922 | rc = vec0_scanner_next(&scanner, &token); | |||
1923 | if (rc == VEC0_TOKEN_RESULT_EOF1) { | |||
1924 | *out_key = key; | |||
1925 | *out_key_length = keyLength; | |||
1926 | *out_value = value; | |||
1927 | *out_value_length = valueLength; | |||
1928 | return SQLITE_OK0; | |||
1929 | } | |||
1930 | return SQLITE_ERROR1; | |||
1931 | } | |||
1932 | /** | |||
1933 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if | |||
1934 | * it's a PARTITION KEY definition. | |||
1935 | * | |||
1936 | * @param source: argv[i] source string | |||
1937 | * @param source_length: length of the source string | |||
1938 | * @param out_column_name: If it is a partition key, the output column name. Same lifetime | |||
1939 | * as source, points to specific char * | |||
1940 | * @param out_column_name_length: Length of out_column_name in bytes | |||
1941 | * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER. | |||
1942 | * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is. | |||
1943 | */ | |||
1944 | int vec0_parse_partition_key_definition(const char *source, int source_length, | |||
1945 | char **out_column_name, | |||
1946 | int *out_column_name_length, | |||
1947 | int *out_column_type) { | |||
1948 | struct Vec0Scanner scanner; | |||
1949 | struct Vec0Token token; | |||
1950 | char *column_name; | |||
1951 | int column_name_length; | |||
1952 | int column_type; | |||
1953 | vec0_scanner_init(&scanner, source, source_length); | |||
1954 | ||||
1955 | // Check first token is identifier, will be the column name | |||
1956 | int rc = vec0_scanner_next(&scanner, &token); | |||
1957 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1958 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
1959 | return SQLITE_EMPTY16; | |||
1960 | } | |||
1961 | ||||
1962 | column_name = token.start; | |||
1963 | column_name_length = token.end - token.start; | |||
1964 | ||||
1965 | // Check the next token matches "text" or "integer", as column type | |||
1966 | rc = vec0_scanner_next(&scanner, &token); | |||
1967 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1968 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
1969 | return SQLITE_EMPTY16; | |||
1970 | } | |||
1971 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { | |||
1972 | column_type = SQLITE_TEXT3; | |||
1973 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == | |||
1974 | 0 || | |||
1975 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", | |||
1976 | token.end - token.start) == 0) { | |||
1977 | column_type = SQLITE_INTEGER1; | |||
1978 | } else { | |||
1979 | return SQLITE_EMPTY16; | |||
1980 | } | |||
1981 | ||||
1982 | // Check the next token is identifier and matches "partition" | |||
1983 | rc = vec0_scanner_next(&scanner, &token); | |||
1984 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1985 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
1986 | return SQLITE_EMPTY16; | |||
1987 | } | |||
1988 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "partition", token.end - token.start) != 0) { | |||
1989 | return SQLITE_EMPTY16; | |||
1990 | } | |||
1991 | ||||
1992 | // Check the next token is identifier and matches "key" | |||
1993 | rc = vec0_scanner_next(&scanner, &token); | |||
1994 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
1995 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
1996 | return SQLITE_EMPTY16; | |||
1997 | } | |||
1998 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) { | |||
1999 | return SQLITE_EMPTY16; | |||
2000 | } | |||
2001 | ||||
2002 | *out_column_name = column_name; | |||
2003 | *out_column_name_length = column_name_length; | |||
2004 | *out_column_type = column_type; | |||
2005 | ||||
2006 | return SQLITE_OK0; | |||
2007 | } | |||
2008 | ||||
2009 | /** | |||
2010 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if | |||
2011 | * it's an auxiliar column definition, ie `+[name] [type]` like `+contents text` | |||
2012 | * | |||
2013 | * @param source: argv[i] source string | |||
2014 | * @param source_length: length of the source string | |||
2015 | * @param out_column_name: If it is a partition key, the output column name. Same lifetime | |||
2016 | * as source, points to specific char * | |||
2017 | * @param out_column_name_length: Length of out_column_name in bytes | |||
2018 | * @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB. | |||
2019 | * @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is. | |||
2020 | */ | |||
2021 | int vec0_parse_auxiliary_column_definition(const char *source, int source_length, | |||
2022 | char **out_column_name, | |||
2023 | int *out_column_name_length, | |||
2024 | int *out_column_type) { | |||
2025 | struct Vec0Scanner scanner; | |||
2026 | struct Vec0Token token; | |||
2027 | char *column_name; | |||
2028 | int column_name_length; | |||
2029 | int column_type; | |||
2030 | vec0_scanner_init(&scanner, source, source_length); | |||
2031 | ||||
2032 | // Check first token is '+', which denotes aux columns | |||
2033 | int rc = vec0_scanner_next(&scanner, &token); | |||
2034 | if (rc != VEC0_TOKEN_RESULT_SOME2 || | |||
2035 | token.token_type != TOKEN_TYPE_PLUS) { | |||
2036 | return SQLITE_EMPTY16; | |||
2037 | } | |||
2038 | ||||
2039 | rc = vec0_scanner_next(&scanner, &token); | |||
2040 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2041 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2042 | return SQLITE_EMPTY16; | |||
2043 | } | |||
2044 | ||||
2045 | column_name = token.start; | |||
2046 | column_name_length = token.end - token.start; | |||
2047 | ||||
2048 | // Check the next token matches "text" or "integer", as column type | |||
2049 | rc = vec0_scanner_next(&scanner, &token); | |||
2050 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2051 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2052 | return SQLITE_EMPTY16; | |||
2053 | } | |||
2054 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { | |||
2055 | column_type = SQLITE_TEXT3; | |||
2056 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == | |||
2057 | 0 || | |||
2058 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", | |||
2059 | token.end - token.start) == 0) { | |||
2060 | column_type = SQLITE_INTEGER1; | |||
2061 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", token.end - token.start) == | |||
2062 | 0 || | |||
2063 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "double", | |||
2064 | token.end - token.start) == 0) { | |||
2065 | column_type = SQLITE_FLOAT2; | |||
2066 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "blob", token.end - token.start) ==0) { | |||
2067 | column_type = SQLITE_BLOB4; | |||
2068 | } else { | |||
2069 | return SQLITE_EMPTY16; | |||
2070 | } | |||
2071 | ||||
2072 | *out_column_name = column_name; | |||
2073 | *out_column_name_length = column_name_length; | |||
2074 | *out_column_type = column_type; | |||
2075 | ||||
2076 | return SQLITE_OK0; | |||
2077 | } | |||
2078 | ||||
2079 | typedef enum { | |||
2080 | VEC0_METADATA_COLUMN_KIND_BOOLEAN, | |||
2081 | VEC0_METADATA_COLUMN_KIND_INTEGER, | |||
2082 | VEC0_METADATA_COLUMN_KIND_FLOAT, | |||
2083 | VEC0_METADATA_COLUMN_KIND_TEXT, | |||
2084 | // future: blob, date, datetime | |||
2085 | } vec0_metadata_column_kind; | |||
2086 | ||||
2087 | /** | |||
2088 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if | |||
2089 | * it's an metadata column definition, ie `[name] [type]` like `is_released boolean` | |||
2090 | * | |||
2091 | * @param source: argv[i] source string | |||
2092 | * @param source_length: length of the source string | |||
2093 | * @param out_column_name: If it is a metadata column, the output column name. Same lifetime | |||
2094 | * as source, points to specific char * | |||
2095 | * @param out_column_name_length: Length of out_column_name in bytes | |||
2096 | * @param out_column_type: one of vec0_metadata_column_kind | |||
2097 | * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is. | |||
2098 | */ | |||
2099 | int vec0_parse_metadata_column_definition(const char *source, int source_length, | |||
2100 | char **out_column_name, | |||
2101 | int *out_column_name_length, | |||
2102 | vec0_metadata_column_kind *out_column_type) { | |||
2103 | struct Vec0Scanner scanner; | |||
2104 | struct Vec0Token token; | |||
2105 | char *column_name; | |||
2106 | int column_name_length; | |||
2107 | vec0_metadata_column_kind column_type; | |||
2108 | int rc; | |||
2109 | vec0_scanner_init(&scanner, source, source_length); | |||
2110 | ||||
2111 | rc = vec0_scanner_next(&scanner, &token); | |||
2112 | if (rc != VEC0_TOKEN_RESULT_SOME2 || | |||
2113 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2114 | return SQLITE_EMPTY16; | |||
2115 | } | |||
2116 | ||||
2117 | column_name = token.start; | |||
2118 | column_name_length = token.end - token.start; | |||
2119 | ||||
2120 | // Check the next token matches a valid metadata type | |||
2121 | rc = vec0_scanner_next(&scanner, &token); | |||
2122 | if (rc != VEC0_TOKEN_RESULT_SOME2 || | |||
2123 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2124 | return SQLITE_EMPTY16; | |||
2125 | } | |||
2126 | char * t = token.start; | |||
2127 | int n = token.end - token.start; | |||
2128 | if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "bool", n) == 0) { | |||
2129 | column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN; | |||
2130 | }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "int64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "int", n) == 0) { | |||
2131 | column_type = VEC0_METADATA_COLUMN_KIND_INTEGER; | |||
2132 | }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "float", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "double", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "float64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "f64", n) == 0) { | |||
2133 | column_type = VEC0_METADATA_COLUMN_KIND_FLOAT; | |||
2134 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "text", n) == 0) { | |||
2135 | column_type = VEC0_METADATA_COLUMN_KIND_TEXT; | |||
2136 | } else { | |||
2137 | return SQLITE_EMPTY16; | |||
2138 | } | |||
2139 | ||||
2140 | *out_column_name = column_name; | |||
2141 | *out_column_name_length = column_name_length; | |||
2142 | *out_column_type = column_type; | |||
2143 | ||||
2144 | return SQLITE_OK0; | |||
2145 | } | |||
2146 | ||||
2147 | /** | |||
2148 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if | |||
2149 | * it's a PRIMARY KEY definition. | |||
2150 | * | |||
2151 | * @param source: argv[i] source string | |||
2152 | * @param source_length: length of the source string | |||
2153 | * @param out_column_name: If it is a PK, the output column name. Same lifetime | |||
2154 | * as source, points to specific char * | |||
2155 | * @param out_column_name_length: Length of out_column_name in bytes | |||
2156 | * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER. | |||
2157 | * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is. | |||
2158 | */ | |||
2159 | int vec0_parse_primary_key_definition(const char *source, int source_length, | |||
2160 | char **out_column_name, | |||
2161 | int *out_column_name_length, | |||
2162 | int *out_column_type) { | |||
2163 | struct Vec0Scanner scanner; | |||
2164 | struct Vec0Token token; | |||
2165 | char *column_name; | |||
2166 | int column_name_length; | |||
2167 | int column_type; | |||
2168 | vec0_scanner_init(&scanner, source, source_length); | |||
2169 | ||||
2170 | // Check first token is identifier, will be the column name | |||
2171 | int rc = vec0_scanner_next(&scanner, &token); | |||
2172 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2173 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2174 | return SQLITE_EMPTY16; | |||
2175 | } | |||
2176 | ||||
2177 | column_name = token.start; | |||
2178 | column_name_length = token.end - token.start; | |||
2179 | ||||
2180 | // Check the next token matches "text" or "integer", as column type | |||
2181 | rc = vec0_scanner_next(&scanner, &token); | |||
2182 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2183 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2184 | return SQLITE_EMPTY16; | |||
2185 | } | |||
2186 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { | |||
2187 | column_type = SQLITE_TEXT3; | |||
2188 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == | |||
2189 | 0 || | |||
2190 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", | |||
2191 | token.end - token.start) == 0) { | |||
2192 | column_type = SQLITE_INTEGER1; | |||
2193 | } else { | |||
2194 | return SQLITE_EMPTY16; | |||
2195 | } | |||
2196 | ||||
2197 | // Check the next token is identifier and matches "primary" | |||
2198 | rc = vec0_scanner_next(&scanner, &token); | |||
2199 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2200 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2201 | return SQLITE_EMPTY16; | |||
2202 | } | |||
2203 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "primary", token.end - token.start) != 0) { | |||
2204 | return SQLITE_EMPTY16; | |||
2205 | } | |||
2206 | ||||
2207 | // Check the next token is identifier and matches "key" | |||
2208 | rc = vec0_scanner_next(&scanner, &token); | |||
2209 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2210 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2211 | return SQLITE_EMPTY16; | |||
2212 | } | |||
2213 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) { | |||
2214 | return SQLITE_EMPTY16; | |||
2215 | } | |||
2216 | ||||
2217 | *out_column_name = column_name; | |||
2218 | *out_column_name_length = column_name_length; | |||
2219 | *out_column_type = column_type; | |||
2220 | ||||
2221 | return SQLITE_OK0; | |||
2222 | } | |||
2223 | ||||
2224 | enum Vec0DistanceMetrics { | |||
2225 | VEC0_DISTANCE_METRIC_L2 = 1, | |||
2226 | VEC0_DISTANCE_METRIC_COSINE = 2, | |||
2227 | VEC0_DISTANCE_METRIC_L1 = 3, | |||
2228 | }; | |||
2229 | ||||
2230 | struct VectorColumnDefinition { | |||
2231 | char *name; | |||
2232 | int name_length; | |||
2233 | size_t dimensions; | |||
2234 | enum VectorElementType element_type; | |||
2235 | enum Vec0DistanceMetrics distance_metric; | |||
2236 | }; | |||
2237 | ||||
2238 | struct Vec0PartitionColumnDefinition { | |||
2239 | int type; | |||
2240 | char * name; | |||
2241 | int name_length; | |||
2242 | }; | |||
2243 | ||||
2244 | struct Vec0AuxiliaryColumnDefinition { | |||
2245 | int type; | |||
2246 | char * name; | |||
2247 | int name_length; | |||
2248 | }; | |||
2249 | struct Vec0MetadataColumnDefinition { | |||
2250 | vec0_metadata_column_kind kind; | |||
2251 | char * name; | |||
2252 | int name_length; | |||
2253 | }; | |||
2254 | ||||
2255 | size_t vector_byte_size(enum VectorElementType element_type, | |||
2256 | size_t dimensions) { | |||
2257 | switch (element_type) { | |||
2258 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: | |||
2259 | return dimensions * sizeof(f32); | |||
2260 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
2261 | return dimensions * sizeof(i8); | |||
2262 | case SQLITE_VEC_ELEMENT_TYPE_BIT: | |||
2263 | return dimensions / CHAR_BIT8; | |||
2264 | } | |||
2265 | return 0; | |||
2266 | } | |||
2267 | ||||
2268 | size_t vector_column_byte_size(struct VectorColumnDefinition column) { | |||
2269 | return vector_byte_size(column.element_type, column.dimensions); | |||
2270 | } | |||
2271 | ||||
2272 | /** | |||
2273 | * @brief Parse an vec0 vtab argv[i] column definition and see if | |||
2274 | * it's a vector column defintion, ex `contents_embedding float[768]`. | |||
2275 | * | |||
2276 | * @param source vec0 argv[i] item | |||
2277 | * @param source_length length of source in bytes | |||
2278 | * @param outColumn Output the parse vector column to this struct, if success | |||
2279 | * @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column | |||
2280 | * definition, SQLITE_ERROR on error. | |||
2281 | */ | |||
2282 | int vec0_parse_vector_column(const char *source, int source_length, | |||
2283 | struct VectorColumnDefinition *outColumn) { | |||
2284 | // parses a vector column definition like so: | |||
2285 | // "abc float[123]", "abc_123 bit[1234]", eetc. | |||
2286 | // https://github.com/asg017/sqlite-vec/issues/46 | |||
2287 | int rc; | |||
2288 | struct Vec0Scanner scanner; | |||
2289 | struct Vec0Token token; | |||
2290 | ||||
2291 | char *name; | |||
2292 | int nameLength; | |||
2293 | enum VectorElementType elementType; | |||
2294 | enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2; | |||
2295 | int dimensions; | |||
2296 | ||||
2297 | vec0_scanner_init(&scanner, source, source_length); | |||
2298 | ||||
2299 | // starts with an identifier | |||
2300 | rc = vec0_scanner_next(&scanner, &token); | |||
2301 | ||||
2302 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2303 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2304 | return SQLITE_EMPTY16; | |||
2305 | } | |||
2306 | ||||
2307 | name = token.start; | |||
2308 | nameLength = token.end - token.start; | |||
2309 | ||||
2310 | // vector column type comes next: float, int, or bit | |||
2311 | rc = vec0_scanner_next(&scanner, &token); | |||
2312 | ||||
2313 | if (rc != VEC0_TOKEN_RESULT_SOME2 || | |||
2314 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2315 | return SQLITE_EMPTY16; | |||
2316 | } | |||
2317 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", 5) == 0 || | |||
2318 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "f32", 3) == 0) { | |||
2319 | elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; | |||
2320 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int8", 4) == 0 || | |||
2321 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "i8", 2) == 0) { | |||
2322 | elementType = SQLITE_VEC_ELEMENT_TYPE_INT8; | |||
2323 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "bit", 3) == 0) { | |||
2324 | elementType = SQLITE_VEC_ELEMENT_TYPE_BIT; | |||
2325 | } else { | |||
2326 | return SQLITE_EMPTY16; | |||
2327 | } | |||
2328 | ||||
2329 | // left '[' bracket | |||
2330 | rc = vec0_scanner_next(&scanner, &token); | |||
2331 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_LBRACKET) { | |||
2332 | return SQLITE_EMPTY16; | |||
2333 | } | |||
2334 | ||||
2335 | // digit, for vector dimension length | |||
2336 | rc = vec0_scanner_next(&scanner, &token); | |||
2337 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_DIGIT) { | |||
2338 | return SQLITE_ERROR1; | |||
2339 | } | |||
2340 | dimensions = atoi(token.start); | |||
2341 | if (dimensions <= 0) { | |||
2342 | return SQLITE_ERROR1; | |||
2343 | } | |||
2344 | ||||
2345 | // // right ']' bracket | |||
2346 | rc = vec0_scanner_next(&scanner, &token); | |||
2347 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_RBRACKET) { | |||
2348 | return SQLITE_ERROR1; | |||
2349 | } | |||
2350 | ||||
2351 | // any other tokens left should be column-level options , ex `key=value` | |||
2352 | // ex `distance_metric=L2 distance_metric=cosine` should error | |||
2353 | while (1) { | |||
2354 | // should be EOF or identifier (option key) | |||
2355 | rc = vec0_scanner_next(&scanner, &token); | |||
2356 | if (rc == VEC0_TOKEN_RESULT_EOF1) { | |||
2357 | break; | |||
2358 | } | |||
2359 | ||||
2360 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2361 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2362 | return SQLITE_ERROR1; | |||
2363 | } | |||
2364 | ||||
2365 | char *key = token.start; | |||
2366 | int keyLength = token.end - token.start; | |||
2367 | ||||
2368 | if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "distance_metric", keyLength) == 0) { | |||
2369 | ||||
2370 | if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) { | |||
2371 | return SQLITE_ERROR1; | |||
2372 | } | |||
2373 | // ensure equal sign after distance_metric | |||
2374 | rc = vec0_scanner_next(&scanner, &token); | |||
2375 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) { | |||
2376 | return SQLITE_ERROR1; | |||
2377 | } | |||
2378 | ||||
2379 | // distance_metric value, an identifier (L2, cosine, etc) | |||
2380 | rc = vec0_scanner_next(&scanner, &token); | |||
2381 | if (rc != VEC0_TOKEN_RESULT_SOME2 && | |||
2382 | token.token_type != TOKEN_TYPE_IDENTIFIER) { | |||
2383 | return SQLITE_ERROR1; | |||
2384 | } | |||
2385 | ||||
2386 | char *value = token.start; | |||
2387 | int valueLength = token.end - token.start; | |||
2388 | if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l2", valueLength) == 0) { | |||
2389 | distanceMetric = VEC0_DISTANCE_METRIC_L2; | |||
2390 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l1", valueLength) == 0) { | |||
2391 | distanceMetric = VEC0_DISTANCE_METRIC_L1; | |||
2392 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "cosine", valueLength) == 0) { | |||
2393 | distanceMetric = VEC0_DISTANCE_METRIC_COSINE; | |||
2394 | } else { | |||
2395 | return SQLITE_ERROR1; | |||
2396 | } | |||
2397 | } | |||
2398 | // unknown key | |||
2399 | else { | |||
2400 | return SQLITE_ERROR1; | |||
2401 | } | |||
2402 | } | |||
2403 | ||||
2404 | outColumn->name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", nameLength, name); | |||
2405 | if (!outColumn->name) { | |||
2406 | return SQLITE_ERROR1; | |||
2407 | } | |||
2408 | outColumn->name_length = nameLength; | |||
2409 | outColumn->distance_metric = distanceMetric; | |||
2410 | outColumn->element_type = elementType; | |||
2411 | outColumn->dimensions = dimensions; | |||
2412 | return SQLITE_OK0; | |||
2413 | } | |||
2414 | ||||
2415 | #pragma region vec_each table function | |||
2416 | ||||
2417 | typedef struct vec_each_vtab vec_each_vtab; | |||
2418 | struct vec_each_vtab { | |||
2419 | sqlite3_vtab base; | |||
2420 | }; | |||
2421 | ||||
2422 | typedef struct vec_each_cursor vec_each_cursor; | |||
2423 | struct vec_each_cursor { | |||
2424 | sqlite3_vtab_cursor base; | |||
2425 | i64 iRowid; | |||
2426 | enum VectorElementType vector_type; | |||
2427 | void *vector; | |||
2428 | size_t dimensions; | |||
2429 | vector_cleanup cleanup; | |||
2430 | }; | |||
2431 | ||||
2432 | static int vec_eachConnect(sqlite3 *db, void *pAux, int argc, | |||
2433 | const char *const *argv, sqlite3_vtab **ppVtab, | |||
2434 | char **pzErr) { | |||
2435 | UNUSED_PARAMETER(pAux)(void)(pAux); | |||
2436 | UNUSED_PARAMETER(argc)(void)(argc); | |||
2437 | UNUSED_PARAMETER(argv)(void)(argv); | |||
2438 | UNUSED_PARAMETER(pzErr)(void)(pzErr); | |||
2439 | vec_each_vtab *pNew; | |||
2440 | int rc; | |||
2441 | ||||
2442 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(value, vector hidden)"); | |||
2443 | #define VEC_EACH_COLUMN_VALUE0 0 | |||
2444 | #define VEC_EACH_COLUMN_VECTOR1 1 | |||
2445 | if (rc == SQLITE_OK0) { | |||
2446 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
2447 | *ppVtab = (sqlite3_vtab *)pNew; | |||
2448 | if (pNew == 0) | |||
2449 | return SQLITE_NOMEM7; | |||
2450 | memset(pNew, 0, sizeof(*pNew)); | |||
2451 | } | |||
2452 | return rc; | |||
2453 | } | |||
2454 | ||||
2455 | static int vec_eachDisconnect(sqlite3_vtab *pVtab) { | |||
2456 | vec_each_vtab *p = (vec_each_vtab *)pVtab; | |||
2457 | sqlite3_freesqlite3_api->free(p); | |||
2458 | return SQLITE_OK0; | |||
2459 | } | |||
2460 | ||||
2461 | static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { | |||
2462 | UNUSED_PARAMETER(p)(void)(p); | |||
2463 | vec_each_cursor *pCur; | |||
2464 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); | |||
2465 | if (pCur == 0) | |||
2466 | return SQLITE_NOMEM7; | |||
2467 | memset(pCur, 0, sizeof(*pCur)); | |||
2468 | *ppCursor = &pCur->base; | |||
2469 | return SQLITE_OK0; | |||
2470 | } | |||
2471 | ||||
2472 | static int vec_eachClose(sqlite3_vtab_cursor *cur) { | |||
2473 | vec_each_cursor *pCur = (vec_each_cursor *)cur; | |||
2474 | if(pCur->vector) { | |||
2475 | pCur->cleanup(pCur->vector); | |||
2476 | } | |||
2477 | sqlite3_freesqlite3_api->free(pCur); | |||
2478 | return SQLITE_OK0; | |||
2479 | } | |||
2480 | ||||
2481 | static int vec_eachBestIndex(sqlite3_vtab *pVTab, | |||
2482 | sqlite3_index_info *pIdxInfo) { | |||
2483 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
2484 | int hasVector = 0; | |||
2485 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
2486 | const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; | |||
2487 | // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn, | |||
2488 | // pCons->op, pCons->usable); | |||
2489 | switch (pCons->iColumn) { | |||
2490 | case VEC_EACH_COLUMN_VECTOR1: { | |||
2491 | if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) { | |||
2492 | hasVector = 1; | |||
2493 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; | |||
2494 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
2495 | } | |||
2496 | break; | |||
2497 | } | |||
2498 | } | |||
2499 | } | |||
2500 | if (!hasVector) { | |||
2501 | return SQLITE_CONSTRAINT19; | |||
2502 | } | |||
2503 | ||||
2504 | pIdxInfo->estimatedCost = (double)100000; | |||
2505 | pIdxInfo->estimatedRows = 100000; | |||
2506 | ||||
2507 | return SQLITE_OK0; | |||
2508 | } | |||
2509 | ||||
2510 | static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, | |||
2511 | const char *idxStr, int argc, sqlite3_value **argv) { | |||
2512 | UNUSED_PARAMETER(idxNum)(void)(idxNum); | |||
2513 | UNUSED_PARAMETER(idxStr)(void)(idxStr); | |||
2514 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 2514, __extension__ __PRETTY_FUNCTION__); })); | |||
2515 | vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor; | |||
2516 | ||||
2517 | if (pCur->vector) { | |||
2518 | pCur->cleanup(pCur->vector); | |||
2519 | pCur->vector = NULL((void*)0); | |||
2520 | } | |||
2521 | ||||
2522 | char *pzErrMsg; | |||
2523 | int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions, | |||
2524 | &pCur->vector_type, &pCur->cleanup, &pzErrMsg); | |||
2525 | if (rc != SQLITE_OK0) { | |||
2526 | return SQLITE_ERROR1; | |||
2527 | } | |||
2528 | pCur->iRowid = 0; | |||
2529 | return SQLITE_OK0; | |||
2530 | } | |||
2531 | ||||
2532 | static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { | |||
2533 | vec_each_cursor *pCur = (vec_each_cursor *)cur; | |||
2534 | *pRowid = pCur->iRowid; | |||
2535 | return SQLITE_OK0; | |||
2536 | } | |||
2537 | ||||
2538 | static int vec_eachEof(sqlite3_vtab_cursor *cur) { | |||
2539 | vec_each_cursor *pCur = (vec_each_cursor *)cur; | |||
2540 | return pCur->iRowid >= (i64)pCur->dimensions; | |||
2541 | } | |||
2542 | ||||
2543 | static int vec_eachNext(sqlite3_vtab_cursor *cur) { | |||
2544 | vec_each_cursor *pCur = (vec_each_cursor *)cur; | |||
2545 | pCur->iRowid++; | |||
2546 | return SQLITE_OK0; | |||
2547 | } | |||
2548 | ||||
2549 | static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context, | |||
2550 | int i) { | |||
2551 | vec_each_cursor *pCur = (vec_each_cursor *)cur; | |||
2552 | switch (i) { | |||
2553 | case VEC_EACH_COLUMN_VALUE0: | |||
2554 | switch (pCur->vector_type) { | |||
2555 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
2556 | sqlite3_result_doublesqlite3_api->result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]); | |||
2557 | break; | |||
2558 | } | |||
2559 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
2560 | u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT8]; | |||
2561 | sqlite3_result_intsqlite3_api->result_int(context, | |||
2562 | (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT8)))) > 0); | |||
2563 | break; | |||
2564 | } | |||
2565 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
2566 | sqlite3_result_intsqlite3_api->result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]); | |||
2567 | break; | |||
2568 | } | |||
2569 | } | |||
2570 | ||||
2571 | break; | |||
2572 | } | |||
2573 | return SQLITE_OK0; | |||
2574 | } | |||
2575 | ||||
2576 | static sqlite3_module vec_eachModule = { | |||
2577 | /* iVersion */ 0, | |||
2578 | /* xCreate */ 0, | |||
2579 | /* xConnect */ vec_eachConnect, | |||
2580 | /* xBestIndex */ vec_eachBestIndex, | |||
2581 | /* xDisconnect */ vec_eachDisconnect, | |||
2582 | /* xDestroy */ 0, | |||
2583 | /* xOpen */ vec_eachOpen, | |||
2584 | /* xClose */ vec_eachClose, | |||
2585 | /* xFilter */ vec_eachFilter, | |||
2586 | /* xNext */ vec_eachNext, | |||
2587 | /* xEof */ vec_eachEof, | |||
2588 | /* xColumn */ vec_eachColumn, | |||
2589 | /* xRowid */ vec_eachRowid, | |||
2590 | /* xUpdate */ 0, | |||
2591 | /* xBegin */ 0, | |||
2592 | /* xSync */ 0, | |||
2593 | /* xCommit */ 0, | |||
2594 | /* xRollback */ 0, | |||
2595 | /* xFindMethod */ 0, | |||
2596 | /* xRename */ 0, | |||
2597 | /* xSavepoint */ 0, | |||
2598 | /* xRelease */ 0, | |||
2599 | /* xRollbackTo */ 0, | |||
2600 | /* xShadowName */ 0, | |||
2601 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 | |||
2602 | /* xIntegrity */ 0 | |||
2603 | #endif | |||
2604 | }; | |||
2605 | ||||
2606 | #pragma endregion | |||
2607 | ||||
2608 | #pragma region vec_npy_each table function | |||
2609 | ||||
2610 | enum NpyTokenType { | |||
2611 | NPY_TOKEN_TYPE_IDENTIFIER, | |||
2612 | NPY_TOKEN_TYPE_NUMBER, | |||
2613 | NPY_TOKEN_TYPE_LPAREN, | |||
2614 | NPY_TOKEN_TYPE_RPAREN, | |||
2615 | NPY_TOKEN_TYPE_LBRACE, | |||
2616 | NPY_TOKEN_TYPE_RBRACE, | |||
2617 | NPY_TOKEN_TYPE_COLON, | |||
2618 | NPY_TOKEN_TYPE_COMMA, | |||
2619 | NPY_TOKEN_TYPE_STRING, | |||
2620 | NPY_TOKEN_TYPE_FALSE, | |||
2621 | }; | |||
2622 | ||||
2623 | struct NpyToken { | |||
2624 | enum NpyTokenType token_type; | |||
2625 | unsigned char *start; | |||
2626 | unsigned char *end; | |||
2627 | }; | |||
2628 | ||||
2629 | int npy_token_next(unsigned char *start, unsigned char *end, | |||
2630 | struct NpyToken *out) { | |||
2631 | unsigned char *ptr = start; | |||
2632 | while (ptr < end) { | |||
2633 | unsigned char curr = *ptr; | |||
2634 | if (is_whitespace(curr)) { | |||
2635 | ptr++; | |||
2636 | continue; | |||
2637 | } else if (curr == '(') { | |||
2638 | out->start = ptr++; | |||
2639 | out->end = ptr; | |||
2640 | out->token_type = NPY_TOKEN_TYPE_LPAREN; | |||
2641 | return VEC0_TOKEN_RESULT_SOME2; | |||
2642 | } else if (curr == ')') { | |||
2643 | out->start = ptr++; | |||
2644 | out->end = ptr; | |||
2645 | out->token_type = NPY_TOKEN_TYPE_RPAREN; | |||
2646 | return VEC0_TOKEN_RESULT_SOME2; | |||
2647 | } else if (curr == '{') { | |||
2648 | out->start = ptr++; | |||
2649 | out->end = ptr; | |||
2650 | out->token_type = NPY_TOKEN_TYPE_LBRACE; | |||
2651 | return VEC0_TOKEN_RESULT_SOME2; | |||
2652 | } else if (curr == '}') { | |||
2653 | out->start = ptr++; | |||
2654 | out->end = ptr; | |||
2655 | out->token_type = NPY_TOKEN_TYPE_RBRACE; | |||
2656 | return VEC0_TOKEN_RESULT_SOME2; | |||
2657 | } else if (curr == ':') { | |||
2658 | out->start = ptr++; | |||
2659 | out->end = ptr; | |||
2660 | out->token_type = NPY_TOKEN_TYPE_COLON; | |||
2661 | return VEC0_TOKEN_RESULT_SOME2; | |||
2662 | } else if (curr == ',') { | |||
2663 | out->start = ptr++; | |||
2664 | out->end = ptr; | |||
2665 | out->token_type = NPY_TOKEN_TYPE_COMMA; | |||
2666 | return VEC0_TOKEN_RESULT_SOME2; | |||
2667 | } else if (curr == '\'') { | |||
2668 | unsigned char *start = ptr; | |||
2669 | ptr++; | |||
2670 | while (ptr < end) { | |||
2671 | if ((*ptr) == '\'') { | |||
2672 | break; | |||
2673 | } | |||
2674 | ptr++; | |||
2675 | } | |||
2676 | if ((*ptr) != '\'') { | |||
2677 | return VEC0_TOKEN_RESULT_ERROR3; | |||
2678 | } | |||
2679 | out->start = start; | |||
2680 | out->end = ++ptr; | |||
2681 | out->token_type = NPY_TOKEN_TYPE_STRING; | |||
2682 | return VEC0_TOKEN_RESULT_SOME2; | |||
2683 | } else if (curr == 'F' && | |||
2684 | strncmp((char *)ptr, "False", strlen("False")) == 0) { | |||
2685 | out->start = ptr; | |||
2686 | out->end = (ptr + (int)strlen("False")); | |||
2687 | ptr = out->end; | |||
2688 | out->token_type = NPY_TOKEN_TYPE_FALSE; | |||
2689 | return VEC0_TOKEN_RESULT_SOME2; | |||
2690 | } else if (is_digit(curr)) { | |||
2691 | unsigned char *start = ptr; | |||
2692 | while (ptr < end && (is_digit(*ptr))) { | |||
2693 | ptr++; | |||
2694 | } | |||
2695 | out->start = start; | |||
2696 | out->end = ptr; | |||
2697 | out->token_type = NPY_TOKEN_TYPE_NUMBER; | |||
2698 | return VEC0_TOKEN_RESULT_SOME2; | |||
2699 | } else { | |||
2700 | return VEC0_TOKEN_RESULT_ERROR3; | |||
2701 | } | |||
2702 | } | |||
2703 | return VEC0_TOKEN_RESULT_ERROR3; | |||
2704 | } | |||
2705 | ||||
2706 | struct NpyScanner { | |||
2707 | unsigned char *start; | |||
2708 | unsigned char *end; | |||
2709 | unsigned char *ptr; | |||
2710 | }; | |||
2711 | ||||
2712 | void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *source, | |||
2713 | int source_length) { | |||
2714 | scanner->start = (unsigned char *)source; | |||
2715 | scanner->end = (unsigned char *)source + source_length; | |||
2716 | scanner->ptr = (unsigned char *)source; | |||
2717 | } | |||
2718 | ||||
2719 | int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) { | |||
2720 | int rc = npy_token_next(scanner->start, scanner->end, out); | |||
2721 | if (rc == VEC0_TOKEN_RESULT_SOME2) { | |||
2722 | scanner->start = out->end; | |||
2723 | } | |||
2724 | return rc; | |||
2725 | } | |||
2726 | ||||
2727 | #define NPY_PARSE_ERROR"Error parsing numpy array: " "Error parsing numpy array: " | |||
2728 | int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header, | |||
2729 | size_t headerLength, | |||
2730 | enum VectorElementType *out_element_type, | |||
2731 | int *fortran_order, size_t *numElements, | |||
2732 | size_t *numDimensions) { | |||
2733 | ||||
2734 | struct NpyScanner scanner; | |||
2735 | struct NpyToken token; | |||
2736 | int rc; | |||
2737 | npy_scanner_init(&scanner, header, headerLength); | |||
2738 | ||||
2739 | if (npy_scanner_next(&scanner, &token) != VEC0_TOKEN_RESULT_SOME2 && | |||
2740 | token.token_type != NPY_TOKEN_TYPE_LBRACE) { | |||
2741 | vtab_set_error(pVTab, | |||
2742 | NPY_PARSE_ERROR"Error parsing numpy array: " "numpy header did not start with '{'"); | |||
2743 | return SQLITE_ERROR1; | |||
2744 | } | |||
2745 | while (1) { | |||
2746 | rc = npy_scanner_next(&scanner, &token); | |||
2747 | if (rc != VEC0_TOKEN_RESULT_SOME2) { | |||
2748 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "expected key in numpy header"); | |||
2749 | return SQLITE_ERROR1; | |||
2750 | } | |||
2751 | ||||
2752 | if (token.token_type == NPY_TOKEN_TYPE_RBRACE) { | |||
2753 | break; | |||
2754 | } | |||
2755 | if (token.token_type != NPY_TOKEN_TYPE_STRING) { | |||
2756 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2757 | "expected a string as key in numpy header"); | |||
2758 | return SQLITE_ERROR1; | |||
2759 | } | |||
2760 | unsigned char *key = token.start; | |||
2761 | ||||
2762 | rc = npy_scanner_next(&scanner, &token); | |||
2763 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2764 | (token.token_type != NPY_TOKEN_TYPE_COLON)) { | |||
2765 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2766 | "expected a ':' after key in numpy header"); | |||
2767 | return SQLITE_ERROR1; | |||
2768 | } | |||
2769 | ||||
2770 | if (strncmp((char *)key, "'descr'", strlen("'descr'")) == 0) { | |||
2771 | rc = npy_scanner_next(&scanner, &token); | |||
2772 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2773 | (token.token_type != NPY_TOKEN_TYPE_STRING)) { | |||
2774 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2775 | "expected a string value after 'descr' key"); | |||
2776 | return SQLITE_ERROR1; | |||
2777 | } | |||
2778 | if (strncmp((char *)token.start, "'<f4'", strlen("'<f4'")) != 0) { | |||
2779 | vtab_set_error( | |||
2780 | pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2781 | "Only '<f4' values are supported in sqlite-vec numpy functions"); | |||
2782 | return SQLITE_ERROR1; | |||
2783 | } | |||
2784 | *out_element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; | |||
2785 | } else if (strncmp((char *)key, "'fortran_order'", | |||
2786 | strlen("'fortran_order'")) == 0) { | |||
2787 | rc = npy_scanner_next(&scanner, &token); | |||
2788 | if (rc != VEC0_TOKEN_RESULT_SOME2 || | |||
2789 | token.token_type != NPY_TOKEN_TYPE_FALSE) { | |||
2790 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2791 | "Only fortran_order = False is supported in sqlite-vec " | |||
2792 | "numpy functions"); | |||
2793 | return SQLITE_ERROR1; | |||
2794 | } | |||
2795 | *fortran_order = 0; | |||
2796 | } else if (strncmp((char *)key, "'shape'", strlen("'shape'")) == 0) { | |||
2797 | // "(xxx, xxx)" OR (xxx,) | |||
2798 | size_t first; | |||
2799 | rc = npy_scanner_next(&scanner, &token); | |||
2800 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2801 | (token.token_type != NPY_TOKEN_TYPE_LPAREN)) { | |||
2802 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2803 | "Expected left parenthesis '(' after shape key"); | |||
2804 | return SQLITE_ERROR1; | |||
2805 | } | |||
2806 | ||||
2807 | rc = npy_scanner_next(&scanner, &token); | |||
2808 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2809 | (token.token_type != NPY_TOKEN_TYPE_NUMBER)) { | |||
2810 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2811 | "Expected an initial number in shape value"); | |||
2812 | return SQLITE_ERROR1; | |||
2813 | } | |||
2814 | first = strtol((char *)token.start, NULL((void*)0), 10); | |||
2815 | ||||
2816 | rc = npy_scanner_next(&scanner, &token); | |||
2817 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2818 | (token.token_type != NPY_TOKEN_TYPE_COMMA)) { | |||
2819 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2820 | "Expected comma after first shape value"); | |||
2821 | return SQLITE_ERROR1; | |||
2822 | } | |||
2823 | ||||
2824 | rc = npy_scanner_next(&scanner, &token); | |||
2825 | if (rc != VEC0_TOKEN_RESULT_SOME2) { | |||
2826 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2827 | "unexpected header EOF while parsing shape"); | |||
2828 | return SQLITE_ERROR1; | |||
2829 | } | |||
2830 | if (token.token_type == NPY_TOKEN_TYPE_NUMBER) { | |||
2831 | *numElements = first; | |||
2832 | *numDimensions = strtol((char *)token.start, NULL((void*)0), 10); | |||
2833 | rc = npy_scanner_next(&scanner, &token); | |||
2834 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2835 | (token.token_type != NPY_TOKEN_TYPE_RPAREN)) { | |||
2836 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " | |||
2837 | "expected right parenthesis after shape value"); | |||
2838 | return SQLITE_ERROR1; | |||
2839 | } | |||
2840 | } else if (token.token_type == NPY_TOKEN_TYPE_RPAREN) { | |||
2841 | // '(0,)' means an empty array! | |||
2842 | *numElements = first ? 1 : 0; | |||
2843 | *numDimensions = first; | |||
2844 | } else { | |||
2845 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown type in shape value"); | |||
2846 | return SQLITE_ERROR1; | |||
2847 | } | |||
2848 | } else { | |||
2849 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown key in numpy header"); | |||
2850 | return SQLITE_ERROR1; | |||
2851 | } | |||
2852 | ||||
2853 | rc = npy_scanner_next(&scanner, &token); | |||
2854 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || | |||
2855 | (token.token_type != NPY_TOKEN_TYPE_COMMA)) { | |||
2856 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown extra token after value"); | |||
2857 | return SQLITE_ERROR1; | |||
2858 | } | |||
2859 | } | |||
2860 | ||||
2861 | return SQLITE_OK0; | |||
2862 | } | |||
2863 | ||||
2864 | typedef struct vec_npy_each_vtab vec_npy_each_vtab; | |||
2865 | struct vec_npy_each_vtab { | |||
2866 | sqlite3_vtab base; | |||
2867 | }; | |||
2868 | ||||
2869 | typedef enum { | |||
2870 | VEC_NPY_EACH_INPUT_BUFFER, | |||
2871 | VEC_NPY_EACH_INPUT_FILE, | |||
2872 | } vec_npy_each_input_type; | |||
2873 | ||||
2874 | typedef struct vec_npy_each_cursor vec_npy_each_cursor; | |||
2875 | struct vec_npy_each_cursor { | |||
2876 | sqlite3_vtab_cursor base; | |||
2877 | i64 iRowid; | |||
2878 | // sqlite-vec compatible type of vector | |||
2879 | enum VectorElementType elementType; | |||
2880 | // number of vectors in the npy array | |||
2881 | size_t nElements; | |||
2882 | // number of dimensions each vector has | |||
2883 | size_t nDimensions; | |||
2884 | ||||
2885 | vec_npy_each_input_type input_type; | |||
2886 | ||||
2887 | // when input_type == VEC_NPY_EACH_INPUT_BUFFER | |||
2888 | ||||
2889 | // Buffer containing the vector data, when reading from an in-memory buffer. | |||
2890 | // Size: nElements * nDimensions * element_size | |||
2891 | // Clean up with sqlite3_free() once complete | |||
2892 | void *vector; | |||
2893 | ||||
2894 | // when input_type == VEC_NPY_EACH_INPUT_FILE | |||
2895 | ||||
2896 | // Opened npy file, when reading from a file. | |||
2897 | // fclose() when complete. | |||
2898 | #ifndef SQLITE_VEC_OMIT_FS | |||
2899 | FILE *file; | |||
2900 | #endif | |||
2901 | ||||
2902 | // an in-memory buffer containing a portion of the npy array. | |||
2903 | // Used for faster reading, instead of calling fread a lot. | |||
2904 | // Will have a byte-size of fileBufferSize | |||
2905 | void *chunksBuffer; | |||
2906 | // size of allocated fileBuffer in bytes | |||
2907 | size_t chunksBufferSize; | |||
2908 | //// Maximum length of the buffer, in terms of number of vectors. | |||
2909 | size_t maxChunks; | |||
2910 | ||||
2911 | // Counter index of the current vector into of fileBuffer to yield. | |||
2912 | // Starts at 0 once fileBuffer is read, and iterates to bufferLength. | |||
2913 | // Resets to 0 once that "buffer" is yielded and a new one is read. | |||
2914 | size_t currentChunkIndex; | |||
2915 | size_t currentChunkSize; | |||
2916 | ||||
2917 | // 0 when there are still more elements to read/yield, 1 when complete. | |||
2918 | int eof; | |||
2919 | }; | |||
2920 | ||||
2921 | static unsigned char NPY_MAGIC[6] = "\x93NUMPY"; | |||
2922 | ||||
2923 | #ifndef SQLITE_VEC_OMIT_FS | |||
2924 | int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor *pCur) { | |||
2925 | int n; | |||
2926 | fseek(file, 0, SEEK_END2); | |||
2927 | long fileSize = ftell(file); | |||
2928 | ||||
2929 | fseek(file, 0L, SEEK_SET0); | |||
2930 | ||||
2931 | unsigned char header[10]; | |||
2932 | n = fread(&header, sizeof(unsigned char), 10, file); | |||
2933 | if (n != 10) { | |||
2934 | vtab_set_error(pVTab, "numpy array file too short"); | |||
2935 | return SQLITE_ERROR1; | |||
2936 | } | |||
2937 | ||||
2938 | if (memcmp(NPY_MAGIC, header, sizeof(NPY_MAGIC)) != 0) { | |||
2939 | vtab_set_error(pVTab, | |||
2940 | "numpy array file does not contain the 'magic' header"); | |||
2941 | return SQLITE_ERROR1; | |||
2942 | } | |||
2943 | ||||
2944 | u8 major = header[6]; | |||
2945 | u8 minor = header[7]; | |||
2946 | uint16_t headerLength = 0; | |||
2947 | memcpy(&headerLength, &header[8], sizeof(uint16_t)); | |||
2948 | ||||
2949 | size_t totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) + | |||
2950 | sizeof(headerLength) + headerLength; | |||
2951 | i32 dataSize = fileSize - totalHeaderLength; | |||
2952 | if (dataSize < 0) { | |||
2953 | vtab_set_error(pVTab, "numpy array file header length is invalid"); | |||
2954 | return SQLITE_ERROR1; | |||
2955 | } | |||
2956 | ||||
2957 | unsigned char *headerX = sqlite3_mallocsqlite3_api->malloc(headerLength); | |||
2958 | if (headerLength && !headerX) { | |||
2959 | return SQLITE_NOMEM7; | |||
2960 | } | |||
2961 | ||||
2962 | n = fread(headerX, sizeof(char), headerLength, file); | |||
2963 | if (n != headerLength) { | |||
2964 | sqlite3_freesqlite3_api->free(headerX); | |||
2965 | vtab_set_error(pVTab, "numpy array file header length is invalid"); | |||
2966 | return SQLITE_ERROR1; | |||
2967 | } | |||
2968 | ||||
2969 | int fortran_order; | |||
2970 | enum VectorElementType element_type; | |||
2971 | size_t numElements; | |||
2972 | size_t numDimensions; | |||
2973 | int rc = parse_npy_header(pVTab, headerX, headerLength, &element_type, | |||
2974 | &fortran_order, &numElements, &numDimensions); | |||
2975 | sqlite3_freesqlite3_api->free(headerX); | |||
2976 | if (rc != SQLITE_OK0) { | |||
2977 | // parse_npy_header already attackes an error emssage | |||
2978 | return rc; | |||
2979 | } | |||
2980 | ||||
2981 | i32 expectedDataSize = | |||
2982 | numElements * vector_byte_size(element_type, numDimensions); | |||
2983 | if (expectedDataSize != dataSize) { | |||
2984 | vtab_set_error( | |||
2985 | pVTab, "numpy array file error: Expected a data size of %d, found %d", | |||
2986 | expectedDataSize, dataSize); | |||
2987 | return SQLITE_ERROR1; | |||
2988 | } | |||
2989 | ||||
2990 | pCur->maxChunks = 1024; | |||
2991 | pCur->chunksBufferSize = | |||
2992 | (vector_byte_size(element_type, numDimensions)) * pCur->maxChunks; | |||
2993 | pCur->chunksBuffer = sqlite3_mallocsqlite3_api->malloc(pCur->chunksBufferSize); | |||
2994 | if (pCur->chunksBufferSize && !pCur->chunksBuffer) { | |||
2995 | return SQLITE_NOMEM7; | |||
2996 | } | |||
2997 | ||||
2998 | pCur->currentChunkSize = | |||
2999 | fread(pCur->chunksBuffer, vector_byte_size(element_type, numDimensions), | |||
3000 | pCur->maxChunks, file); | |||
3001 | ||||
3002 | pCur->currentChunkIndex = 0; | |||
3003 | pCur->elementType = element_type; | |||
3004 | pCur->nElements = numElements; | |||
3005 | pCur->nDimensions = numDimensions; | |||
3006 | pCur->input_type = VEC_NPY_EACH_INPUT_FILE; | |||
3007 | ||||
3008 | pCur->eof = pCur->currentChunkSize == 0; | |||
3009 | pCur->file = file; | |||
3010 | return SQLITE_OK0; | |||
3011 | } | |||
3012 | #endif | |||
3013 | ||||
3014 | int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer, | |||
3015 | int bufferLength, void **data, size_t *numElements, | |||
3016 | size_t *numDimensions, | |||
3017 | enum VectorElementType *element_type) { | |||
3018 | ||||
3019 | if (bufferLength < 10) { | |||
3020 | // IMP: V03312_20150 | |||
3021 | vtab_set_error(pVTab, "numpy array too short"); | |||
3022 | return SQLITE_ERROR1; | |||
3023 | } | |||
3024 | if (memcmp(NPY_MAGIC, buffer, sizeof(NPY_MAGIC)) != 0) { | |||
3025 | // V11954_28792 | |||
3026 | vtab_set_error(pVTab, "numpy array does not contain the 'magic' header"); | |||
3027 | return SQLITE_ERROR1; | |||
3028 | } | |||
3029 | ||||
3030 | u8 major = buffer[6]; | |||
3031 | u8 minor = buffer[7]; | |||
3032 | uint16_t headerLength = 0; | |||
3033 | memcpy(&headerLength, &buffer[8], sizeof(uint16_t)); | |||
3034 | ||||
3035 | i32 totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) + | |||
3036 | sizeof(headerLength) + headerLength; | |||
3037 | i32 dataSize = bufferLength - totalHeaderLength; | |||
3038 | ||||
3039 | if (dataSize < 0) { | |||
3040 | vtab_set_error(pVTab, "numpy array header length is invalid"); | |||
3041 | return SQLITE_ERROR1; | |||
3042 | } | |||
3043 | ||||
3044 | const unsigned char *header = &buffer[10]; | |||
3045 | int fortran_order; | |||
3046 | ||||
3047 | int rc = parse_npy_header(pVTab, header, headerLength, element_type, | |||
3048 | &fortran_order, numElements, numDimensions); | |||
3049 | if (rc != SQLITE_OK0) { | |||
3050 | return rc; | |||
3051 | } | |||
3052 | ||||
3053 | i32 expectedDataSize = | |||
3054 | (*numElements * vector_byte_size(*element_type, *numDimensions)); | |||
3055 | if (expectedDataSize != dataSize) { | |||
3056 | vtab_set_error(pVTab, | |||
3057 | "numpy array error: Expected a data size of %d, found %d", | |||
3058 | expectedDataSize, dataSize); | |||
3059 | return SQLITE_ERROR1; | |||
3060 | } | |||
3061 | ||||
3062 | *data = (void *)&buffer[totalHeaderLength]; | |||
3063 | return SQLITE_OK0; | |||
3064 | } | |||
3065 | ||||
3066 | static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc, | |||
3067 | const char *const *argv, sqlite3_vtab **ppVtab, | |||
3068 | char **pzErr) { | |||
3069 | UNUSED_PARAMETER(pAux)(void)(pAux); | |||
3070 | UNUSED_PARAMETER(argc)(void)(argc); | |||
3071 | UNUSED_PARAMETER(argv)(void)(argv); | |||
3072 | UNUSED_PARAMETER(pzErr)(void)(pzErr); | |||
3073 | vec_npy_each_vtab *pNew; | |||
3074 | int rc; | |||
3075 | ||||
3076 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(vector, input hidden)"); | |||
3077 | #define VEC_NPY_EACH_COLUMN_VECTOR0 0 | |||
3078 | #define VEC_NPY_EACH_COLUMN_INPUT1 1 | |||
3079 | if (rc == SQLITE_OK0) { | |||
3080 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
3081 | *ppVtab = (sqlite3_vtab *)pNew; | |||
3082 | if (pNew == 0) | |||
3083 | return SQLITE_NOMEM7; | |||
3084 | memset(pNew, 0, sizeof(*pNew)); | |||
3085 | } | |||
3086 | return rc; | |||
3087 | } | |||
3088 | ||||
3089 | static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) { | |||
3090 | vec_npy_each_vtab *p = (vec_npy_each_vtab *)pVtab; | |||
3091 | sqlite3_freesqlite3_api->free(p); | |||
3092 | return SQLITE_OK0; | |||
3093 | } | |||
3094 | ||||
3095 | static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { | |||
3096 | UNUSED_PARAMETER(p)(void)(p); | |||
3097 | vec_npy_each_cursor *pCur; | |||
3098 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); | |||
3099 | if (pCur == 0) | |||
3100 | return SQLITE_NOMEM7; | |||
3101 | memset(pCur, 0, sizeof(*pCur)); | |||
3102 | *ppCursor = &pCur->base; | |||
3103 | return SQLITE_OK0; | |||
3104 | } | |||
3105 | ||||
3106 | static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) { | |||
3107 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; | |||
3108 | #ifndef SQLITE_VEC_OMIT_FS | |||
3109 | if (pCur->file) { | |||
3110 | fclose(pCur->file); | |||
3111 | pCur->file = NULL((void*)0); | |||
3112 | } | |||
3113 | #endif | |||
3114 | if (pCur->chunksBuffer) { | |||
3115 | sqlite3_freesqlite3_api->free(pCur->chunksBuffer); | |||
3116 | pCur->chunksBuffer = NULL((void*)0); | |||
3117 | } | |||
3118 | if (pCur->vector) { | |||
3119 | pCur->vector = NULL((void*)0); | |||
3120 | } | |||
3121 | sqlite3_freesqlite3_api->free(pCur); | |||
3122 | return SQLITE_OK0; | |||
3123 | } | |||
3124 | ||||
3125 | static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab, | |||
3126 | sqlite3_index_info *pIdxInfo) { | |||
3127 | int hasInput; | |||
3128 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
3129 | const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; | |||
3130 | // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn, | |||
3131 | // pCons->op, pCons->usable); | |||
3132 | switch (pCons->iColumn) { | |||
3133 | case VEC_NPY_EACH_COLUMN_INPUT1: { | |||
3134 | if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) { | |||
3135 | hasInput = 1; | |||
3136 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; | |||
3137 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
3138 | } | |||
3139 | break; | |||
3140 | } | |||
3141 | } | |||
3142 | } | |||
3143 | if (!hasInput) { | |||
3144 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("input argument is required"); | |||
3145 | return SQLITE_ERROR1; | |||
3146 | } | |||
3147 | ||||
3148 | pIdxInfo->estimatedCost = (double)100000; | |||
3149 | pIdxInfo->estimatedRows = 100000; | |||
3150 | ||||
3151 | return SQLITE_OK0; | |||
3152 | } | |||
3153 | ||||
3154 | static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, | |||
3155 | const char *idxStr, int argc, | |||
3156 | sqlite3_value **argv) { | |||
3157 | UNUSED_PARAMETER(idxNum)(void)(idxNum); | |||
3158 | UNUSED_PARAMETER(idxStr)(void)(idxStr); | |||
3159 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3159, __extension__ __PRETTY_FUNCTION__); })); | |||
3160 | int rc; | |||
3161 | ||||
3162 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor; | |||
3163 | ||||
3164 | #ifndef SQLITE_VEC_OMIT_FS | |||
3165 | if (pCur->file) { | |||
3166 | fclose(pCur->file); | |||
3167 | pCur->file = NULL((void*)0); | |||
3168 | } | |||
3169 | #endif | |||
3170 | if (pCur->chunksBuffer) { | |||
3171 | sqlite3_freesqlite3_api->free(pCur->chunksBuffer); | |||
3172 | pCur->chunksBuffer = NULL((void*)0); | |||
3173 | } | |||
3174 | if (pCur->vector) { | |||
3175 | pCur->vector = NULL((void*)0); | |||
3176 | } | |||
3177 | ||||
3178 | #ifndef SQLITE_VEC_OMIT_FS | |||
3179 | struct VecNpyFile *f = NULL((void*)0); | |||
3180 | if ((f = sqlite3_value_pointersqlite3_api->value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file"))) { | |||
3181 | FILE *file = fopen(f->path, "r"); | |||
3182 | if (!file) { | |||
3183 | vtab_set_error(pVtabCursor->pVtab, "Could not open numpy file"); | |||
3184 | return SQLITE_ERROR1; | |||
3185 | } | |||
3186 | ||||
3187 | rc = parse_npy_file(pVtabCursor->pVtab, file, pCur); | |||
3188 | if (rc != SQLITE_OK0) { | |||
3189 | #ifndef SQLITE_VEC_OMIT_FS | |||
3190 | fclose(file); | |||
3191 | #endif | |||
3192 | return rc; | |||
3193 | } | |||
3194 | ||||
3195 | } else | |||
3196 | #endif | |||
3197 | { | |||
3198 | ||||
3199 | const unsigned char *input = sqlite3_value_blobsqlite3_api->value_blob(argv[0]); | |||
3200 | int inputLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); | |||
3201 | void *data; | |||
3202 | size_t numElements; | |||
3203 | size_t numDimensions; | |||
3204 | enum VectorElementType element_type; | |||
3205 | ||||
3206 | rc = parse_npy_buffer(pVtabCursor->pVtab, input, inputLength, &data, | |||
3207 | &numElements, &numDimensions, &element_type); | |||
3208 | if (rc != SQLITE_OK0) { | |||
3209 | return rc; | |||
3210 | } | |||
3211 | ||||
3212 | pCur->vector = data; | |||
3213 | pCur->elementType = element_type; | |||
3214 | pCur->nElements = numElements; | |||
3215 | pCur->nDimensions = numDimensions; | |||
3216 | pCur->input_type = VEC_NPY_EACH_INPUT_BUFFER; | |||
3217 | } | |||
3218 | ||||
3219 | pCur->iRowid = 0; | |||
3220 | return SQLITE_OK0; | |||
3221 | } | |||
3222 | ||||
3223 | static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { | |||
3224 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; | |||
3225 | *pRowid = pCur->iRowid; | |||
3226 | return SQLITE_OK0; | |||
3227 | } | |||
3228 | ||||
3229 | static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) { | |||
3230 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; | |||
3231 | if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) { | |||
3232 | return (!pCur->nElements) || (size_t)pCur->iRowid >= pCur->nElements; | |||
3233 | } | |||
3234 | return pCur->eof; | |||
3235 | } | |||
3236 | ||||
3237 | static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) { | |||
3238 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; | |||
3239 | pCur->iRowid++; | |||
3240 | if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) { | |||
3241 | return SQLITE_OK0; | |||
3242 | } | |||
3243 | ||||
3244 | #ifndef SQLITE_VEC_OMIT_FS | |||
3245 | // else: input is a file | |||
3246 | pCur->currentChunkIndex++; | |||
3247 | if (pCur->currentChunkIndex >= pCur->currentChunkSize) { | |||
3248 | pCur->currentChunkSize = | |||
3249 | fread(pCur->chunksBuffer, | |||
3250 | vector_byte_size(pCur->elementType, pCur->nDimensions), | |||
3251 | pCur->maxChunks, pCur->file); | |||
3252 | if (!pCur->currentChunkSize) { | |||
3253 | pCur->eof = 1; | |||
3254 | } | |||
3255 | pCur->currentChunkIndex = 0; | |||
3256 | } | |||
3257 | #endif | |||
3258 | return SQLITE_OK0; | |||
3259 | } | |||
3260 | ||||
3261 | static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur, | |||
3262 | sqlite3_context *context, int i) { | |||
3263 | switch (i) { | |||
3264 | case VEC_NPY_EACH_COLUMN_VECTOR0: { | |||
3265 | sqlite3_result_subtypesqlite3_api->result_subtype(context, pCur->elementType); | |||
3266 | switch (pCur->elementType) { | |||
3267 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
3268 | sqlite3_result_blobsqlite3_api->result_blob( | |||
3269 | context, | |||
3270 | &((unsigned char *) | |||
3271 | pCur->vector)[pCur->iRowid * pCur->nDimensions * sizeof(f32)], | |||
3272 | pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
3273 | ||||
3274 | break; | |||
3275 | } | |||
3276 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
3277 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
3278 | // https://github.com/asg017/sqlite-vec/issues/42 | |||
3279 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
3280 | "vec_npy_each only supports float32 vectors", -1); | |||
3281 | break; | |||
3282 | } | |||
3283 | } | |||
3284 | ||||
3285 | break; | |||
3286 | } | |||
3287 | } | |||
3288 | return SQLITE_OK0; | |||
3289 | } | |||
3290 | static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur, | |||
3291 | sqlite3_context *context, int i) { | |||
3292 | switch (i) { | |||
3293 | case VEC_NPY_EACH_COLUMN_VECTOR0: { | |||
3294 | switch (pCur->elementType) { | |||
3295 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
3296 | sqlite3_result_blobsqlite3_api->result_blob( | |||
3297 | context, | |||
3298 | &((unsigned char *) | |||
3299 | pCur->chunksBuffer)[pCur->currentChunkIndex * | |||
3300 | pCur->nDimensions * sizeof(f32)], | |||
3301 | pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
3302 | break; | |||
3303 | } | |||
3304 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
3305 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
3306 | // https://github.com/asg017/sqlite-vec/issues/42 | |||
3307 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
3308 | "vec_npy_each only supports float32 vectors", -1); | |||
3309 | break; | |||
3310 | } | |||
3311 | } | |||
3312 | break; | |||
3313 | } | |||
3314 | } | |||
3315 | return SQLITE_OK0; | |||
3316 | } | |||
3317 | static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur, | |||
3318 | sqlite3_context *context, int i) { | |||
3319 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; | |||
3320 | switch (pCur->input_type) { | |||
3321 | case VEC_NPY_EACH_INPUT_BUFFER: | |||
3322 | return vec_npy_eachColumnBuffer(pCur, context, i); | |||
3323 | case VEC_NPY_EACH_INPUT_FILE: | |||
3324 | return vec_npy_eachColumnFile(pCur, context, i); | |||
3325 | } | |||
3326 | return SQLITE_ERROR1; | |||
3327 | } | |||
3328 | ||||
3329 | static sqlite3_module vec_npy_eachModule = { | |||
3330 | /* iVersion */ 0, | |||
3331 | /* xCreate */ 0, | |||
3332 | /* xConnect */ vec_npy_eachConnect, | |||
3333 | /* xBestIndex */ vec_npy_eachBestIndex, | |||
3334 | /* xDisconnect */ vec_npy_eachDisconnect, | |||
3335 | /* xDestroy */ 0, | |||
3336 | /* xOpen */ vec_npy_eachOpen, | |||
3337 | /* xClose */ vec_npy_eachClose, | |||
3338 | /* xFilter */ vec_npy_eachFilter, | |||
3339 | /* xNext */ vec_npy_eachNext, | |||
3340 | /* xEof */ vec_npy_eachEof, | |||
3341 | /* xColumn */ vec_npy_eachColumn, | |||
3342 | /* xRowid */ vec_npy_eachRowid, | |||
3343 | /* xUpdate */ 0, | |||
3344 | /* xBegin */ 0, | |||
3345 | /* xSync */ 0, | |||
3346 | /* xCommit */ 0, | |||
3347 | /* xRollback */ 0, | |||
3348 | /* xFindMethod */ 0, | |||
3349 | /* xRename */ 0, | |||
3350 | /* xSavepoint */ 0, | |||
3351 | /* xRelease */ 0, | |||
3352 | /* xRollbackTo */ 0, | |||
3353 | /* xShadowName */ 0, | |||
3354 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 | |||
3355 | /* xIntegrity */ 0, | |||
3356 | #endif | |||
3357 | }; | |||
3358 | ||||
3359 | #pragma endregion | |||
3360 | ||||
3361 | #pragma region vec0 virtual table | |||
3362 | ||||
3363 | #define VEC0_COLUMN_ID0 0 | |||
3364 | #define VEC0_COLUMN_USERN_START1 1 | |||
3365 | #define VEC0_COLUMN_OFFSET_DISTANCE1 1 | |||
3366 | #define VEC0_COLUMN_OFFSET_K2 2 | |||
3367 | ||||
3368 | #define VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "\"%w\".\"%w_info\"" | |||
3369 | ||||
3370 | #define VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "\"%w\".\"%w_chunks\"" | |||
3371 | /// 1) schema, 2) original vtab table name | |||
3372 | #define VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL" ");" \ | |||
3373 | "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(" \ | |||
3374 | "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \ | |||
3375 | "size INTEGER NOT NULL," \ | |||
3376 | "validity BLOB NOT NULL," \ | |||
3377 | "rowids BLOB NOT NULL" \ | |||
3378 | ");" | |||
3379 | ||||
3380 | #define VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "\"%w\".\"%w_rowids\"" | |||
3381 | /// 1) schema, 2) original vtab table name | |||
3382 | #define VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id," "chunk_id INTEGER," "chunk_offset INTEGER" ");" \ | |||
3383 | "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \ | |||
3384 | "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \ | |||
3385 | "id," \ | |||
3386 | "chunk_id INTEGER," \ | |||
3387 | "chunk_offset INTEGER" \ | |||
3388 | ");" | |||
3389 | ||||
3390 | // vec0 tables with a text primary keys are still backed by int64 primary keys, | |||
3391 | // since a fixed-length rowid is required for vec0 chunks. But we add a new 'id | |||
3392 | // text unique' column to emulate a text primary key interface. | |||
3393 | #define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER" ");" \ | |||
3394 | "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \ | |||
3395 | "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \ | |||
3396 | "id TEXT UNIQUE NOT NULL," \ | |||
3397 | "chunk_id INTEGER," \ | |||
3398 | "chunk_offset INTEGER" \ | |||
3399 | ");" | |||
3400 | ||||
3401 | /// 1) schema, 2) original vtab table name | |||
3402 | #define VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "\"%w\".\"%w_vector_chunks%02d\"" | |||
3403 | ||||
3404 | /// 1) schema, 2) original vtab table name | |||
3405 | #define VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY," "vectors BLOB NOT NULL" ");" \ | |||
3406 | "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "(" \ | |||
3407 | "rowid PRIMARY KEY," \ | |||
3408 | "vectors BLOB NOT NULL" \ | |||
3409 | ");" | |||
3410 | ||||
3411 | #define VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "\"%w\".\"%w_auxiliary\"" | |||
3412 | ||||
3413 | #define VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "\"%w\".\"%w_metadatachunks%02d\"" | |||
3414 | #define VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "\"%w\".\"%w_metadatatext%02d\"" | |||
3415 | ||||
3416 | #define VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Internal sqlite-vec error: " | |||
3417 | #define REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new" "https://github.com/asg017/sqlite-vec/issues/new" | |||
3418 | ||||
3419 | typedef struct vec0_vtab vec0_vtab; | |||
3420 | ||||
3421 | #define VEC0_MAX_VECTOR_COLUMNS16 16 | |||
3422 | #define VEC0_MAX_PARTITION_COLUMNS4 4 | |||
3423 | #define VEC0_MAX_AUXILIARY_COLUMNS16 16 | |||
3424 | #define VEC0_MAX_METADATA_COLUMNS16 16 | |||
3425 | ||||
3426 | #define SQLITE_VEC_VEC0_MAX_DIMENSIONS8192 8192 | |||
3427 | #define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16 16 | |||
3428 | #define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12 12 | |||
3429 | ||||
3430 | typedef enum { | |||
3431 | // vector column, ie "contents_embedding float[1024]" | |||
3432 | SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1, | |||
3433 | ||||
3434 | // partition key column, ie "user_id integer partition key" | |||
3435 | SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2, | |||
3436 | ||||
3437 | // | |||
3438 | SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3, | |||
3439 | ||||
3440 | // metadata column that can be filtered, ie "genre text" | |||
3441 | SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4, | |||
3442 | } vec0_user_column_kind; | |||
3443 | ||||
3444 | struct vec0_vtab { | |||
3445 | sqlite3_vtab base; | |||
3446 | ||||
3447 | // the SQLite connection of the host database | |||
3448 | sqlite3 *db; | |||
3449 | ||||
3450 | // True if the primary key of the vec0 table has a column type TEXT. | |||
3451 | // Will change the schema of the _rowids table, and insert/query logic. | |||
3452 | int pkIsText; | |||
3453 | ||||
3454 | // number of defined vector columns. | |||
3455 | int numVectorColumns; | |||
3456 | ||||
3457 | // number of defined PARTITION KEY columns. | |||
3458 | int numPartitionColumns; | |||
3459 | ||||
3460 | // number of defined auxiliary columns | |||
3461 | int numAuxiliaryColumns; | |||
3462 | ||||
3463 | // number of defined metadata columns | |||
3464 | int numMetadataColumns; | |||
3465 | ||||
3466 | ||||
3467 | // Name of the schema the table exists on. | |||
3468 | // Must be freed with sqlite3_free() | |||
3469 | char *schemaName; | |||
3470 | ||||
3471 | // Name of the table the table exists on. | |||
3472 | // Must be freed with sqlite3_free() | |||
3473 | char *tableName; | |||
3474 | ||||
3475 | // Name of the _rowids shadow table. | |||
3476 | // Must be freed with sqlite3_free() | |||
3477 | char *shadowRowidsName; | |||
3478 | ||||
3479 | // Name of the _chunks shadow table. | |||
3480 | // Must be freed with sqlite3_free() | |||
3481 | char *shadowChunksName; | |||
3482 | ||||
3483 | // contains enum vec0_user_column_kind values for up to | |||
3484 | // numVectorColumns + numPartitionColumns entries | |||
3485 | vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16]; | |||
3486 | ||||
3487 | uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16]; | |||
3488 | ||||
3489 | ||||
3490 | // Name of all the vector chunk shadow tables. | |||
3491 | // Ex '_vector_chunks00' | |||
3492 | // Only the first numVectorColumns entries will be available. | |||
3493 | // The first numVectorColumns entries must be freed with sqlite3_free() | |||
3494 | char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS16]; | |||
3495 | ||||
3496 | // Name of all metadata chunk shadow tables, ie `_metadatachunks00` | |||
3497 | // Only the first numMetadataColumns entries will be available. | |||
3498 | // The first numMetadataColumns entries must be freed with sqlite3_free() | |||
3499 | char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS16]; | |||
3500 | ||||
3501 | struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS16]; | |||
3502 | struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS4]; | |||
3503 | struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS16]; | |||
3504 | struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS16]; | |||
3505 | ||||
3506 | int chunk_size; | |||
3507 | ||||
3508 | // select latest chunk from _chunks, getting chunk_id | |||
3509 | sqlite3_stmt *stmtLatestChunk; | |||
3510 | ||||
3511 | /** | |||
3512 | * Statement to insert a row into the _rowids table, with a rowid. | |||
3513 | * Parameters: | |||
3514 | * 1: int64, rowid to insert | |||
3515 | * Result columns: none | |||
3516 | * SQL: "INSERT INTO _rowids(rowid) VALUES (?)" | |||
3517 | * | |||
3518 | * Must be cleaned up with sqlite3_finalize(). | |||
3519 | */ | |||
3520 | sqlite3_stmt *stmtRowidsInsertRowid; | |||
3521 | ||||
3522 | /** | |||
3523 | * Statement to insert a row into the _rowids table, with an id. | |||
3524 | * The id column isn't a tradition primary key, but instead a unique | |||
3525 | * column to handle "text primary key" vec0 tables. The true int64 rowid | |||
3526 | * can be retrieved after inserting with sqlite3_last_rowid(). | |||
3527 | * | |||
3528 | * Parameters: | |||
3529 | * 1: text or null, id to insert | |||
3530 | * Result columns: none | |||
3531 | * | |||
3532 | * Must be cleaned up with sqlite3_finalize(). | |||
3533 | */ | |||
3534 | sqlite3_stmt *stmtRowidsInsertId; | |||
3535 | ||||
3536 | /** | |||
3537 | * Statement to update the "position" columns chunk_id and chunk_offset for | |||
3538 | * a given _rowids row. Used when the "next available" chunk position is found | |||
3539 | * for a vector. | |||
3540 | * | |||
3541 | * Parameters: | |||
3542 | * 1: int64, chunk_id value | |||
3543 | * 2: int64, chunk_offset value | |||
3544 | * 3: int64, rowid value | |||
3545 | * Result columns: none | |||
3546 | * | |||
3547 | * Must be cleaned up with sqlite3_finalize(). | |||
3548 | */ | |||
3549 | sqlite3_stmt *stmtRowidsUpdatePosition; | |||
3550 | ||||
3551 | /** | |||
3552 | * Statement to quickly find the chunk_id + chunk_offset of a given row. | |||
3553 | * Parameters: | |||
3554 | * 1: rowid of the row/vector to lookup | |||
3555 | * Result columns: | |||
3556 | * 0: chunk_id (i64) | |||
3557 | * 1: chunk_offset (i64) | |||
3558 | * SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?"" | |||
3559 | * | |||
3560 | * Must be cleaned up with sqlite3_finalize(). | |||
3561 | */ | |||
3562 | sqlite3_stmt *stmtRowidsGetChunkPosition; | |||
3563 | }; | |||
3564 | ||||
3565 | /** | |||
3566 | * @brief Finalize all the sqlite3_stmt members in a vec0_vtab. | |||
3567 | * | |||
3568 | * @param p vec0_vtab pointer | |||
3569 | */ | |||
3570 | void vec0_free_resources(vec0_vtab *p) { | |||
3571 | sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk); | |||
3572 | p->stmtLatestChunk = NULL((void*)0); | |||
3573 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid); | |||
3574 | p->stmtRowidsInsertRowid = NULL((void*)0); | |||
3575 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId); | |||
3576 | p->stmtRowidsInsertId = NULL((void*)0); | |||
3577 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition); | |||
3578 | p->stmtRowidsUpdatePosition = NULL((void*)0); | |||
3579 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition); | |||
3580 | p->stmtRowidsGetChunkPosition = NULL((void*)0); | |||
3581 | } | |||
3582 | ||||
3583 | /** | |||
3584 | * @brief Free all memory and sqlite3_stmt members of a vec0_vtab | |||
3585 | * | |||
3586 | * @param p vec0_vtab pointer | |||
3587 | */ | |||
3588 | void vec0_free(vec0_vtab *p) { | |||
3589 | vec0_free_resources(p); | |||
3590 | ||||
3591 | sqlite3_freesqlite3_api->free(p->schemaName); | |||
3592 | p->schemaName = NULL((void*)0); | |||
3593 | sqlite3_freesqlite3_api->free(p->tableName); | |||
3594 | p->tableName = NULL((void*)0); | |||
3595 | sqlite3_freesqlite3_api->free(p->shadowChunksName); | |||
3596 | p->shadowChunksName = NULL((void*)0); | |||
3597 | sqlite3_freesqlite3_api->free(p->shadowRowidsName); | |||
3598 | p->shadowRowidsName = NULL((void*)0); | |||
3599 | ||||
3600 | for (int i = 0; i < p->numVectorColumns; i++) { | |||
3601 | sqlite3_freesqlite3_api->free(p->shadowVectorChunksNames[i]); | |||
3602 | p->shadowVectorChunksNames[i] = NULL((void*)0); | |||
3603 | ||||
3604 | sqlite3_freesqlite3_api->free(p->vector_columns[i].name); | |||
3605 | p->vector_columns[i].name = NULL((void*)0); | |||
3606 | } | |||
3607 | } | |||
3608 | ||||
3609 | int vec0_num_defined_user_columns(vec0_vtab *p) { | |||
3610 | return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns; | |||
3611 | } | |||
3612 | ||||
3613 | /** | |||
3614 | * @brief Returns the index of the distance hidden column for the given vec0 | |||
3615 | * table. | |||
3616 | * | |||
3617 | * @param p vec0 table | |||
3618 | * @return int | |||
3619 | */ | |||
3620 | int vec0_column_distance_idx(vec0_vtab *p) { | |||
3621 | return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) + | |||
3622 | VEC0_COLUMN_OFFSET_DISTANCE1; | |||
3623 | } | |||
3624 | ||||
3625 | /** | |||
3626 | * @brief Returns the index of the k hidden column for the given vec0 table. | |||
3627 | * | |||
3628 | * @param p vec0 table | |||
3629 | * @return int k column index | |||
3630 | */ | |||
3631 | int vec0_column_k_idx(vec0_vtab *p) { | |||
3632 | return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) + | |||
3633 | VEC0_COLUMN_OFFSET_K2; | |||
3634 | } | |||
3635 | ||||
3636 | /** | |||
3637 | * Returns 1 if the given column-based index is a valid vector column, | |||
3638 | * 0 otherwise. | |||
3639 | */ | |||
3640 | int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) { | |||
3641 | return column_idx >= VEC0_COLUMN_USERN_START1 && | |||
3642 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && | |||
3643 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR; | |||
3644 | } | |||
3645 | ||||
3646 | /** | |||
3647 | * Returns the vector index of the given user column index. | |||
3648 | * ONLY call if validated with vec0_column_idx_is_vector before | |||
3649 | */ | |||
3650 | int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) { | |||
3651 | UNUSED_PARAMETER(pVtab)(void)(pVtab); | |||
3652 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; | |||
3653 | } | |||
3654 | /** | |||
3655 | * Returns 1 if the given column-based index is a "partition key" column, | |||
3656 | * 0 otherwise. | |||
3657 | */ | |||
3658 | int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) { | |||
3659 | return column_idx >= VEC0_COLUMN_USERN_START1 && | |||
3660 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && | |||
3661 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION; | |||
3662 | } | |||
3663 | ||||
3664 | /** | |||
3665 | * Returns the partition column index of the given user column index. | |||
3666 | * ONLY call if validated with vec0_column_idx_is_vector before | |||
3667 | */ | |||
3668 | int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) { | |||
3669 | UNUSED_PARAMETER(pVtab)(void)(pVtab); | |||
3670 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; | |||
3671 | } | |||
3672 | ||||
3673 | /** | |||
3674 | * Returns 1 if the given column-based index is a auxiliary column, | |||
3675 | * 0 otherwise. | |||
3676 | */ | |||
3677 | int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) { | |||
3678 | return column_idx >= VEC0_COLUMN_USERN_START1 && | |||
3679 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && | |||
3680 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY; | |||
3681 | } | |||
3682 | ||||
3683 | /** | |||
3684 | * Returns the auxiliary column index of the given user column index. | |||
3685 | * ONLY call if validated with vec0_column_idx_to_partition_idx before | |||
3686 | */ | |||
3687 | int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) { | |||
3688 | UNUSED_PARAMETER(pVtab)(void)(pVtab); | |||
3689 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; | |||
3690 | } | |||
3691 | ||||
3692 | /** | |||
3693 | * Returns 1 if the given column-based index is a metadata column, | |||
3694 | * 0 otherwise. | |||
3695 | */ | |||
3696 | int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) { | |||
3697 | return column_idx >= VEC0_COLUMN_USERN_START1 && | |||
3698 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && | |||
3699 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA; | |||
3700 | } | |||
3701 | ||||
3702 | /** | |||
3703 | * Returns the metadata column index of the given user column index. | |||
3704 | * ONLY call if validated with vec0_column_idx_is_metadata before | |||
3705 | */ | |||
3706 | int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) { | |||
3707 | UNUSED_PARAMETER(pVtab)(void)(pVtab); | |||
3708 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; | |||
3709 | } | |||
3710 | ||||
3711 | /** | |||
3712 | * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value | |||
3713 | * of a vec0_vtab row with the provided rowid | |||
3714 | * | |||
3715 | * @param p vec0_vtab | |||
3716 | * @param rowid the rowid of the row to query | |||
3717 | * @param id output, optional sqlite3_value to provide the id. | |||
3718 | * Useful for text PK rows. Must be freed with sqlite3_value_free() | |||
3719 | * @param chunk_id output, the chunk_id the row belongs to | |||
3720 | * @param chunk_offset output, the offset within the chunk the row belongs to | |||
3721 | * @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE | |||
3722 | */ | |||
3723 | int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id, | |||
3724 | i64 *chunk_id, i64 *chunk_offset) { | |||
3725 | int rc; | |||
3726 | ||||
3727 | if (!p->stmtRowidsGetChunkPosition) { | |||
3728 | const char *zSql = | |||
3729 | sqlite3_mprintfsqlite3_api->mprintf("SELECT id, chunk_id, chunk_offset " | |||
3730 | "FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?", | |||
3731 | p->schemaName, p->tableName); | |||
3732 | if (!zSql) { | |||
3733 | rc = SQLITE_NOMEM7; | |||
3734 | goto cleanup; | |||
3735 | } | |||
3736 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0); | |||
3737 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
3738 | if (rc != SQLITE_OK0) { | |||
3739 | vtab_set_error( | |||
3740 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
3741 | "could not initialize 'rowids get chunk position' statement"); | |||
3742 | goto cleanup; | |||
3743 | } | |||
3744 | } | |||
3745 | ||||
3746 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid); | |||
3747 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsGetChunkPosition); | |||
3748 | // special case: when no results, return SQLITE_EMPTY to convey "that chunk | |||
3749 | // position doesnt exist" | |||
3750 | if (rc == SQLITE_DONE101) { | |||
3751 | rc = SQLITE_EMPTY16; | |||
3752 | goto cleanup; | |||
3753 | } | |||
3754 | if (rc != SQLITE_ROW100) { | |||
3755 | goto cleanup; | |||
3756 | } | |||
3757 | ||||
3758 | if (id) { | |||
3759 | sqlite3_value *value = | |||
3760 | sqlite3_column_valuesqlite3_api->column_value(p->stmtRowidsGetChunkPosition, 0); | |||
3761 | *id = sqlite3_value_dupsqlite3_api->value_dup(value); | |||
3762 | if (!*id) { | |||
3763 | rc = SQLITE_NOMEM7; | |||
3764 | goto cleanup; | |||
3765 | } | |||
3766 | } | |||
3767 | ||||
3768 | if (chunk_id) { | |||
3769 | *chunk_id = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 1); | |||
3770 | } | |||
3771 | if (chunk_offset) { | |||
3772 | *chunk_offset = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 2); | |||
3773 | } | |||
3774 | ||||
3775 | rc = SQLITE_OK0; | |||
3776 | ||||
3777 | cleanup: | |||
3778 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsGetChunkPosition); | |||
3779 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsGetChunkPosition); | |||
3780 | return rc; | |||
3781 | } | |||
3782 | ||||
3783 | /** | |||
3784 | * @brief Return the id value from the _rowids table where _rowids.rowid = | |||
3785 | * rowid. | |||
3786 | * | |||
3787 | * @param pVtab: vec0 table to query | |||
3788 | * @param rowid: rowid of the row to query. | |||
3789 | * @param out: A dup'ed sqlite3_value of the id column. Might be null. | |||
3790 | * Must be cleaned up with sqlite3_value_free(). | |||
3791 | * @returns SQLITE_OK on success, error code on failure | |||
3792 | */ | |||
3793 | int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid, | |||
3794 | sqlite3_value **out) { | |||
3795 | // PERF: different strategy than get_chunk_position? | |||
3796 | return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL((void*)0), NULL((void*)0)); | |||
3797 | } | |||
3798 | ||||
3799 | int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) { | |||
3800 | sqlite3_stmt *stmt = NULL((void*)0); | |||
3801 | int rc; | |||
3802 | char *zSql; | |||
3803 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT rowid" | |||
3804 | " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE id = ?", | |||
3805 | p->schemaName, p->tableName); | |||
3806 | if (!zSql) { | |||
3807 | rc = SQLITE_NOMEM7; | |||
3808 | goto cleanup; | |||
3809 | } | |||
3810 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
3811 | sqlite3_freesqlite3_api->free(zSql); | |||
3812 | if (rc != SQLITE_OK0) { | |||
3813 | goto cleanup; | |||
3814 | } | |||
3815 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, valueId); | |||
3816 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
3817 | if (rc == SQLITE_DONE101) { | |||
3818 | rc = SQLITE_EMPTY16; | |||
3819 | goto cleanup; | |||
3820 | } | |||
3821 | if (rc != SQLITE_ROW100) { | |||
3822 | goto cleanup; | |||
3823 | } | |||
3824 | *rowid = sqlite3_column_int64sqlite3_api->column_int64(stmt, 0); | |||
3825 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
3826 | if (rc != SQLITE_DONE101) { | |||
3827 | goto cleanup; | |||
3828 | } | |||
3829 | ||||
3830 | rc = SQLITE_OK0; | |||
3831 | ||||
3832 | cleanup: | |||
3833 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
3834 | return rc; | |||
3835 | } | |||
3836 | ||||
3837 | int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) { | |||
3838 | if (!p->pkIsText) { | |||
3839 | sqlite3_result_int64sqlite3_api->result_int64(context, rowid); | |||
3840 | return SQLITE_OK0; | |||
3841 | } | |||
3842 | sqlite3_value *valueId; | |||
3843 | int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId); | |||
3844 | if (rc != SQLITE_OK0) { | |||
3845 | return rc; | |||
3846 | } | |||
3847 | if (!valueId) { | |||
3848 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
3849 | } else { | |||
3850 | sqlite3_result_valuesqlite3_api->result_value(context, valueId); | |||
3851 | sqlite3_value_freesqlite3_api->value_free(valueId); | |||
3852 | } | |||
3853 | return SQLITE_OK0; | |||
3854 | } | |||
3855 | ||||
3856 | /** | |||
3857 | * @brief | |||
3858 | * | |||
3859 | * @param pVtab: virtual table to query | |||
3860 | * @param rowid: row to lookup | |||
3861 | * @param vector_column_idx: which vector column to query | |||
3862 | * @param outVector: Output pointer to the vector buffer. | |||
3863 | * Must be sqlite3_free()'ed. | |||
3864 | * @param outVectorSize: Pointer to a int where the size of outVector | |||
3865 | * will be stored. | |||
3866 | * @return int SQLITE_OK on success. | |||
3867 | */ | |||
3868 | int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx, | |||
3869 | void **outVector, int *outVectorSize) { | |||
3870 | vec0_vtab *p = pVtab; | |||
3871 | int rc, brc; | |||
3872 | i64 chunk_id; | |||
3873 | i64 chunk_offset; | |||
3874 | size_t size; | |||
3875 | void *buf = NULL((void*)0); | |||
3876 | int blobOffset; | |||
3877 | sqlite3_blob *vectorBlob = NULL((void*)0); | |||
3878 | assert((vector_column_idx >= 0) &&((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({ if ((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3879, __extension__ __PRETTY_FUNCTION__); })) | |||
3879 | (vector_column_idx < pVtab->numVectorColumns))((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({ if ((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3879, __extension__ __PRETTY_FUNCTION__); })); | |||
3880 | ||||
3881 | rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset); | |||
3882 | if (rc == SQLITE_EMPTY16) { | |||
3883 | vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid); | |||
3884 | goto cleanup; | |||
3885 | } | |||
3886 | if (rc != SQLITE_OK0) { | |||
3887 | goto cleanup; | |||
3888 | } | |||
3889 | ||||
3890 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, | |||
3891 | p->shadowVectorChunksNames[vector_column_idx], | |||
3892 | "vectors", chunk_id, 0, &vectorBlob); | |||
3893 | ||||
3894 | if (rc != SQLITE_OK0) { | |||
3895 | vtab_set_error(&pVtab->base, | |||
3896 | "Could not fetch vector data for %lld, opening blob failed", | |||
3897 | rowid); | |||
3898 | rc = SQLITE_ERROR1; | |||
3899 | goto cleanup; | |||
3900 | } | |||
3901 | ||||
3902 | size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]); | |||
3903 | blobOffset = chunk_offset * size; | |||
3904 | ||||
3905 | buf = sqlite3_mallocsqlite3_api->malloc(size); | |||
3906 | if (!buf) { | |||
3907 | rc = SQLITE_NOMEM7; | |||
3908 | goto cleanup; | |||
3909 | } | |||
3910 | ||||
3911 | rc = sqlite3_blob_readsqlite3_api->blob_read(vectorBlob, buf, size, blobOffset); | |||
3912 | if (rc != SQLITE_OK0) { | |||
3913 | sqlite3_freesqlite3_api->free(buf); | |||
3914 | buf = NULL((void*)0); | |||
3915 | vtab_set_error( | |||
3916 | &pVtab->base, | |||
3917 | "Could not fetch vector data for %lld, reading from blob failed", | |||
3918 | rowid); | |||
3919 | rc = SQLITE_ERROR1; | |||
3920 | goto cleanup; | |||
3921 | } | |||
3922 | ||||
3923 | *outVector = buf; | |||
3924 | if (outVectorSize) { | |||
3925 | *outVectorSize = size; | |||
3926 | } | |||
3927 | rc = SQLITE_OK0; | |||
3928 | ||||
3929 | cleanup: | |||
3930 | brc = sqlite3_blob_closesqlite3_api->blob_close(vectorBlob); | |||
3931 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { | |||
3932 | vtab_set_error( | |||
3933 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
3934 | "unknown error, could not close vector blob, please file an issue"); | |||
3935 | return brc; | |||
3936 | } | |||
3937 | ||||
3938 | return rc; | |||
3939 | } | |||
3940 | ||||
3941 | /** | |||
3942 | * @brief Retrieve the sqlite3_value of the i'th partition value for the given row. | |||
3943 | * | |||
3944 | * @param pVtab - the vec0_vtab in questions | |||
3945 | * @param rowid - rowid of target row | |||
3946 | * @param partition_idx - which partition column to retrieve | |||
3947 | * @param outValue - output sqlite3_value | |||
3948 | * @return int - SQLITE_OK on success, otherwise error code | |||
3949 | */ | |||
3950 | int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) { | |||
3951 | int rc; | |||
3952 | i64 chunk_id; | |||
3953 | i64 chunk_offset; | |||
3954 | rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset); | |||
3955 | if(rc != SQLITE_OK0) { | |||
3956 | return rc; | |||
3957 | } | |||
3958 | sqlite3_stmt * stmt = NULL((void*)0); | |||
3959 | char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName); | |||
3960 | if(!zSql) { | |||
3961 | return SQLITE_NOMEM7; | |||
3962 | } | |||
3963 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0)); | |||
3964 | sqlite3_freesqlite3_api->free(zSql); | |||
3965 | if(rc != SQLITE_OK0) { | |||
3966 | return rc; | |||
3967 | } | |||
3968 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, chunk_id); | |||
3969 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
3970 | if(rc != SQLITE_ROW100) { | |||
3971 | rc = SQLITE_ERROR1; | |||
3972 | goto done; | |||
3973 | } | |||
3974 | *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); | |||
3975 | if(!*outValue) { | |||
3976 | rc = SQLITE_NOMEM7; | |||
3977 | goto done; | |||
3978 | } | |||
3979 | rc = SQLITE_OK0; | |||
3980 | ||||
3981 | done: | |||
3982 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
3983 | return rc; | |||
3984 | ||||
3985 | } | |||
3986 | ||||
3987 | /** | |||
3988 | * @brief Get the value of an auxiliary column for the given rowid | |||
3989 | * | |||
3990 | * @param pVtab vec0_vtab | |||
3991 | * @param rowid the rowid of the row to lookup | |||
3992 | * @param auxiliary_idx aux index of the column we care about | |||
3993 | * @param outValue Output sqlite3_value to store | |||
3994 | * @return int SQLITE_OK on success, error code otherwise | |||
3995 | */ | |||
3996 | int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) { | |||
3997 | int rc; | |||
3998 | sqlite3_stmt * stmt = NULL((void*)0); | |||
3999 | char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName); | |||
4000 | if(!zSql) { | |||
4001 | return SQLITE_NOMEM7; | |||
4002 | } | |||
4003 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0)); | |||
4004 | sqlite3_freesqlite3_api->free(zSql); | |||
4005 | if(rc != SQLITE_OK0) { | |||
4006 | return rc; | |||
4007 | } | |||
4008 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
4009 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
4010 | if(rc != SQLITE_ROW100) { | |||
4011 | rc = SQLITE_ERROR1; | |||
4012 | goto done; | |||
4013 | } | |||
4014 | *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); | |||
4015 | if(!*outValue) { | |||
4016 | rc = SQLITE_NOMEM7; | |||
4017 | goto done; | |||
4018 | } | |||
4019 | rc = SQLITE_OK0; | |||
4020 | ||||
4021 | done: | |||
4022 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4023 | return rc; | |||
4024 | } | |||
4025 | ||||
4026 | /** | |||
4027 | * @brief Result the given metadata value for the given row and metadata column index. | |||
4028 | * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid. | |||
4029 | * | |||
4030 | * @param p | |||
4031 | * @param rowid | |||
4032 | * @param metadata_idx | |||
4033 | * @param context | |||
4034 | * @return int | |||
4035 | */ | |||
4036 | int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) { | |||
4037 | int rc; | |||
4038 | i64 chunk_id; | |||
4039 | i64 chunk_offset; | |||
4040 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); | |||
4041 | if(rc != SQLITE_OK0) { | |||
4042 | return rc; | |||
4043 | } | |||
4044 | sqlite3_blob * blobValue; | |||
4045 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue); | |||
4046 | if(rc != SQLITE_OK0) { | |||
4047 | return rc; | |||
4048 | } | |||
4049 | ||||
4050 | switch(p->metadata_columns[metadata_idx].kind) { | |||
4051 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
4052 | u8 block; | |||
4053 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT8); | |||
4054 | if(rc != SQLITE_OK0) { | |||
4055 | goto done; | |||
4056 | } | |||
4057 | int value = block >> ((chunk_offset % CHAR_BIT8)) & 1; | |||
4058 | sqlite3_result_intsqlite3_api->result_int(context, value); | |||
4059 | break; | |||
4060 | } | |||
4061 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
4062 | i64 value; | |||
4063 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); | |||
4064 | if(rc != SQLITE_OK0) { | |||
4065 | goto done; | |||
4066 | } | |||
4067 | sqlite3_result_int64sqlite3_api->result_int64(context, value); | |||
4068 | break; | |||
4069 | } | |||
4070 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
4071 | double value; | |||
4072 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); | |||
4073 | if(rc != SQLITE_OK0) { | |||
4074 | goto done; | |||
4075 | } | |||
4076 | sqlite3_result_doublesqlite3_api->result_double(context, value); | |||
4077 | break; | |||
4078 | } | |||
4079 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
4080 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
4081 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
4082 | if(rc != SQLITE_OK0) { | |||
4083 | goto done; | |||
4084 | } | |||
4085 | int length = ((int *)view)[0]; | |||
4086 | if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
4087 | sqlite3_result_textsqlite3_api->result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
4088 | } | |||
4089 | else { | |||
4090 | sqlite3_stmt * stmt; | |||
4091 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); | |||
4092 | if(!zSql) { | |||
4093 | rc = SQLITE_ERROR1; | |||
4094 | goto done; | |||
4095 | } | |||
4096 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
4097 | sqlite3_freesqlite3_api->free((void *) zSql); | |||
4098 | if(rc != SQLITE_OK0) { | |||
4099 | goto done; | |||
4100 | } | |||
4101 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
4102 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
4103 | if(rc != SQLITE_ROW100) { | |||
4104 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4105 | rc = SQLITE_ERROR1; | |||
4106 | goto done; | |||
4107 | } | |||
4108 | sqlite3_result_valuesqlite3_api->result_value(context, sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); | |||
4109 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4110 | rc = SQLITE_OK0; | |||
4111 | } | |||
4112 | break; | |||
4113 | } | |||
4114 | } | |||
4115 | done: | |||
4116 | // blobValue is read-only, will not fail on close | |||
4117 | sqlite3_blob_closesqlite3_api->blob_close(blobValue); | |||
4118 | return rc; | |||
4119 | ||||
4120 | } | |||
4121 | ||||
4122 | int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) { | |||
4123 | int rc; | |||
4124 | const char *zSql; | |||
4125 | // lazy initialize stmtLatestChunk when needed. May be cleared during xSync() | |||
4126 | if (!p->stmtLatestChunk) { | |||
4127 | if(p->numPartitionColumns > 0) { | |||
4128 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
4129 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE ", | |||
4130 | p->schemaName, p->tableName); | |||
4131 | ||||
4132 | for(int i = 0; i < p->numPartitionColumns; i++) { | |||
4133 | if(i != 0) { | |||
4134 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND "); | |||
4135 | } | |||
4136 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", i); | |||
4137 | } | |||
4138 | zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
4139 | }else { | |||
4140 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", | |||
4141 | p->schemaName, p->tableName); | |||
4142 | } | |||
4143 | ||||
4144 | if (!zSql) { | |||
4145 | rc = SQLITE_NOMEM7; | |||
4146 | goto cleanup; | |||
4147 | } | |||
4148 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0); | |||
4149 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
4150 | if (rc != SQLITE_OK0) { | |||
4151 | // IMP: V21406_05476 | |||
4152 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4153 | "could not initialize 'latest chunk' statement"); | |||
4154 | goto cleanup; | |||
4155 | } | |||
4156 | } | |||
4157 | ||||
4158 | for(int i = 0; i < p->numPartitionColumns; i++) { | |||
4159 | sqlite3_bind_valuesqlite3_api->bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i])); | |||
4160 | } | |||
4161 | ||||
4162 | rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk); | |||
4163 | if (rc != SQLITE_ROW100) { | |||
4164 | // IMP: V31559_15629 | |||
4165 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not find latest chunk"); | |||
4166 | rc = SQLITE_ERROR1; | |||
4167 | goto cleanup; | |||
4168 | } | |||
4169 | if(sqlite3_column_typesqlite3_api->column_type(p->stmtLatestChunk, 0) == SQLITE_NULL5){ | |||
4170 | rc = SQLITE_EMPTY16; | |||
4171 | goto cleanup; | |||
4172 | } | |||
4173 | *chunk_rowid = sqlite3_column_int64sqlite3_api->column_int64(p->stmtLatestChunk, 0); | |||
4174 | rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk); | |||
4175 | if (rc != SQLITE_DONE101) { | |||
4176 | vtab_set_error(&p->base, | |||
4177 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4178 | "unknown result code when closing out stmtLatestChunk. " | |||
4179 | "Please file an issue: " REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new", | |||
4180 | p->schemaName, p->shadowChunksName); | |||
4181 | goto cleanup; | |||
4182 | } | |||
4183 | rc = SQLITE_OK0; | |||
4184 | ||||
4185 | cleanup: | |||
4186 | if (p->stmtLatestChunk) { | |||
4187 | sqlite3_resetsqlite3_api->reset(p->stmtLatestChunk); | |||
4188 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtLatestChunk); | |||
4189 | } | |||
4190 | return rc; | |||
4191 | } | |||
4192 | ||||
4193 | int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) { | |||
4194 | int rc = SQLITE_OK0; | |||
4195 | int entered = 0; | |||
4196 | UNUSED_PARAMETER(entered)(void)(entered); // temporary | |||
4197 | if (!p->stmtRowidsInsertRowid) { | |||
4198 | const char *zSql = | |||
4199 | sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(rowid)" | |||
4200 | "VALUES (?);", | |||
4201 | p->schemaName, p->tableName); | |||
4202 | if (!zSql) { | |||
4203 | rc = SQLITE_NOMEM7; | |||
4204 | goto cleanup; | |||
4205 | } | |||
4206 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0); | |||
4207 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
4208 | if (rc != SQLITE_OK0) { | |||
4209 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4210 | "could not initialize 'insert rowids' statement"); | |||
4211 | goto cleanup; | |||
4212 | } | |||
4213 | } | |||
4214 | ||||
4215 | #if SQLITE_THREADSAFE | |||
4216 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { | |||
4217 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4218 | entered = 1; | |||
4219 | } | |||
4220 | #endif | |||
4221 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsInsertRowid, 1, rowid); | |||
4222 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertRowid); | |||
4223 | ||||
4224 | if (rc != SQLITE_DONE101) { | |||
4225 | if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY(19 | (6<<8))) { | |||
4226 | // IMP: V17090_01160 | |||
4227 | vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key", | |||
4228 | p->tableName); | |||
4229 | } else { | |||
4230 | // IMP: V04679_21517 | |||
4231 | vtab_set_error(&p->base, | |||
4232 | "Error inserting rowid into rowids shadow table: %s", | |||
4233 | sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId))); | |||
4234 | } | |||
4235 | rc = SQLITE_ERROR1; | |||
4236 | goto cleanup; | |||
4237 | } | |||
4238 | ||||
4239 | rc = SQLITE_OK0; | |||
4240 | ||||
4241 | cleanup: | |||
4242 | if (p->stmtRowidsInsertRowid) { | |||
4243 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertRowid); | |||
4244 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertRowid); | |||
4245 | } | |||
4246 | ||||
4247 | #if SQLITE_THREADSAFE | |||
4248 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) { | |||
4249 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4250 | } | |||
4251 | #endif | |||
4252 | return rc; | |||
4253 | } | |||
4254 | ||||
4255 | int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) { | |||
4256 | int rc = SQLITE_OK0; | |||
4257 | int entered = 0; | |||
4258 | UNUSED_PARAMETER(entered)(void)(entered); // temporary | |||
4259 | if (!p->stmtRowidsInsertId) { | |||
4260 | const char *zSql = | |||
4261 | sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(id)" | |||
4262 | "VALUES (?);", | |||
4263 | p->schemaName, p->tableName); | |||
4264 | if (!zSql) { | |||
4265 | rc = SQLITE_NOMEM7; | |||
4266 | goto complete; | |||
4267 | } | |||
4268 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0); | |||
4269 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
4270 | if (rc != SQLITE_OK0) { | |||
4271 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4272 | "could not initialize 'insert rowids id' statement"); | |||
4273 | goto complete; | |||
4274 | } | |||
4275 | } | |||
4276 | ||||
4277 | #if SQLITE_THREADSAFE | |||
4278 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { | |||
4279 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4280 | entered = 1; | |||
4281 | } | |||
4282 | #endif | |||
4283 | ||||
4284 | if (idValue) { | |||
4285 | sqlite3_bind_valuesqlite3_api->bind_value(p->stmtRowidsInsertId, 1, idValue); | |||
4286 | } | |||
4287 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertId); | |||
4288 | ||||
4289 | if (rc != SQLITE_DONE101) { | |||
4290 | if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE(19 | (8<<8))) { | |||
4291 | // IMP: V20497_04568 | |||
4292 | vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key", | |||
4293 | p->tableName); | |||
4294 | } else { | |||
4295 | // IMP: V24016_08086 | |||
4296 | // IMP: V15177_32015 | |||
4297 | vtab_set_error(&p->base, | |||
4298 | "Error inserting id into rowids shadow table: %s", | |||
4299 | sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId))); | |||
4300 | } | |||
4301 | rc = SQLITE_ERROR1; | |||
4302 | goto complete; | |||
4303 | } | |||
4304 | ||||
4305 | *rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db); | |||
4306 | rc = SQLITE_OK0; | |||
4307 | ||||
4308 | complete: | |||
4309 | if (p->stmtRowidsInsertId) { | |||
4310 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertId); | |||
4311 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertId); | |||
4312 | } | |||
4313 | ||||
4314 | #if SQLITE_THREADSAFE | |||
4315 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) { | |||
4316 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4317 | } | |||
4318 | #endif | |||
4319 | return rc; | |||
4320 | } | |||
4321 | ||||
4322 | int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) { | |||
4323 | switch(kind) { | |||
4324 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: | |||
4325 | return chunk_size / 8; | |||
4326 | case VEC0_METADATA_COLUMN_KIND_INTEGER: | |||
4327 | return chunk_size * sizeof(i64); | |||
4328 | case VEC0_METADATA_COLUMN_KIND_FLOAT: | |||
4329 | return chunk_size * sizeof(double); | |||
4330 | case VEC0_METADATA_COLUMN_KIND_TEXT: | |||
4331 | return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16; | |||
4332 | } | |||
4333 | return 0; | |||
4334 | } | |||
4335 | ||||
4336 | int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid, | |||
4337 | i64 chunk_offset) { | |||
4338 | int rc = SQLITE_OK0; | |||
4339 | ||||
4340 | if (!p->stmtRowidsUpdatePosition) { | |||
4341 | const char *zSql = sqlite3_mprintfsqlite3_api->mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" | |||
4342 | " SET chunk_id = ?, chunk_offset = ?" | |||
4343 | " WHERE rowid = ?", | |||
4344 | p->schemaName, p->tableName); | |||
4345 | if (!zSql) { | |||
4346 | rc = SQLITE_NOMEM7; | |||
4347 | goto cleanup; | |||
4348 | } | |||
4349 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0); | |||
4350 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
4351 | if (rc != SQLITE_OK0) { | |||
4352 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4353 | "could not initialize 'update rowids position' statement"); | |||
4354 | goto cleanup; | |||
4355 | } | |||
4356 | } | |||
4357 | ||||
4358 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid); | |||
4359 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset); | |||
4360 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 3, rowid); | |||
4361 | ||||
4362 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsUpdatePosition); | |||
4363 | if (rc != SQLITE_DONE101) { | |||
4364 | // IMP: V21925_05995 | |||
4365 | vtab_set_error(&p->base, | |||
4366 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
4367 | "could not update rowids position for rowid=%lld, " | |||
4368 | "chunk_rowid=%lld, chunk_offset=%lld", | |||
4369 | rowid, chunk_rowid, chunk_offset); | |||
4370 | rc = SQLITE_ERROR1; | |||
4371 | goto cleanup; | |||
4372 | } | |||
4373 | rc = SQLITE_OK0; | |||
4374 | ||||
4375 | cleanup: | |||
4376 | if (p->stmtRowidsUpdatePosition) { | |||
4377 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsUpdatePosition); | |||
4378 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsUpdatePosition); | |||
4379 | } | |||
4380 | ||||
4381 | return rc; | |||
4382 | } | |||
4383 | ||||
4384 | /** | |||
4385 | * @brief Adds a new chunk for the vec0 table, and the corresponding vector | |||
4386 | * chunks. | |||
4387 | * | |||
4388 | * Inserts a new row into the _chunks table, with blank data, and uses that new | |||
4389 | * rowid to insert new blank rows into _vector_chunksXX tables. | |||
4390 | * | |||
4391 | * @param p: vec0 table to add new chunk | |||
4392 | * @param paritionKeyValues: Array of partition key valeus for the new chunk, if available | |||
4393 | * @param chunk_rowid: Output pointer, if not NULL, then will be filled with the | |||
4394 | * new chunk rowid. | |||
4395 | * @return int SQLITE_OK on success, error code otherwise. | |||
4396 | */ | |||
4397 | int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) { | |||
4398 | int rc; | |||
4399 | char *zSql; | |||
4400 | sqlite3_stmt *stmt; | |||
4401 | i64 rowid; | |||
4402 | ||||
4403 | // Step 1: Insert a new row in _chunks, capture that new rowid | |||
4404 | if(p->numPartitionColumns > 0) { | |||
4405 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
4406 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName, p->tableName); | |||
4407 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "(size, validity, rowids"); | |||
4408 | for(int i = 0; i < p->numPartitionColumns; i++) { | |||
4409 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", partition%02d", i); | |||
4410 | } | |||
4411 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (?, ?, ?"); | |||
4412 | for(int i = 0; i < p->numPartitionColumns; i++) { | |||
4413 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?"); | |||
4414 | } | |||
4415 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); | |||
4416 | ||||
4417 | zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
4418 | }else { | |||
4419 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" | |||
4420 | "(size, validity, rowids) " | |||
4421 | "VALUES (?, ?, ?);", | |||
4422 | p->schemaName, p->tableName); | |||
4423 | } | |||
4424 | ||||
4425 | if (!zSql) { | |||
4426 | return SQLITE_NOMEM7; | |||
4427 | } | |||
4428 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
4429 | sqlite3_freesqlite3_api->free(zSql); | |||
4430 | if (rc != SQLITE_OK0) { | |||
4431 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4432 | return rc; | |||
4433 | } | |||
4434 | ||||
4435 | #if SQLITE_THREADSAFE | |||
4436 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { | |||
4437 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4438 | } | |||
4439 | #endif | |||
4440 | ||||
4441 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, p->chunk_size); // size | |||
4442 | sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT8); // validity bitmap | |||
4443 | sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids | |||
4444 | ||||
4445 | for(int i = 0; i < p->numPartitionColumns; i++) { | |||
4446 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 4 + i, partitionKeyValues[i]); | |||
4447 | } | |||
4448 | ||||
4449 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
4450 | int failed = rc != SQLITE_DONE101; | |||
4451 | rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db); | |||
4452 | #if SQLITE_THREADSAFE | |||
4453 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave) { | |||
4454 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); | |||
4455 | } | |||
4456 | #endif | |||
4457 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4458 | if (failed) { | |||
4459 | return SQLITE_ERROR1; | |||
4460 | } | |||
4461 | ||||
4462 | // Step 2: Create new vector chunks for each vector column, with | |||
4463 | // that new chunk_rowid. | |||
4464 | ||||
4465 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
4466 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { | |||
4467 | continue; | |||
4468 | } | |||
4469 | int vector_column_idx = p->user_column_idxs[i]; | |||
4470 | i64 vectorsSize = | |||
4471 | p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]); | |||
4472 | ||||
4473 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" | |||
4474 | "(rowid, vectors)" | |||
4475 | "VALUES (?, ?)", | |||
4476 | p->schemaName, p->tableName, vector_column_idx); | |||
4477 | if (!zSql) { | |||
4478 | return SQLITE_NOMEM7; | |||
4479 | } | |||
4480 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
4481 | sqlite3_freesqlite3_api->free(zSql); | |||
4482 | ||||
4483 | if (rc != SQLITE_OK0) { | |||
4484 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4485 | return rc; | |||
4486 | } | |||
4487 | ||||
4488 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
4489 | sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vectorsSize); | |||
4490 | ||||
4491 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
4492 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4493 | if (rc != SQLITE_DONE101) { | |||
4494 | return rc; | |||
4495 | } | |||
4496 | } | |||
4497 | ||||
4498 | // Step 3: Create new metadata chunks for each metadata column | |||
4499 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
4500 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { | |||
4501 | continue; | |||
4502 | } | |||
4503 | int metadata_column_idx = p->user_column_idxs[i]; | |||
4504 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" | |||
4505 | "(rowid, data)" | |||
4506 | "VALUES (?, ?)", | |||
4507 | p->schemaName, p->tableName, metadata_column_idx); | |||
4508 | if (!zSql) { | |||
4509 | return SQLITE_NOMEM7; | |||
4510 | } | |||
4511 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
4512 | sqlite3_freesqlite3_api->free(zSql); | |||
4513 | ||||
4514 | if (rc != SQLITE_OK0) { | |||
4515 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4516 | return rc; | |||
4517 | } | |||
4518 | ||||
4519 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
4520 | sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size)); | |||
4521 | ||||
4522 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
4523 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4524 | if (rc != SQLITE_DONE101) { | |||
4525 | return rc; | |||
4526 | } | |||
4527 | } | |||
4528 | ||||
4529 | ||||
4530 | if (chunk_rowid) { | |||
4531 | *chunk_rowid = rowid; | |||
4532 | } | |||
4533 | ||||
4534 | return SQLITE_OK0; | |||
4535 | } | |||
4536 | ||||
4537 | struct vec0_query_fullscan_data { | |||
4538 | sqlite3_stmt *rowids_stmt; | |||
4539 | i8 done; | |||
4540 | }; | |||
4541 | void vec0_query_fullscan_data_clear( | |||
4542 | struct vec0_query_fullscan_data *fullscan_data) { | |||
4543 | if (!fullscan_data) | |||
4544 | return; | |||
4545 | ||||
4546 | if (fullscan_data->rowids_stmt) { | |||
4547 | sqlite3_finalizesqlite3_api->finalize(fullscan_data->rowids_stmt); | |||
4548 | fullscan_data->rowids_stmt = NULL((void*)0); | |||
4549 | } | |||
4550 | } | |||
4551 | ||||
4552 | struct vec0_query_knn_data { | |||
4553 | i64 k; | |||
4554 | i64 k_used; | |||
4555 | // Array of rowids of size k. Must be freed with sqlite3_free(). | |||
4556 | i64 *rowids; | |||
4557 | // Array of distances of size k. Must be freed with sqlite3_free(). | |||
4558 | f32 *distances; | |||
4559 | i64 current_idx; | |||
4560 | }; | |||
4561 | void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) { | |||
4562 | if (!knn_data) | |||
4563 | return; | |||
4564 | ||||
4565 | if (knn_data->rowids) { | |||
4566 | sqlite3_freesqlite3_api->free(knn_data->rowids); | |||
4567 | knn_data->rowids = NULL((void*)0); | |||
4568 | } | |||
4569 | if (knn_data->distances) { | |||
4570 | sqlite3_freesqlite3_api->free(knn_data->distances); | |||
4571 | knn_data->distances = NULL((void*)0); | |||
4572 | } | |||
4573 | } | |||
4574 | ||||
4575 | struct vec0_query_point_data { | |||
4576 | i64 rowid; | |||
4577 | void *vectors[VEC0_MAX_VECTOR_COLUMNS16]; | |||
4578 | int done; | |||
4579 | }; | |||
4580 | void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) { | |||
4581 | if (!point_data) | |||
4582 | return; | |||
4583 | for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS16; i++) { | |||
4584 | sqlite3_freesqlite3_api->free(point_data->vectors[i]); | |||
4585 | point_data->vectors[i] = NULL((void*)0); | |||
4586 | } | |||
4587 | } | |||
4588 | ||||
4589 | typedef enum { | |||
4590 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! | |||
4591 | ||||
4592 | VEC0_QUERY_PLAN_FULLSCAN = '1', | |||
4593 | VEC0_QUERY_PLAN_POINT = '2', | |||
4594 | VEC0_QUERY_PLAN_KNN = '3', | |||
4595 | } vec0_query_plan; | |||
4596 | ||||
4597 | typedef struct vec0_cursor vec0_cursor; | |||
4598 | struct vec0_cursor { | |||
4599 | sqlite3_vtab_cursor base; | |||
4600 | ||||
4601 | vec0_query_plan query_plan; | |||
4602 | struct vec0_query_fullscan_data *fullscan_data; | |||
4603 | struct vec0_query_knn_data *knn_data; | |||
4604 | struct vec0_query_point_data *point_data; | |||
4605 | }; | |||
4606 | ||||
4607 | void vec0_cursor_clear(vec0_cursor *pCur) { | |||
4608 | if (pCur->fullscan_data) { | |||
4609 | vec0_query_fullscan_data_clear(pCur->fullscan_data); | |||
4610 | sqlite3_freesqlite3_api->free(pCur->fullscan_data); | |||
4611 | pCur->fullscan_data = NULL((void*)0); | |||
4612 | } | |||
4613 | if (pCur->knn_data) { | |||
4614 | vec0_query_knn_data_clear(pCur->knn_data); | |||
4615 | sqlite3_freesqlite3_api->free(pCur->knn_data); | |||
4616 | pCur->knn_data = NULL((void*)0); | |||
4617 | } | |||
4618 | if (pCur->point_data) { | |||
4619 | vec0_query_point_data_clear(pCur->point_data); | |||
4620 | sqlite3_freesqlite3_api->free(pCur->point_data); | |||
4621 | pCur->point_data = NULL((void*)0); | |||
4622 | } | |||
4623 | } | |||
4624 | ||||
4625 | #define VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "vec0 constructor error: " | |||
4626 | static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, | |||
4627 | sqlite3_vtab **ppVtab, char **pzErr, bool_Bool isCreate) { | |||
4628 | UNUSED_PARAMETER(pAux)(void)(pAux); | |||
4629 | vec0_vtab *pNew; | |||
4630 | int rc; | |||
4631 | const char *zSql; | |||
4632 | ||||
4633 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
4634 | if (pNew == 0) | |||
4635 | return SQLITE_NOMEM7; | |||
4636 | memset(pNew, 0, sizeof(*pNew)); | |||
4637 | ||||
4638 | // Declared chunk_size=N for entire table. | |||
4639 | // -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N` | |||
4640 | // option | |||
4641 | int chunk_size = -1; | |||
4642 | int numVectorColumns = 0; | |||
4643 | int numPartitionColumns = 0; | |||
4644 | int numAuxiliaryColumns = 0; | |||
4645 | int numMetadataColumns = 0; | |||
4646 | int user_column_idx = 0; | |||
4647 | ||||
4648 | // track if a "primary key" column is defined | |||
4649 | char *pkColumnName = NULL((void*)0); | |||
4650 | int pkColumnNameLength; | |||
4651 | int pkColumnType = SQLITE_INTEGER1; | |||
4652 | ||||
4653 | for (int i = 3; i < argc; i++) { | |||
4654 | struct VectorColumnDefinition vecColumn; | |||
4655 | struct Vec0PartitionColumnDefinition partitionColumn; | |||
4656 | struct Vec0AuxiliaryColumnDefinition auxColumn; | |||
4657 | struct Vec0MetadataColumnDefinition metadataColumn; | |||
4658 | char *cName = NULL((void*)0); | |||
4659 | int cNameLength; | |||
4660 | int cType; | |||
4661 | ||||
4662 | // Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]` | |||
4663 | rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn); | |||
4664 | if (rc == SQLITE_ERROR1) { | |||
4665 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4666 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse vector column '%s'", argv[i]); | |||
4667 | goto error; | |||
4668 | } | |||
4669 | if (rc == SQLITE_OK0) { | |||
4670 | if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS16) { | |||
4671 | sqlite3_freesqlite3_api->free(vecColumn.name); | |||
4672 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4673 | "Too many provided vector columns, maximum %d", | |||
4674 | VEC0_MAX_VECTOR_COLUMNS16); | |||
4675 | goto error; | |||
4676 | } | |||
4677 | ||||
4678 | if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS8192) { | |||
4679 | sqlite3_freesqlite3_api->free(vecColumn.name); | |||
4680 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4681 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4682 | "Dimension on vector column too large, provided %lld, maximum %lld", | |||
4683 | (i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS8192); | |||
4684 | goto error; | |||
4685 | } | |||
4686 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR; | |||
4687 | pNew->user_column_idxs[user_column_idx] = numVectorColumns; | |||
4688 | memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn)); | |||
4689 | numVectorColumns++; | |||
4690 | user_column_idx++; | |||
4691 | ||||
4692 | continue; | |||
4693 | } | |||
4694 | ||||
4695 | // Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key` | |||
4696 | rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName, | |||
4697 | &cNameLength, &cType); | |||
4698 | if (rc == SQLITE_OK0) { | |||
4699 | if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS4) { | |||
4700 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4701 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4702 | "More than %d partition key columns were provided", | |||
4703 | VEC0_MAX_PARTITION_COLUMNS4); | |||
4704 | goto error; | |||
4705 | } | |||
4706 | partitionColumn.type = cType; | |||
4707 | partitionColumn.name_length = cNameLength; | |||
4708 | partitionColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); | |||
4709 | if(!partitionColumn.name) { | |||
4710 | rc = SQLITE_NOMEM7; | |||
4711 | goto error; | |||
4712 | } | |||
4713 | ||||
4714 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION; | |||
4715 | pNew->user_column_idxs[user_column_idx] = numPartitionColumns; | |||
4716 | memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn)); | |||
4717 | numPartitionColumns++; | |||
4718 | user_column_idx++; | |||
4719 | continue; | |||
4720 | } | |||
4721 | ||||
4722 | // Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key` | |||
4723 | rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName, | |||
4724 | &cNameLength, &cType); | |||
4725 | if (rc == SQLITE_OK0) { | |||
4726 | if (pkColumnName) { | |||
4727 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4728 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4729 | "More than one primary key definition was provided, vec0 only " | |||
4730 | "suports a single primary key column", | |||
4731 | argv[i]); | |||
4732 | goto error; | |||
4733 | } | |||
4734 | pkColumnName = cName; | |||
4735 | pkColumnNameLength = cNameLength; | |||
4736 | pkColumnType = cType; | |||
4737 | continue; | |||
4738 | } | |||
4739 | ||||
4740 | // Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text` | |||
4741 | rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName, | |||
4742 | &cNameLength, &cType); | |||
4743 | if(rc == SQLITE_OK0) { | |||
4744 | if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS16) { | |||
4745 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4746 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4747 | "More than %d auxiliary columns were provided", | |||
4748 | VEC0_MAX_AUXILIARY_COLUMNS16); | |||
4749 | goto error; | |||
4750 | } | |||
4751 | auxColumn.type = cType; | |||
4752 | auxColumn.name_length = cNameLength; | |||
4753 | auxColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); | |||
4754 | if(!auxColumn.name) { | |||
4755 | rc = SQLITE_NOMEM7; | |||
4756 | goto error; | |||
4757 | } | |||
4758 | ||||
4759 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY; | |||
4760 | pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns; | |||
4761 | memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn)); | |||
4762 | numAuxiliaryColumns++; | |||
4763 | user_column_idx++; | |||
4764 | continue; | |||
4765 | } | |||
4766 | ||||
4767 | vec0_metadata_column_kind kind; | |||
4768 | rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName, | |||
4769 | &cNameLength, &kind); | |||
4770 | if(rc == SQLITE_OK0) { | |||
4771 | if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS16) { | |||
4772 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4773 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4774 | "More than %d metadata columns were provided", | |||
4775 | VEC0_MAX_METADATA_COLUMNS16); | |||
4776 | goto error; | |||
4777 | } | |||
4778 | metadataColumn.kind = kind; | |||
4779 | metadataColumn.name_length = cNameLength; | |||
4780 | metadataColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); | |||
4781 | if(!metadataColumn.name) { | |||
4782 | rc = SQLITE_NOMEM7; | |||
4783 | goto error; | |||
4784 | } | |||
4785 | ||||
4786 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA; | |||
4787 | pNew->user_column_idxs[user_column_idx] = numMetadataColumns; | |||
4788 | memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn)); | |||
4789 | numMetadataColumns++; | |||
4790 | user_column_idx++; | |||
4791 | continue; | |||
4792 | } | |||
4793 | ||||
4794 | // Scenario #4: Constructor argument is a table-level option, ie `chunk_size` | |||
4795 | ||||
4796 | char *key; | |||
4797 | char *value; | |||
4798 | int keyLength, valueLength; | |||
4799 | rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength, | |||
4800 | &value, &valueLength); | |||
4801 | if (rc == SQLITE_ERROR1) { | |||
4802 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4803 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse table option '%s'", argv[i]); | |||
4804 | goto error; | |||
4805 | } | |||
4806 | if (rc == SQLITE_OK0) { | |||
4807 | if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "chunk_size", keyLength) == 0) { | |||
4808 | chunk_size = atoi(value); | |||
4809 | if (chunk_size <= 0) { | |||
4810 | // IMP: V01931_18769 | |||
4811 | *pzErr = | |||
4812 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4813 | "chunk_size must be a non-zero positive integer"); | |||
4814 | goto error; | |||
4815 | } | |||
4816 | if ((chunk_size % 8) != 0) { | |||
4817 | // IMP: V14110_30948 | |||
4818 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4819 | "chunk_size must be divisible by 8"); | |||
4820 | goto error; | |||
4821 | } | |||
4822 | #define SQLITE_VEC_CHUNK_SIZE_MAX4096 4096 | |||
4823 | if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX4096) { | |||
4824 | *pzErr = | |||
4825 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "chunk_size too large"); | |||
4826 | goto error; | |||
4827 | } | |||
4828 | } else { | |||
4829 | // IMP: V27642_11712 | |||
4830 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4831 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Unknown table option: %.*s", keyLength, key); | |||
4832 | goto error; | |||
4833 | } | |||
4834 | continue; | |||
4835 | } | |||
4836 | ||||
4837 | // Scenario #5: Unknown constructor argument | |||
4838 | *pzErr = | |||
4839 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Could not parse '%s'", argv[i]); | |||
4840 | goto error; | |||
4841 | } | |||
4842 | ||||
4843 | if (chunk_size < 0) { | |||
4844 | chunk_size = 1024; | |||
4845 | } | |||
4846 | ||||
4847 | if (numVectorColumns <= 0) { | |||
4848 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4849 | "At least one vector column is required"); | |||
4850 | goto error; | |||
4851 | } | |||
4852 | ||||
4853 | sqlite3_str *createStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
4854 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "CREATE TABLE x("); | |||
4855 | if (pkColumnName) { | |||
4856 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength, | |||
4857 | pkColumnName); | |||
4858 | } else { | |||
4859 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "rowid, "); | |||
4860 | } | |||
4861 | for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) { | |||
4862 | switch(pNew->user_column_kinds[i]) { | |||
4863 | case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: { | |||
4864 | int vector_idx = pNew->user_column_idxs[i]; | |||
4865 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", | |||
4866 | pNew->vector_columns[vector_idx].name_length, | |||
4867 | pNew->vector_columns[vector_idx].name); | |||
4868 | break; | |||
4869 | } | |||
4870 | case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: { | |||
4871 | int partition_idx = pNew->user_column_idxs[i]; | |||
4872 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", | |||
4873 | pNew->paritition_columns[partition_idx].name_length, | |||
4874 | pNew->paritition_columns[partition_idx].name); | |||
4875 | break; | |||
4876 | } | |||
4877 | case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: { | |||
4878 | int auxiliary_idx = pNew->user_column_idxs[i]; | |||
4879 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", | |||
4880 | pNew->auxiliary_columns[auxiliary_idx].name_length, | |||
4881 | pNew->auxiliary_columns[auxiliary_idx].name); | |||
4882 | break; | |||
4883 | } | |||
4884 | case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: { | |||
4885 | int metadata_idx = pNew->user_column_idxs[i]; | |||
4886 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", | |||
4887 | pNew->metadata_columns[metadata_idx].name_length, | |||
4888 | pNew->metadata_columns[metadata_idx].name); | |||
4889 | break; | |||
4890 | } | |||
4891 | } | |||
4892 | ||||
4893 | } | |||
4894 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, " distance hidden, k hidden) "); | |||
4895 | if (pkColumnName) { | |||
4896 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "without rowid "); | |||
4897 | } | |||
4898 | zSql = sqlite3_str_finishsqlite3_api->str_finish(createStr); | |||
4899 | if (!zSql) { | |||
4900 | goto error; | |||
4901 | } | |||
4902 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, zSql); | |||
4903 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
4904 | if (rc != SQLITE_OK0) { | |||
4905 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " | |||
4906 | "could not declare virtual table, '%s'", | |||
4907 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
4908 | goto error; | |||
4909 | } | |||
4910 | ||||
4911 | const char *schemaName = argv[1]; | |||
4912 | const char *tableName = argv[2]; | |||
4913 | ||||
4914 | pNew->db = db; | |||
4915 | pNew->pkIsText = pkColumnType == SQLITE_TEXT3; | |||
4916 | pNew->schemaName = sqlite3_mprintfsqlite3_api->mprintf("%s", schemaName); | |||
4917 | if (!pNew->schemaName) { | |||
4918 | goto error; | |||
4919 | } | |||
4920 | pNew->tableName = sqlite3_mprintfsqlite3_api->mprintf("%s", tableName); | |||
4921 | if (!pNew->tableName) { | |||
4922 | goto error; | |||
4923 | } | |||
4924 | pNew->shadowRowidsName = sqlite3_mprintfsqlite3_api->mprintf("%s_rowids", tableName); | |||
4925 | if (!pNew->shadowRowidsName) { | |||
4926 | goto error; | |||
4927 | } | |||
4928 | pNew->shadowChunksName = sqlite3_mprintfsqlite3_api->mprintf("%s_chunks", tableName); | |||
4929 | if (!pNew->shadowChunksName) { | |||
4930 | goto error; | |||
4931 | } | |||
4932 | pNew->numVectorColumns = numVectorColumns; | |||
4933 | pNew->numPartitionColumns = numPartitionColumns; | |||
4934 | pNew->numAuxiliaryColumns = numAuxiliaryColumns; | |||
4935 | pNew->numMetadataColumns = numMetadataColumns; | |||
4936 | ||||
4937 | for (int i = 0; i < pNew->numVectorColumns; i++) { | |||
4938 | pNew->shadowVectorChunksNames[i] = | |||
4939 | sqlite3_mprintfsqlite3_api->mprintf("%s_vector_chunks%02d", tableName, i); | |||
4940 | if (!pNew->shadowVectorChunksNames[i]) { | |||
4941 | goto error; | |||
4942 | } | |||
4943 | } | |||
4944 | for (int i = 0; i < pNew->numMetadataColumns; i++) { | |||
4945 | pNew->shadowMetadataChunksNames[i] = | |||
4946 | sqlite3_mprintfsqlite3_api->mprintf("%s_metadatachunks%02d", tableName, i); | |||
4947 | if (!pNew->shadowMetadataChunksNames[i]) { | |||
4948 | goto error; | |||
4949 | } | |||
4950 | } | |||
4951 | pNew->chunk_size = chunk_size; | |||
4952 | ||||
4953 | // if xCreate, then create the necessary shadow tables | |||
4954 | if (isCreate) { | |||
4955 | sqlite3_stmt *stmt; | |||
4956 | int rc; | |||
4957 | ||||
4958 | char * zCreateInfo = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" " (key text primary key, value any)", pNew->schemaName, pNew->tableName); | |||
4959 | if(!zCreateInfo) { | |||
4960 | goto error; | |||
4961 | } | |||
4962 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateInfo, -1, &stmt, NULL((void*)0)); | |||
4963 | ||||
4964 | sqlite3_freesqlite3_api->free((void *) zCreateInfo); | |||
4965 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
4966 | // TODO(IMP) | |||
4967 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4968 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_info' shadow table: %s", | |||
4969 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
4970 | goto error; | |||
4971 | } | |||
4972 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4973 | ||||
4974 | char * zSeedInfo = sqlite3_mprintfsqlite3_api->mprintf( | |||
4975 | "INSERT INTO "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "(key, value) VALUES " | |||
4976 | "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ", | |||
4977 | pNew->schemaName, pNew->tableName | |||
4978 | ); | |||
4979 | if(!zSeedInfo) { | |||
4980 | goto error; | |||
4981 | } | |||
4982 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSeedInfo, -1, &stmt, NULL((void*)0)); | |||
4983 | sqlite3_freesqlite3_api->free((void *) zSeedInfo); | |||
4984 | if (rc != SQLITE_OK0) { | |||
4985 | // TODO(IMP) | |||
4986 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
4987 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s", | |||
4988 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
4989 | goto error; | |||
4990 | } | |||
4991 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
4992 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, SQLITE_VEC_VERSION"v0.1.7-alpha.2", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
4993 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
4994 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR0); | |||
4995 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
4996 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR1); | |||
4997 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
4998 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH7); | |||
4999 | ||||
5000 | if(sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101) { | |||
5001 | // TODO(IMP) | |||
5002 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5003 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s", | |||
5004 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5005 | goto error; | |||
5006 | } | |||
5007 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5008 | ||||
5009 | ||||
5010 | ||||
5011 | // create the _chunks shadow table | |||
5012 | char *zCreateShadowChunks = NULL((void*)0); | |||
5013 | if(pNew->numPartitionColumns) { | |||
5014 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
5015 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(", pNew->schemaName, pNew->tableName); | |||
5016 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,"); | |||
5017 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "sequence_id integer,"); | |||
5018 | for(int i = 0; i < pNew->numPartitionColumns;i++) { | |||
5019 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "partition%02d,", i); | |||
5020 | } | |||
5021 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);"); | |||
5022 | zCreateShadowChunks = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
5023 | }else { | |||
5024 | zCreateShadowChunks = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL" ");", | |||
5025 | pNew->schemaName, pNew->tableName); | |||
5026 | } | |||
5027 | if (!zCreateShadowChunks) { | |||
5028 | goto error; | |||
5029 | } | |||
5030 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0); | |||
5031 | sqlite3_freesqlite3_api->free((void *)zCreateShadowChunks); | |||
5032 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5033 | // IMP: V17740_01811 | |||
5034 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5035 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_chunks' shadow table: %s", | |||
5036 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5037 | goto error; | |||
5038 | } | |||
5039 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5040 | ||||
5041 | // create the _rowids shadow table | |||
5042 | char *zCreateShadowRowids; | |||
5043 | if (pNew->pkIsText) { | |||
5044 | // adds a "text unique not null" constraint to the id column | |||
5045 | zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER" ");", | |||
5046 | pNew->schemaName, pNew->tableName); | |||
5047 | } else { | |||
5048 | zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id," "chunk_id INTEGER," "chunk_offset INTEGER" ");", | |||
5049 | pNew->schemaName, pNew->tableName); | |||
5050 | } | |||
5051 | if (!zCreateShadowRowids) { | |||
5052 | goto error; | |||
5053 | } | |||
5054 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0); | |||
5055 | sqlite3_freesqlite3_api->free((void *)zCreateShadowRowids); | |||
5056 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5057 | // IMP: V11631_28470 | |||
5058 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5059 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_rowids' shadow table: %s", | |||
5060 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5061 | goto error; | |||
5062 | } | |||
5063 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5064 | ||||
5065 | for (int i = 0; i < pNew->numVectorColumns; i++) { | |||
5066 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY," "vectors BLOB NOT NULL" ");", | |||
5067 | pNew->schemaName, pNew->tableName, i); | |||
5068 | if (!zSql) { | |||
5069 | goto error; | |||
5070 | } | |||
5071 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); | |||
5072 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5073 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5074 | // IMP: V25919_09989 | |||
5075 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5076 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5077 | "Could not create '_vector_chunks%02d' shadow table: %s", i, | |||
5078 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5079 | goto error; | |||
5080 | } | |||
5081 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5082 | } | |||
5083 | ||||
5084 | for (int i = 0; i < pNew->numMetadataColumns; i++) { | |||
5085 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "(rowid PRIMARY KEY, data BLOB NOT NULL);", | |||
5086 | pNew->schemaName, pNew->tableName, i); | |||
5087 | if (!zSql) { | |||
5088 | goto error; | |||
5089 | } | |||
5090 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); | |||
5091 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5092 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5093 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5094 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5095 | "Could not create '_metata_chunks%02d' shadow table: %s", i, | |||
5096 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5097 | goto error; | |||
5098 | } | |||
5099 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5100 | ||||
5101 | if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { | |||
5102 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "(rowid PRIMARY KEY, data TEXT);", | |||
5103 | pNew->schemaName, pNew->tableName, i); | |||
5104 | if (!zSql) { | |||
5105 | goto error; | |||
5106 | } | |||
5107 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); | |||
5108 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5109 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5110 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5111 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5112 | "Could not create '_metadatatext%02d' shadow table: %s", i, | |||
5113 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5114 | goto error; | |||
5115 | } | |||
5116 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5117 | ||||
5118 | } | |||
5119 | } | |||
5120 | ||||
5121 | if(pNew->numAuxiliaryColumns > 0) { | |||
5122 | sqlite3_stmt * stmt; | |||
5123 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
5124 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName); | |||
5125 | for(int i = 0; i < pNew->numAuxiliaryColumns; i++) { | |||
5126 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i); | |||
5127 | } | |||
5128 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); | |||
5129 | char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
5130 | if(!zSql) { | |||
5131 | goto error; | |||
5132 | } | |||
5133 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, NULL((void*)0)); | |||
5134 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5135 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5136 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5137 | "Could not create auxiliary shadow table: %s", | |||
5138 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
5139 | ||||
5140 | goto error; | |||
5141 | } | |||
5142 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5143 | } | |||
5144 | } | |||
5145 | ||||
5146 | *ppVtab = (sqlite3_vtab *)pNew; | |||
5147 | return SQLITE_OK0; | |||
5148 | ||||
5149 | error: | |||
5150 | vec0_free(pNew); | |||
5151 | return SQLITE_ERROR1; | |||
5152 | } | |||
5153 | ||||
5154 | static int vec0Create(sqlite3 *db, void *pAux, int argc, | |||
5155 | const char *const *argv, sqlite3_vtab **ppVtab, | |||
5156 | char **pzErr) { | |||
5157 | return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true1); | |||
5158 | } | |||
5159 | static int vec0Connect(sqlite3 *db, void *pAux, int argc, | |||
5160 | const char *const *argv, sqlite3_vtab **ppVtab, | |||
5161 | char **pzErr) { | |||
5162 | return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false0); | |||
5163 | } | |||
5164 | ||||
5165 | static int vec0Disconnect(sqlite3_vtab *pVtab) { | |||
5166 | vec0_vtab *p = (vec0_vtab *)pVtab; | |||
5167 | vec0_free(p); | |||
5168 | sqlite3_freesqlite3_api->free(p); | |||
5169 | return SQLITE_OK0; | |||
5170 | } | |||
5171 | static int vec0Destroy(sqlite3_vtab *pVtab) { | |||
5172 | vec0_vtab *p = (vec0_vtab *)pVtab; | |||
5173 | sqlite3_stmt *stmt; | |||
5174 | int rc; | |||
5175 | const char *zSql; | |||
5176 | ||||
5177 | // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail | |||
5178 | vec0_free_resources(p); | |||
5179 | ||||
5180 | // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of | |||
5181 | // provided error | |||
5182 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName, | |||
5183 | p->tableName); | |||
5184 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5185 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5186 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5187 | rc = SQLITE_ERROR1; | |||
5188 | vtab_set_error(pVtab, "could not drop chunks shadow table"); | |||
5189 | goto done; | |||
5190 | } | |||
5191 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5192 | ||||
5193 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"", p->schemaName, | |||
5194 | p->tableName); | |||
5195 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5196 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5197 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5198 | rc = SQLITE_ERROR1; | |||
5199 | vtab_set_error(pVtab, "could not drop info shadow table"); | |||
5200 | goto done; | |||
5201 | } | |||
5202 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5203 | ||||
5204 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"", p->schemaName, | |||
5205 | p->tableName); | |||
5206 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5207 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5208 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5209 | rc = SQLITE_ERROR1; | |||
5210 | goto done; | |||
5211 | } | |||
5212 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5213 | ||||
5214 | for (int i = 0; i < p->numVectorColumns; i++) { | |||
5215 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName, | |||
5216 | p->shadowVectorChunksNames[i]); | |||
5217 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5218 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5219 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5220 | rc = SQLITE_ERROR1; | |||
5221 | goto done; | |||
5222 | } | |||
5223 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5224 | } | |||
5225 | ||||
5226 | if(p->numAuxiliaryColumns > 0) { | |||
5227 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"", p->schemaName, p->tableName); | |||
5228 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5229 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5230 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5231 | rc = SQLITE_ERROR1; | |||
5232 | goto done; | |||
5233 | } | |||
5234 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5235 | } | |||
5236 | ||||
5237 | ||||
5238 | for (int i = 0; i < p->numMetadataColumns; i++) { | |||
5239 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"", p->schemaName,p->tableName, i); | |||
5240 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5241 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5242 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5243 | rc = SQLITE_ERROR1; | |||
5244 | goto done; | |||
5245 | } | |||
5246 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5247 | ||||
5248 | if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { | |||
5249 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"", p->schemaName,p->tableName, i); | |||
5250 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); | |||
5251 | sqlite3_freesqlite3_api->free((void *)zSql); | |||
5252 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { | |||
5253 | rc = SQLITE_ERROR1; | |||
5254 | goto done; | |||
5255 | } | |||
5256 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5257 | } | |||
5258 | } | |||
5259 | ||||
5260 | stmt = NULL((void*)0); | |||
5261 | rc = SQLITE_OK0; | |||
5262 | ||||
5263 | done: | |||
5264 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
5265 | vec0_free(p); | |||
5266 | // If there was an error | |||
5267 | if (rc == SQLITE_OK0) { | |||
5268 | sqlite3_freesqlite3_api->free(p); | |||
5269 | } | |||
5270 | return rc; | |||
5271 | } | |||
5272 | ||||
5273 | static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { | |||
5274 | UNUSED_PARAMETER(p)(void)(p); | |||
5275 | vec0_cursor *pCur; | |||
5276 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); | |||
5277 | if (pCur == 0) | |||
5278 | return SQLITE_NOMEM7; | |||
5279 | memset(pCur, 0, sizeof(*pCur)); | |||
5280 | *ppCursor = &pCur->base; | |||
5281 | return SQLITE_OK0; | |||
5282 | } | |||
5283 | ||||
5284 | static int vec0Close(sqlite3_vtab_cursor *cur) { | |||
5285 | vec0_cursor *pCur = (vec0_cursor *)cur; | |||
5286 | vec0_cursor_clear(pCur); | |||
5287 | sqlite3_freesqlite3_api->free(pCur); | |||
5288 | return SQLITE_OK0; | |||
5289 | } | |||
5290 | ||||
5291 | // All the different type of "values" provided to argv/argc in vec0Filter. | |||
5292 | // These enums denote the use and purpose of all of them. | |||
5293 | typedef enum { | |||
5294 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! | |||
5295 | ||||
5296 | VEC0_IDXSTR_KIND_KNN_MATCH = '{', | |||
5297 | VEC0_IDXSTR_KIND_KNN_K = '}', | |||
5298 | VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[', | |||
5299 | VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']', | |||
5300 | VEC0_IDXSTR_KIND_POINT_ID = '!', | |||
5301 | VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&', | |||
5302 | } vec0_idxstr_kind; | |||
5303 | ||||
5304 | // The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns | |||
5305 | // support, but as characters that fit nicely in idxstr. | |||
5306 | typedef enum { | |||
5307 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! | |||
5308 | ||||
5309 | VEC0_PARTITION_OPERATOR_EQ = 'a', | |||
5310 | VEC0_PARTITION_OPERATOR_GT = 'b', | |||
5311 | VEC0_PARTITION_OPERATOR_LE = 'c', | |||
5312 | VEC0_PARTITION_OPERATOR_LT = 'd', | |||
5313 | VEC0_PARTITION_OPERATOR_GE = 'e', | |||
5314 | VEC0_PARTITION_OPERATOR_NE = 'f', | |||
5315 | } vec0_partition_operator; | |||
5316 | typedef enum { | |||
5317 | VEC0_METADATA_OPERATOR_EQ = 'a', | |||
5318 | VEC0_METADATA_OPERATOR_GT = 'b', | |||
5319 | VEC0_METADATA_OPERATOR_LE = 'c', | |||
5320 | VEC0_METADATA_OPERATOR_LT = 'd', | |||
5321 | VEC0_METADATA_OPERATOR_GE = 'e', | |||
5322 | VEC0_METADATA_OPERATOR_NE = 'f', | |||
5323 | VEC0_METADATA_OPERATOR_IN = 'g', | |||
5324 | } vec0_metadata_operator; | |||
5325 | ||||
5326 | static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { | |||
5327 | vec0_vtab *p = (vec0_vtab *)pVTab; | |||
5328 | /** | |||
5329 | * Possible query plans are: | |||
5330 | * 1. KNN when: | |||
5331 | * a) An `MATCH` op on vector column | |||
5332 | * b) ORDER BY on distance column | |||
5333 | * c) LIMIT | |||
5334 | * d) rowid in (...) OPTIONAL | |||
5335 | * 2. Point when: | |||
5336 | * a) An `EQ` op on rowid column | |||
5337 | * 3. else: fullscan | |||
5338 | * | |||
5339 | */ | |||
5340 | int iMatchTerm = -1; | |||
5341 | int iMatchVectorTerm = -1; | |||
5342 | int iLimitTerm = -1; | |||
5343 | int iRowidTerm = -1; | |||
5344 | int iKTerm = -1; | |||
5345 | int iRowidInTerm = -1; | |||
5346 | int hasAuxConstraint = 0; | |||
5347 | ||||
5348 | #ifdef SQLITE_VEC_DEBUG | |||
5349 | printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint); | |||
5350 | #endif | |||
5351 | ||||
5352 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
5353 | u8 vtabIn = 0; | |||
5354 | ||||
5355 | #if COMPILER_SUPPORTS_VTAB_IN1 | |||
5356 | if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) { | |||
5357 | vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1); | |||
5358 | } | |||
5359 | #endif | |||
5360 | ||||
5361 | #ifdef SQLITE_VEC_DEBUG | |||
5362 | printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i, | |||
5363 | pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn, | |||
5364 | pIdxInfo->aConstraint[i].op, vtabIn); | |||
5365 | #endif | |||
5366 | if (!pIdxInfo->aConstraint[i].usable) | |||
5367 | continue; | |||
5368 | ||||
5369 | int iColumn = pIdxInfo->aConstraint[i].iColumn; | |||
5370 | int op = pIdxInfo->aConstraint[i].op; | |||
5371 | ||||
5372 | if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) { | |||
5373 | iLimitTerm = i; | |||
5374 | } | |||
5375 | if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 && | |||
5376 | vec0_column_idx_is_vector(p, iColumn)) { | |||
5377 | if (iMatchTerm > -1) { | |||
5378 | vtab_set_error( | |||
5379 | pVTab, "only 1 MATCH operator is allowed in a single vec0 query"); | |||
5380 | return SQLITE_ERROR1; | |||
5381 | } | |||
5382 | iMatchTerm = i; | |||
5383 | iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn); | |||
5384 | } | |||
5385 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == VEC0_COLUMN_ID0) { | |||
5386 | if (vtabIn) { | |||
5387 | if (iRowidInTerm != -1) { | |||
5388 | vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in " | |||
5389 | "a single vec0 query"); | |||
5390 | return SQLITE_ERROR1; | |||
5391 | } | |||
5392 | iRowidInTerm = i; | |||
5393 | ||||
5394 | } else { | |||
5395 | iRowidTerm = i; | |||
5396 | } | |||
5397 | } | |||
5398 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == vec0_column_k_idx(p)) { | |||
5399 | iKTerm = i; | |||
5400 | } | |||
5401 | if( | |||
5402 | (op != SQLITE_INDEX_CONSTRAINT_LIMIT73 && op != SQLITE_INDEX_CONSTRAINT_OFFSET74) | |||
5403 | && vec0_column_idx_is_auxiliary(p, iColumn)) { | |||
5404 | hasAuxConstraint = 1; | |||
5405 | } | |||
5406 | } | |||
5407 | ||||
5408 | sqlite3_str *idxStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
5409 | int rc; | |||
5410 | ||||
5411 | if (iMatchTerm >= 0) { | |||
5412 | if (iLimitTerm < 0 && iKTerm < 0) { | |||
5413 | vtab_set_error( | |||
5414 | pVTab, | |||
5415 | "A LIMIT or 'k = ?' constraint is required on vec0 knn queries."); | |||
5416 | rc = SQLITE_ERROR1; | |||
5417 | goto done; | |||
5418 | } | |||
5419 | if (iLimitTerm >= 0 && iKTerm >= 0) { | |||
5420 | vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both"); | |||
5421 | rc = SQLITE_ERROR1; | |||
5422 | goto done; | |||
5423 | } | |||
5424 | ||||
5425 | if (pIdxInfo->nOrderBy) { | |||
5426 | if (pIdxInfo->nOrderBy > 1) { | |||
5427 | vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is " | |||
5428 | "allowed on vec0 KNN queries"); | |||
5429 | rc = SQLITE_ERROR1; | |||
5430 | goto done; | |||
5431 | } | |||
5432 | if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) { | |||
5433 | vtab_set_error(pVTab, | |||
5434 | "Only a single 'ORDER BY distance' clause is allowed on " | |||
5435 | "vec0 KNN queries, not on other columns"); | |||
5436 | rc = SQLITE_ERROR1; | |||
5437 | goto done; | |||
5438 | } | |||
5439 | if (pIdxInfo->aOrderBy[0].desc) { | |||
5440 | vtab_set_error( | |||
5441 | pVTab, "Only ascending in ORDER BY distance clause is supported, " | |||
5442 | "DESC is not supported yet."); | |||
5443 | rc = SQLITE_ERROR1; | |||
5444 | goto done; | |||
5445 | } | |||
5446 | } | |||
5447 | ||||
5448 | if(hasAuxConstraint) { | |||
5449 | // IMP: V25623_09693 | |||
5450 | vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query."); | |||
5451 | rc = SQLITE_ERROR1; | |||
5452 | goto done; | |||
5453 | } | |||
5454 | ||||
5455 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN); | |||
5456 | ||||
5457 | int argvIndex = 1; | |||
5458 | pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++; | |||
5459 | pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1; | |||
5460 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH); | |||
5461 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); | |||
5462 | ||||
5463 | if (iLimitTerm >= 0) { | |||
5464 | pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++; | |||
5465 | pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1; | |||
5466 | } else { | |||
5467 | pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++; | |||
5468 | pIdxInfo->aConstraintUsage[iKTerm].omit = 1; | |||
5469 | } | |||
5470 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K); | |||
5471 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); | |||
5472 | ||||
5473 | #if COMPILER_SUPPORTS_VTAB_IN1 | |||
5474 | if (iRowidInTerm >= 0) { | |||
5475 | // already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when | |||
5476 | // vtabIn == 1 | |||
5477 | sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, iRowidInTerm, 1); | |||
5478 | pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++; | |||
5479 | pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1; | |||
5480 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN); | |||
5481 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); | |||
5482 | } | |||
5483 | #endif | |||
5484 | ||||
5485 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
5486 | if (!pIdxInfo->aConstraint[i].usable) | |||
5487 | continue; | |||
5488 | ||||
5489 | int iColumn = pIdxInfo->aConstraint[i].iColumn; | |||
5490 | int op = pIdxInfo->aConstraint[i].op; | |||
5491 | if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) { | |||
5492 | continue; | |||
5493 | } | |||
5494 | if(!vec0_column_idx_is_partition(p, iColumn)) { | |||
5495 | continue; | |||
5496 | } | |||
5497 | ||||
5498 | int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn); | |||
5499 | char value = 0; | |||
5500 | ||||
5501 | switch(op) { | |||
5502 | case SQLITE_INDEX_CONSTRAINT_EQ2: { | |||
5503 | value = VEC0_PARTITION_OPERATOR_EQ; | |||
5504 | break; | |||
5505 | } | |||
5506 | case SQLITE_INDEX_CONSTRAINT_GT4: { | |||
5507 | value = VEC0_PARTITION_OPERATOR_GT; | |||
5508 | break; | |||
5509 | } | |||
5510 | case SQLITE_INDEX_CONSTRAINT_LE8: { | |||
5511 | value = VEC0_PARTITION_OPERATOR_LE; | |||
5512 | break; | |||
5513 | } | |||
5514 | case SQLITE_INDEX_CONSTRAINT_LT16: { | |||
5515 | value = VEC0_PARTITION_OPERATOR_LT; | |||
5516 | break; | |||
5517 | } | |||
5518 | case SQLITE_INDEX_CONSTRAINT_GE32: { | |||
5519 | value = VEC0_PARTITION_OPERATOR_GE; | |||
5520 | break; | |||
5521 | } | |||
5522 | case SQLITE_INDEX_CONSTRAINT_NE68: { | |||
5523 | value = VEC0_PARTITION_OPERATOR_NE; | |||
5524 | break; | |||
5525 | } | |||
5526 | } | |||
5527 | ||||
5528 | if(value) { | |||
5529 | pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++; | |||
5530 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
5531 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT); | |||
5532 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + partition_idx); | |||
5533 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value); | |||
5534 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_'); | |||
5535 | } | |||
5536 | ||||
5537 | } | |||
5538 | ||||
5539 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
5540 | if (!pIdxInfo->aConstraint[i].usable) | |||
5541 | continue; | |||
5542 | ||||
5543 | int iColumn = pIdxInfo->aConstraint[i].iColumn; | |||
5544 | int op = pIdxInfo->aConstraint[i].op; | |||
5545 | if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) { | |||
5546 | continue; | |||
5547 | } | |||
5548 | if(!vec0_column_idx_is_metadata(p, iColumn)) { | |||
5549 | continue; | |||
5550 | } | |||
5551 | ||||
5552 | int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn); | |||
5553 | char value = 0; | |||
5554 | ||||
5555 | switch(op) { | |||
5556 | case SQLITE_INDEX_CONSTRAINT_EQ2: { | |||
5557 | int vtabIn = 0; | |||
5558 | #if COMPILER_SUPPORTS_VTAB_IN1 | |||
5559 | if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) { | |||
5560 | vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1); | |||
5561 | } | |||
5562 | if(vtabIn) { | |||
5563 | switch(p->metadata_columns[metadata_idx].kind) { | |||
5564 | case VEC0_METADATA_COLUMN_KIND_FLOAT: | |||
5565 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
5566 | // IMP: V15248_32086 | |||
5567 | rc = SQLITE_ERROR1; | |||
5568 | vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns."); | |||
5569 | goto done; | |||
5570 | break; | |||
5571 | } | |||
5572 | case VEC0_METADATA_COLUMN_KIND_INTEGER: | |||
5573 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
5574 | break; | |||
5575 | } | |||
5576 | } | |||
5577 | value = VEC0_METADATA_OPERATOR_IN; | |||
5578 | sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, 1); | |||
5579 | }else | |||
5580 | #endif | |||
5581 | { | |||
5582 | value = VEC0_PARTITION_OPERATOR_EQ; | |||
5583 | } | |||
5584 | break; | |||
5585 | } | |||
5586 | case SQLITE_INDEX_CONSTRAINT_GT4: { | |||
5587 | value = VEC0_METADATA_OPERATOR_GT; | |||
5588 | break; | |||
5589 | } | |||
5590 | case SQLITE_INDEX_CONSTRAINT_LE8: { | |||
5591 | value = VEC0_METADATA_OPERATOR_LE; | |||
5592 | break; | |||
5593 | } | |||
5594 | case SQLITE_INDEX_CONSTRAINT_LT16: { | |||
5595 | value = VEC0_METADATA_OPERATOR_LT; | |||
5596 | break; | |||
5597 | } | |||
5598 | case SQLITE_INDEX_CONSTRAINT_GE32: { | |||
5599 | value = VEC0_METADATA_OPERATOR_GE; | |||
5600 | break; | |||
5601 | } | |||
5602 | case SQLITE_INDEX_CONSTRAINT_NE68: { | |||
5603 | value = VEC0_METADATA_OPERATOR_NE; | |||
5604 | break; | |||
5605 | } | |||
5606 | default: { | |||
5607 | // IMP: V16511_00582 | |||
5608 | rc = SQLITE_ERROR1; | |||
5609 | vtab_set_error(pVTab, | |||
5610 | "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. " | |||
5611 | "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed." | |||
5612 | ); | |||
5613 | goto done; | |||
5614 | } | |||
5615 | } | |||
5616 | ||||
5617 | if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) { | |||
5618 | if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) { | |||
5619 | // IMP: V10145_26984 | |||
5620 | rc = SQLITE_ERROR1; | |||
5621 | vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns."); | |||
5622 | goto done; | |||
5623 | } | |||
5624 | } | |||
5625 | ||||
5626 | pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++; | |||
5627 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
5628 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT); | |||
5629 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + metadata_idx); | |||
5630 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value); | |||
5631 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_'); | |||
5632 | ||||
5633 | } | |||
5634 | ||||
5635 | ||||
5636 | ||||
5637 | pIdxInfo->idxNum = iMatchVectorTerm; | |||
5638 | pIdxInfo->estimatedCost = 30.0; | |||
5639 | pIdxInfo->estimatedRows = 10; | |||
5640 | ||||
5641 | } else if (iRowidTerm >= 0) { | |||
5642 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT); | |||
5643 | pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; | |||
5644 | pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; | |||
5645 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID); | |||
5646 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); | |||
5647 | pIdxInfo->idxNum = pIdxInfo->colUsed; | |||
5648 | pIdxInfo->estimatedCost = 10.0; | |||
5649 | pIdxInfo->estimatedRows = 1; | |||
5650 | } else { | |||
5651 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN); | |||
5652 | pIdxInfo->estimatedCost = 3000000.0; | |||
5653 | pIdxInfo->estimatedRows = 100000; | |||
5654 | } | |||
5655 | pIdxInfo->idxStr = sqlite3_str_finishsqlite3_api->str_finish(idxStr); | |||
5656 | idxStr = NULL((void*)0); | |||
5657 | if (!pIdxInfo->idxStr) { | |||
5658 | rc = SQLITE_OK0; | |||
5659 | goto done; | |||
5660 | } | |||
5661 | pIdxInfo->needToFreeIdxStr = 1; | |||
5662 | ||||
5663 | ||||
5664 | rc = SQLITE_OK0; | |||
5665 | ||||
5666 | done: | |||
5667 | if(idxStr) { | |||
5668 | sqlite3_str_finishsqlite3_api->str_finish(idxStr); | |||
5669 | } | |||
5670 | return rc; | |||
5671 | } | |||
5672 | ||||
5673 | // forward delcaration bc vec0Filter uses it | |||
5674 | static int vec0Next(sqlite3_vtab_cursor *cur); | |||
5675 | ||||
5676 | void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b, | |||
5677 | i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out, | |||
5678 | i64 *out_rowids, i64 out_length, i64 *out_used) { | |||
5679 | // assert((a_length >= out_length) || (b_length >= out_length)); | |||
5680 | i64 ptrA = 0; | |||
5681 | i64 ptrB = 0; | |||
5682 | for (int i = 0; i < out_length; i++) { | |||
5683 | if ((ptrA >= a_length) && (ptrB >= b_length)) { | |||
5684 | *out_used = i; | |||
5685 | return; | |||
5686 | } | |||
5687 | if (ptrA >= a_length) { | |||
5688 | out[i] = b[b_top_idxs[ptrB]]; | |||
5689 | out_rowids[i] = b_rowids[b_top_idxs[ptrB]]; | |||
5690 | ptrB++; | |||
5691 | } else if (ptrB >= b_length) { | |||
5692 | out[i] = a[ptrA]; | |||
5693 | out_rowids[i] = a_rowids[ptrA]; | |||
5694 | ptrA++; | |||
5695 | } else { | |||
5696 | if (a[ptrA] <= b[b_top_idxs[ptrB]]) { | |||
5697 | out[i] = a[ptrA]; | |||
5698 | out_rowids[i] = a_rowids[ptrA]; | |||
5699 | ptrA++; | |||
5700 | } else { | |||
5701 | out[i] = b[b_top_idxs[ptrB]]; | |||
5702 | out_rowids[i] = b_rowids[b_top_idxs[ptrB]]; | |||
5703 | ptrB++; | |||
5704 | } | |||
5705 | } | |||
5706 | } | |||
5707 | ||||
5708 | *out_used = out_length; | |||
5709 | } | |||
5710 | ||||
5711 | u8 *bitmap_new(i32 n) { | |||
5712 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5712, __extension__ __PRETTY_FUNCTION__); })); | |||
5713 | u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8); | |||
5714 | if (p) { | |||
5715 | memset(p, 0, n * sizeof(u8) / CHAR_BIT8); | |||
5716 | } | |||
5717 | return p; | |||
5718 | } | |||
5719 | u8 *bitmap_new_from(i32 n, u8 *from) { | |||
5720 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5720, __extension__ __PRETTY_FUNCTION__); })); | |||
5721 | u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8); | |||
5722 | if (p) { | |||
5723 | memcpy(p, from, n / CHAR_BIT8); | |||
5724 | } | |||
5725 | return p; | |||
5726 | } | |||
5727 | ||||
5728 | void bitmap_copy(u8 *base, u8 *from, i32 n) { | |||
5729 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5729, __extension__ __PRETTY_FUNCTION__); })); | |||
5730 | memcpy(base, from, n / CHAR_BIT8); | |||
5731 | } | |||
5732 | ||||
5733 | void bitmap_and_inplace(u8 *base, u8 *other, i32 n) { | |||
5734 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5734, __extension__ __PRETTY_FUNCTION__); })); | |||
5735 | for (int i = 0; i < n / CHAR_BIT8; i++) { | |||
5736 | base[i] = base[i] & other[i]; | |||
5737 | } | |||
5738 | } | |||
5739 | ||||
5740 | void bitmap_set(u8 *bitmap, i32 position, int value) { | |||
5741 | if (value) { | |||
5742 | bitmap[position / CHAR_BIT8] |= 1 << (position % CHAR_BIT8); | |||
5743 | } else { | |||
5744 | bitmap[position / CHAR_BIT8] &= ~(1 << (position % CHAR_BIT8)); | |||
5745 | } | |||
5746 | } | |||
5747 | ||||
5748 | int bitmap_get(u8 *bitmap, i32 position) { | |||
5749 | return (((bitmap[position / CHAR_BIT8]) >> (position % CHAR_BIT8)) & 1); | |||
5750 | } | |||
5751 | ||||
5752 | void bitmap_clear(u8 *bitmap, i32 n) { | |||
5753 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5753, __extension__ __PRETTY_FUNCTION__); })); | |||
5754 | memset(bitmap, 0, n / CHAR_BIT8); | |||
5755 | } | |||
5756 | ||||
5757 | void bitmap_fill(u8 *bitmap, i32 n) { | |||
5758 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5758, __extension__ __PRETTY_FUNCTION__); })); | |||
5759 | memset(bitmap, 0xFF, n / CHAR_BIT8); | |||
5760 | } | |||
5761 | ||||
5762 | /** | |||
5763 | * @brief Finds the minimum k items in distances, and writes the indicies to | |||
5764 | * out. | |||
5765 | * | |||
5766 | * @param distances input f32 array of size n, the items to consider. | |||
5767 | * @param n: size of distances array. | |||
5768 | * @param out: Output array of size k, will contain at most k element indicies | |||
5769 | * @param k: Size of output array | |||
5770 | * @return int | |||
5771 | */ | |||
5772 | int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k, | |||
5773 | u8 *bTaken, i32 *k_used) { | |||
5774 | assert(k > 0)((void) sizeof ((k > 0) ? 1 : 0), __extension__ ({ if (k > 0) ; else __assert_fail ("k > 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5774, __extension__ __PRETTY_FUNCTION__); })); | |||
5775 | assert(k <= n)((void) sizeof ((k <= n) ? 1 : 0), __extension__ ({ if (k <= n) ; else __assert_fail ("k <= n", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5775, __extension__ __PRETTY_FUNCTION__); })); | |||
5776 | ||||
5777 | bitmap_clear(bTaken, n); | |||
5778 | ||||
5779 | for (int ik = 0; ik < k; ik++) { | |||
5780 | int min_idx = 0; | |||
5781 | while (min_idx < n && | |||
5782 | (bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) { | |||
5783 | min_idx++; | |||
5784 | } | |||
5785 | if (min_idx >= n) { | |||
5786 | *k_used = ik; | |||
5787 | return SQLITE_OK0; | |||
5788 | } | |||
5789 | ||||
5790 | for (int i = 0; i < n; i++) { | |||
5791 | if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) && | |||
5792 | (bitmap_get(candidates, i))) { | |||
5793 | min_idx = i; | |||
5794 | } | |||
5795 | } | |||
5796 | ||||
5797 | out[ik] = min_idx; | |||
5798 | bitmap_set(bTaken, min_idx, 1); | |||
5799 | } | |||
5800 | *k_used = k; | |||
5801 | return SQLITE_OK0; | |||
5802 | } | |||
5803 | ||||
5804 | int vec0_get_metadata_text_long_value( | |||
5805 | vec0_vtab * p, | |||
5806 | sqlite3_stmt ** stmt, | |||
5807 | int metadata_idx, | |||
5808 | i64 rowid, | |||
5809 | int *n, | |||
5810 | char ** s) { | |||
5811 | int rc; | |||
5812 | if(!(*stmt)) { | |||
5813 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " where rowid = ?", p->schemaName, p->tableName, metadata_idx); | |||
5814 | if(!zSql) { | |||
5815 | rc = SQLITE_NOMEM7; | |||
5816 | goto done; | |||
5817 | } | |||
5818 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, stmt, NULL((void*)0)); | |||
5819 | sqlite3_freesqlite3_api->free( (void *) zSql); | |||
5820 | if(rc != SQLITE_OK0) { | |||
5821 | goto done; | |||
5822 | } | |||
5823 | } | |||
5824 | ||||
5825 | sqlite3_resetsqlite3_api->reset(*stmt); | |||
5826 | sqlite3_bind_int64sqlite3_api->bind_int64(*stmt, 1, rowid); | |||
5827 | rc = sqlite3_stepsqlite3_api->step(*stmt); | |||
5828 | if(rc != SQLITE_ROW100) { | |||
5829 | rc = SQLITE_ERROR1; | |||
5830 | goto done; | |||
5831 | } | |||
5832 | *s = (char *) sqlite3_column_textsqlite3_api->column_text(*stmt, 0); | |||
5833 | *n = sqlite3_column_bytessqlite3_api->column_bytes(*stmt, 0); | |||
5834 | rc = SQLITE_OK0; | |||
5835 | done: | |||
5836 | return rc; | |||
5837 | } | |||
5838 | ||||
5839 | /** | |||
5840 | * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints | |||
5841 | * | |||
5842 | * Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied | |||
5843 | * as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt | |||
5844 | * can freely step through the stmt with all constraints satisfied. | |||
5845 | * | |||
5846 | * @param p - vec0_vtab | |||
5847 | * @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values | |||
5848 | * @param argc - number of argv values from xFilter | |||
5849 | * @param argv - array of sqlite3_value from xFilter | |||
5850 | * @param outStmt - output sqlite3_stmt of chunks with all filters applied | |||
5851 | * @return int SQLITE_OK on success, error code otherwise | |||
5852 | */ | |||
5853 | int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) { | |||
5854 | // always null terminated, enforced by SQLite | |||
5855 | int idxStrLength = strlen(idxStr); | |||
5856 | // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element" | |||
5857 | int numValueEntries = (idxStrLength-1) / 4; | |||
5858 | assert(argc == numValueEntries)((void) sizeof ((argc == numValueEntries) ? 1 : 0), __extension__ ({ if (argc == numValueEntries) ; else __assert_fail ("argc == numValueEntries" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5858, __extension__ __PRETTY_FUNCTION__); })); | |||
5859 | ||||
5860 | int rc; | |||
5861 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
5862 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "select chunk_id, validity, rowids " | |||
5863 | " from " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", | |||
5864 | p->schemaName, p->tableName); | |||
5865 | ||||
5866 | int appendedWhere = 0; | |||
5867 | for(int i = 0; i < numValueEntries; i++) { | |||
5868 | int idx = 1 + (i * 4); | |||
5869 | char kind = idxStr[idx + 0]; | |||
5870 | if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) { | |||
5871 | continue; | |||
5872 | } | |||
5873 | ||||
5874 | int partition_idx = idxStr[idx + 1] - 'A'; | |||
5875 | int operator = idxStr[idx + 2]; | |||
5876 | // idxStr[idx + 3] is just null, a '_' placeholder | |||
5877 | ||||
5878 | if(!appendedWhere) { | |||
5879 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " WHERE "); | |||
5880 | appendedWhere = 1; | |||
5881 | }else { | |||
5882 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND "); | |||
5883 | } | |||
5884 | switch(operator) { | |||
5885 | case VEC0_PARTITION_OPERATOR_EQ: | |||
5886 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", partition_idx); | |||
5887 | break; | |||
5888 | case VEC0_PARTITION_OPERATOR_GT: | |||
5889 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d > ? ", partition_idx); | |||
5890 | break; | |||
5891 | case VEC0_PARTITION_OPERATOR_LE: | |||
5892 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d <= ? ", partition_idx); | |||
5893 | break; | |||
5894 | case VEC0_PARTITION_OPERATOR_LT: | |||
5895 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d < ? ", partition_idx); | |||
5896 | break; | |||
5897 | case VEC0_PARTITION_OPERATOR_GE: | |||
5898 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d >= ? ", partition_idx); | |||
5899 | break; | |||
5900 | case VEC0_PARTITION_OPERATOR_NE: | |||
5901 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d != ? ", partition_idx); | |||
5902 | break; | |||
5903 | default: { | |||
5904 | char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
5905 | sqlite3_freesqlite3_api->free(zSql); | |||
5906 | return SQLITE_ERROR1; | |||
5907 | } | |||
5908 | ||||
5909 | } | |||
5910 | ||||
5911 | } | |||
5912 | ||||
5913 | char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
5914 | if (!zSql) { | |||
5915 | return SQLITE_NOMEM7; | |||
5916 | } | |||
5917 | ||||
5918 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, outStmt, NULL((void*)0)); | |||
5919 | sqlite3_freesqlite3_api->free(zSql); | |||
5920 | if(rc != SQLITE_OK0) { | |||
5921 | return rc; | |||
5922 | } | |||
5923 | ||||
5924 | int n = 1; | |||
5925 | for(int i = 0; i < numValueEntries; i++) { | |||
5926 | int idx = 1 + (i * 4); | |||
5927 | char kind = idxStr[idx + 0]; | |||
5928 | if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) { | |||
5929 | continue; | |||
5930 | } | |||
5931 | sqlite3_bind_valuesqlite3_api->bind_value(*outStmt, n++, argv[i]); | |||
5932 | } | |||
5933 | ||||
5934 | return rc; | |||
5935 | } | |||
5936 | ||||
5937 | // a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now. | |||
5938 | struct Vec0MetadataIn{ | |||
5939 | // index of argv[i]` the constraint is on | |||
5940 | int argv_idx; | |||
5941 | // metadata column index of the constraint, derived from idxStr + argv_idx | |||
5942 | int metadata_idx; | |||
5943 | // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next() | |||
5944 | struct Array array; | |||
5945 | }; | |||
5946 | ||||
5947 | // Array elements for `xxx in (...)` values for a text column. basically just a string | |||
5948 | struct Vec0MetadataInTextEntry { | |||
5949 | int n; | |||
5950 | char * zString; | |||
5951 | }; | |||
5952 | ||||
5953 | ||||
5954 | int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) { | |||
5955 | int rc; | |||
5956 | sqlite3_stmt * stmt = NULL((void*)0); | |||
5957 | i64 * rowids = NULL((void*)0); | |||
5958 | sqlite3_blob * rowidsBlob; | |||
5959 | const char * sTarget = (const char *) sqlite3_value_textsqlite3_api->value_text(value); | |||
5960 | int nTarget = sqlite3_value_bytessqlite3_api->value_bytes(value); | |||
5961 | ||||
5962 | ||||
5963 | // TODO(perf): only text metadata news the rowids BLOB. Make it so that | |||
5964 | // rowids BLOB is re-used when multiple fitlers on text columns, | |||
5965 | // ex "name BETWEEN 'a' and 'b'"" | |||
5966 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob); | |||
5967 | if(rc != SQLITE_OK0) { | |||
5968 | return rc; | |||
5969 | } | |||
5970 | assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0)((void) sizeof ((sqlite3_api->blob_bytes(rowidsBlob) % sizeof (i64) == 0) ? 1 : 0), __extension__ ({ if (sqlite3_api->blob_bytes (rowidsBlob) % sizeof(i64) == 0) ; else __assert_fail ("sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5970, __extension__ __PRETTY_FUNCTION__); })); | |||
5971 | assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size)((void) sizeof (((sqlite3_api->blob_bytes(rowidsBlob) / sizeof (i64)) == size) ? 1 : 0), __extension__ ({ if ((sqlite3_api-> blob_bytes(rowidsBlob) / sizeof(i64)) == size) ; else __assert_fail ("(sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5971, __extension__ __PRETTY_FUNCTION__); })); | |||
5972 | ||||
5973 | rowids = sqlite3_mallocsqlite3_api->malloc(sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob)); | |||
5974 | if(!rowids) { | |||
5975 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); | |||
5976 | return SQLITE_NOMEM7; | |||
5977 | } | |||
5978 | ||||
5979 | rc = sqlite3_blob_readsqlite3_api->blob_read(rowidsBlob, rowids, sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob), 0); | |||
5980 | if(rc != SQLITE_OK0) { | |||
5981 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); | |||
5982 | return rc; | |||
5983 | } | |||
5984 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); | |||
5985 | ||||
5986 | switch(op) { | |||
5987 | int nPrefix; | |||
5988 | char * sPrefix; | |||
5989 | char *sFull; | |||
5990 | int nFull; | |||
5991 | u8 * view; | |||
5992 | case VEC0_METADATA_OPERATOR_EQ: { | |||
5993 | for(int i = 0; i < size; i++) { | |||
5994 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
5995 | nPrefix = ((int*) view)[0]; | |||
5996 | sPrefix = (char *) &view[4]; | |||
5997 | ||||
5998 | // for EQ the text lengths must match | |||
5999 | if(nPrefix != nTarget) { | |||
6000 | bitmap_set(b, i, 0); | |||
6001 | continue; | |||
6002 | } | |||
6003 | int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); | |||
6004 | ||||
6005 | // for short strings, use the prefix comparison direclty | |||
6006 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6007 | bitmap_set(b, i, cmpPrefix == 0); | |||
6008 | continue; | |||
6009 | } | |||
6010 | // for EQ on longs strings, the prefix must match | |||
6011 | if(cmpPrefix) { | |||
6012 | bitmap_set(b, i, 0); | |||
6013 | continue; | |||
6014 | } | |||
6015 | // consult the full string | |||
6016 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6017 | if(rc != SQLITE_OK0) { | |||
6018 | goto done; | |||
6019 | } | |||
6020 | if(nPrefix != nFull) { | |||
6021 | rc = SQLITE_ERROR1; | |||
6022 | goto done; | |||
6023 | } | |||
6024 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0); | |||
6025 | } | |||
6026 | break; | |||
6027 | } | |||
6028 | case VEC0_METADATA_OPERATOR_NE: { | |||
6029 | for(int i = 0; i < size; i++) { | |||
6030 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6031 | nPrefix = ((int*) view)[0]; | |||
6032 | sPrefix = (char *) &view[4]; | |||
6033 | ||||
6034 | // for NE if text lengths dont match, it never will | |||
6035 | if(nPrefix != nTarget) { | |||
6036 | bitmap_set(b, i, 1); | |||
6037 | continue; | |||
6038 | } | |||
6039 | ||||
6040 | int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); | |||
6041 | ||||
6042 | // for short strings, use the prefix comparison direclty | |||
6043 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6044 | bitmap_set(b, i, cmpPrefix != 0); | |||
6045 | continue; | |||
6046 | } | |||
6047 | // for NE on longs strings, if prefixes dont match, then long string wont | |||
6048 | if(cmpPrefix) { | |||
6049 | bitmap_set(b, i, 1); | |||
6050 | continue; | |||
6051 | } | |||
6052 | // consult the full string | |||
6053 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6054 | if(rc != SQLITE_OK0) { | |||
6055 | goto done; | |||
6056 | } | |||
6057 | if(nPrefix != nFull) { | |||
6058 | rc = SQLITE_ERROR1; | |||
6059 | goto done; | |||
6060 | } | |||
6061 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0); | |||
6062 | } | |||
6063 | break; | |||
6064 | } | |||
6065 | case VEC0_METADATA_OPERATOR_GT: { | |||
6066 | for(int i = 0; i < size; i++) { | |||
6067 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6068 | nPrefix = ((int*) view)[0]; | |||
6069 | sPrefix = (char *) &view[4]; | |||
6070 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); | |||
6071 | ||||
6072 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6073 | // if prefix match, check which is longer | |||
6074 | if(cmpPrefix == 0) { | |||
6075 | bitmap_set(b, i, nPrefix > nTarget); | |||
6076 | } | |||
6077 | else { | |||
6078 | bitmap_set(b, i, cmpPrefix > 0); | |||
6079 | } | |||
6080 | continue; | |||
6081 | } | |||
6082 | // TODO(perf): may not need to compare full text in some cases | |||
6083 | ||||
6084 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6085 | if(rc != SQLITE_OK0) { | |||
6086 | goto done; | |||
6087 | } | |||
6088 | if(nPrefix != nFull) { | |||
6089 | rc = SQLITE_ERROR1; | |||
6090 | goto done; | |||
6091 | } | |||
6092 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0); | |||
6093 | } | |||
6094 | break; | |||
6095 | } | |||
6096 | case VEC0_METADATA_OPERATOR_GE: { | |||
6097 | for(int i = 0; i < size; i++) { | |||
6098 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6099 | nPrefix = ((int*) view)[0]; | |||
6100 | sPrefix = (char *) &view[4]; | |||
6101 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); | |||
6102 | ||||
6103 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6104 | // if prefix match, check which is longer | |||
6105 | if(cmpPrefix == 0) { | |||
6106 | bitmap_set(b, i, nPrefix >= nTarget); | |||
6107 | } | |||
6108 | else { | |||
6109 | bitmap_set(b, i, cmpPrefix >= 0); | |||
6110 | } | |||
6111 | continue; | |||
6112 | } | |||
6113 | // TODO(perf): may not need to compare full text in some cases | |||
6114 | ||||
6115 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6116 | if(rc != SQLITE_OK0) { | |||
6117 | goto done; | |||
6118 | } | |||
6119 | if(nPrefix != nFull) { | |||
6120 | rc = SQLITE_ERROR1; | |||
6121 | goto done; | |||
6122 | } | |||
6123 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0); | |||
6124 | } | |||
6125 | break; | |||
6126 | } | |||
6127 | case VEC0_METADATA_OPERATOR_LE: { | |||
6128 | for(int i = 0; i < size; i++) { | |||
6129 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6130 | nPrefix = ((int*) view)[0]; | |||
6131 | sPrefix = (char *) &view[4]; | |||
6132 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); | |||
6133 | ||||
6134 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6135 | // if prefix match, check which is longer | |||
6136 | if(cmpPrefix == 0) { | |||
6137 | bitmap_set(b, i, nPrefix <= nTarget); | |||
6138 | } | |||
6139 | else { | |||
6140 | bitmap_set(b, i, cmpPrefix <= 0); | |||
6141 | } | |||
6142 | continue; | |||
6143 | } | |||
6144 | // TODO(perf): may not need to compare full text in some cases | |||
6145 | ||||
6146 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6147 | if(rc != SQLITE_OK0) { | |||
6148 | goto done; | |||
6149 | } | |||
6150 | if(nPrefix != nFull) { | |||
6151 | rc = SQLITE_ERROR1; | |||
6152 | goto done; | |||
6153 | } | |||
6154 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0); | |||
6155 | } | |||
6156 | break; | |||
6157 | } | |||
6158 | case VEC0_METADATA_OPERATOR_LT: { | |||
6159 | for(int i = 0; i < size; i++) { | |||
6160 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6161 | nPrefix = ((int*) view)[0]; | |||
6162 | sPrefix = (char *) &view[4]; | |||
6163 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); | |||
6164 | ||||
6165 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6166 | // if prefix match, check which is longer | |||
6167 | if(cmpPrefix == 0) { | |||
6168 | bitmap_set(b, i, nPrefix < nTarget); | |||
6169 | } | |||
6170 | else { | |||
6171 | bitmap_set(b, i, cmpPrefix < 0); | |||
6172 | } | |||
6173 | continue; | |||
6174 | } | |||
6175 | // TODO(perf): may not need to compare full text in some cases | |||
6176 | ||||
6177 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6178 | if(rc != SQLITE_OK0) { | |||
6179 | goto done; | |||
6180 | } | |||
6181 | if(nPrefix != nFull) { | |||
6182 | rc = SQLITE_ERROR1; | |||
6183 | goto done; | |||
6184 | } | |||
6185 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0); | |||
6186 | } | |||
6187 | break; | |||
6188 | } | |||
6189 | ||||
6190 | case VEC0_METADATA_OPERATOR_IN: { | |||
6191 | size_t metadataInIdx = -1; | |||
6192 | for(size_t i = 0; i < aMetadataIn->length; i++) { | |||
6193 | struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]); | |||
6194 | if(metadataIn->argv_idx == argv_idx) { | |||
6195 | metadataInIdx = i; | |||
6196 | break; | |||
6197 | } | |||
6198 | } | |||
6199 | if(metadataInIdx < 0) { | |||
6200 | rc = SQLITE_ERROR1; | |||
6201 | goto done; | |||
6202 | } | |||
6203 | ||||
6204 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; | |||
6205 | struct Array * aTarget = &(metadataIn->array); | |||
6206 | ||||
6207 | ||||
6208 | int nPrefix; | |||
6209 | char * sPrefix; | |||
6210 | char *sFull; | |||
6211 | int nFull; | |||
6212 | u8 * view; | |||
6213 | for(int i = 0; i < size; i++) { | |||
6214 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
6215 | nPrefix = ((int*) view)[0]; | |||
6216 | sPrefix = (char *) &view[4]; | |||
6217 | for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { | |||
6218 | struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]); | |||
6219 | if(entry->n != nPrefix) { | |||
6220 | continue; | |||
6221 | } | |||
6222 | int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); | |||
6223 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
6224 | if(cmpPrefix == 0) { | |||
6225 | bitmap_set(b, i, 1); | |||
6226 | break; | |||
6227 | } | |||
6228 | continue; | |||
6229 | } | |||
6230 | if(cmpPrefix) { | |||
6231 | continue; | |||
6232 | } | |||
6233 | ||||
6234 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); | |||
6235 | if(rc != SQLITE_OK0) { | |||
6236 | goto done; | |||
6237 | } | |||
6238 | if(nPrefix != nFull) { | |||
6239 | rc = SQLITE_ERROR1; | |||
6240 | goto done; | |||
6241 | } | |||
6242 | if(strncmp(sFull, entry->zString, nFull) == 0) { | |||
6243 | bitmap_set(b, i, 1); | |||
6244 | break; | |||
6245 | } | |||
6246 | } | |||
6247 | } | |||
6248 | break; | |||
6249 | } | |||
6250 | ||||
6251 | } | |||
6252 | rc = SQLITE_OK0; | |||
6253 | ||||
6254 | done: | |||
6255 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
6256 | sqlite3_freesqlite3_api->free(rowids); | |||
6257 | return rc; | |||
6258 | ||||
6259 | } | |||
6260 | ||||
6261 | /** | |||
6262 | * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint | |||
6263 | * | |||
6264 | * @param p vec0_vtab | |||
6265 | * @param metadata_idx index of the metatadata column to perfrom constraints on | |||
6266 | * @param value sqlite3_value of the constraints value | |||
6267 | * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table | |||
6268 | * @param chunk_rowid rowid of the chunk to calculate on | |||
6269 | * @param b pre-allocated and zero'd out bitmap to write results to | |||
6270 | * @param size size of the chunk | |||
6271 | * @return int SQLITE_OK on success, error code otherwise | |||
6272 | */ | |||
6273 | int vec0_set_metadata_filter_bitmap( | |||
6274 | vec0_vtab *p, | |||
6275 | int metadata_idx, | |||
6276 | vec0_metadata_operator op, | |||
6277 | sqlite3_value * value, | |||
6278 | sqlite3_blob * blob, | |||
6279 | i64 chunk_rowid, | |||
6280 | u8* b, | |||
6281 | int size, | |||
6282 | struct Array * aMetadataIn, int argv_idx) { | |||
6283 | // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap? | |||
6284 | ||||
6285 | int rc; | |||
6286 | rc = sqlite3_blob_reopensqlite3_api->blob_reopen(blob, chunk_rowid); | |||
6287 | if(rc != SQLITE_OK0) { | |||
6288 | return rc; | |||
6289 | } | |||
6290 | ||||
6291 | vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; | |||
6292 | int szMatch = 0; | |||
6293 | int blobSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blob); | |||
6294 | switch(kind) { | |||
6295 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
6296 | szMatch = blobSize == size / CHAR_BIT8; | |||
6297 | break; | |||
6298 | } | |||
6299 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
6300 | szMatch = blobSize == size * sizeof(i64); | |||
6301 | break; | |||
6302 | } | |||
6303 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
6304 | szMatch = blobSize == size * sizeof(double); | |||
6305 | break; | |||
6306 | } | |||
6307 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
6308 | szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16; | |||
6309 | break; | |||
6310 | } | |||
6311 | } | |||
6312 | if(!szMatch) { | |||
6313 | return SQLITE_ERROR1; | |||
6314 | } | |||
6315 | void * buffer = sqlite3_mallocsqlite3_api->malloc(blobSize); | |||
6316 | if(!buffer) { | |||
6317 | return SQLITE_NOMEM7; | |||
6318 | } | |||
6319 | rc = sqlite3_blob_readsqlite3_api->blob_read(blob, buffer, blobSize, 0); | |||
6320 | if(rc != SQLITE_OK0) { | |||
6321 | goto done; | |||
6322 | } | |||
6323 | switch(kind) { | |||
6324 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
6325 | int target = sqlite3_value_intsqlite3_api->value_int(value); | |||
6326 | if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) { | |||
6327 | for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); } | |||
6328 | } | |||
6329 | else { | |||
6330 | for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); } | |||
6331 | } | |||
6332 | break; | |||
6333 | } | |||
6334 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
6335 | i64 * array = (i64*) buffer; | |||
6336 | i64 target = sqlite3_value_int64sqlite3_api->value_int64(value); | |||
6337 | switch(op) { | |||
6338 | case VEC0_METADATA_OPERATOR_EQ: { | |||
6339 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } | |||
6340 | break; | |||
6341 | } | |||
6342 | case VEC0_METADATA_OPERATOR_GT: { | |||
6343 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } | |||
6344 | break; | |||
6345 | } | |||
6346 | case VEC0_METADATA_OPERATOR_LE: { | |||
6347 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } | |||
6348 | break; | |||
6349 | } | |||
6350 | case VEC0_METADATA_OPERATOR_LT: { | |||
6351 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } | |||
6352 | break; | |||
6353 | } | |||
6354 | case VEC0_METADATA_OPERATOR_GE: { | |||
6355 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } | |||
6356 | break; | |||
6357 | } | |||
6358 | case VEC0_METADATA_OPERATOR_NE: { | |||
6359 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } | |||
6360 | break; | |||
6361 | } | |||
6362 | case VEC0_METADATA_OPERATOR_IN: { | |||
6363 | int metadataInIdx = -1; | |||
6364 | for(size_t i = 0; i < aMetadataIn->length; i++) { | |||
6365 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; | |||
6366 | if(metadataIn->argv_idx == argv_idx) { | |||
6367 | metadataInIdx = i; | |||
6368 | break; | |||
6369 | } | |||
6370 | } | |||
6371 | if(metadataInIdx < 0) { | |||
6372 | rc = SQLITE_ERROR1; | |||
6373 | goto done; | |||
6374 | } | |||
6375 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; | |||
6376 | struct Array * aTarget = &(metadataIn->array); | |||
6377 | ||||
6378 | for(int i = 0; i < size; i++) { | |||
6379 | for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { | |||
6380 | if( ((i64*)aTarget->z)[target_idx] == array[i]) { | |||
6381 | bitmap_set(b, i, 1); | |||
6382 | break; | |||
6383 | } | |||
6384 | } | |||
6385 | } | |||
6386 | break; | |||
6387 | } | |||
6388 | } | |||
6389 | break; | |||
6390 | } | |||
6391 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
6392 | double * array = (double*) buffer; | |||
6393 | double target = sqlite3_value_doublesqlite3_api->value_double(value); | |||
6394 | switch(op) { | |||
6395 | case VEC0_METADATA_OPERATOR_EQ: { | |||
6396 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } | |||
6397 | break; | |||
6398 | } | |||
6399 | case VEC0_METADATA_OPERATOR_GT: { | |||
6400 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } | |||
6401 | break; | |||
6402 | } | |||
6403 | case VEC0_METADATA_OPERATOR_LE: { | |||
6404 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } | |||
6405 | break; | |||
6406 | } | |||
6407 | case VEC0_METADATA_OPERATOR_LT: { | |||
6408 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } | |||
6409 | break; | |||
6410 | } | |||
6411 | case VEC0_METADATA_OPERATOR_GE: { | |||
6412 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } | |||
6413 | break; | |||
6414 | } | |||
6415 | case VEC0_METADATA_OPERATOR_NE: { | |||
6416 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } | |||
6417 | break; | |||
6418 | } | |||
6419 | case VEC0_METADATA_OPERATOR_IN: { | |||
6420 | // should never be reached | |||
6421 | break; | |||
6422 | } | |||
6423 | } | |||
6424 | break; | |||
6425 | } | |||
6426 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
6427 | rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx); | |||
6428 | if(rc != SQLITE_OK0) { | |||
6429 | goto done; | |||
6430 | } | |||
6431 | break; | |||
6432 | } | |||
6433 | } | |||
6434 | done: | |||
6435 | sqlite3_freesqlite3_api->free(buffer); | |||
6436 | return rc; | |||
6437 | } | |||
6438 | ||||
6439 | int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, | |||
6440 | struct VectorColumnDefinition *vector_column, | |||
6441 | int vectorColumnIdx, struct Array *arrayRowidsIn, | |||
6442 | struct Array * aMetadataIn, | |||
6443 | const char * idxStr, int argc, sqlite3_value ** argv, | |||
6444 | void *queryVector, i64 k, i64 **out_topk_rowids, | |||
6445 | f32 **out_topk_distances, i64 *out_used) { | |||
6446 | // for each chunk, get top min(k, chunk_size) rowid + distances to query vec. | |||
6447 | // then reconcile all topk_chunks for a true top k. | |||
6448 | // output only rowids + distances for now | |||
6449 | ||||
6450 | int rc = SQLITE_OK0; | |||
6451 | sqlite3_blob *blobVectors = NULL((void*)0); | |||
6452 | ||||
6453 | void *baseVectors = NULL((void*)0); // memory: chunk_size * dimensions * element_size | |||
6454 | ||||
6455 | // OWNED BY CALLER ON SUCCESS | |||
6456 | i64 *topk_rowids = NULL((void*)0); // memory: k * 4 | |||
6457 | // OWNED BY CALLER ON SUCCESS | |||
6458 | f32 *topk_distances = NULL((void*)0); // memory: k * 4 | |||
6459 | ||||
6460 | i64 *tmp_topk_rowids = NULL((void*)0); // memory: k * 4 | |||
6461 | f32 *tmp_topk_distances = NULL((void*)0); // memory: k * 4 | |||
6462 | f32 *chunk_distances = NULL((void*)0); // memory: chunk_size * 4 | |||
6463 | u8 *b = NULL((void*)0); // memory: chunk_size / 8 | |||
6464 | u8 *bTaken = NULL((void*)0); // memory: chunk_size / 8 | |||
6465 | i32 *chunk_topk_idxs = NULL((void*)0); // memory: k * 4 | |||
6466 | u8 *bmRowids = NULL((void*)0); // memory: chunk_size / 8 | |||
6467 | u8 *bmMetadata = NULL((void*)0); // memory: chunk_size / 8 | |||
6468 | // // total: a lot??? | |||
6469 | ||||
6470 | // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4) | |||
6471 | ||||
6472 | topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64)); | |||
6473 | if (!topk_rowids) { | |||
6474 | rc = SQLITE_NOMEM7; | |||
6475 | goto cleanup; | |||
6476 | } | |||
6477 | memset(topk_rowids, 0, k * sizeof(i64)); | |||
6478 | ||||
6479 | topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32)); | |||
6480 | if (!topk_distances) { | |||
6481 | rc = SQLITE_NOMEM7; | |||
6482 | goto cleanup; | |||
6483 | } | |||
6484 | memset(topk_distances, 0, k * sizeof(f32)); | |||
6485 | ||||
6486 | tmp_topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64)); | |||
6487 | if (!tmp_topk_rowids) { | |||
6488 | rc = SQLITE_NOMEM7; | |||
6489 | goto cleanup; | |||
6490 | } | |||
6491 | memset(tmp_topk_rowids, 0, k * sizeof(i64)); | |||
6492 | ||||
6493 | tmp_topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32)); | |||
6494 | if (!tmp_topk_distances) { | |||
6495 | rc = SQLITE_NOMEM7; | |||
6496 | goto cleanup; | |||
6497 | } | |||
6498 | memset(tmp_topk_distances, 0, k * sizeof(f32)); | |||
6499 | ||||
6500 | i64 k_used = 0; | |||
6501 | i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column); | |||
6502 | baseVectors = sqlite3_mallocsqlite3_api->malloc(baseVectorsSize); | |||
6503 | if (!baseVectors) { | |||
6504 | rc = SQLITE_NOMEM7; | |||
6505 | goto cleanup; | |||
6506 | } | |||
6507 | ||||
6508 | chunk_distances = sqlite3_mallocsqlite3_api->malloc(p->chunk_size * sizeof(f32)); | |||
6509 | if (!chunk_distances) { | |||
6510 | rc = SQLITE_NOMEM7; | |||
6511 | goto cleanup; | |||
6512 | } | |||
6513 | ||||
6514 | b = bitmap_new(p->chunk_size); | |||
6515 | if (!b) { | |||
6516 | rc = SQLITE_NOMEM7; | |||
6517 | goto cleanup; | |||
6518 | } | |||
6519 | ||||
6520 | bTaken = bitmap_new(p->chunk_size); | |||
6521 | if (!bTaken) { | |||
6522 | rc = SQLITE_NOMEM7; | |||
6523 | goto cleanup; | |||
6524 | } | |||
6525 | ||||
6526 | chunk_topk_idxs = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32)); | |||
6527 | if (!chunk_topk_idxs) { | |||
6528 | rc = SQLITE_NOMEM7; | |||
6529 | goto cleanup; | |||
6530 | } | |||
6531 | ||||
6532 | bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL((void*)0); | |||
6533 | if (arrayRowidsIn && !bmRowids) { | |||
6534 | rc = SQLITE_NOMEM7; | |||
6535 | goto cleanup; | |||
6536 | } | |||
6537 | ||||
6538 | sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS16]; | |||
6539 | memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS16); | |||
6540 | ||||
6541 | bmMetadata = bitmap_new(p->chunk_size); | |||
6542 | if(!bmMetadata) { | |||
6543 | rc = SQLITE_NOMEM7; | |||
6544 | goto cleanup; | |||
6545 | } | |||
6546 | ||||
6547 | int idxStrLength = strlen(idxStr); | |||
6548 | int numValueEntries = (idxStrLength-1) / 4; | |||
6549 | assert(numValueEntries == argc)((void) sizeof ((numValueEntries == argc) ? 1 : 0), __extension__ ({ if (numValueEntries == argc) ; else __assert_fail ("numValueEntries == argc" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6549, __extension__ __PRETTY_FUNCTION__); })); | |||
6550 | int hasMetadataFilters = 0; | |||
6551 | for(int i = 0; i < argc; i++) { | |||
6552 | int idx = 1 + (i * 4); | |||
6553 | char kind = idxStr[idx + 0]; | |||
6554 | if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { | |||
6555 | hasMetadataFilters = 1; | |||
6556 | break; | |||
6557 | } | |||
6558 | } | |||
6559 | ||||
6560 | while (true1) { | |||
6561 | rc = sqlite3_stepsqlite3_api->step(stmtChunks); | |||
6562 | if (rc == SQLITE_DONE101) { | |||
6563 | break; | |||
6564 | } | |||
6565 | if (rc != SQLITE_ROW100) { | |||
6566 | vtab_set_error(&p->base, "chunks iter error"); | |||
6567 | rc = SQLITE_ERROR1; | |||
6568 | goto cleanup; | |||
6569 | } | |||
6570 | memset(chunk_distances, 0, p->chunk_size * sizeof(f32)); | |||
6571 | memset(chunk_topk_idxs, 0, k * sizeof(i32)); | |||
6572 | bitmap_clear(b, p->chunk_size); | |||
6573 | ||||
6574 | i64 chunk_id = sqlite3_column_int64sqlite3_api->column_int64(stmtChunks, 0); | |||
6575 | unsigned char *chunkValidity = | |||
6576 | (unsigned char *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 1); | |||
6577 | i64 validitySize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 1); | |||
6578 | if (validitySize != p->chunk_size / CHAR_BIT8) { | |||
6579 | // IMP: V05271_22109 | |||
6580 | vtab_set_error( | |||
6581 | &p->base, | |||
6582 | "chunk validity size doesn't match - expected %lld, found %lld", | |||
6583 | p->chunk_size / CHAR_BIT8, validitySize); | |||
6584 | rc = SQLITE_ERROR1; | |||
6585 | goto cleanup; | |||
6586 | } | |||
6587 | ||||
6588 | i64 *chunkRowids = (i64 *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 2); | |||
6589 | i64 rowidsSize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 2); | |||
6590 | if (rowidsSize != p->chunk_size * sizeof(i64)) { | |||
6591 | // IMP: V02796_19635 | |||
6592 | vtab_set_error(&p->base, "rowids size doesn't match"); | |||
6593 | vtab_set_error( | |||
6594 | &p->base, | |||
6595 | "chunk rowids size doesn't match - expected %lld, found %lld", | |||
6596 | p->chunk_size * sizeof(i64), rowidsSize); | |||
6597 | rc = SQLITE_ERROR1; | |||
6598 | goto cleanup; | |||
6599 | } | |||
6600 | ||||
6601 | // open the vector chunk blob for the current chunk | |||
6602 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, | |||
6603 | p->shadowVectorChunksNames[vectorColumnIdx], | |||
6604 | "vectors", chunk_id, 0, &blobVectors); | |||
6605 | if (rc != SQLITE_OK0) { | |||
6606 | vtab_set_error(&p->base, "could not open vectors blob for chunk %lld", | |||
6607 | chunk_id); | |||
6608 | rc = SQLITE_ERROR1; | |||
6609 | goto cleanup; | |||
6610 | } | |||
6611 | ||||
6612 | i64 currentBaseVectorsSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors); | |||
6613 | i64 expectedBaseVectorsSize = | |||
6614 | p->chunk_size * vector_column_byte_size(*vector_column); | |||
6615 | if (currentBaseVectorsSize != expectedBaseVectorsSize) { | |||
6616 | // IMP: V16465_00535 | |||
6617 | vtab_set_error( | |||
6618 | &p->base, | |||
6619 | "vectors blob size doesn't match - expected %lld, found %lld", | |||
6620 | expectedBaseVectorsSize, currentBaseVectorsSize); | |||
6621 | rc = SQLITE_ERROR1; | |||
6622 | goto cleanup; | |||
6623 | } | |||
6624 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0); | |||
6625 | ||||
6626 | if (rc != SQLITE_OK0) { | |||
6627 | vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id); | |||
6628 | rc = SQLITE_ERROR1; | |||
6629 | goto cleanup; | |||
6630 | } | |||
6631 | ||||
6632 | bitmap_copy(b, chunkValidity, p->chunk_size); | |||
6633 | if (arrayRowidsIn) { | |||
6634 | bitmap_clear(bmRowids, p->chunk_size); | |||
6635 | ||||
6636 | for (int i = 0; i < p->chunk_size; i++) { | |||
6637 | if (!bitmap_get(chunkValidity, i)) { | |||
6638 | continue; | |||
6639 | } | |||
6640 | i64 rowid = chunkRowids[i]; | |||
6641 | void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length, | |||
6642 | sizeof(i64), _cmp); | |||
6643 | bitmap_set(bmRowids, i, in ? 1 : 0); | |||
6644 | } | |||
6645 | bitmap_and_inplace(b, bmRowids, p->chunk_size); | |||
6646 | } | |||
6647 | ||||
6648 | if(hasMetadataFilters) { | |||
6649 | for(int i = 0; i < argc; i++) { | |||
6650 | int idx = 1 + (i * 4); | |||
6651 | char kind = idxStr[idx + 0]; | |||
6652 | if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { | |||
6653 | continue; | |||
6654 | } | |||
6655 | int metadata_idx = idxStr[idx + 1] - 'A'; | |||
6656 | int operator = idxStr[idx + 2]; | |||
6657 | ||||
6658 | if(!metadataBlobs[metadata_idx]) { | |||
6659 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]); | |||
6660 | vtab_set_error(&p->base, "Could not open metadata blob"); | |||
6661 | if(rc != SQLITE_OK0) { | |||
6662 | goto cleanup; | |||
6663 | } | |||
6664 | } | |||
6665 | ||||
6666 | bitmap_clear(bmMetadata, p->chunk_size); | |||
6667 | rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i); | |||
6668 | if(rc != SQLITE_OK0) { | |||
6669 | vtab_set_error(&p->base, "Could not filter metadata fields"); | |||
6670 | if(rc != SQLITE_OK0) { | |||
6671 | goto cleanup; | |||
6672 | } | |||
6673 | } | |||
6674 | bitmap_and_inplace(b, bmMetadata, p->chunk_size); | |||
6675 | } | |||
6676 | } | |||
6677 | ||||
6678 | ||||
6679 | for (int i = 0; i < p->chunk_size; i++) { | |||
6680 | if (!bitmap_get(b, i)) { | |||
6681 | continue; | |||
6682 | }; | |||
6683 | ||||
6684 | f32 result; | |||
6685 | switch (vector_column->element_type) { | |||
6686 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { | |||
6687 | const f32 *base_i = | |||
6688 | ((f32 *)baseVectors) + (i * vector_column->dimensions); | |||
6689 | switch (vector_column->distance_metric) { | |||
6690 | case VEC0_DISTANCE_METRIC_L2: { | |||
6691 | result = distance_l2_sqr_float(base_i, (f32 *)queryVector, | |||
6692 | &vector_column->dimensions); | |||
6693 | break; | |||
6694 | } | |||
6695 | case VEC0_DISTANCE_METRIC_L1: { | |||
6696 | result = distance_l1_f32(base_i, (f32 *)queryVector, | |||
6697 | &vector_column->dimensions); | |||
6698 | break; | |||
6699 | } | |||
6700 | case VEC0_DISTANCE_METRIC_COSINE: { | |||
6701 | result = distance_cosine_float(base_i, (f32 *)queryVector, | |||
6702 | &vector_column->dimensions); | |||
6703 | break; | |||
6704 | } | |||
6705 | } | |||
6706 | break; | |||
6707 | } | |||
6708 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { | |||
6709 | const i8 *base_i = | |||
6710 | ((i8 *)baseVectors) + (i * vector_column->dimensions); | |||
6711 | switch (vector_column->distance_metric) { | |||
6712 | case VEC0_DISTANCE_METRIC_L2: { | |||
6713 | result = distance_l2_sqr_int8(base_i, (i8 *)queryVector, | |||
6714 | &vector_column->dimensions); | |||
6715 | break; | |||
6716 | } | |||
6717 | case VEC0_DISTANCE_METRIC_L1: { | |||
6718 | result = distance_l1_int8(base_i, (i8 *)queryVector, | |||
6719 | &vector_column->dimensions); | |||
6720 | break; | |||
6721 | } | |||
6722 | case VEC0_DISTANCE_METRIC_COSINE: { | |||
6723 | result = distance_cosine_int8(base_i, (i8 *)queryVector, | |||
6724 | &vector_column->dimensions); | |||
6725 | break; | |||
6726 | } | |||
6727 | } | |||
6728 | ||||
6729 | break; | |||
6730 | } | |||
6731 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { | |||
6732 | const u8 *base_i = | |||
6733 | ((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT8)); | |||
6734 | result = distance_hamming(base_i, (u8 *)queryVector, | |||
6735 | &vector_column->dimensions); | |||
6736 | break; | |||
6737 | } | |||
6738 | } | |||
6739 | ||||
6740 | chunk_distances[i] = result; | |||
6741 | } | |||
6742 | ||||
6743 | int used1; | |||
6744 | min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs, | |||
6745 | min(k, p->chunk_size)(((k) <= (p->chunk_size)) ? (k) : (p->chunk_size)), bTaken, &used1); | |||
6746 | ||||
6747 | i64 used; | |||
6748 | merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances, | |||
6749 | chunkRowids, chunk_topk_idxs, | |||
6750 | min(min(k, p->chunk_size), used1)((((((k) <= (p->chunk_size)) ? (k) : (p->chunk_size) )) <= (used1)) ? ((((k) <= (p->chunk_size)) ? (k) : ( p->chunk_size))) : (used1)), tmp_topk_distances, | |||
6751 | tmp_topk_rowids, k, &used); | |||
6752 | ||||
6753 | for (int i = 0; i < used; i++) { | |||
6754 | topk_rowids[i] = tmp_topk_rowids[i]; | |||
6755 | topk_distances[i] = tmp_topk_distances[i]; | |||
6756 | } | |||
6757 | k_used = used; | |||
6758 | // blobVectors is always opened with read-only permissions, so this never | |||
6759 | // fails. | |||
6760 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
6761 | blobVectors = NULL((void*)0); | |||
6762 | } | |||
6763 | ||||
6764 | *out_topk_rowids = topk_rowids; | |||
6765 | *out_topk_distances = topk_distances; | |||
6766 | *out_used = k_used; | |||
6767 | rc = SQLITE_OK0; | |||
6768 | ||||
6769 | cleanup: | |||
6770 | if (rc != SQLITE_OK0) { | |||
6771 | sqlite3_freesqlite3_api->free(topk_rowids); | |||
6772 | sqlite3_freesqlite3_api->free(topk_distances); | |||
6773 | } | |||
6774 | sqlite3_freesqlite3_api->free(chunk_topk_idxs); | |||
6775 | sqlite3_freesqlite3_api->free(tmp_topk_rowids); | |||
6776 | sqlite3_freesqlite3_api->free(tmp_topk_distances); | |||
6777 | sqlite3_freesqlite3_api->free(b); | |||
6778 | sqlite3_freesqlite3_api->free(bTaken); | |||
6779 | sqlite3_freesqlite3_api->free(bmRowids); | |||
6780 | sqlite3_freesqlite3_api->free(baseVectors); | |||
6781 | sqlite3_freesqlite3_api->free(chunk_distances); | |||
6782 | sqlite3_freesqlite3_api->free(bmMetadata); | |||
6783 | for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS16; i++) { | |||
6784 | sqlite3_blob_closesqlite3_api->blob_close(metadataBlobs[i]); | |||
6785 | } | |||
6786 | // blobVectors is always opened with read-only permissions, so this never | |||
6787 | // fails. | |||
6788 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
6789 | return rc; | |||
6790 | } | |||
6791 | ||||
6792 | int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, | |||
6793 | const char *idxStr, int argc, sqlite3_value **argv) { | |||
6794 | assert(argc == (strlen(idxStr)-1) / 4)((void) sizeof ((argc == (strlen(idxStr)-1) / 4) ? 1 : 0), __extension__ ({ if (argc == (strlen(idxStr)-1) / 4) ; else __assert_fail ( "argc == (strlen(idxStr)-1) / 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6794, __extension__ __PRETTY_FUNCTION__); })); | |||
6795 | int rc; | |||
6796 | struct vec0_query_knn_data *knn_data; | |||
6797 | ||||
6798 | int vectorColumnIdx = idxNum; | |||
6799 | struct VectorColumnDefinition *vector_column = | |||
6800 | &p->vector_columns[vectorColumnIdx]; | |||
6801 | ||||
6802 | struct Array *arrayRowidsIn = NULL((void*)0); | |||
6803 | sqlite3_stmt *stmtChunks = NULL((void*)0); | |||
6804 | void *queryVector; | |||
6805 | size_t dimensions; | |||
6806 | enum VectorElementType elementType; | |||
6807 | vector_cleanup queryVectorCleanup = vector_cleanup_noop; | |||
6808 | char *pzError; | |||
6809 | knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data)); | |||
6810 | if (!knn_data) { | |||
6811 | return SQLITE_NOMEM7; | |||
6812 | } | |||
6813 | memset(knn_data, 0, sizeof(*knn_data)); | |||
6814 | // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints | |||
6815 | struct Array * aMetadataIn = NULL((void*)0); | |||
6816 | ||||
6817 | int query_idx =-1; | |||
6818 | int k_idx = -1; | |||
6819 | int rowid_in_idx = -1; | |||
6820 | for(int i = 0; i < argc; i++) { | |||
6821 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) { | |||
6822 | query_idx = i; | |||
6823 | } | |||
6824 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) { | |||
6825 | k_idx = i; | |||
6826 | } | |||
6827 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) { | |||
6828 | rowid_in_idx = i; | |||
6829 | } | |||
6830 | } | |||
6831 | assert(query_idx >= 0)((void) sizeof ((query_idx >= 0) ? 1 : 0), __extension__ ( { if (query_idx >= 0) ; else __assert_fail ("query_idx >= 0" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6831, __extension__ __PRETTY_FUNCTION__); })); | |||
6832 | assert(k_idx >= 0)((void) sizeof ((k_idx >= 0) ? 1 : 0), __extension__ ({ if (k_idx >= 0) ; else __assert_fail ("k_idx >= 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6832, __extension__ __PRETTY_FUNCTION__); })); | |||
6833 | ||||
6834 | // make sure the query vector matches the vector column (type dimensions etc.) | |||
6835 | rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType, | |||
6836 | &queryVectorCleanup, &pzError); | |||
6837 | ||||
6838 | if (rc != SQLITE_OK0) { | |||
6839 | vtab_set_error(&p->base, | |||
6840 | "Query vector on the \"%.*s\" column is invalid: %z", | |||
6841 | vector_column->name_length, vector_column->name, pzError); | |||
6842 | rc = SQLITE_ERROR1; | |||
6843 | goto cleanup; | |||
6844 | } | |||
6845 | if (elementType != vector_column->element_type) { | |||
6846 | vtab_set_error( | |||
6847 | &p->base, | |||
6848 | "Query vector for the \"%.*s\" column is expected to be of type " | |||
6849 | "%s, but a %s vector was provided.", | |||
6850 | vector_column->name_length, vector_column->name, | |||
6851 | vector_subtype_name(vector_column->element_type), | |||
6852 | vector_subtype_name(elementType)); | |||
6853 | rc = SQLITE_ERROR1; | |||
6854 | goto cleanup; | |||
6855 | } | |||
6856 | if (dimensions != vector_column->dimensions) { | |||
6857 | vtab_set_error( | |||
6858 | &p->base, | |||
6859 | "Dimension mismatch for query vector for the \"%.*s\" column. " | |||
6860 | "Expected %d dimensions but received %d.", | |||
6861 | vector_column->name_length, vector_column->name, | |||
6862 | vector_column->dimensions, dimensions); | |||
6863 | rc = SQLITE_ERROR1; | |||
6864 | goto cleanup; | |||
6865 | } | |||
6866 | ||||
6867 | i64 k = sqlite3_value_int64sqlite3_api->value_int64(argv[k_idx]); | |||
6868 | if (k < 0) { | |||
6869 | vtab_set_error( | |||
6870 | &p->base, "k value in knn queries must be greater than or equal to 0."); | |||
6871 | rc = SQLITE_ERROR1; | |||
6872 | goto cleanup; | |||
6873 | } | |||
6874 | #define SQLITE_VEC_VEC0_K_MAX4096 4096 | |||
6875 | if (k > SQLITE_VEC_VEC0_K_MAX4096) { | |||
6876 | vtab_set_error( | |||
6877 | &p->base, | |||
6878 | "k value in knn query too large, provided %lld and the limit is %lld", | |||
6879 | k, SQLITE_VEC_VEC0_K_MAX4096); | |||
6880 | rc = SQLITE_ERROR1; | |||
6881 | goto cleanup; | |||
6882 | } | |||
6883 | ||||
6884 | if (k == 0) { | |||
6885 | knn_data->k = 0; | |||
6886 | pCur->knn_data = knn_data; | |||
6887 | pCur->query_plan = VEC0_QUERY_PLAN_KNN; | |||
6888 | rc = SQLITE_OK0; | |||
6889 | goto cleanup; | |||
6890 | } | |||
6891 | ||||
6892 | // handle when a `rowid in (...)` operation was provided | |||
6893 | // Array of all the rowids that appear in any `rowid in (...)` constraint. | |||
6894 | // NULL if none were provided, which means a "full" scan. | |||
6895 | #if COMPILER_SUPPORTS_VTAB_IN1 | |||
6896 | if (rowid_in_idx >= 0) { | |||
6897 | sqlite3_value *item; | |||
6898 | int rc; | |||
6899 | arrayRowidsIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*arrayRowidsIn)); | |||
6900 | if (!arrayRowidsIn) { | |||
6901 | rc = SQLITE_NOMEM7; | |||
6902 | goto cleanup; | |||
6903 | } | |||
6904 | memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn)); | |||
6905 | ||||
6906 | rc = array_init(arrayRowidsIn, sizeof(i64), 32); | |||
6907 | if (rc != SQLITE_OK0) { | |||
6908 | goto cleanup; | |||
6909 | } | |||
6910 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK0 && item; | |||
6911 | rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[rowid_in_idx], &item)) { | |||
6912 | i64 rowid; | |||
6913 | if (p->pkIsText) { | |||
6914 | rc = vec0_rowid_from_id(p, item, &rowid); | |||
6915 | if (rc != SQLITE_OK0) { | |||
6916 | goto cleanup; | |||
6917 | } | |||
6918 | } else { | |||
6919 | rowid = sqlite3_value_int64sqlite3_api->value_int64(item); | |||
6920 | } | |||
6921 | rc = array_append(arrayRowidsIn, &rowid); | |||
6922 | if (rc != SQLITE_OK0) { | |||
6923 | goto cleanup; | |||
6924 | } | |||
6925 | } | |||
6926 | if (rc != SQLITE_DONE101) { | |||
6927 | vtab_set_error(&p->base, "error processing rowid in (...) array"); | |||
6928 | goto cleanup; | |||
6929 | } | |||
6930 | qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size, | |||
6931 | _cmp); | |||
6932 | } | |||
6933 | #endif | |||
6934 | ||||
6935 | #if COMPILER_SUPPORTS_VTAB_IN1 | |||
6936 | for(int i = 0; i < argc; i++) { | |||
6937 | if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) { | |||
6938 | continue; | |||
6939 | } | |||
6940 | int metadata_idx = idxStr[1 + (i*4) + 1] - 'A'; | |||
6941 | if(!aMetadataIn) { | |||
6942 | aMetadataIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*aMetadataIn)); | |||
6943 | if(!aMetadataIn) { | |||
6944 | rc = SQLITE_NOMEM7; | |||
6945 | goto cleanup; | |||
6946 | } | |||
6947 | memset(aMetadataIn, 0, sizeof(*aMetadataIn)); | |||
6948 | rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8); | |||
6949 | if(rc != SQLITE_OK0) { | |||
6950 | goto cleanup; | |||
6951 | } | |||
6952 | } | |||
6953 | ||||
6954 | struct Vec0MetadataIn item; | |||
6955 | memset(&item, 0, sizeof(item)); | |||
6956 | item.metadata_idx=metadata_idx; | |||
6957 | item.argv_idx = i; | |||
6958 | ||||
6959 | switch(p->metadata_columns[metadata_idx].kind) { | |||
6960 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
6961 | rc = array_init(&item.array, sizeof(i64), 16); | |||
6962 | if(rc != SQLITE_OK0) { | |||
6963 | goto cleanup; | |||
6964 | } | |||
6965 | sqlite3_value *entry; | |||
6966 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) { | |||
6967 | i64 v = sqlite3_value_int64sqlite3_api->value_int64(entry); | |||
6968 | rc = array_append(&item.array, &v); | |||
6969 | if (rc != SQLITE_OK0) { | |||
6970 | goto cleanup; | |||
6971 | } | |||
6972 | } | |||
6973 | ||||
6974 | if (rc != SQLITE_DONE101) { | |||
6975 | vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression"); | |||
6976 | goto cleanup; | |||
6977 | } | |||
6978 | ||||
6979 | break; | |||
6980 | } | |||
6981 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
6982 | rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16); | |||
6983 | if(rc != SQLITE_OK0) { | |||
6984 | goto cleanup; | |||
6985 | } | |||
6986 | sqlite3_value *entry; | |||
6987 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) { | |||
6988 | const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(entry); | |||
6989 | int n = sqlite3_value_bytessqlite3_api->value_bytes(entry); | |||
6990 | ||||
6991 | struct Vec0MetadataInTextEntry entry; | |||
6992 | entry.zString = sqlite3_mprintfsqlite3_api->mprintf("%.*s", n, s); | |||
6993 | if(!entry.zString) { | |||
6994 | rc = SQLITE_NOMEM7; | |||
6995 | goto cleanup; | |||
6996 | } | |||
6997 | entry.n = n; | |||
6998 | rc = array_append(&item.array, &entry); | |||
6999 | if (rc != SQLITE_OK0) { | |||
7000 | goto cleanup; | |||
7001 | } | |||
7002 | } | |||
7003 | ||||
7004 | if (rc != SQLITE_DONE101) { | |||
7005 | vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression"); | |||
7006 | goto cleanup; | |||
7007 | } | |||
7008 | ||||
7009 | break; | |||
7010 | } | |||
7011 | default: { | |||
7012 | vtab_set_error(&p->base, "Internal sqlite-vec error"); | |||
7013 | goto cleanup; | |||
7014 | } | |||
7015 | } | |||
7016 | ||||
7017 | rc = array_append(aMetadataIn, &item); | |||
7018 | if(rc != SQLITE_OK0) { | |||
7019 | goto cleanup; | |||
7020 | } | |||
7021 | } | |||
7022 | #endif | |||
7023 | ||||
7024 | rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks); | |||
7025 | if (rc != SQLITE_OK0) { | |||
7026 | // IMP: V06942_23781 | |||
7027 | vtab_set_error(&p->base, "Error preparing stmtChunk: %s", | |||
7028 | sqlite3_errmsgsqlite3_api->errmsg(p->db)); | |||
7029 | goto cleanup; | |||
7030 | } | |||
7031 | ||||
7032 | i64 *topk_rowids = NULL((void*)0); | |||
7033 | f32 *topk_distances = NULL((void*)0); | |||
7034 | i64 k_used = 0; | |||
7035 | rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx, | |||
7036 | arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids, | |||
7037 | &topk_distances, &k_used); | |||
7038 | if (rc != SQLITE_OK0) { | |||
7039 | goto cleanup; | |||
7040 | } | |||
7041 | ||||
7042 | knn_data->current_idx = 0; | |||
7043 | knn_data->k = k; | |||
7044 | knn_data->rowids = topk_rowids; | |||
7045 | knn_data->distances = topk_distances; | |||
7046 | knn_data->k_used = k_used; | |||
7047 | ||||
7048 | pCur->knn_data = knn_data; | |||
7049 | pCur->query_plan = VEC0_QUERY_PLAN_KNN; | |||
7050 | rc = SQLITE_OK0; | |||
7051 | ||||
7052 | cleanup: | |||
7053 | sqlite3_finalizesqlite3_api->finalize(stmtChunks); | |||
7054 | array_cleanup(arrayRowidsIn); | |||
7055 | sqlite3_freesqlite3_api->free(arrayRowidsIn); | |||
7056 | queryVectorCleanup(queryVector); | |||
7057 | if(aMetadataIn) { | |||
7058 | for(size_t i = 0; i < aMetadataIn->length; i++) { | |||
7059 | struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; | |||
7060 | for(size_t j = 0; j < item->array.length; j++) { | |||
7061 | if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { | |||
7062 | struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j]; | |||
7063 | sqlite3_freesqlite3_api->free(entry.zString); | |||
7064 | } | |||
7065 | } | |||
7066 | array_cleanup(&item->array); | |||
7067 | } | |||
7068 | array_cleanup(aMetadataIn); | |||
7069 | } | |||
7070 | ||||
7071 | sqlite3_freesqlite3_api->free(aMetadataIn); | |||
7072 | ||||
7073 | return rc; | |||
7074 | } | |||
7075 | ||||
7076 | int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) { | |||
7077 | int rc; | |||
7078 | char *zSql; | |||
7079 | struct vec0_query_fullscan_data *fullscan_data; | |||
7080 | ||||
7081 | fullscan_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*fullscan_data)); | |||
7082 | if (!fullscan_data) { | |||
7083 | return SQLITE_NOMEM7; | |||
7084 | } | |||
7085 | memset(fullscan_data, 0, sizeof(*fullscan_data)); | |||
7086 | ||||
7087 | zSql = sqlite3_mprintfsqlite3_api->mprintf(" SELECT rowid " | |||
7088 | " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" | |||
7089 | " ORDER by chunk_id, chunk_offset ", | |||
7090 | p->schemaName, p->tableName); | |||
7091 | if (!zSql) { | |||
7092 | rc = SQLITE_NOMEM7; | |||
7093 | goto error; | |||
7094 | } | |||
7095 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL((void*)0)); | |||
7096 | sqlite3_freesqlite3_api->free(zSql); | |||
7097 | if (rc != SQLITE_OK0) { | |||
7098 | // IMP: V09901_26739 | |||
7099 | vtab_set_error(&p->base, "Error preparing rowid scan: %s", | |||
7100 | sqlite3_errmsgsqlite3_api->errmsg(p->db)); | |||
7101 | goto error; | |||
7102 | } | |||
7103 | ||||
7104 | rc = sqlite3_stepsqlite3_api->step(fullscan_data->rowids_stmt); | |||
7105 | ||||
7106 | // DONE when there's no rowids, ROW when there are, both "success" | |||
7107 | if (!(rc == SQLITE_ROW100 || rc == SQLITE_DONE101)) { | |||
7108 | goto error; | |||
7109 | } | |||
7110 | ||||
7111 | fullscan_data->done = rc == SQLITE_DONE101; | |||
7112 | pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN; | |||
7113 | pCur->fullscan_data = fullscan_data; | |||
7114 | return SQLITE_OK0; | |||
7115 | ||||
7116 | error: | |||
7117 | vec0_query_fullscan_data_clear(fullscan_data); | |||
7118 | sqlite3_freesqlite3_api->free(fullscan_data); | |||
7119 | return rc; | |||
7120 | } | |||
7121 | ||||
7122 | int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc, | |||
7123 | sqlite3_value **argv) { | |||
7124 | int rc; | |||
7125 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 7125, __extension__ __PRETTY_FUNCTION__); })); | |||
7126 | i64 rowid; | |||
7127 | struct vec0_query_point_data *point_data = NULL((void*)0); | |||
7128 | ||||
7129 | point_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*point_data)); | |||
7130 | if (!point_data) { | |||
7131 | rc = SQLITE_NOMEM7; | |||
7132 | goto error; | |||
7133 | } | |||
7134 | memset(point_data, 0, sizeof(*point_data)); | |||
7135 | ||||
7136 | if (p->pkIsText) { | |||
7137 | rc = vec0_rowid_from_id(p, argv[0], &rowid); | |||
7138 | if (rc == SQLITE_EMPTY16) { | |||
7139 | goto eof; | |||
7140 | } | |||
7141 | if (rc != SQLITE_OK0) { | |||
7142 | goto error; | |||
7143 | } | |||
7144 | } else { | |||
7145 | rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | |||
7146 | } | |||
7147 | ||||
7148 | for (int i = 0; i < p->numVectorColumns; i++) { | |||
7149 | rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL((void*)0)); | |||
7150 | if (rc == SQLITE_EMPTY16) { | |||
7151 | goto eof; | |||
7152 | } | |||
7153 | if (rc != SQLITE_OK0) { | |||
7154 | goto error; | |||
7155 | } | |||
7156 | } | |||
7157 | ||||
7158 | point_data->rowid = rowid; | |||
7159 | point_data->done = 0; | |||
7160 | pCur->point_data = point_data; | |||
7161 | pCur->query_plan = VEC0_QUERY_PLAN_POINT; | |||
7162 | return SQLITE_OK0; | |||
7163 | ||||
7164 | eof: | |||
7165 | point_data->rowid = rowid; | |||
7166 | point_data->done = 1; | |||
7167 | pCur->point_data = point_data; | |||
7168 | pCur->query_plan = VEC0_QUERY_PLAN_POINT; | |||
7169 | return SQLITE_OK0; | |||
7170 | ||||
7171 | error: | |||
7172 | vec0_query_point_data_clear(point_data); | |||
7173 | sqlite3_freesqlite3_api->free(point_data); | |||
7174 | return rc; | |||
7175 | } | |||
7176 | ||||
7177 | static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, | |||
7178 | const char *idxStr, int argc, sqlite3_value **argv) { | |||
7179 | vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab; | |||
7180 | vec0_cursor *pCur = (vec0_cursor *)pVtabCursor; | |||
7181 | vec0_cursor_clear(pCur); | |||
7182 | ||||
7183 | int idxStrLength = strlen(idxStr); | |||
7184 | if(idxStrLength <= 0) { | |||
7185 | return SQLITE_ERROR1; | |||
7186 | } | |||
7187 | if((idxStrLength-1) % 4 != 0) { | |||
7188 | return SQLITE_ERROR1; | |||
7189 | } | |||
7190 | int numValueEntries = (idxStrLength-1) / 4; | |||
7191 | if(numValueEntries != argc) { | |||
7192 | return SQLITE_ERROR1; | |||
7193 | } | |||
7194 | ||||
7195 | char query_plan = idxStr[0]; | |||
7196 | switch(query_plan) { | |||
7197 | case VEC0_QUERY_PLAN_FULLSCAN: | |||
7198 | return vec0Filter_fullscan(p, pCur); | |||
7199 | case VEC0_QUERY_PLAN_KNN: | |||
7200 | return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv); | |||
7201 | case VEC0_QUERY_PLAN_POINT: | |||
7202 | return vec0Filter_point(pCur, p, argc, argv); | |||
7203 | default: | |||
7204 | vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr); | |||
7205 | return SQLITE_ERROR1; | |||
7206 | } | |||
7207 | } | |||
7208 | ||||
7209 | static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { | |||
7210 | vec0_cursor *pCur = (vec0_cursor *)cur; | |||
7211 | switch (pCur->query_plan) { | |||
7212 | case VEC0_QUERY_PLAN_FULLSCAN: { | |||
7213 | *pRowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0); | |||
7214 | return SQLITE_OK0; | |||
7215 | } | |||
7216 | case VEC0_QUERY_PLAN_POINT: { | |||
7217 | *pRowid = pCur->point_data->rowid; | |||
7218 | return SQLITE_OK0; | |||
7219 | } | |||
7220 | case VEC0_QUERY_PLAN_KNN: { | |||
7221 | vtab_set_error(cur->pVtab, | |||
7222 | "Internal sqlite-vec error: expected point query plan in " | |||
7223 | "vec0Rowid, found %d", | |||
7224 | pCur->query_plan); | |||
7225 | return SQLITE_ERROR1; | |||
7226 | } | |||
7227 | } | |||
7228 | return SQLITE_ERROR1; | |||
7229 | } | |||
7230 | ||||
7231 | static int vec0Next(sqlite3_vtab_cursor *cur) { | |||
7232 | vec0_cursor *pCur = (vec0_cursor *)cur; | |||
7233 | switch (pCur->query_plan) { | |||
7234 | case VEC0_QUERY_PLAN_FULLSCAN: { | |||
7235 | if (!pCur->fullscan_data) { | |||
7236 | return SQLITE_ERROR1; | |||
7237 | } | |||
7238 | int rc = sqlite3_stepsqlite3_api->step(pCur->fullscan_data->rowids_stmt); | |||
7239 | if (rc == SQLITE_DONE101) { | |||
7240 | pCur->fullscan_data->done = 1; | |||
7241 | return SQLITE_OK0; | |||
7242 | } | |||
7243 | if (rc == SQLITE_ROW100) { | |||
7244 | return SQLITE_OK0; | |||
7245 | } | |||
7246 | return SQLITE_ERROR1; | |||
7247 | } | |||
7248 | case VEC0_QUERY_PLAN_KNN: { | |||
7249 | if (!pCur->knn_data) { | |||
7250 | return SQLITE_ERROR1; | |||
7251 | } | |||
7252 | ||||
7253 | pCur->knn_data->current_idx++; | |||
7254 | return SQLITE_OK0; | |||
7255 | } | |||
7256 | case VEC0_QUERY_PLAN_POINT: { | |||
7257 | if (!pCur->point_data) { | |||
7258 | return SQLITE_ERROR1; | |||
7259 | } | |||
7260 | pCur->point_data->done = 1; | |||
7261 | return SQLITE_OK0; | |||
7262 | } | |||
7263 | } | |||
7264 | return SQLITE_ERROR1; | |||
7265 | } | |||
7266 | ||||
7267 | static int vec0Eof(sqlite3_vtab_cursor *cur) { | |||
7268 | vec0_cursor *pCur = (vec0_cursor *)cur; | |||
7269 | switch (pCur->query_plan) { | |||
7270 | case VEC0_QUERY_PLAN_FULLSCAN: { | |||
7271 | if (!pCur->fullscan_data) { | |||
7272 | return 1; | |||
7273 | } | |||
7274 | return pCur->fullscan_data->done; | |||
7275 | } | |||
7276 | case VEC0_QUERY_PLAN_KNN: { | |||
7277 | if (!pCur->knn_data) { | |||
7278 | return 1; | |||
7279 | } | |||
7280 | // return (pCur->knn_data->current_idx >= pCur->knn_data->k) || | |||
7281 | // (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX); | |||
7282 | return (pCur->knn_data->current_idx >= pCur->knn_data->k_used); | |||
7283 | } | |||
7284 | case VEC0_QUERY_PLAN_POINT: { | |||
7285 | if (!pCur->point_data) { | |||
7286 | return 1; | |||
7287 | } | |||
7288 | return pCur->point_data->done; | |||
7289 | } | |||
7290 | } | |||
7291 | return 1; | |||
7292 | } | |||
7293 | ||||
7294 | static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur, | |||
7295 | sqlite3_context *context, int i) { | |||
7296 | if (!pCur->fullscan_data) { | |||
7297 | sqlite3_result_errorsqlite3_api->result_error( | |||
7298 | context, "Internal sqlite-vec error: fullscan_data is NULL.", -1); | |||
7299 | return SQLITE_ERROR1; | |||
7300 | } | |||
7301 | i64 rowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0); | |||
7302 | if (i == VEC0_COLUMN_ID0) { | |||
7303 | return vec0_result_id(pVtab, context, rowid); | |||
7304 | } | |||
7305 | else if (vec0_column_idx_is_vector(pVtab, i)) { | |||
7306 | void *v; | |||
7307 | int sz; | |||
7308 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); | |||
7309 | int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz); | |||
7310 | if (rc != SQLITE_OK0) { | |||
7311 | return rc; | |||
7312 | } | |||
7313 | sqlite3_result_blobsqlite3_api->result_blob(context, v, sz, sqlite3_freesqlite3_api->free); | |||
7314 | sqlite3_result_subtypesqlite3_api->result_subtype(context, | |||
7315 | pVtab->vector_columns[vector_idx].element_type); | |||
7316 | ||||
7317 | } | |||
7318 | else if (i == vec0_column_distance_idx(pVtab)) { | |||
7319 | sqlite3_result_nullsqlite3_api->result_null(context); | |||
7320 | } | |||
7321 | else if(vec0_column_idx_is_partition(pVtab, i)) { | |||
7322 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); | |||
7323 | sqlite3_value * v; | |||
7324 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); | |||
7325 | if(rc == SQLITE_OK0) { | |||
7326 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7327 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7328 | }else { | |||
7329 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7330 | } | |||
7331 | } | |||
7332 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { | |||
7333 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); | |||
7334 | sqlite3_value * v; | |||
7335 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); | |||
7336 | if(rc == SQLITE_OK0) { | |||
7337 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7338 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7339 | }else { | |||
7340 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7341 | } | |||
7342 | } | |||
7343 | ||||
7344 | else if(vec0_column_idx_is_metadata(pVtab, i)) { | |||
7345 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { | |||
7346 | return SQLITE_OK0; | |||
7347 | } | |||
7348 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); | |||
7349 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); | |||
7350 | if(rc != SQLITE_OK0) { | |||
7351 | // IMP: V15466_32305 | |||
7352 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
7353 | "Could not extract metadata value for column %.*s at rowid %lld", | |||
7354 | pVtab->metadata_columns[metadata_idx].name_length, | |||
7355 | pVtab->metadata_columns[metadata_idx].name, rowid | |||
7356 | ); | |||
7357 | if(zErr) { | |||
7358 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); | |||
7359 | sqlite3_freesqlite3_api->free((void *) zErr); | |||
7360 | }else { | |||
7361 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
7362 | } | |||
7363 | } | |||
7364 | } | |||
7365 | ||||
7366 | return SQLITE_OK0; | |||
7367 | } | |||
7368 | ||||
7369 | static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur, | |||
7370 | sqlite3_context *context, int i) { | |||
7371 | if (!pCur->point_data) { | |||
7372 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
7373 | "Internal sqlite-vec error: point_data is NULL.", -1); | |||
7374 | return SQLITE_ERROR1; | |||
7375 | } | |||
7376 | if (i == VEC0_COLUMN_ID0) { | |||
7377 | return vec0_result_id(pVtab, context, pCur->point_data->rowid); | |||
7378 | } | |||
7379 | else if (i == vec0_column_distance_idx(pVtab)) { | |||
7380 | sqlite3_result_nullsqlite3_api->result_null(context); | |||
7381 | return SQLITE_OK0; | |||
7382 | } | |||
7383 | else if (vec0_column_idx_is_vector(pVtab, i)) { | |||
7384 | if (sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { | |||
7385 | sqlite3_result_nullsqlite3_api->result_null(context); | |||
7386 | return SQLITE_OK0; | |||
7387 | } | |||
7388 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); | |||
7389 | sqlite3_result_blobsqlite3_api->result_blob( | |||
7390 | context, pCur->point_data->vectors[vector_idx], | |||
7391 | vector_column_byte_size(pVtab->vector_columns[vector_idx]), | |||
7392 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
7393 | sqlite3_result_subtypesqlite3_api->result_subtype(context, | |||
7394 | pVtab->vector_columns[vector_idx].element_type); | |||
7395 | return SQLITE_OK0; | |||
7396 | } | |||
7397 | else if(vec0_column_idx_is_partition(pVtab, i)) { | |||
7398 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { | |||
7399 | return SQLITE_OK0; | |||
7400 | } | |||
7401 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); | |||
7402 | i64 rowid = pCur->point_data->rowid; | |||
7403 | sqlite3_value * v; | |||
7404 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); | |||
7405 | if(rc == SQLITE_OK0) { | |||
7406 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7407 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7408 | }else { | |||
7409 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7410 | } | |||
7411 | } | |||
7412 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { | |||
7413 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { | |||
7414 | return SQLITE_OK0; | |||
7415 | } | |||
7416 | i64 rowid = pCur->point_data->rowid; | |||
7417 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); | |||
7418 | sqlite3_value * v; | |||
7419 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); | |||
7420 | if(rc == SQLITE_OK0) { | |||
7421 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7422 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7423 | }else { | |||
7424 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7425 | } | |||
7426 | } | |||
7427 | ||||
7428 | else if(vec0_column_idx_is_metadata(pVtab, i)) { | |||
7429 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { | |||
7430 | return SQLITE_OK0; | |||
7431 | } | |||
7432 | i64 rowid = pCur->point_data->rowid; | |||
7433 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); | |||
7434 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); | |||
7435 | if(rc != SQLITE_OK0) { | |||
7436 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
7437 | "Could not extract metadata value for column %.*s at rowid %lld", | |||
7438 | pVtab->metadata_columns[metadata_idx].name_length, | |||
7439 | pVtab->metadata_columns[metadata_idx].name, rowid | |||
7440 | ); | |||
7441 | if(zErr) { | |||
7442 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); | |||
7443 | sqlite3_freesqlite3_api->free((void *) zErr); | |||
7444 | }else { | |||
7445 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
7446 | } | |||
7447 | } | |||
7448 | } | |||
7449 | ||||
7450 | return SQLITE_OK0; | |||
7451 | } | |||
7452 | ||||
7453 | static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur, | |||
7454 | sqlite3_context *context, int i) { | |||
7455 | if (!pCur->knn_data) { | |||
7456 | sqlite3_result_errorsqlite3_api->result_error(context, | |||
7457 | "Internal sqlite-vec error: knn_data is NULL.", -1); | |||
7458 | return SQLITE_ERROR1; | |||
7459 | } | |||
7460 | if (i == VEC0_COLUMN_ID0) { | |||
7461 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; | |||
7462 | return vec0_result_id(pVtab, context, rowid); | |||
7463 | } | |||
7464 | else if (i == vec0_column_distance_idx(pVtab)) { | |||
7465 | sqlite3_result_doublesqlite3_api->result_double( | |||
7466 | context, pCur->knn_data->distances[pCur->knn_data->current_idx]); | |||
7467 | return SQLITE_OK0; | |||
7468 | } | |||
7469 | else if (vec0_column_idx_is_vector(pVtab, i)) { | |||
7470 | void *out; | |||
7471 | int sz; | |||
7472 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); | |||
7473 | int rc = vec0_get_vector_data( | |||
7474 | pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx, | |||
7475 | &out, &sz); | |||
7476 | if (rc != SQLITE_OK0) { | |||
7477 | return rc; | |||
7478 | } | |||
7479 | sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free); | |||
7480 | sqlite3_result_subtypesqlite3_api->result_subtype(context, | |||
7481 | pVtab->vector_columns[vector_idx].element_type); | |||
7482 | return SQLITE_OK0; | |||
7483 | } | |||
7484 | else if(vec0_column_idx_is_partition(pVtab, i)) { | |||
7485 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); | |||
7486 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; | |||
7487 | sqlite3_value * v; | |||
7488 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); | |||
7489 | if(rc == SQLITE_OK0) { | |||
7490 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7491 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7492 | }else { | |||
7493 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7494 | } | |||
7495 | } | |||
7496 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { | |||
7497 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); | |||
7498 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; | |||
7499 | sqlite3_value * v; | |||
7500 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); | |||
7501 | if(rc == SQLITE_OK0) { | |||
7502 | sqlite3_result_valuesqlite3_api->result_value(context, v); | |||
7503 | sqlite3_value_freesqlite3_api->value_free(v); | |||
7504 | }else { | |||
7505 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); | |||
7506 | } | |||
7507 | } | |||
7508 | ||||
7509 | else if(vec0_column_idx_is_metadata(pVtab, i)) { | |||
7510 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); | |||
7511 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; | |||
7512 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); | |||
7513 | if(rc != SQLITE_OK0) { | |||
7514 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
7515 | "Could not extract metadata value for column %.*s at rowid %lld", | |||
7516 | pVtab->metadata_columns[metadata_idx].name_length, | |||
7517 | pVtab->metadata_columns[metadata_idx].name, rowid | |||
7518 | ); | |||
7519 | if(zErr) { | |||
7520 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); | |||
7521 | sqlite3_freesqlite3_api->free((void *) zErr); | |||
7522 | }else { | |||
7523 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
7524 | } | |||
7525 | } | |||
7526 | } | |||
7527 | ||||
7528 | return SQLITE_OK0; | |||
7529 | } | |||
7530 | ||||
7531 | static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context, | |||
7532 | int i) { | |||
7533 | vec0_cursor *pCur = (vec0_cursor *)cur; | |||
7534 | vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab; | |||
7535 | switch (pCur->query_plan) { | |||
7536 | case VEC0_QUERY_PLAN_FULLSCAN: { | |||
7537 | return vec0Column_fullscan(pVtab, pCur, context, i); | |||
7538 | } | |||
7539 | case VEC0_QUERY_PLAN_KNN: { | |||
7540 | return vec0Column_knn(pVtab, pCur, context, i); | |||
7541 | } | |||
7542 | case VEC0_QUERY_PLAN_POINT: { | |||
7543 | return vec0Column_point(pVtab, pCur, context, i); | |||
7544 | } | |||
7545 | } | |||
7546 | return SQLITE_OK0; | |||
7547 | } | |||
7548 | ||||
7549 | /** | |||
7550 | * @brief Handles the "insert rowid" step of a row insert operation of a vec0 | |||
7551 | * table. | |||
7552 | * | |||
7553 | * This function will insert a new row into the _rowids vec0 shadow table. | |||
7554 | * | |||
7555 | * @param p: virtual table | |||
7556 | * @param idValue: Value containing the inserted rowid/id value. | |||
7557 | * @param rowid: Output rowid, will point to the "real" i64 rowid | |||
7558 | * value that was inserted | |||
7559 | * @return int SQLITE_OK on success, error code on failure | |||
7560 | */ | |||
7561 | int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue, | |||
7562 | i64 *rowid) { | |||
7563 | ||||
7564 | /** | |||
7565 | * An insert into a vec0 table can happen a few different ways: | |||
7566 | * 1) With default INTEGER primary key: With a supplied i64 rowid | |||
7567 | * 2) With default INTEGER primary key: WITHOUT a supplied rowid | |||
7568 | * 3) With TEXT primary key: supplied text rowid | |||
7569 | */ | |||
7570 | ||||
7571 | int rc; | |||
7572 | ||||
7573 | // Option 3: vtab has a user-defined TEXT primary key, so ensure a text value | |||
7574 | // is provided. | |||
7575 | if (p->pkIsText) { | |||
7576 | if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_TEXT3) { | |||
7577 | // IMP: V04200_21039 | |||
7578 | vtab_set_error(&p->base, | |||
7579 | "The %s virtual table was declared with a TEXT primary " | |||
7580 | "key, but a non-TEXT value was provided in an INSERT.", | |||
7581 | p->tableName); | |||
7582 | return SQLITE_ERROR1; | |||
7583 | } | |||
7584 | ||||
7585 | return vec0_rowids_insert_id(p, idValue, rowid); | |||
7586 | } | |||
7587 | ||||
7588 | // Option 1: User supplied a i64 rowid | |||
7589 | if (sqlite3_value_typesqlite3_api->value_type(idValue) == SQLITE_INTEGER1) { | |||
7590 | i64 suppliedRowid = sqlite3_value_int64sqlite3_api->value_int64(idValue); | |||
7591 | rc = vec0_rowids_insert_rowid(p, suppliedRowid); | |||
7592 | if (rc == SQLITE_OK0) { | |||
7593 | *rowid = suppliedRowid; | |||
7594 | } | |||
7595 | return rc; | |||
7596 | } | |||
7597 | ||||
7598 | // Option 2: User did not suppled a rowid | |||
7599 | ||||
7600 | if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_NULL5) { | |||
7601 | // IMP: V30855_14925 | |||
7602 | vtab_set_error(&p->base, | |||
7603 | "Only integers are allows for primary key values on %s", | |||
7604 | p->tableName); | |||
7605 | return SQLITE_ERROR1; | |||
7606 | } | |||
7607 | // NULL to get next auto-incremented value | |||
7608 | return vec0_rowids_insert_id(p, NULL((void*)0), rowid); | |||
7609 | } | |||
7610 | ||||
7611 | /** | |||
7612 | * @brief Determines the "next available" chunk position for a newly inserted | |||
7613 | * vec0 row. | |||
7614 | * | |||
7615 | * This operation may insert a new "blank" chunk the _chunks table, if there is | |||
7616 | * no more space in previous chunks. | |||
7617 | * | |||
7618 | * @param p: virtual table | |||
7619 | * @param partitionKeyValues: array of partition key column values, to constrain | |||
7620 | * against any partition key columns. | |||
7621 | * @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table | |||
7622 | * that has the avialabiity. | |||
7623 | * @param chunk_offset: Output the index of the available space insert the | |||
7624 | * chunk, based on the index of the first available validity bit. | |||
7625 | * @param pBlobValidity: Output blob of the validity column of the available | |||
7626 | * chunk. Will be opened with read/write permissions. | |||
7627 | * @param pValidity: Output buffer of the original chunk's validity column. | |||
7628 | * Needs to be cleaned up with sqlite3_free(). | |||
7629 | * @return int SQLITE_OK on success, error code on failure | |||
7630 | */ | |||
7631 | int vec0Update_InsertNextAvailableStep( | |||
7632 | vec0_vtab *p, | |||
7633 | sqlite3_value ** partitionKeyValues, | |||
7634 | i64 *chunk_rowid, i64 *chunk_offset, | |||
7635 | sqlite3_blob **blobChunksValidity, | |||
7636 | const unsigned char **bufferChunksValidity) { | |||
7637 | ||||
7638 | int rc; | |||
7639 | i64 validitySize; | |||
7640 | *chunk_offset = -1; | |||
7641 | ||||
7642 | rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues); | |||
7643 | if(rc == SQLITE_EMPTY16) { | |||
7644 | goto done; | |||
7645 | } | |||
7646 | if (rc != SQLITE_OK0) { | |||
7647 | goto cleanup; | |||
7648 | } | |||
7649 | ||||
7650 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity", | |||
7651 | *chunk_rowid, 1, blobChunksValidity); | |||
7652 | if (rc != SQLITE_OK0) { | |||
7653 | // IMP: V22053_06123 | |||
7654 | vtab_set_error(&p->base, | |||
7655 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7656 | "could not open validity blob on %s.%s.%lld", | |||
7657 | p->schemaName, p->shadowChunksName, *chunk_rowid); | |||
7658 | goto cleanup; | |||
7659 | } | |||
7660 | ||||
7661 | validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity); | |||
7662 | if (validitySize != p->chunk_size / CHAR_BIT8) { | |||
7663 | // IMP: V29362_13432 | |||
7664 | vtab_set_error(&p->base, | |||
7665 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7666 | "validity blob size mismatch on " | |||
7667 | "%s.%s.%lld, expected %lld but received %lld.", | |||
7668 | p->schemaName, p->shadowChunksName, *chunk_rowid, | |||
7669 | (i64)(p->chunk_size / CHAR_BIT8), validitySize); | |||
7670 | rc = SQLITE_ERROR1; | |||
7671 | goto cleanup; | |||
7672 | } | |||
7673 | ||||
7674 | *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize); | |||
7675 | if (!(*bufferChunksValidity)) { | |||
7676 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7677 | "Could not allocate memory for validity bitmap"); | |||
7678 | rc = SQLITE_NOMEM7; | |||
7679 | goto cleanup; | |||
7680 | } | |||
7681 | ||||
7682 | rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity, | |||
7683 | validitySize, 0); | |||
7684 | ||||
7685 | if (rc != SQLITE_OK0) { | |||
7686 | vtab_set_error(&p->base, | |||
7687 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7688 | "Could not read validity bitmap for %s.%s.%lld", | |||
7689 | p->schemaName, p->shadowChunksName, *chunk_rowid); | |||
7690 | goto cleanup; | |||
7691 | } | |||
7692 | ||||
7693 | // find the next available offset, ie first `0` in the bitmap. | |||
7694 | for (int i = 0; i < validitySize; i++) { | |||
7695 | if ((*bufferChunksValidity)[i] == 0b11111111) | |||
7696 | continue; | |||
7697 | for (int j = 0; j < CHAR_BIT8; j++) { | |||
7698 | if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) { | |||
7699 | *chunk_offset = (i * CHAR_BIT8) + j; | |||
7700 | goto done; | |||
7701 | } | |||
7702 | } | |||
7703 | } | |||
7704 | ||||
7705 | done: | |||
7706 | // latest chunk was full, so need to create a new one | |||
7707 | if (*chunk_offset == -1) { | |||
7708 | rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid); | |||
7709 | if (rc != SQLITE_OK0) { | |||
7710 | // IMP: V08441_25279 | |||
7711 | vtab_set_error(&p->base, | |||
7712 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not insert a new vector chunk"); | |||
7713 | rc = SQLITE_ERROR1; // otherwise raises a DatabaseError and not operational | |||
7714 | // error? | |||
7715 | goto cleanup; | |||
7716 | } | |||
7717 | *chunk_offset = 0; | |||
7718 | ||||
7719 | // blobChunksValidity and pValidity are stale, pointing to the previous | |||
7720 | // (full) chunk. to re-assign them | |||
7721 | rc = sqlite3_blob_closesqlite3_api->blob_close(*blobChunksValidity); | |||
7722 | sqlite3_freesqlite3_api->free((void *)*bufferChunksValidity); | |||
7723 | *blobChunksValidity = NULL((void*)0); | |||
7724 | *bufferChunksValidity = NULL((void*)0); | |||
7725 | if (rc != SQLITE_OK0) { | |||
7726 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7727 | "unknown error, blobChunksValidity could not be closed, " | |||
7728 | "please file an issue."); | |||
7729 | rc = SQLITE_ERROR1; | |||
7730 | goto cleanup; | |||
7731 | } | |||
7732 | ||||
7733 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, | |||
7734 | "validity", *chunk_rowid, 1, blobChunksValidity); | |||
7735 | if (rc != SQLITE_OK0) { | |||
7736 | vtab_set_error( | |||
7737 | &p->base, | |||
7738 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7739 | "Could not open validity blob for newly created chunk %s.%s.%lld", | |||
7740 | p->schemaName, p->shadowChunksName, *chunk_rowid); | |||
7741 | goto cleanup; | |||
7742 | } | |||
7743 | validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity); | |||
7744 | if (validitySize != p->chunk_size / CHAR_BIT8) { | |||
7745 | vtab_set_error(&p->base, | |||
7746 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7747 | "validity blob size mismatch for newly created chunk " | |||
7748 | "%s.%s.%lld. Exepcted %lld, got %lld", | |||
7749 | p->schemaName, p->shadowChunksName, *chunk_rowid, | |||
7750 | p->chunk_size / CHAR_BIT8, validitySize); | |||
7751 | goto cleanup; | |||
7752 | } | |||
7753 | *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize); | |||
7754 | rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity, | |||
7755 | validitySize, 0); | |||
7756 | if (rc != SQLITE_OK0) { | |||
7757 | vtab_set_error(&p->base, | |||
7758 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7759 | "could not read validity blob newly created chunk " | |||
7760 | "%s.%s.%lld", | |||
7761 | p->schemaName, p->shadowChunksName, *chunk_rowid); | |||
7762 | goto cleanup; | |||
7763 | } | |||
7764 | } | |||
7765 | ||||
7766 | rc = SQLITE_OK0; | |||
7767 | ||||
7768 | cleanup: | |||
7769 | return rc; | |||
7770 | } | |||
7771 | ||||
7772 | /** | |||
7773 | * @brief Write the vector data into the provided vector blob at the given | |||
7774 | * offset | |||
7775 | * | |||
7776 | * @param blobVectors SQLite BLOB to write to | |||
7777 | * @param chunk_offset the "offset" (ie validity bitmap position) to write the | |||
7778 | * vector to | |||
7779 | * @param bVector pointer to the vector containing data | |||
7780 | * @param dimensions how many dimensions the vector has | |||
7781 | * @param element_type the vector type | |||
7782 | * @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure | |||
7783 | */ | |||
7784 | static int | |||
7785 | vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset, | |||
7786 | const void *bVector, size_t dimensions, | |||
7787 | enum VectorElementType element_type) { | |||
7788 | int n; | |||
7789 | int offset; | |||
7790 | ||||
7791 | switch (element_type) { | |||
7792 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: | |||
7793 | n = dimensions * sizeof(f32); | |||
7794 | offset = chunk_offset * dimensions * sizeof(f32); | |||
7795 | break; | |||
7796 | case SQLITE_VEC_ELEMENT_TYPE_INT8: | |||
7797 | n = dimensions * sizeof(i8); | |||
7798 | offset = chunk_offset * dimensions * sizeof(i8); | |||
7799 | break; | |||
7800 | case SQLITE_VEC_ELEMENT_TYPE_BIT: | |||
7801 | n = dimensions / CHAR_BIT8; | |||
7802 | offset = chunk_offset * dimensions / CHAR_BIT8; | |||
7803 | break; | |||
7804 | } | |||
7805 | ||||
7806 | return sqlite3_blob_writesqlite3_api->blob_write(blobVectors, bVector, n, offset); | |||
7807 | } | |||
7808 | ||||
7809 | /** | |||
7810 | * @brief | |||
7811 | * | |||
7812 | * @param p vec0 virtual table | |||
7813 | * @param chunk_rowid: which chunk to write to | |||
7814 | * @param chunk_offset: the offset inside the chunk to write the vector to. | |||
7815 | * @param rowid: the rowid of the inserting row | |||
7816 | * @param vectorDatas: array of the vector data to insert | |||
7817 | * @param blobValidity: writeable validity blob of the row's assigned chunk. | |||
7818 | * @param validity: snapshot buffer of the valdity column from the row's | |||
7819 | * assigned chunk. | |||
7820 | * @return int SQLITE_OK on success, error code on failure | |||
7821 | */ | |||
7822 | int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid, | |||
7823 | i64 chunk_offset, i64 rowid, | |||
7824 | void *vectorDatas[], | |||
7825 | sqlite3_blob *blobChunksValidity, | |||
7826 | const unsigned char *bufferChunksValidity) { | |||
7827 | int rc, brc; | |||
7828 | sqlite3_blob *blobChunksRowids = NULL((void*)0); | |||
7829 | ||||
7830 | // mark the validity bit for this row in the chunk's validity bitmap | |||
7831 | // Get the byte offset of the bitmap | |||
7832 | char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT8]; | |||
7833 | // set the bit at the chunk_offset position inside that byte | |||
7834 | bx = bx | (1 << (chunk_offset % CHAR_BIT8)); | |||
7835 | // write that 1 byte | |||
7836 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT8); | |||
7837 | if (rc != SQLITE_OK0) { | |||
7838 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not mark validity bit "); | |||
7839 | return rc; | |||
7840 | } | |||
7841 | ||||
7842 | // Go insert the vector data into the vector chunk shadow tables | |||
7843 | for (int i = 0; i < p->numVectorColumns; i++) { | |||
7844 | sqlite3_blob *blobVectors; | |||
7845 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i], | |||
7846 | "vectors", chunk_rowid, 1, &blobVectors); | |||
7847 | if (rc != SQLITE_OK0) { | |||
7848 | vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld", | |||
7849 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); | |||
7850 | goto cleanup; | |||
7851 | } | |||
7852 | ||||
7853 | i64 expected = | |||
7854 | p->chunk_size * vector_column_byte_size(p->vector_columns[i]); | |||
7855 | i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors); | |||
7856 | ||||
7857 | if (actual != expected) { | |||
7858 | // IMP: V16386_00456 | |||
7859 | vtab_set_error( | |||
7860 | &p->base, | |||
7861 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7862 | "vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld", | |||
7863 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected, | |||
7864 | actual); | |||
7865 | rc = SQLITE_ERROR1; | |||
7866 | // already error, can ignore result code | |||
7867 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
7868 | goto cleanup; | |||
7869 | }; | |||
7870 | ||||
7871 | rc = vec0_write_vector_to_vector_blob( | |||
7872 | blobVectors, chunk_offset, vectorDatas[i], | |||
7873 | p->vector_columns[i].dimensions, p->vector_columns[i].element_type); | |||
7874 | if (rc != SQLITE_OK0) { | |||
7875 | vtab_set_error(&p->base, | |||
7876 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7877 | "could not write vector blob on %s.%s.%lld", | |||
7878 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); | |||
7879 | rc = SQLITE_ERROR1; | |||
7880 | // already error, can ignore result code | |||
7881 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
7882 | goto cleanup; | |||
7883 | } | |||
7884 | rc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
7885 | if (rc != SQLITE_OK0) { | |||
7886 | vtab_set_error(&p->base, | |||
7887 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7888 | "could not close vector blob on %s.%s.%lld", | |||
7889 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); | |||
7890 | rc = SQLITE_ERROR1; | |||
7891 | goto cleanup; | |||
7892 | } | |||
7893 | } | |||
7894 | ||||
7895 | // write the new rowid to the rowids column of the _chunks table | |||
7896 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", | |||
7897 | chunk_rowid, 1, &blobChunksRowids); | |||
7898 | if (rc != SQLITE_OK0) { | |||
7899 | // IMP: V09221_26060 | |||
7900 | vtab_set_error(&p->base, | |||
7901 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not open rowids blob on %s.%s.%lld", | |||
7902 | p->schemaName, p->shadowChunksName, chunk_rowid); | |||
7903 | goto cleanup; | |||
7904 | } | |||
7905 | i64 expected = p->chunk_size * sizeof(i64); | |||
7906 | i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobChunksRowids); | |||
7907 | if (expected != actual) { | |||
7908 | // IMP: V12779_29618 | |||
7909 | vtab_set_error( | |||
7910 | &p->base, | |||
7911 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " | |||
7912 | "rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld", | |||
7913 | p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual); | |||
7914 | rc = SQLITE_ERROR1; | |||
7915 | goto cleanup; | |||
7916 | } | |||
7917 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksRowids, &rowid, sizeof(i64), | |||
7918 | chunk_offset * sizeof(i64)); | |||
7919 | if (rc != SQLITE_OK0) { | |||
7920 | vtab_set_error( | |||
7921 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not write rowids blob on %s.%s.%lld", | |||
7922 | p->schemaName, p->shadowChunksName, chunk_rowid); | |||
7923 | rc = SQLITE_ERROR1; | |||
7924 | goto cleanup; | |||
7925 | } | |||
7926 | ||||
7927 | // Now with all the vectors inserted, go back and update the _rowids table | |||
7928 | // with the new chunk_rowid/chunk_offset values | |||
7929 | rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset); | |||
7930 | ||||
7931 | cleanup: | |||
7932 | brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksRowids); | |||
7933 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { | |||
7934 | vtab_set_error( | |||
7935 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not close rowids blob on %s.%s.%lld", | |||
7936 | p->schemaName, p->shadowChunksName, chunk_rowid); | |||
7937 | return brc; | |||
7938 | } | |||
7939 | return rc; | |||
7940 | } | |||
7941 | ||||
7942 | int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) { | |||
7943 | int rc; | |||
7944 | struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx]; | |||
7945 | vec0_metadata_column_kind kind = metadata_column->kind; | |||
7946 | ||||
7947 | // verify input value matches column type | |||
7948 | switch(kind) { | |||
7949 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
7950 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1 || ((sqlite3_value_intsqlite3_api->value_int(v) != 0) && (sqlite3_value_intsqlite3_api->value_int(v) != 1))) { | |||
7951 | rc = SQLITE_ERROR1; | |||
7952 | vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name); | |||
7953 | goto done; | |||
7954 | } | |||
7955 | break; | |||
7956 | } | |||
7957 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
7958 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1) { | |||
7959 | rc = SQLITE_ERROR1; | |||
7960 | vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); | |||
7961 | goto done; | |||
7962 | } | |||
7963 | break; | |||
7964 | } | |||
7965 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
7966 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_FLOAT2) { | |||
7967 | rc = SQLITE_ERROR1; | |||
7968 | vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); | |||
7969 | goto done; | |||
7970 | } | |||
7971 | break; | |||
7972 | } | |||
7973 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
7974 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_TEXT3) { | |||
7975 | rc = SQLITE_ERROR1; | |||
7976 | vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); | |||
7977 | goto done; | |||
7978 | } | |||
7979 | break; | |||
7980 | } | |||
7981 | } | |||
7982 | ||||
7983 | sqlite3_blob * blobValue = NULL((void*)0); | |||
7984 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue); | |||
7985 | if(rc != SQLITE_OK0) { | |||
7986 | goto done; | |||
7987 | } | |||
7988 | ||||
7989 | switch(kind) { | |||
7990 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
7991 | u8 block; | |||
7992 | int value = sqlite3_value_intsqlite3_api->value_int(v); | |||
7993 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8)); | |||
7994 | if(rc != SQLITE_OK0) { | |||
7995 | goto done; | |||
7996 | } | |||
7997 | ||||
7998 | if (value) { | |||
7999 | block |= 1 << (chunk_offset % CHAR_BIT8); | |||
8000 | } else { | |||
8001 | block &= ~(1 << (chunk_offset % CHAR_BIT8)); | |||
8002 | } | |||
8003 | ||||
8004 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8); | |||
8005 | break; | |||
8006 | } | |||
8007 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
8008 | i64 value = sqlite3_value_int64sqlite3_api->value_int64(v); | |||
8009 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); | |||
8010 | break; | |||
8011 | } | |||
8012 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
8013 | double value = sqlite3_value_doublesqlite3_api->value_double(v); | |||
8014 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); | |||
8015 | break; | |||
8016 | } | |||
8017 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
8018 | int prev_n; | |||
8019 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8020 | if(rc != SQLITE_OK0) { | |||
8021 | goto done; | |||
8022 | } | |||
8023 | ||||
8024 | const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(v); | |||
8025 | int n = sqlite3_value_bytessqlite3_api->value_bytes(v); | |||
8026 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
8027 | memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8028 | memcpy(view, &n, sizeof(int)); | |||
8029 | memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4)(((n) <= (16 -4)) ? (n) : (16 -4))); | |||
8030 | ||||
8031 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8032 | if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
8033 | const char * zSql; | |||
8034 | ||||
8035 | if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12)) { | |||
8036 | zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx); | |||
8037 | }else { | |||
8038 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx); | |||
8039 | } | |||
8040 | if(!zSql) { | |||
8041 | rc = SQLITE_NOMEM7; | |||
8042 | goto done; | |||
8043 | } | |||
8044 | sqlite3_stmt * stmt; | |||
8045 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8046 | if(rc != SQLITE_OK0) { | |||
8047 | goto done; | |||
8048 | } | |||
8049 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8050 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, s, n, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
8051 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8052 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8053 | ||||
8054 | if(rc != SQLITE_DONE101) { | |||
8055 | rc = SQLITE_ERROR1; | |||
8056 | goto done; | |||
8057 | } | |||
8058 | } | |||
8059 | else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
8060 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx); | |||
8061 | if(!zSql) { | |||
8062 | rc = SQLITE_NOMEM7; | |||
8063 | goto done; | |||
8064 | } | |||
8065 | sqlite3_stmt * stmt; | |||
8066 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8067 | if(rc != SQLITE_OK0) { | |||
8068 | goto done; | |||
8069 | } | |||
8070 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8071 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8072 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8073 | ||||
8074 | if(rc != SQLITE_DONE101) { | |||
8075 | rc = SQLITE_ERROR1; | |||
8076 | goto done; | |||
8077 | } | |||
8078 | } | |||
8079 | break; | |||
8080 | } | |||
8081 | } | |||
8082 | ||||
8083 | if(rc != SQLITE_OK0) { | |||
8084 | ||||
8085 | } | |||
8086 | rc = sqlite3_blob_closesqlite3_api->blob_close(blobValue); | |||
8087 | if(rc != SQLITE_OK0) { | |||
8088 | goto done; | |||
8089 | } | |||
8090 | ||||
8091 | done: | |||
8092 | return rc; | |||
8093 | } | |||
8094 | ||||
8095 | ||||
8096 | /** | |||
8097 | * @brief Handles INSERT INTO operations on a vec0 table. | |||
8098 | * | |||
8099 | * @return int SQLITE_OK on success, otherwise error code on failure | |||
8100 | */ | |||
8101 | int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, | |||
8102 | sqlite_int64 *pRowid) { | |||
8103 | UNUSED_PARAMETER(argc)(void)(argc); | |||
8104 | vec0_vtab *p = (vec0_vtab *)pVTab; | |||
8105 | int rc; | |||
8106 | // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow | |||
8107 | // table | |||
8108 | i64 rowid; | |||
8109 | ||||
8110 | // Array to hold the vector data of the inserted row. Individual elements will | |||
8111 | // have a lifetime bound to the argv[..] values. | |||
8112 | void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS16]; | |||
8113 | // Array to hold cleanup functions for vectorDatas[] | |||
8114 | vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS16]; | |||
8115 | ||||
8116 | sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS4]; | |||
8117 | ||||
8118 | // Rowid of the chunk in the _chunks shadow table that the row will be a part | |||
8119 | // of. | |||
8120 | i64 chunk_rowid; | |||
8121 | // offset within the chunk where the rowid belongs | |||
8122 | i64 chunk_offset; | |||
8123 | ||||
8124 | // a write-able blob of the validity column for the given chunk. Used to mark | |||
8125 | // validity bit | |||
8126 | sqlite3_blob *blobChunksValidity = NULL((void*)0); | |||
8127 | // buffer for the valididty column for the given chunk. Maybe not needed here? | |||
8128 | const unsigned char *bufferChunksValidity = NULL((void*)0); | |||
8129 | int numReadVectors = 0; | |||
8130 | ||||
8131 | // Read all provided partition key values into partitionKeyValues | |||
8132 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8133 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) { | |||
8134 | continue; | |||
8135 | } | |||
8136 | int partition_key_idx = p->user_column_idxs[i]; | |||
8137 | partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START1 + i]; | |||
8138 | ||||
8139 | int new_value_type = sqlite3_value_typesqlite3_api->value_type(partitionKeyValues[partition_key_idx]); | |||
8140 | if((new_value_type != SQLITE_NULL5) && (new_value_type != p->paritition_columns[partition_key_idx].type)) { | |||
8141 | // IMP: V11454_28292 | |||
8142 | vtab_set_error( | |||
8143 | pVTab, | |||
8144 | "Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.", | |||
8145 | p->paritition_columns[partition_key_idx].name_length, | |||
8146 | p->paritition_columns[partition_key_idx].name, | |||
8147 | type_name(p->paritition_columns[partition_key_idx].type), | |||
8148 | type_name(new_value_type) | |||
8149 | ); | |||
8150 | rc = SQLITE_ERROR1; | |||
8151 | goto cleanup; | |||
8152 | } | |||
8153 | } | |||
8154 | ||||
8155 | // read all the inserted vectors into vectorDatas, validate their lengths. | |||
8156 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8157 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { | |||
8158 | continue; | |||
8159 | } | |||
8160 | int vector_column_idx = p->user_column_idxs[i]; | |||
8161 | sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i]; | |||
8162 | size_t dimensions; | |||
8163 | ||||
8164 | char *pzError; | |||
8165 | enum VectorElementType elementType; | |||
8166 | rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions, | |||
8167 | &elementType, &cleanups[vector_column_idx], &pzError); | |||
8168 | if (rc != SQLITE_OK0) { | |||
8169 | // IMP: V06519_23358 | |||
8170 | vtab_set_error( | |||
8171 | pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z", | |||
8172 | p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError); | |||
8173 | rc = SQLITE_ERROR1; | |||
8174 | goto cleanup; | |||
8175 | } | |||
8176 | ||||
8177 | numReadVectors++; | |||
8178 | if (elementType != p->vector_columns[vector_column_idx].element_type) { | |||
8179 | // IMP: V08221_25059 | |||
8180 | vtab_set_error( | |||
8181 | pVTab, | |||
8182 | "Inserted vector for the \"%.*s\" column is expected to be of type " | |||
8183 | "%s, but a %s vector was provided.", | |||
8184 | p->vector_columns[i].name_length, p->vector_columns[i].name, | |||
8185 | vector_subtype_name(p->vector_columns[i].element_type), | |||
8186 | vector_subtype_name(elementType)); | |||
8187 | rc = SQLITE_ERROR1; | |||
8188 | goto cleanup; | |||
8189 | } | |||
8190 | ||||
8191 | if (dimensions != p->vector_columns[vector_column_idx].dimensions) { | |||
8192 | // IMP: V01145_17984 | |||
8193 | vtab_set_error( | |||
8194 | pVTab, | |||
8195 | "Dimension mismatch for inserted vector for the \"%.*s\" column. " | |||
8196 | "Expected %d dimensions but received %d.", | |||
8197 | p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, | |||
8198 | p->vector_columns[vector_column_idx].dimensions, dimensions); | |||
8199 | rc = SQLITE_ERROR1; | |||
8200 | goto cleanup; | |||
8201 | } | |||
8202 | } | |||
8203 | ||||
8204 | // Cannot insert a value in the hidden "distance" column | |||
8205 | if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_distance_idx(p)]) != | |||
8206 | SQLITE_NULL5) { | |||
8207 | // IMP: V24228_08298 | |||
8208 | vtab_set_error(pVTab, | |||
8209 | "A value was provided for the hidden \"distance\" column."); | |||
8210 | rc = SQLITE_ERROR1; | |||
8211 | goto cleanup; | |||
8212 | } | |||
8213 | // Cannot insert a value in the hidden "k" column | |||
8214 | if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL5) { | |||
8215 | // IMP: V11875_28713 | |||
8216 | vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column."); | |||
8217 | rc = SQLITE_ERROR1; | |||
8218 | goto cleanup; | |||
8219 | } | |||
8220 | ||||
8221 | // Step #1: Insert/get a rowid for this row, from the _rowids table. | |||
8222 | rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID0], &rowid); | |||
8223 | if (rc != SQLITE_OK0) { | |||
8224 | goto cleanup; | |||
8225 | } | |||
8226 | ||||
8227 | // Step #2: Find the next "available" position in the _chunks table for this | |||
8228 | // row. | |||
8229 | rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues, | |||
8230 | &chunk_rowid, &chunk_offset, | |||
8231 | &blobChunksValidity, | |||
8232 | &bufferChunksValidity); | |||
8233 | if (rc != SQLITE_OK0) { | |||
8234 | goto cleanup; | |||
8235 | } | |||
8236 | ||||
8237 | // Step #3: With the next available chunk position, write out all the vectors | |||
8238 | // to their specified location. | |||
8239 | rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid, | |||
8240 | vectorDatas, blobChunksValidity, | |||
8241 | bufferChunksValidity); | |||
8242 | if (rc != SQLITE_OK0) { | |||
8243 | goto cleanup; | |||
8244 | } | |||
8245 | ||||
8246 | if(p->numAuxiliaryColumns > 0) { | |||
8247 | sqlite3_stmt *stmt; | |||
8248 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); | |||
8249 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "(rowid ", p->schemaName, p->tableName); | |||
8250 | for(int i = 0; i < p->numAuxiliaryColumns; i++) { | |||
8251 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i); | |||
8252 | } | |||
8253 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (? "); | |||
8254 | for(int i = 0; i < p->numAuxiliaryColumns; i++) { | |||
8255 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?"); | |||
8256 | } | |||
8257 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); | |||
8258 | char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s); | |||
8259 | // TODO double check error handling ehre | |||
8260 | if(!zSql) { | |||
8261 | rc = SQLITE_NOMEM7; | |||
8262 | goto cleanup; | |||
8263 | } | |||
8264 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8265 | if(rc != SQLITE_OK0) { | |||
8266 | goto cleanup; | |||
8267 | } | |||
8268 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8269 | ||||
8270 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8271 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { | |||
8272 | continue; | |||
8273 | } | |||
8274 | int auxiliary_key_idx = p->user_column_idxs[i]; | |||
8275 | sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START1 + i]; | |||
8276 | int v_type = sqlite3_value_typesqlite3_api->value_type(v); | |||
8277 | if(v_type != SQLITE_NULL5 && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) { | |||
8278 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8279 | rc = SQLITE_CONSTRAINT19; | |||
8280 | vtab_set_error( | |||
8281 | pVTab, | |||
8282 | "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.", | |||
8283 | p->auxiliary_columns[auxiliary_key_idx].name_length, | |||
8284 | p->auxiliary_columns[auxiliary_key_idx].name, | |||
8285 | type_name(p->auxiliary_columns[auxiliary_key_idx].type), | |||
8286 | type_name(v_type) | |||
8287 | ); | |||
8288 | goto cleanup; | |||
8289 | } | |||
8290 | // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter | |||
8291 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1 + 1 + auxiliary_key_idx, v); | |||
8292 | } | |||
8293 | ||||
8294 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8295 | if(rc != SQLITE_DONE101) { | |||
8296 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8297 | rc = SQLITE_ERROR1; | |||
8298 | goto cleanup; | |||
8299 | } | |||
8300 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8301 | } | |||
8302 | ||||
8303 | ||||
8304 | for(int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8305 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { | |||
8306 | continue; | |||
8307 | } | |||
8308 | int metadata_idx = p->user_column_idxs[i]; | |||
8309 | sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START1 + i]; | |||
8310 | rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0); | |||
8311 | if(rc != SQLITE_OK0) { | |||
8312 | goto cleanup; | |||
8313 | } | |||
8314 | } | |||
8315 | ||||
8316 | *pRowid = rowid; | |||
8317 | rc = SQLITE_OK0; | |||
8318 | ||||
8319 | cleanup: | |||
8320 | for (int i = 0; i < numReadVectors; i++) { | |||
8321 | cleanups[i](vectorDatas[i]); | |||
8322 | } | |||
8323 | sqlite3_freesqlite3_api->free((void *)bufferChunksValidity); | |||
8324 | int brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity); | |||
8325 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { | |||
8326 | vtab_set_error(&p->base, | |||
8327 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "unknown error, blobChunksValidity could " | |||
8328 | "not be closed, please file an issue"); | |||
8329 | return brc; | |||
8330 | } | |||
8331 | return rc; | |||
8332 | } | |||
8333 | ||||
8334 | int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id, | |||
8335 | u64 chunk_offset) { | |||
8336 | int rc, brc; | |||
8337 | sqlite3_blob *blobChunksValidity = NULL((void*)0); | |||
8338 | char unsigned bx; | |||
8339 | int validityOffset = chunk_offset / CHAR_BIT8; | |||
8340 | ||||
8341 | // 2. ensure chunks.validity bit is 1, then set to 0 | |||
8342 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity", | |||
8343 | chunk_id, 1, &blobChunksValidity); | |||
8344 | if (rc != SQLITE_OK0) { | |||
8345 | // IMP: V26002_10073 | |||
8346 | vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld", | |||
8347 | p->schemaName, p->shadowChunksName, chunk_id); | |||
8348 | return SQLITE_ERROR1; | |||
8349 | } | |||
8350 | // will skip the sqlite3_blob_bytes(blobChunksValidity) check for now, | |||
8351 | // the read below would catch it | |||
8352 | ||||
8353 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset); | |||
8354 | if (rc != SQLITE_OK0) { | |||
8355 | // IMP: V21193_05263 | |||
8356 | vtab_set_error( | |||
8357 | &p->base, "could not read validity blob for %s.%s.%lld at %d", | |||
8358 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); | |||
8359 | goto cleanup; | |||
8360 | } | |||
8361 | if (!(bx >> (chunk_offset % CHAR_BIT8))) { | |||
8362 | // IMP: V21193_05263 | |||
8363 | rc = SQLITE_ERROR1; | |||
8364 | vtab_set_error( | |||
8365 | &p->base, | |||
8366 | "vec0 deletion error: validity bit is not set for %s.%s.%lld at %d", | |||
8367 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); | |||
8368 | goto cleanup; | |||
8369 | } | |||
8370 | char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT8)); | |||
8371 | char result = bx & mask; | |||
8372 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &result, sizeof(bx), | |||
8373 | validityOffset); | |||
8374 | if (rc != SQLITE_OK0) { | |||
8375 | vtab_set_error( | |||
8376 | &p->base, "could not write to validity blob for %s.%s.%lld at %d", | |||
8377 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); | |||
8378 | goto cleanup; | |||
8379 | } | |||
8380 | ||||
8381 | cleanup: | |||
8382 | ||||
8383 | brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity); | |||
8384 | if (rc != SQLITE_OK0) | |||
8385 | return rc; | |||
8386 | if (brc != SQLITE_OK0) { | |||
8387 | vtab_set_error(&p->base, | |||
8388 | "vec0 deletion error: Error commiting validity blob " | |||
8389 | "transaction on %s.%s.%lld at %d", | |||
8390 | p->schemaName, p->shadowChunksName, chunk_id, | |||
8391 | validityOffset); | |||
8392 | return brc; | |||
8393 | } | |||
8394 | return SQLITE_OK0; | |||
8395 | } | |||
8396 | ||||
8397 | int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) { | |||
8398 | int rc; | |||
8399 | sqlite3_stmt *stmt = NULL((void*)0); | |||
8400 | ||||
8401 | char *zSql = | |||
8402 | sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?", | |||
8403 | p->schemaName, p->tableName); | |||
8404 | if (!zSql) { | |||
8405 | return SQLITE_NOMEM7; | |||
8406 | } | |||
8407 | ||||
8408 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8409 | sqlite3_freesqlite3_api->free(zSql); | |||
8410 | if (rc != SQLITE_OK0) { | |||
8411 | goto cleanup; | |||
8412 | } | |||
8413 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8414 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8415 | if (rc != SQLITE_DONE101) { | |||
8416 | goto cleanup; | |||
8417 | } | |||
8418 | rc = SQLITE_OK0; | |||
8419 | ||||
8420 | cleanup: | |||
8421 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8422 | return rc; | |||
8423 | } | |||
8424 | ||||
8425 | int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) { | |||
8426 | int rc; | |||
8427 | sqlite3_stmt *stmt = NULL((void*)0); | |||
8428 | ||||
8429 | char *zSql = | |||
8430 | sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?", | |||
8431 | p->schemaName, p->tableName); | |||
8432 | if (!zSql) { | |||
8433 | return SQLITE_NOMEM7; | |||
8434 | } | |||
8435 | ||||
8436 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8437 | sqlite3_freesqlite3_api->free(zSql); | |||
8438 | if (rc != SQLITE_OK0) { | |||
8439 | goto cleanup; | |||
8440 | } | |||
8441 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8442 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8443 | if (rc != SQLITE_DONE101) { | |||
8444 | goto cleanup; | |||
8445 | } | |||
8446 | rc = SQLITE_OK0; | |||
8447 | ||||
8448 | cleanup: | |||
8449 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8450 | return rc; | |||
8451 | } | |||
8452 | ||||
8453 | int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id, | |||
8454 | u64 chunk_offset) { | |||
8455 | int rc; | |||
8456 | sqlite3_blob * blobValue; | |||
8457 | vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; | |||
8458 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue); | |||
8459 | if(rc != SQLITE_OK0) { | |||
8460 | return rc; | |||
8461 | } | |||
8462 | ||||
8463 | switch(kind) { | |||
8464 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { | |||
8465 | u8 block; | |||
8466 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8)); | |||
8467 | if(rc != SQLITE_OK0) { | |||
8468 | goto done; | |||
8469 | } | |||
8470 | ||||
8471 | block &= ~(1 << (chunk_offset % CHAR_BIT8)); | |||
8472 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8); | |||
8473 | break; | |||
8474 | } | |||
8475 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { | |||
8476 | i64 v = 0; | |||
8477 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64)); | |||
8478 | break; | |||
8479 | } | |||
8480 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { | |||
8481 | double v = 0; | |||
8482 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double)); | |||
8483 | break; | |||
8484 | } | |||
8485 | case VEC0_METADATA_COLUMN_KIND_TEXT: { | |||
8486 | int n; | |||
8487 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8488 | if(rc != SQLITE_OK0) { | |||
8489 | goto done; | |||
8490 | } | |||
8491 | ||||
8492 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; | |||
8493 | memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8494 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); | |||
8495 | if(rc != SQLITE_OK0) { | |||
8496 | goto done; | |||
8497 | } | |||
8498 | ||||
8499 | if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { | |||
8500 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); | |||
8501 | if(!zSql) { | |||
8502 | rc = SQLITE_NOMEM7; | |||
8503 | goto done; | |||
8504 | } | |||
8505 | sqlite3_stmt * stmt; | |||
8506 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8507 | if(rc != SQLITE_OK0) { | |||
8508 | goto done; | |||
8509 | } | |||
8510 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); | |||
8511 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8512 | if(rc != SQLITE_DONE101) { | |||
8513 | rc = SQLITE_ERROR1; | |||
8514 | goto done; | |||
8515 | } | |||
8516 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8517 | } | |||
8518 | break; | |||
8519 | } | |||
8520 | } | |||
8521 | int rc2; | |||
8522 | done: | |||
8523 | rc2 = sqlite3_blob_closesqlite3_api->blob_close(blobValue); | |||
8524 | if(rc == SQLITE_OK0) { | |||
8525 | return rc2; | |||
8526 | } | |||
8527 | return rc; | |||
8528 | } | |||
8529 | ||||
8530 | int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) { | |||
8531 | vec0_vtab *p = (vec0_vtab *)pVTab; | |||
8532 | int rc; | |||
8533 | i64 rowid; | |||
8534 | i64 chunk_id; | |||
8535 | i64 chunk_offset; | |||
8536 | ||||
8537 | if (p->pkIsText) { | |||
8538 | rc = vec0_rowid_from_id(p, idValue, &rowid); | |||
8539 | if (rc != SQLITE_OK0) { | |||
8540 | return rc; | |||
8541 | } | |||
8542 | } else { | |||
8543 | rowid = sqlite3_value_int64sqlite3_api->value_int64(idValue); | |||
8544 | } | |||
8545 | ||||
8546 | // 1. Find chunk position for given rowid | |||
8547 | // 2. Ensure that validity bit for position is 1, then set to 0 | |||
8548 | // 3. Zero out rowid in chunks.rowid | |||
8549 | // 4. Zero out vector data in all vector column chunks | |||
8550 | // 5. Delete value in _rowids table | |||
8551 | ||||
8552 | // 1. get chunk_id and chunk_offset from _rowids | |||
8553 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); | |||
8554 | if (rc != SQLITE_OK0) { | |||
8555 | return rc; | |||
8556 | } | |||
8557 | ||||
8558 | rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset); | |||
8559 | if (rc != SQLITE_OK0) { | |||
8560 | return rc; | |||
8561 | } | |||
8562 | ||||
8563 | // 3. zero out rowid in chunks.rowids | |||
8564 | // https://github.com/asg017/sqlite-vec/issues/54 | |||
8565 | ||||
8566 | // 4. zero out any data in vector chunks tables | |||
8567 | // https://github.com/asg017/sqlite-vec/issues/54 | |||
8568 | ||||
8569 | // 5. delete from _rowids table | |||
8570 | rc = vec0Update_Delete_DeleteRowids(p, rowid); | |||
8571 | if (rc != SQLITE_OK0) { | |||
8572 | return rc; | |||
8573 | } | |||
8574 | ||||
8575 | // 6. delete any auxiliary rows | |||
8576 | if(p->numAuxiliaryColumns > 0) { | |||
8577 | rc = vec0Update_Delete_DeleteAux(p, rowid); | |||
8578 | if (rc != SQLITE_OK0) { | |||
8579 | return rc; | |||
8580 | } | |||
8581 | } | |||
8582 | ||||
8583 | // 6. delete metadata | |||
8584 | for(int i = 0; i < p->numMetadataColumns; i++) { | |||
8585 | rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset); | |||
8586 | } | |||
8587 | ||||
8588 | return SQLITE_OK0; | |||
8589 | } | |||
8590 | ||||
8591 | int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) { | |||
8592 | int rc; | |||
8593 | sqlite3_stmt *stmt; | |||
8594 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx); | |||
8595 | if(!zSql) { | |||
8596 | return SQLITE_NOMEM7; | |||
8597 | } | |||
8598 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); | |||
8599 | if(rc != SQLITE_OK0) { | |||
8600 | return rc; | |||
8601 | } | |||
8602 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, value); | |||
8603 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 2, rowid); | |||
8604 | rc = sqlite3_stepsqlite3_api->step(stmt); | |||
8605 | if(rc != SQLITE_DONE101) { | |||
8606 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8607 | return SQLITE_ERROR1; | |||
8608 | } | |||
8609 | sqlite3_finalizesqlite3_api->finalize(stmt); | |||
8610 | return SQLITE_OK0; | |||
8611 | } | |||
8612 | ||||
8613 | int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset, | |||
8614 | int i, sqlite3_value *valueVector) { | |||
8615 | int rc; | |||
8616 | ||||
8617 | sqlite3_blob *blobVectors = NULL((void*)0); | |||
8618 | ||||
8619 | char *pzError; | |||
8620 | size_t dimensions; | |||
8621 | enum VectorElementType elementType; | |||
8622 | void *vector; | |||
8623 | vector_cleanup cleanup = vector_cleanup_noop; | |||
8624 | // https://github.com/asg017/sqlite-vec/issues/53 | |||
8625 | rc = vector_from_value(valueVector, &vector, &dimensions, &elementType, | |||
8626 | &cleanup, &pzError); | |||
8627 | if (rc != SQLITE_OK0) { | |||
8628 | // IMP: V15203_32042 | |||
8629 | vtab_set_error( | |||
8630 | &p->base, "Updated vector for the \"%.*s\" column is invalid: %z", | |||
8631 | p->vector_columns[i].name_length, p->vector_columns[i].name, pzError); | |||
8632 | rc = SQLITE_ERROR1; | |||
8633 | goto cleanup; | |||
8634 | } | |||
8635 | if (elementType != p->vector_columns[i].element_type) { | |||
8636 | // IMP: V03643_20481 | |||
8637 | vtab_set_error( | |||
8638 | &p->base, | |||
8639 | "Updated vector for the \"%.*s\" column is expected to be of type " | |||
8640 | "%s, but a %s vector was provided.", | |||
8641 | p->vector_columns[i].name_length, p->vector_columns[i].name, | |||
8642 | vector_subtype_name(p->vector_columns[i].element_type), | |||
8643 | vector_subtype_name(elementType)); | |||
8644 | rc = SQLITE_ERROR1; | |||
8645 | goto cleanup; | |||
8646 | } | |||
8647 | if (dimensions != p->vector_columns[i].dimensions) { | |||
8648 | // IMP: V25739_09810 | |||
8649 | vtab_set_error( | |||
8650 | &p->base, | |||
8651 | "Dimension mismatch for new updated vector for the \"%.*s\" column. " | |||
8652 | "Expected %d dimensions but received %d.", | |||
8653 | p->vector_columns[i].name_length, p->vector_columns[i].name, | |||
8654 | p->vector_columns[i].dimensions, dimensions); | |||
8655 | rc = SQLITE_ERROR1; | |||
8656 | goto cleanup; | |||
8657 | } | |||
8658 | ||||
8659 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i], | |||
8660 | "vectors", chunk_id, 1, &blobVectors); | |||
8661 | if (rc != SQLITE_OK0) { | |||
8662 | vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld", | |||
8663 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); | |||
8664 | goto cleanup; | |||
8665 | } | |||
8666 | rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector, | |||
8667 | p->vector_columns[i].dimensions, | |||
8668 | p->vector_columns[i].element_type); | |||
8669 | if (rc != SQLITE_OK0) { | |||
8670 | vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld", | |||
8671 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); | |||
8672 | goto cleanup; | |||
8673 | } | |||
8674 | ||||
8675 | cleanup: | |||
8676 | cleanup(vector); | |||
8677 | int brc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors); | |||
8678 | if (rc != SQLITE_OK0) { | |||
8679 | return rc; | |||
8680 | } | |||
8681 | if (brc != SQLITE_OK0) { | |||
8682 | vtab_set_error( | |||
8683 | &p->base, | |||
8684 | "Could not commit blob transaction for vectors blob for %s.%s.%lld", | |||
8685 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); | |||
8686 | return brc; | |||
8687 | } | |||
8688 | return SQLITE_OK0; | |||
8689 | } | |||
8690 | ||||
8691 | int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { | |||
8692 | UNUSED_PARAMETER(argc)(void)(argc); | |||
8693 | vec0_vtab *p = (vec0_vtab *)pVTab; | |||
8694 | int rc; | |||
8695 | i64 chunk_id; | |||
8696 | i64 chunk_offset; | |||
8697 | ||||
8698 | i64 rowid; | |||
8699 | if (p->pkIsText) { | |||
8700 | const char *a = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[0]); | |||
8701 | const char *b = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]); | |||
8702 | // IMP: V08886_25725 | |||
8703 | if ((sqlite3_value_bytessqlite3_api->value_bytes(argv[0]) != sqlite3_value_bytessqlite3_api->value_bytes(argv[1])) || | |||
8704 | strncmp(a, b, sqlite3_value_bytessqlite3_api->value_bytes(argv[0])) != 0) { | |||
8705 | vtab_set_error(pVTab, | |||
8706 | "UPDATEs on vec0 primary key values are not allowed."); | |||
8707 | return SQLITE_ERROR1; | |||
8708 | } | |||
8709 | rc = vec0_rowid_from_id(p, argv[0], &rowid); | |||
8710 | if (rc != SQLITE_OK0) { | |||
8711 | return rc; | |||
8712 | } | |||
8713 | } else { | |||
8714 | rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | |||
8715 | } | |||
8716 | ||||
8717 | // 1) get chunk_id and chunk_offset from _rowids | |||
8718 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); | |||
8719 | if (rc != SQLITE_OK0) { | |||
8720 | return rc; | |||
8721 | } | |||
8722 | ||||
8723 | // 2) update any partition key values | |||
8724 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8725 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) { | |||
8726 | continue; | |||
8727 | } | |||
8728 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; | |||
8729 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { | |||
8730 | continue; | |||
8731 | } | |||
8732 | vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. "); | |||
8733 | return SQLITE_ERROR1; | |||
8734 | } | |||
8735 | ||||
8736 | // 3) handle auxiliary column updates | |||
8737 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8738 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { | |||
8739 | continue; | |||
8740 | } | |||
8741 | int auxiliary_column_idx = p->user_column_idxs[i]; | |||
8742 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; | |||
8743 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { | |||
8744 | continue; | |||
8745 | } | |||
8746 | rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid); | |||
8747 | if(rc != SQLITE_OK0) { | |||
8748 | return SQLITE_ERROR1; | |||
8749 | } | |||
8750 | } | |||
8751 | ||||
8752 | // 4) handle metadata column updates | |||
8753 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8754 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { | |||
8755 | continue; | |||
8756 | } | |||
8757 | int metadata_column_idx = p->user_column_idxs[i]; | |||
8758 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; | |||
8759 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { | |||
8760 | continue; | |||
8761 | } | |||
8762 | rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1); | |||
8763 | if(rc != SQLITE_OK0) { | |||
8764 | return rc; | |||
8765 | } | |||
8766 | } | |||
8767 | ||||
8768 | // 5) iterate over all new vectors, update the vectors | |||
8769 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { | |||
8770 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { | |||
8771 | continue; | |||
8772 | } | |||
8773 | int vector_idx = p->user_column_idxs[i]; | |||
8774 | sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i]; | |||
8775 | // in vec0Column, we check sqlite3_vtab_nochange() on vector columns. | |||
8776 | // If the vector column isn't being changed, we return NULL; | |||
8777 | // That's not great, that means vector columns can never be NULLABLE | |||
8778 | // (bc we cant distinguish if an updated vector is truly NULL or nochange). | |||
8779 | // Also it means that if someone tries to run `UPDATE v SET X = NULL`, | |||
8780 | // we can't effectively detect and raise an error. | |||
8781 | // A better solution would be to use a custom result_type for "empty", | |||
8782 | // but subtypes don't appear to survive xColumn -> xUpdate, it's always 0. | |||
8783 | // So for now, we'll just use NULL and warn people to not SET X = NULL | |||
8784 | // in the docs. | |||
8785 | if (sqlite3_value_typesqlite3_api->value_type(valueVector) == SQLITE_NULL5) { | |||
8786 | continue; | |||
8787 | } | |||
8788 | ||||
8789 | rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx, | |||
| ||||
8790 | valueVector); | |||
8791 | if (rc != SQLITE_OK0) { | |||
8792 | return SQLITE_ERROR1; | |||
8793 | } | |||
8794 | } | |||
8795 | ||||
8796 | return SQLITE_OK0; | |||
8797 | } | |||
8798 | ||||
8799 | static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, | |||
8800 | sqlite_int64 *pRowid) { | |||
8801 | // DELETE operation | |||
8802 | if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { | |||
| ||||
8803 | return vec0Update_Delete(pVTab, argv[0]); | |||
8804 | } | |||
8805 | // INSERT operation | |||
8806 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) { | |||
8807 | return vec0Update_Insert(pVTab, argc, argv, pRowid); | |||
8808 | } | |||
8809 | // UPDATE operation | |||
8810 | else if (argc
| |||
8811 | return vec0Update_Update(pVTab, argc, argv); | |||
8812 | } else { | |||
8813 | vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0."); | |||
8814 | return SQLITE_ERROR1; | |||
8815 | } | |||
8816 | } | |||
8817 | ||||
8818 | static int vec0ShadowName(const char *zName) { | |||
8819 | static const char *azName[] = { | |||
8820 | "rowids", "chunks", "auxiliary", "info", | |||
8821 | ||||
8822 | // Up to VEC0_MAX_METADATA_COLUMNS | |||
8823 | // TODO be smarter about this man | |||
8824 | "metadatachunks00", | |||
8825 | "metadatachunks01", | |||
8826 | "metadatachunks02", | |||
8827 | "metadatachunks03", | |||
8828 | "metadatachunks04", | |||
8829 | "metadatachunks05", | |||
8830 | "metadatachunks06", | |||
8831 | "metadatachunks07", | |||
8832 | "metadatachunks08", | |||
8833 | "metadatachunks09", | |||
8834 | "metadatachunks10", | |||
8835 | "metadatachunks11", | |||
8836 | "metadatachunks12", | |||
8837 | "metadatachunks13", | |||
8838 | "metadatachunks14", | |||
8839 | "metadatachunks15", | |||
8840 | ||||
8841 | // Up to | |||
8842 | "metadatatext00", | |||
8843 | "metadatatext01", | |||
8844 | "metadatatext02", | |||
8845 | "metadatatext03", | |||
8846 | "metadatatext04", | |||
8847 | "metadatatext05", | |||
8848 | "metadatatext06", | |||
8849 | "metadatatext07", | |||
8850 | "metadatatext08", | |||
8851 | "metadatatext09", | |||
8852 | "metadatatext10", | |||
8853 | "metadatatext11", | |||
8854 | "metadatatext12", | |||
8855 | "metadatatext13", | |||
8856 | "metadatatext14", | |||
8857 | "metadatatext15", | |||
8858 | }; | |||
8859 | ||||
8860 | for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) { | |||
8861 | if (sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i]) == 0) | |||
8862 | return 1; | |||
8863 | } | |||
8864 | //for(size_t i = 0; i < )"vector_chunks", "metadatachunks" | |||
8865 | return 0; | |||
8866 | } | |||
8867 | ||||
8868 | static int vec0Begin(sqlite3_vtab *pVTab) { | |||
8869 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
8870 | return SQLITE_OK0; | |||
8871 | } | |||
8872 | static int vec0Sync(sqlite3_vtab *pVTab) { | |||
8873 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
8874 | vec0_vtab *p = (vec0_vtab *)pVTab; | |||
8875 | if (p->stmtLatestChunk) { | |||
8876 | sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk); | |||
8877 | p->stmtLatestChunk = NULL((void*)0); | |||
8878 | } | |||
8879 | if (p->stmtRowidsInsertRowid) { | |||
8880 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid); | |||
8881 | p->stmtRowidsInsertRowid = NULL((void*)0); | |||
8882 | } | |||
8883 | if (p->stmtRowidsInsertId) { | |||
8884 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId); | |||
8885 | p->stmtRowidsInsertId = NULL((void*)0); | |||
8886 | } | |||
8887 | if (p->stmtRowidsUpdatePosition) { | |||
8888 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition); | |||
8889 | p->stmtRowidsUpdatePosition = NULL((void*)0); | |||
8890 | } | |||
8891 | if (p->stmtRowidsGetChunkPosition) { | |||
8892 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition); | |||
8893 | p->stmtRowidsGetChunkPosition = NULL((void*)0); | |||
8894 | } | |||
8895 | return SQLITE_OK0; | |||
8896 | } | |||
8897 | static int vec0Commit(sqlite3_vtab *pVTab) { | |||
8898 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
8899 | return SQLITE_OK0; | |||
8900 | } | |||
8901 | static int vec0Rollback(sqlite3_vtab *pVTab) { | |||
8902 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
8903 | return SQLITE_OK0; | |||
8904 | } | |||
8905 | ||||
8906 | static sqlite3_module vec0Module = { | |||
8907 | /* iVersion */ 3, | |||
8908 | /* xCreate */ vec0Create, | |||
8909 | /* xConnect */ vec0Connect, | |||
8910 | /* xBestIndex */ vec0BestIndex, | |||
8911 | /* xDisconnect */ vec0Disconnect, | |||
8912 | /* xDestroy */ vec0Destroy, | |||
8913 | /* xOpen */ vec0Open, | |||
8914 | /* xClose */ vec0Close, | |||
8915 | /* xFilter */ vec0Filter, | |||
8916 | /* xNext */ vec0Next, | |||
8917 | /* xEof */ vec0Eof, | |||
8918 | /* xColumn */ vec0Column, | |||
8919 | /* xRowid */ vec0Rowid, | |||
8920 | /* xUpdate */ vec0Update, | |||
8921 | /* xBegin */ vec0Begin, | |||
8922 | /* xSync */ vec0Sync, | |||
8923 | /* xCommit */ vec0Commit, | |||
8924 | /* xRollback */ vec0Rollback, | |||
8925 | /* xFindFunction */ 0, | |||
8926 | /* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43 | |||
8927 | /* xSavepoint */ 0, | |||
8928 | /* xRelease */ 0, | |||
8929 | /* xRollbackTo */ 0, | |||
8930 | /* xShadowName */ vec0ShadowName, | |||
8931 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 | |||
8932 | /* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44 | |||
8933 | #endif | |||
8934 | }; | |||
8935 | #pragma endregion | |||
8936 | ||||
8937 | static char *POINTER_NAME_STATIC_BLOB_DEF = "vec0-static_blob_def"; | |||
8938 | struct static_blob_definition { | |||
8939 | void *p; | |||
8940 | size_t dimensions; | |||
8941 | size_t nvectors; | |||
8942 | enum VectorElementType element_type; | |||
8943 | }; | |||
8944 | static void vec_static_blob_from_raw(sqlite3_context *context, int argc, | |||
8945 | sqlite3_value **argv) { | |||
8946 | ||||
8947 | assert(argc == 4)((void) sizeof ((argc == 4) ? 1 : 0), __extension__ ({ if (argc == 4) ; else __assert_fail ("argc == 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 8947, __extension__ __PRETTY_FUNCTION__); })); | |||
8948 | struct static_blob_definition *p; | |||
8949 | p = sqlite3_mallocsqlite3_api->malloc(sizeof(*p)); | |||
8950 | if (!p) { | |||
8951 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); | |||
8952 | return; | |||
8953 | } | |||
8954 | memset(p, 0, sizeof(*p)); | |||
8955 | p->p = (void *)sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | |||
8956 | p->element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; | |||
8957 | p->dimensions = sqlite3_value_int64sqlite3_api->value_int64(argv[2]); | |||
8958 | p->nvectors = sqlite3_value_int64sqlite3_api->value_int64(argv[3]); | |||
8959 | sqlite3_result_pointersqlite3_api->result_pointer(context, p, POINTER_NAME_STATIC_BLOB_DEF, | |||
8960 | sqlite3_freesqlite3_api->free); | |||
8961 | } | |||
8962 | #pragma region vec_static_blobs() table function | |||
8963 | ||||
8964 | #define MAX_STATIC_BLOBS16 16 | |||
8965 | ||||
8966 | typedef struct static_blob static_blob; | |||
8967 | struct static_blob { | |||
8968 | char *name; | |||
8969 | void *p; | |||
8970 | size_t dimensions; | |||
8971 | size_t nvectors; | |||
8972 | enum VectorElementType element_type; | |||
8973 | }; | |||
8974 | ||||
8975 | typedef struct vec_static_blob_data vec_static_blob_data; | |||
8976 | struct vec_static_blob_data { | |||
8977 | static_blob static_blobs[MAX_STATIC_BLOBS16]; | |||
8978 | }; | |||
8979 | ||||
8980 | typedef struct vec_static_blobs_vtab vec_static_blobs_vtab; | |||
8981 | struct vec_static_blobs_vtab { | |||
8982 | sqlite3_vtab base; | |||
8983 | vec_static_blob_data *data; | |||
8984 | }; | |||
8985 | ||||
8986 | typedef struct vec_static_blobs_cursor vec_static_blobs_cursor; | |||
8987 | struct vec_static_blobs_cursor { | |||
8988 | sqlite3_vtab_cursor base; | |||
8989 | sqlite3_int64 iRowid; | |||
8990 | }; | |||
8991 | ||||
8992 | static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc, | |||
8993 | const char *const *argv, | |||
8994 | sqlite3_vtab **ppVtab, char **pzErr) { | |||
8995 | UNUSED_PARAMETER(argc)(void)(argc); | |||
8996 | UNUSED_PARAMETER(argv)(void)(argv); | |||
8997 | UNUSED_PARAMETER(pzErr)(void)(pzErr); | |||
8998 | ||||
8999 | vec_static_blobs_vtab *pNew; | |||
9000 | #define VEC_STATIC_BLOBS_NAME0 0 | |||
9001 | #define VEC_STATIC_BLOBS_DATA1 1 | |||
9002 | #define VEC_STATIC_BLOBS_DIMENSIONS2 2 | |||
9003 | #define VEC_STATIC_BLOBS_COUNT3 3 | |||
9004 | int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab( | |||
9005 | db, "CREATE TABLE x(name, data, dimensions hidden, count hidden)"); | |||
9006 | if (rc == SQLITE_OK0) { | |||
9007 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
9008 | *ppVtab = (sqlite3_vtab *)pNew; | |||
9009 | if (pNew == 0) | |||
9010 | return SQLITE_NOMEM7; | |||
9011 | memset(pNew, 0, sizeof(*pNew)); | |||
9012 | pNew->data = pAux; | |||
9013 | } | |||
9014 | return rc; | |||
9015 | } | |||
9016 | ||||
9017 | static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) { | |||
9018 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVtab; | |||
9019 | sqlite3_freesqlite3_api->free(p); | |||
9020 | return SQLITE_OK0; | |||
9021 | } | |||
9022 | ||||
9023 | static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc, | |||
9024 | sqlite3_value **argv, sqlite_int64 *pRowid) { | |||
9025 | UNUSED_PARAMETER(pRowid)(void)(pRowid); | |||
9026 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVTab; | |||
9027 | // DELETE operation | |||
9028 | if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { | |||
9029 | return SQLITE_ERROR1; | |||
9030 | } | |||
9031 | // INSERT operation | |||
9032 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) { | |||
9033 | const char *key = | |||
9034 | (const char *)sqlite3_value_textsqlite3_api->value_text(argv[2 + VEC_STATIC_BLOBS_NAME0]); | |||
9035 | int idx = -1; | |||
9036 | for (int i = 0; i < MAX_STATIC_BLOBS16; i++) { | |||
9037 | if (!p->data->static_blobs[i].name) { | |||
9038 | p->data->static_blobs[i].name = sqlite3_mprintfsqlite3_api->mprintf("%s", key); | |||
9039 | idx = i; | |||
9040 | break; | |||
9041 | } | |||
9042 | } | |||
9043 | if (idx < 0) | |||
9044 | abort(); | |||
9045 | struct static_blob_definition *def = sqlite3_value_pointersqlite3_api->value_pointer( | |||
9046 | argv[2 + VEC_STATIC_BLOBS_DATA1], POINTER_NAME_STATIC_BLOB_DEF); | |||
9047 | p->data->static_blobs[idx].p = def->p; | |||
9048 | p->data->static_blobs[idx].dimensions = def->dimensions; | |||
9049 | p->data->static_blobs[idx].nvectors = def->nvectors; | |||
9050 | p->data->static_blobs[idx].element_type = def->element_type; | |||
9051 | ||||
9052 | return SQLITE_OK0; | |||
9053 | } | |||
9054 | // UPDATE operation | |||
9055 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { | |||
9056 | return SQLITE_ERROR1; | |||
9057 | } | |||
9058 | return SQLITE_ERROR1; | |||
9059 | } | |||
9060 | ||||
9061 | static int vec_static_blobsOpen(sqlite3_vtab *p, | |||
9062 | sqlite3_vtab_cursor **ppCursor) { | |||
9063 | UNUSED_PARAMETER(p)(void)(p); | |||
9064 | vec_static_blobs_cursor *pCur; | |||
9065 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); | |||
9066 | if (pCur == 0) | |||
9067 | return SQLITE_NOMEM7; | |||
9068 | memset(pCur, 0, sizeof(*pCur)); | |||
9069 | *ppCursor = &pCur->base; | |||
9070 | return SQLITE_OK0; | |||
9071 | } | |||
9072 | ||||
9073 | static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) { | |||
9074 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; | |||
9075 | sqlite3_freesqlite3_api->free(pCur); | |||
9076 | return SQLITE_OK0; | |||
9077 | } | |||
9078 | ||||
9079 | static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab, | |||
9080 | sqlite3_index_info *pIdxInfo) { | |||
9081 | UNUSED_PARAMETER(pVTab)(void)(pVTab); | |||
9082 | pIdxInfo->idxNum = 1; | |||
9083 | pIdxInfo->estimatedCost = (double)10; | |||
9084 | pIdxInfo->estimatedRows = 10; | |||
9085 | return SQLITE_OK0; | |||
9086 | } | |||
9087 | ||||
9088 | static int vec_static_blobsNext(sqlite3_vtab_cursor *cur); | |||
9089 | static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, | |||
9090 | const char *idxStr, int argc, | |||
9091 | sqlite3_value **argv) { | |||
9092 | UNUSED_PARAMETER(idxNum)(void)(idxNum); | |||
9093 | UNUSED_PARAMETER(idxStr)(void)(idxStr); | |||
9094 | UNUSED_PARAMETER(argc)(void)(argc); | |||
9095 | UNUSED_PARAMETER(argv)(void)(argv); | |||
9096 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)pVtabCursor; | |||
9097 | pCur->iRowid = -1; | |||
9098 | vec_static_blobsNext(pVtabCursor); | |||
9099 | return SQLITE_OK0; | |||
9100 | } | |||
9101 | ||||
9102 | static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur, | |||
9103 | sqlite_int64 *pRowid) { | |||
9104 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; | |||
9105 | *pRowid = pCur->iRowid; | |||
9106 | return SQLITE_OK0; | |||
9107 | } | |||
9108 | ||||
9109 | static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) { | |||
9110 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; | |||
9111 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pCur->base.pVtab; | |||
9112 | pCur->iRowid++; | |||
9113 | while (pCur->iRowid < MAX_STATIC_BLOBS16) { | |||
9114 | if (p->data->static_blobs[pCur->iRowid].name) { | |||
9115 | return SQLITE_OK0; | |||
9116 | } | |||
9117 | pCur->iRowid++; | |||
9118 | } | |||
9119 | return SQLITE_OK0; | |||
9120 | } | |||
9121 | ||||
9122 | static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) { | |||
9123 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; | |||
9124 | return pCur->iRowid >= MAX_STATIC_BLOBS16; | |||
9125 | } | |||
9126 | ||||
9127 | static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur, | |||
9128 | sqlite3_context *context, int i) { | |||
9129 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; | |||
9130 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)cur->pVtab; | |||
9131 | switch (i) { | |||
9132 | case VEC_STATIC_BLOBS_NAME0: | |||
9133 | sqlite3_result_textsqlite3_api->result_text(context, p->data->static_blobs[pCur->iRowid].name, -1, | |||
9134 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
9135 | break; | |||
9136 | case VEC_STATIC_BLOBS_DATA1: | |||
9137 | sqlite3_result_nullsqlite3_api->result_null(context); | |||
9138 | break; | |||
9139 | case VEC_STATIC_BLOBS_DIMENSIONS2: | |||
9140 | sqlite3_result_int64sqlite3_api->result_int64(context, | |||
9141 | p->data->static_blobs[pCur->iRowid].dimensions); | |||
9142 | break; | |||
9143 | case VEC_STATIC_BLOBS_COUNT3: | |||
9144 | sqlite3_result_int64sqlite3_api->result_int64(context, p->data->static_blobs[pCur->iRowid].nvectors); | |||
9145 | break; | |||
9146 | } | |||
9147 | return SQLITE_OK0; | |||
9148 | } | |||
9149 | ||||
9150 | static sqlite3_module vec_static_blobsModule = { | |||
9151 | /* iVersion */ 3, | |||
9152 | /* xCreate */ 0, | |||
9153 | /* xConnect */ vec_static_blobsConnect, | |||
9154 | /* xBestIndex */ vec_static_blobsBestIndex, | |||
9155 | /* xDisconnect */ vec_static_blobsDisconnect, | |||
9156 | /* xDestroy */ 0, | |||
9157 | /* xOpen */ vec_static_blobsOpen, | |||
9158 | /* xClose */ vec_static_blobsClose, | |||
9159 | /* xFilter */ vec_static_blobsFilter, | |||
9160 | /* xNext */ vec_static_blobsNext, | |||
9161 | /* xEof */ vec_static_blobsEof, | |||
9162 | /* xColumn */ vec_static_blobsColumn, | |||
9163 | /* xRowid */ vec_static_blobsRowid, | |||
9164 | /* xUpdate */ vec_static_blobsUpdate, | |||
9165 | /* xBegin */ 0, | |||
9166 | /* xSync */ 0, | |||
9167 | /* xCommit */ 0, | |||
9168 | /* xRollback */ 0, | |||
9169 | /* xFindMethod */ 0, | |||
9170 | /* xRename */ 0, | |||
9171 | /* xSavepoint */ 0, | |||
9172 | /* xRelease */ 0, | |||
9173 | /* xRollbackTo */ 0, | |||
9174 | /* xShadowName */ 0, | |||
9175 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 | |||
9176 | /* xIntegrity */ 0 | |||
9177 | #endif | |||
9178 | }; | |||
9179 | #pragma endregion | |||
9180 | ||||
9181 | #pragma region vec_static_blob_entries() table function | |||
9182 | ||||
9183 | typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab; | |||
9184 | struct vec_static_blob_entries_vtab { | |||
9185 | sqlite3_vtab base; | |||
9186 | static_blob *blob; | |||
9187 | }; | |||
9188 | typedef enum { | |||
9189 | VEC_SBE__QUERYPLAN_FULLSCAN = 1, | |||
9190 | VEC_SBE__QUERYPLAN_KNN = 2 | |||
9191 | } vec_sbe_query_plan; | |||
9192 | ||||
9193 | struct sbe_query_knn_data { | |||
9194 | i64 k; | |||
9195 | i64 k_used; | |||
9196 | // Array of rowids of size k. Must be freed with sqlite3_free(). | |||
9197 | i32 *rowids; | |||
9198 | // Array of distances of size k. Must be freed with sqlite3_free(). | |||
9199 | f32 *distances; | |||
9200 | i64 current_idx; | |||
9201 | }; | |||
9202 | void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) { | |||
9203 | if (!knn_data) | |||
9204 | return; | |||
9205 | ||||
9206 | if (knn_data->rowids) { | |||
9207 | sqlite3_freesqlite3_api->free(knn_data->rowids); | |||
9208 | knn_data->rowids = NULL((void*)0); | |||
9209 | } | |||
9210 | if (knn_data->distances) { | |||
9211 | sqlite3_freesqlite3_api->free(knn_data->distances); | |||
9212 | knn_data->distances = NULL((void*)0); | |||
9213 | } | |||
9214 | } | |||
9215 | ||||
9216 | typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor; | |||
9217 | struct vec_static_blob_entries_cursor { | |||
9218 | sqlite3_vtab_cursor base; | |||
9219 | sqlite3_int64 iRowid; | |||
9220 | vec_sbe_query_plan query_plan; | |||
9221 | struct sbe_query_knn_data *knn_data; | |||
9222 | }; | |||
9223 | ||||
9224 | static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int argc, | |||
9225 | const char *const *argv, | |||
9226 | sqlite3_vtab **ppVtab, char **pzErr) { | |||
9227 | UNUSED_PARAMETER(argc)(void)(argc); | |||
9228 | UNUSED_PARAMETER(argv)(void)(argv); | |||
9229 | UNUSED_PARAMETER(pzErr)(void)(pzErr); | |||
9230 | vec_static_blob_data *blob_data = pAux; | |||
9231 | int idx = -1; | |||
9232 | for (int i = 0; i < MAX_STATIC_BLOBS16; i++) { | |||
9233 | if (!blob_data->static_blobs[i].name) | |||
9234 | continue; | |||
9235 | if (strncmp(blob_data->static_blobs[i].name, argv[3], | |||
9236 | strlen(blob_data->static_blobs[i].name)) == 0) { | |||
9237 | idx = i; | |||
9238 | break; | |||
9239 | } | |||
9240 | } | |||
9241 | if (idx < 0) | |||
9242 | abort(); | |||
9243 | vec_static_blob_entries_vtab *pNew; | |||
9244 | #define VEC_STATIC_BLOB_ENTRIES_VECTOR0 0 | |||
9245 | #define VEC_STATIC_BLOB_ENTRIES_DISTANCE1 1 | |||
9246 | #define VEC_STATIC_BLOB_ENTRIES_K2 2 | |||
9247 | int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab( | |||
9248 | db, "CREATE TABLE x(vector, distance hidden, k hidden)"); | |||
9249 | if (rc == SQLITE_OK0) { | |||
9250 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
9251 | *ppVtab = (sqlite3_vtab *)pNew; | |||
9252 | if (pNew == 0) | |||
9253 | return SQLITE_NOMEM7; | |||
9254 | memset(pNew, 0, sizeof(*pNew)); | |||
9255 | pNew->blob = &blob_data->static_blobs[idx]; | |||
9256 | } | |||
9257 | return rc; | |||
9258 | } | |||
9259 | ||||
9260 | static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc, | |||
9261 | const char *const *argv, | |||
9262 | sqlite3_vtab **ppVtab, char **pzErr) { | |||
9263 | return vec_static_blob_entriesConnect(db, pAux, argc, argv, ppVtab, pzErr); | |||
9264 | } | |||
9265 | ||||
9266 | static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) { | |||
9267 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVtab; | |||
9268 | sqlite3_freesqlite3_api->free(p); | |||
9269 | return SQLITE_OK0; | |||
9270 | } | |||
9271 | ||||
9272 | static int vec_static_blob_entriesOpen(sqlite3_vtab *p, | |||
9273 | sqlite3_vtab_cursor **ppCursor) { | |||
9274 | UNUSED_PARAMETER(p)(void)(p); | |||
9275 | vec_static_blob_entries_cursor *pCur; | |||
9276 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); | |||
9277 | if (pCur == 0) | |||
9278 | return SQLITE_NOMEM7; | |||
9279 | memset(pCur, 0, sizeof(*pCur)); | |||
9280 | *ppCursor = &pCur->base; | |||
9281 | return SQLITE_OK0; | |||
9282 | } | |||
9283 | ||||
9284 | static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) { | |||
9285 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; | |||
9286 | sqlite3_freesqlite3_api->free(pCur->knn_data); | |||
9287 | sqlite3_freesqlite3_api->free(pCur); | |||
9288 | return SQLITE_OK0; | |||
9289 | } | |||
9290 | ||||
9291 | static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab, | |||
9292 | sqlite3_index_info *pIdxInfo) { | |||
9293 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVTab; | |||
9294 | int iMatchTerm = -1; | |||
9295 | int iLimitTerm = -1; | |||
9296 | // int iRowidTerm = -1; // https://github.com/asg017/sqlite-vec/issues/47 | |||
9297 | int iKTerm = -1; | |||
9298 | ||||
9299 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { | |||
9300 | if (!pIdxInfo->aConstraint[i].usable) | |||
9301 | continue; | |||
9302 | ||||
9303 | int iColumn = pIdxInfo->aConstraint[i].iColumn; | |||
9304 | int op = pIdxInfo->aConstraint[i].op; | |||
9305 | if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 && | |||
9306 | iColumn == VEC_STATIC_BLOB_ENTRIES_VECTOR0) { | |||
9307 | if (iMatchTerm > -1) { | |||
9308 | // https://github.com/asg017/sqlite-vec/issues/51 | |||
9309 | return SQLITE_ERROR1; | |||
9310 | } | |||
9311 | iMatchTerm = i; | |||
9312 | } | |||
9313 | if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) { | |||
9314 | iLimitTerm = i; | |||
9315 | } | |||
9316 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && | |||
9317 | iColumn == VEC_STATIC_BLOB_ENTRIES_K2) { | |||
9318 | iKTerm = i; | |||
9319 | } | |||
9320 | } | |||
9321 | if (iMatchTerm >= 0) { | |||
9322 | if (iLimitTerm < 0 && iKTerm < 0) { | |||
9323 | // https://github.com/asg017/sqlite-vec/issues/51 | |||
9324 | return SQLITE_ERROR1; | |||
9325 | } | |||
9326 | if (iLimitTerm >= 0 && iKTerm >= 0) { | |||
9327 | return SQLITE_ERROR1; // limit or k, not both | |||
9328 | } | |||
9329 | if (pIdxInfo->nOrderBy < 1) { | |||
9330 | vtab_set_error(pVTab, "ORDER BY distance required"); | |||
9331 | return SQLITE_CONSTRAINT19; | |||
9332 | } | |||
9333 | if (pIdxInfo->nOrderBy > 1) { | |||
9334 | // https://github.com/asg017/sqlite-vec/issues/51 | |||
9335 | vtab_set_error(pVTab, "more than 1 ORDER BY clause provided"); | |||
9336 | return SQLITE_CONSTRAINT19; | |||
9337 | } | |||
9338 | if (pIdxInfo->aOrderBy[0].iColumn != VEC_STATIC_BLOB_ENTRIES_DISTANCE1) { | |||
9339 | vtab_set_error(pVTab, "ORDER BY must be on the distance column"); | |||
9340 | return SQLITE_CONSTRAINT19; | |||
9341 | } | |||
9342 | if (pIdxInfo->aOrderBy[0].desc) { | |||
9343 | vtab_set_error(pVTab, | |||
9344 | "Only ascending in ORDER BY distance clause is supported, " | |||
9345 | "DESC is not supported yet."); | |||
9346 | return SQLITE_CONSTRAINT19; | |||
9347 | } | |||
9348 | ||||
9349 | pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_KNN; | |||
9350 | pIdxInfo->estimatedCost = (double)10; | |||
9351 | pIdxInfo->estimatedRows = 10; | |||
9352 | ||||
9353 | pIdxInfo->orderByConsumed = 1; | |||
9354 | pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1; | |||
9355 | pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1; | |||
9356 | if (iLimitTerm >= 0) { | |||
9357 | pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = 2; | |||
9358 | pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1; | |||
9359 | } else { | |||
9360 | pIdxInfo->aConstraintUsage[iKTerm].argvIndex = 2; | |||
9361 | pIdxInfo->aConstraintUsage[iKTerm].omit = 1; | |||
9362 | } | |||
9363 | ||||
9364 | } else { | |||
9365 | pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_FULLSCAN; | |||
9366 | pIdxInfo->estimatedCost = (double)p->blob->nvectors; | |||
9367 | pIdxInfo->estimatedRows = p->blob->nvectors; | |||
9368 | } | |||
9369 | return SQLITE_OK0; | |||
9370 | } | |||
9371 | ||||
9372 | static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor, | |||
9373 | int idxNum, const char *idxStr, | |||
9374 | int argc, sqlite3_value **argv) { | |||
9375 | UNUSED_PARAMETER(idxStr)(void)(idxStr); | |||
9376 | assert(argc >= 0 && argc <= 3)((void) sizeof ((argc >= 0 && argc <= 3) ? 1 : 0 ), __extension__ ({ if (argc >= 0 && argc <= 3) ; else __assert_fail ("argc >= 0 && argc <= 3" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9376, __extension__ __PRETTY_FUNCTION__); })); | |||
9377 | vec_static_blob_entries_cursor *pCur = | |||
9378 | (vec_static_blob_entries_cursor *)pVtabCursor; | |||
9379 | vec_static_blob_entries_vtab *p = | |||
9380 | (vec_static_blob_entries_vtab *)pCur->base.pVtab; | |||
9381 | ||||
9382 | if (idxNum == VEC_SBE__QUERYPLAN_KNN) { | |||
9383 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9383, __extension__ __PRETTY_FUNCTION__); })); | |||
9384 | pCur->query_plan = VEC_SBE__QUERYPLAN_KNN; | |||
9385 | struct sbe_query_knn_data *knn_data; | |||
9386 | knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data)); | |||
9387 | if (!knn_data) { | |||
9388 | return SQLITE_NOMEM7; | |||
9389 | } | |||
9390 | memset(knn_data, 0, sizeof(*knn_data)); | |||
9391 | ||||
9392 | void *queryVector; | |||
9393 | size_t dimensions; | |||
9394 | enum VectorElementType elementType; | |||
9395 | vector_cleanup cleanup; | |||
9396 | char *err; | |||
9397 | int rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType, | |||
9398 | &cleanup, &err); | |||
9399 | if (rc != SQLITE_OK0) { | |||
9400 | return SQLITE_ERROR1; | |||
9401 | } | |||
9402 | if (elementType != p->blob->element_type) { | |||
9403 | return SQLITE_ERROR1; | |||
9404 | } | |||
9405 | if (dimensions != p->blob->dimensions) { | |||
9406 | return SQLITE_ERROR1; | |||
9407 | } | |||
9408 | ||||
9409 | i64 k = min(sqlite3_value_int64(argv[1]), (i64)p->blob->nvectors)(((sqlite3_api->value_int64(argv[1])) <= ((i64)p->blob ->nvectors)) ? (sqlite3_api->value_int64(argv[1])) : (( i64)p->blob->nvectors)); | |||
9410 | if (k < 0) { | |||
9411 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 | |||
9412 | return SQLITE_ERROR1; | |||
9413 | } | |||
9414 | if (k == 0) { | |||
9415 | knn_data->k = 0; | |||
9416 | pCur->knn_data = knn_data; | |||
9417 | return SQLITE_OK0; | |||
9418 | } | |||
9419 | ||||
9420 | size_t bsize = (p->blob->nvectors + 7) & ~7; | |||
9421 | ||||
9422 | i32 *topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32)); | |||
9423 | if (!topk_rowids) { | |||
9424 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 | |||
9425 | return SQLITE_ERROR1; | |||
9426 | } | |||
9427 | f32 *distances = sqlite3_mallocsqlite3_api->malloc(bsize * sizeof(f32)); | |||
9428 | if (!distances) { | |||
9429 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 | |||
9430 | return SQLITE_ERROR1; | |||
9431 | } | |||
9432 | ||||
9433 | for (size_t i = 0; i < p->blob->nvectors; i++) { | |||
9434 | // https://github.com/asg017/sqlite-vec/issues/52 | |||
9435 | float *v = ((float *)p->blob->p) + (i * p->blob->dimensions); | |||
9436 | distances[i] = | |||
9437 | distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions); | |||
9438 | } | |||
9439 | u8 *candidates = bitmap_new(bsize); | |||
9440 | assert(candidates)((void) sizeof ((candidates) ? 1 : 0), __extension__ ({ if (candidates ) ; else __assert_fail ("candidates", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9440, __extension__ __PRETTY_FUNCTION__); })); | |||
9441 | ||||
9442 | u8 *taken = bitmap_new(bsize); | |||
9443 | assert(taken)((void) sizeof ((taken) ? 1 : 0), __extension__ ({ if (taken) ; else __assert_fail ("taken", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9443, __extension__ __PRETTY_FUNCTION__); })); | |||
9444 | ||||
9445 | bitmap_fill(candidates, bsize); | |||
9446 | for (size_t i = bsize; i >= p->blob->nvectors; i--) { | |||
9447 | bitmap_set(candidates, i, 0); | |||
9448 | } | |||
9449 | i32 k_used = 0; | |||
9450 | min_idx(distances, bsize, candidates, topk_rowids, k, taken, &k_used); | |||
9451 | knn_data->current_idx = 0; | |||
9452 | knn_data->distances = distances; | |||
9453 | knn_data->k = k; | |||
9454 | knn_data->rowids = topk_rowids; | |||
9455 | ||||
9456 | pCur->knn_data = knn_data; | |||
9457 | } else { | |||
9458 | pCur->query_plan = VEC_SBE__QUERYPLAN_FULLSCAN; | |||
9459 | pCur->iRowid = 0; | |||
9460 | } | |||
9461 | ||||
9462 | return SQLITE_OK0; | |||
9463 | } | |||
9464 | ||||
9465 | static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur, | |||
9466 | sqlite_int64 *pRowid) { | |||
9467 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; | |||
9468 | switch (pCur->query_plan) { | |||
9469 | case VEC_SBE__QUERYPLAN_FULLSCAN: { | |||
9470 | *pRowid = pCur->iRowid; | |||
9471 | return SQLITE_OK0; | |||
9472 | } | |||
9473 | case VEC_SBE__QUERYPLAN_KNN: { | |||
9474 | i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx]; | |||
9475 | *pRowid = (sqlite3_int64)rowid; | |||
9476 | return SQLITE_OK0; | |||
9477 | } | |||
9478 | } | |||
9479 | return SQLITE_ERROR1; | |||
9480 | } | |||
9481 | ||||
9482 | static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) { | |||
9483 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; | |||
9484 | switch (pCur->query_plan) { | |||
9485 | case VEC_SBE__QUERYPLAN_FULLSCAN: { | |||
9486 | pCur->iRowid++; | |||
9487 | return SQLITE_OK0; | |||
9488 | } | |||
9489 | case VEC_SBE__QUERYPLAN_KNN: { | |||
9490 | pCur->knn_data->current_idx++; | |||
9491 | return SQLITE_OK0; | |||
9492 | } | |||
9493 | } | |||
9494 | return SQLITE_ERROR1; | |||
9495 | } | |||
9496 | ||||
9497 | static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) { | |||
9498 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; | |||
9499 | vec_static_blob_entries_vtab *p = | |||
9500 | (vec_static_blob_entries_vtab *)pCur->base.pVtab; | |||
9501 | switch (pCur->query_plan) { | |||
9502 | case VEC_SBE__QUERYPLAN_FULLSCAN: { | |||
9503 | return (size_t)pCur->iRowid >= p->blob->nvectors; | |||
9504 | } | |||
9505 | case VEC_SBE__QUERYPLAN_KNN: { | |||
9506 | return pCur->knn_data->current_idx >= pCur->knn_data->k; | |||
9507 | } | |||
9508 | } | |||
9509 | return SQLITE_ERROR1; | |||
9510 | } | |||
9511 | ||||
9512 | static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur, | |||
9513 | sqlite3_context *context, int i) { | |||
9514 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; | |||
9515 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)cur->pVtab; | |||
9516 | ||||
9517 | switch (pCur->query_plan) { | |||
9518 | case VEC_SBE__QUERYPLAN_FULLSCAN: { | |||
9519 | switch (i) { | |||
9520 | case VEC_STATIC_BLOB_ENTRIES_VECTOR0: | |||
9521 | ||||
9522 | sqlite3_result_blobsqlite3_api->result_blob( | |||
9523 | context, | |||
9524 | ((unsigned char *)p->blob->p) + | |||
9525 | (pCur->iRowid * p->blob->dimensions * sizeof(float)), | |||
9526 | p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
9527 | sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type); | |||
9528 | break; | |||
9529 | } | |||
9530 | return SQLITE_OK0; | |||
9531 | } | |||
9532 | case VEC_SBE__QUERYPLAN_KNN: { | |||
9533 | switch (i) { | |||
9534 | case VEC_STATIC_BLOB_ENTRIES_VECTOR0: { | |||
9535 | i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx]; | |||
9536 | sqlite3_result_blobsqlite3_api->result_blob(context, | |||
9537 | ((unsigned char *)p->blob->p) + | |||
9538 | (rowid * p->blob->dimensions * sizeof(float)), | |||
9539 | p->blob->dimensions * sizeof(float), | |||
9540 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
9541 | sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type); | |||
9542 | break; | |||
9543 | } | |||
9544 | } | |||
9545 | return SQLITE_OK0; | |||
9546 | } | |||
9547 | } | |||
9548 | return SQLITE_ERROR1; | |||
9549 | } | |||
9550 | ||||
9551 | static sqlite3_module vec_static_blob_entriesModule = { | |||
9552 | /* iVersion */ 3, | |||
9553 | /* xCreate */ | |||
9554 | vec_static_blob_entriesCreate, // handle rm? | |||
9555 | // https://github.com/asg017/sqlite-vec/issues/55 | |||
9556 | /* xConnect */ vec_static_blob_entriesConnect, | |||
9557 | /* xBestIndex */ vec_static_blob_entriesBestIndex, | |||
9558 | /* xDisconnect */ vec_static_blob_entriesDisconnect, | |||
9559 | /* xDestroy */ vec_static_blob_entriesDisconnect, | |||
9560 | /* xOpen */ vec_static_blob_entriesOpen, | |||
9561 | /* xClose */ vec_static_blob_entriesClose, | |||
9562 | /* xFilter */ vec_static_blob_entriesFilter, | |||
9563 | /* xNext */ vec_static_blob_entriesNext, | |||
9564 | /* xEof */ vec_static_blob_entriesEof, | |||
9565 | /* xColumn */ vec_static_blob_entriesColumn, | |||
9566 | /* xRowid */ vec_static_blob_entriesRowid, | |||
9567 | /* xUpdate */ 0, | |||
9568 | /* xBegin */ 0, | |||
9569 | /* xSync */ 0, | |||
9570 | /* xCommit */ 0, | |||
9571 | /* xRollback */ 0, | |||
9572 | /* xFindMethod */ 0, | |||
9573 | /* xRename */ 0, | |||
9574 | /* xSavepoint */ 0, | |||
9575 | /* xRelease */ 0, | |||
9576 | /* xRollbackTo */ 0, | |||
9577 | /* xShadowName */ 0, | |||
9578 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 | |||
9579 | /* xIntegrity */ 0 | |||
9580 | #endif | |||
9581 | }; | |||
9582 | #pragma endregion | |||
9583 | ||||
9584 | #ifdef SQLITE_VEC_ENABLE_AVX | |||
9585 | #define SQLITE_VEC_DEBUG_BUILD_AVX"" "avx" | |||
9586 | #else | |||
9587 | #define SQLITE_VEC_DEBUG_BUILD_AVX"" "" | |||
9588 | #endif | |||
9589 | #ifdef SQLITE_VEC_ENABLE_NEON | |||
9590 | #define SQLITE_VEC_DEBUG_BUILD_NEON"" "neon" | |||
9591 | #else | |||
9592 | #define SQLITE_VEC_DEBUG_BUILD_NEON"" "" | |||
9593 | #endif | |||
9594 | ||||
9595 | #define SQLITE_VEC_DEBUG_BUILD"" " " "" \ | |||
9596 | SQLITE_VEC_DEBUG_BUILD_AVX"" " " SQLITE_VEC_DEBUG_BUILD_NEON"" | |||
9597 | ||||
9598 | #define SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000" "\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" "Build flags: " "" " " "" \ | |||
9599 | "Version: " SQLITE_VEC_VERSION"v0.1.7-alpha.2" "\n" \ | |||
9600 | "Date: " SQLITE_VEC_DATE"2025-01-10T23:18:50Z+0000" "\n" \ | |||
9601 | "Commit: " SQLITE_VEC_SOURCE"bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" \ | |||
9602 | "Build flags: " SQLITE_VEC_DEBUG_BUILD"" " " "" | |||
9603 | ||||
9604 | SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg, | |||
9605 | const sqlite3_api_routines *pApi) { | |||
9606 | #ifndef SQLITE_CORE | |||
9607 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
9608 | #endif | |||
9609 | int rc = SQLITE_OK0; | |||
9610 | ||||
9611 | #define DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) (SQLITE_UTF81 | SQLITE_INNOCUOUS0x000200000 | SQLITE_DETERMINISTIC0x000000800) | |||
9612 | ||||
9613 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), | |||
9614 | SQLITE_VEC_VERSION"v0.1.7-alpha.2", _static_text_func, NULL((void*)0), | |||
9615 | NULL((void*)0), NULL((void*)0)); | |||
9616 | if (rc != SQLITE_OK0) { | |||
9617 | return rc; | |||
9618 | } | |||
9619 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), | |||
9620 | SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000" "\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" "Build flags: " "" " " "", _static_text_func, | |||
9621 | NULL((void*)0), NULL((void*)0), NULL((void*)0)); | |||
9622 | if (rc != SQLITE_OK0) { | |||
9623 | return rc; | |||
9624 | } | |||
9625 | static struct { | |||
9626 | const char *zFName; | |||
9627 | void (*xFunc)(sqlite3_context *, int, sqlite3_value **); | |||
9628 | int nArg; | |||
9629 | int flags; | |||
9630 | } aFunc[] = { | |||
9631 | // clang-format off | |||
9632 | //{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION }, | |||
9633 | //{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING }, | |||
9634 | {"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, | |||
9635 | {"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, | |||
9636 | {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, | |||
9637 | {"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, | |||
9638 | {"vec_length", vec_length, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, | |||
9639 | {"vec_type", vec_type, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), }, | |||
9640 | {"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9641 | {"vec_add", vec_add, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9642 | {"vec_sub", vec_sub, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9643 | {"vec_slice", vec_slice, 3, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9644 | {"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9645 | {"vec_f32", vec_f32, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9646 | {"vec_bit", vec_bit, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9647 | {"vec_int8", vec_int8, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9648 | {"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9649 | {"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, | |||
9650 | // clang-format on | |||
9651 | }; | |||
9652 | ||||
9653 | static struct { | |||
9654 | char *name; | |||
9655 | const sqlite3_module *module; | |||
9656 | void *p; | |||
9657 | void (*xDestroy)(void *); | |||
9658 | } aMod[] = { | |||
9659 | // clang-format off | |||
9660 | {"vec0", &vec0Module, NULL((void*)0), NULL((void*)0)}, | |||
9661 | {"vec_each", &vec_eachModule, NULL((void*)0), NULL((void*)0)}, | |||
9662 | // clang-format on | |||
9663 | }; | |||
9664 | ||||
9665 | for (unsigned long i = 0; i < countof(aFunc)(sizeof(aFunc) / sizeof((aFunc)[0])) && rc == SQLITE_OK0; i++) { | |||
9666 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg, | |||
9667 | aFunc[i].flags, NULL((void*)0), aFunc[i].xFunc, NULL((void*)0), | |||
9668 | NULL((void*)0), NULL((void*)0)); | |||
9669 | if (rc != SQLITE_OK0) { | |||
9670 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating function %s: %s", | |||
9671 | aFunc[i].zFName, sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
9672 | return rc; | |||
9673 | } | |||
9674 | } | |||
9675 | ||||
9676 | for (unsigned long i = 0; i < countof(aMod)(sizeof(aMod) / sizeof((aMod)[0])) && rc == SQLITE_OK0; i++) { | |||
9677 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, aMod[i].name, aMod[i].module, NULL((void*)0), NULL((void*)0)); | |||
9678 | if (rc != SQLITE_OK0) { | |||
9679 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating module %s: %s", aMod[i].name, | |||
9680 | sqlite3_errmsgsqlite3_api->errmsg(db)); | |||
9681 | return rc; | |||
9682 | } | |||
9683 | } | |||
9684 | ||||
9685 | return SQLITE_OK0; | |||
9686 | } | |||
9687 | ||||
9688 | #ifndef SQLITE_VEC_OMIT_FS | |||
9689 | SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg, | |||
9690 | const sqlite3_api_routines *pApi) { | |||
9691 | UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg); | |||
9692 | #ifndef SQLITE_CORE | |||
9693 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
9694 | #endif | |||
9695 | int rc = SQLITE_OK0; | |||
9696 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE0x001000000, | |||
9697 | NULL((void*)0), vec_npy_file, NULL((void*)0), NULL((void*)0), NULL((void*)0)); | |||
9698 | if(rc != SQLITE_OK0) { | |||
9699 | return rc; | |||
9700 | } | |||
9701 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL((void*)0), NULL((void*)0)); | |||
9702 | return rc; | |||
9703 | } | |||
9704 | #endif | |||
9705 | ||||
9706 | SQLITE_VEC_API int | |||
9707 | sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg, | |||
9708 | const sqlite3_api_routines *pApi) { | |||
9709 | UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg); | |||
9710 | #ifndef SQLITE_CORE | |||
9711 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
9712 | #endif | |||
9713 | ||||
9714 | int rc = SQLITE_OK0; | |||
9715 | vec_static_blob_data *static_blob_data; | |||
9716 | static_blob_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*static_blob_data)); | |||
9717 | if (!static_blob_data) { | |||
9718 | return SQLITE_NOMEM7; | |||
9719 | } | |||
9720 | memset(static_blob_data, 0, sizeof(*static_blob_data)); | |||
9721 | ||||
9722 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2( | |||
9723 | db, "vec_static_blob_from_raw", 4, | |||
9724 | DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, NULL((void*)0), | |||
9725 | vec_static_blob_from_raw, NULL((void*)0), NULL((void*)0), NULL((void*)0)); | |||
9726 | if (rc != SQLITE_OK0) | |||
9727 | return rc; | |||
9728 | ||||
9729 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule, | |||
9730 | static_blob_data, sqlite3_freesqlite3_api->free); | |||
9731 | if (rc != SQLITE_OK0) | |||
9732 | return rc; | |||
9733 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blob_entries", | |||
9734 | &vec_static_blob_entriesModule, | |||
9735 | static_blob_data, NULL((void*)0)); | |||
9736 | if (rc != SQLITE_OK0) | |||
9737 | return rc; | |||
9738 | return rc; | |||
9739 | } |