File: | root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c |
Warning: | line 6901, column 7 Value stored to 'rc' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "sqlite-vec.h" |
2 | |
3 | #include <assert.h> |
4 | #include <errno(*__errno_location ()).h> |
5 | #include <float.h> |
6 | #include <inttypes.h> |
7 | #include <limits.h> |
8 | #include <math.h> |
9 | #include <stdbool.h> |
10 | #include <stdint.h> |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | |
14 | #ifndef SQLITE_VEC_OMIT_FS |
15 | #include <stdio.h> |
16 | #endif |
17 | |
18 | #ifndef SQLITE_CORE |
19 | #include "sqlite3ext.h" |
20 | SQLITE_EXTENSION_INIT3extern const sqlite3_api_routines *sqlite3_api; |
21 | #else |
22 | #include "sqlite3.h" |
23 | #endif |
24 | |
25 | #ifndef UINT32_TYPEunsigned int |
26 | #ifdef HAVE_UINT32_T |
27 | #define UINT32_TYPEunsigned int uint32_t |
28 | #else |
29 | #define UINT32_TYPEunsigned int unsigned int |
30 | #endif |
31 | #endif |
32 | #ifndef UINT16_TYPEunsigned short int |
33 | #ifdef HAVE_UINT16_T |
34 | #define UINT16_TYPEunsigned short int uint16_t |
35 | #else |
36 | #define UINT16_TYPEunsigned short int unsigned short int |
37 | #endif |
38 | #endif |
39 | #ifndef INT16_TYPEshort int |
40 | #ifdef HAVE_INT16_T |
41 | #define INT16_TYPEshort int int16_t |
42 | #else |
43 | #define INT16_TYPEshort int short int |
44 | #endif |
45 | #endif |
46 | #ifndef UINT8_TYPEunsigned char |
47 | #ifdef HAVE_UINT8_T |
48 | #define UINT8_TYPEunsigned char uint8_t |
49 | #else |
50 | #define UINT8_TYPEunsigned char unsigned char |
51 | #endif |
52 | #endif |
53 | #ifndef INT8_TYPEsigned char |
54 | #ifdef HAVE_INT8_T |
55 | #define INT8_TYPEsigned char int8_t |
56 | #else |
57 | #define INT8_TYPEsigned char signed char |
58 | #endif |
59 | #endif |
60 | #ifndef LONGDOUBLE_TYPElong double |
61 | #define LONGDOUBLE_TYPElong double long double |
62 | #endif |
63 | |
64 | typedef int8_t i8; |
65 | typedef uint8_t u8; |
66 | typedef int16_t i16; |
67 | typedef int32_t i32; |
68 | typedef sqlite3_int64 i64; |
69 | typedef uint32_t u32; |
70 | typedef uint64_t u64; |
71 | typedef float f32; |
72 | typedef size_t usize; |
73 | |
74 | #ifndef UNUSED_PARAMETER |
75 | #define UNUSED_PARAMETER(X)(void)(X) (void)(X) |
76 | #endif |
77 | |
78 | // sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22) |
79 | // https://www.sqlite.org/changes.html#version_3_38_0 |
80 | #if SQLITE_VERSION_NUMBER3050001 >= 3038000 |
81 | #define COMPILER_SUPPORTS_VTAB_IN1 1 |
82 | #endif |
83 | |
84 | #ifndef SQLITE_SUBTYPE0x000100000 |
85 | #define SQLITE_SUBTYPE0x000100000 0x000100000 |
86 | #endif |
87 | |
88 | #ifndef SQLITE_RESULT_SUBTYPE0x001000000 |
89 | #define SQLITE_RESULT_SUBTYPE0x001000000 0x001000000 |
90 | #endif |
91 | |
92 | #ifndef SQLITE_INDEX_CONSTRAINT_LIMIT73 |
93 | #define SQLITE_INDEX_CONSTRAINT_LIMIT73 73 |
94 | #endif |
95 | |
96 | #ifndef SQLITE_INDEX_CONSTRAINT_OFFSET74 |
97 | #define SQLITE_INDEX_CONSTRAINT_OFFSET74 74 |
98 | #endif |
99 | |
100 | #define countof(x)(sizeof(x) / sizeof((x)[0])) (sizeof(x) / sizeof((x)[0])) |
101 | #define min(a, b)(((a) <= (b)) ? (a) : (b)) (((a) <= (b)) ? (a) : (b)) |
102 | |
103 | enum VectorElementType { |
104 | // clang-format off |
105 | SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0, |
106 | SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1, |
107 | SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2, |
108 | // clang-format on |
109 | }; |
110 | |
111 | #ifdef SQLITE_VEC_ENABLE_AVX |
112 | #include <immintrin.h> |
113 | #define PORTABLE_ALIGN32 __attribute__((aligned(32))) |
114 | #define PORTABLE_ALIGN64 __attribute__((aligned(64))) |
115 | |
116 | static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v, |
117 | const void *qty_ptr) { |
118 | f32 *pVect1 = (f32 *)pVect1v; |
119 | f32 *pVect2 = (f32 *)pVect2v; |
120 | size_t qty = *((size_t *)qty_ptr); |
121 | f32 PORTABLE_ALIGN32 TmpRes[8]; |
122 | size_t qty16 = qty >> 4; |
123 | |
124 | const f32 *pEnd1 = pVect1 + (qty16 << 4); |
125 | |
126 | __m256 diff, v1, v2; |
127 | __m256 sum = _mm256_set1_ps(0); |
128 | |
129 | while (pVect1 < pEnd1) { |
130 | v1 = _mm256_loadu_ps(pVect1); |
131 | pVect1 += 8; |
132 | v2 = _mm256_loadu_ps(pVect2); |
133 | pVect2 += 8; |
134 | diff = _mm256_sub_ps(v1, v2); |
135 | sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); |
136 | |
137 | v1 = _mm256_loadu_ps(pVect1); |
138 | pVect1 += 8; |
139 | v2 = _mm256_loadu_ps(pVect2); |
140 | pVect2 += 8; |
141 | diff = _mm256_sub_ps(v1, v2); |
142 | sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff)); |
143 | } |
144 | |
145 | _mm256_store_ps(TmpRes, sum); |
146 | return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + |
147 | TmpRes[5] + TmpRes[6] + TmpRes[7]); |
148 | } |
149 | #endif |
150 | |
151 | #ifdef SQLITE_VEC_ENABLE_NEON |
152 | #include <arm_neon.h> |
153 | |
154 | #define PORTABLE_ALIGN32 __attribute__((aligned(32))) |
155 | |
156 | // thx https://github.com/nmslib/hnswlib/pull/299/files |
157 | static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v, |
158 | const void *qty_ptr) { |
159 | f32 *pVect1 = (f32 *)pVect1v; |
160 | f32 *pVect2 = (f32 *)pVect2v; |
161 | size_t qty = *((size_t *)qty_ptr); |
162 | size_t qty16 = qty >> 4; |
163 | |
164 | const f32 *pEnd1 = pVect1 + (qty16 << 4); |
165 | |
166 | float32x4_t diff, v1, v2; |
167 | float32x4_t sum0 = vdupq_n_f32(0); |
168 | float32x4_t sum1 = vdupq_n_f32(0); |
169 | float32x4_t sum2 = vdupq_n_f32(0); |
170 | float32x4_t sum3 = vdupq_n_f32(0); |
171 | |
172 | while (pVect1 < pEnd1) { |
173 | v1 = vld1q_f32(pVect1); |
174 | pVect1 += 4; |
175 | v2 = vld1q_f32(pVect2); |
176 | pVect2 += 4; |
177 | diff = vsubq_f32(v1, v2); |
178 | sum0 = vfmaq_f32(sum0, diff, diff); |
179 | |
180 | v1 = vld1q_f32(pVect1); |
181 | pVect1 += 4; |
182 | v2 = vld1q_f32(pVect2); |
183 | pVect2 += 4; |
184 | diff = vsubq_f32(v1, v2); |
185 | sum1 = vfmaq_f32(sum1, diff, diff); |
186 | |
187 | v1 = vld1q_f32(pVect1); |
188 | pVect1 += 4; |
189 | v2 = vld1q_f32(pVect2); |
190 | pVect2 += 4; |
191 | diff = vsubq_f32(v1, v2); |
192 | sum2 = vfmaq_f32(sum2, diff, diff); |
193 | |
194 | v1 = vld1q_f32(pVect1); |
195 | pVect1 += 4; |
196 | v2 = vld1q_f32(pVect2); |
197 | pVect2 += 4; |
198 | diff = vsubq_f32(v1, v2); |
199 | sum3 = vfmaq_f32(sum3, diff, diff); |
200 | } |
201 | |
202 | f32 sum_scalar = |
203 | vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3))); |
204 | const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4)); |
205 | while (pVect1 < pEnd2) { |
206 | f32 diff = *pVect1 - *pVect2; |
207 | sum_scalar += diff * diff; |
208 | pVect1++; |
209 | pVect2++; |
210 | } |
211 | |
212 | return sqrt(sum_scalar); |
213 | } |
214 | |
215 | static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v, |
216 | const void *qty_ptr) { |
217 | i8 *pVect1 = (i8 *)pVect1v; |
218 | i8 *pVect2 = (i8 *)pVect2v; |
219 | size_t qty = *((size_t *)qty_ptr); |
220 | |
221 | const i8 *pEnd1 = pVect1 + qty; |
222 | i32 sum_scalar = 0; |
223 | |
224 | while (pVect1 < pEnd1 - 7) { |
225 | // loading 8 at a time |
226 | int8x8_t v1 = vld1_s8(pVect1); |
227 | int8x8_t v2 = vld1_s8(pVect2); |
228 | pVect1 += 8; |
229 | pVect2 += 8; |
230 | |
231 | // widen to protect against overflow |
232 | int16x8_t v1_wide = vmovl_s8(v1); |
233 | int16x8_t v2_wide = vmovl_s8(v2); |
234 | |
235 | int16x8_t diff = vsubq_s16(v1_wide, v2_wide); |
236 | int16x8_t squared_diff = vmulq_s16(diff, diff); |
237 | int32x4_t sum = vpaddlq_s16(squared_diff); |
238 | |
239 | sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) + |
240 | vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3); |
241 | } |
242 | |
243 | // handle leftovers |
244 | while (pVect1 < pEnd1) { |
245 | i16 diff = (i16)*pVect1 - (i16)*pVect2; |
246 | sum_scalar += diff * diff; |
247 | pVect1++; |
248 | pVect2++; |
249 | } |
250 | |
251 | return sqrtf(sum_scalar); |
252 | } |
253 | |
254 | static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v, |
255 | const void *qty_ptr) { |
256 | i8 *pVect1 = (i8 *)pVect1v; |
257 | i8 *pVect2 = (i8 *)pVect2v; |
258 | size_t qty = *((size_t *)qty_ptr); |
259 | |
260 | const int8_t *pEnd1 = pVect1 + qty; |
261 | |
262 | int32x4_t acc1 = vdupq_n_s32(0); |
263 | int32x4_t acc2 = vdupq_n_s32(0); |
264 | int32x4_t acc3 = vdupq_n_s32(0); |
265 | int32x4_t acc4 = vdupq_n_s32(0); |
266 | |
267 | while (pVect1 < pEnd1 - 63) { |
268 | int8x16_t v1 = vld1q_s8(pVect1); |
269 | int8x16_t v2 = vld1q_s8(pVect2); |
270 | int8x16_t diff1 = vabdq_s8(v1, v2); |
271 | acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1))); |
272 | |
273 | v1 = vld1q_s8(pVect1 + 16); |
274 | v2 = vld1q_s8(pVect2 + 16); |
275 | int8x16_t diff2 = vabdq_s8(v1, v2); |
276 | acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2))); |
277 | |
278 | v1 = vld1q_s8(pVect1 + 32); |
279 | v2 = vld1q_s8(pVect2 + 32); |
280 | int8x16_t diff3 = vabdq_s8(v1, v2); |
281 | acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3))); |
282 | |
283 | v1 = vld1q_s8(pVect1 + 48); |
284 | v2 = vld1q_s8(pVect2 + 48); |
285 | int8x16_t diff4 = vabdq_s8(v1, v2); |
286 | acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4))); |
287 | |
288 | pVect1 += 64; |
289 | pVect2 += 64; |
290 | } |
291 | |
292 | while (pVect1 < pEnd1 - 15) { |
293 | int8x16_t v1 = vld1q_s8(pVect1); |
294 | int8x16_t v2 = vld1q_s8(pVect2); |
295 | int8x16_t diff = vabdq_s8(v1, v2); |
296 | acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff))); |
297 | pVect1 += 16; |
298 | pVect2 += 16; |
299 | } |
300 | |
301 | int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4)); |
302 | |
303 | int32_t sum = 0; |
304 | while (pVect1 < pEnd1) { |
305 | int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2); |
306 | sum += diff; |
307 | pVect1++; |
308 | pVect2++; |
309 | } |
310 | |
311 | return vaddvq_s32(acc) + sum; |
312 | } |
313 | |
314 | static double l1_f32_neon(const void *pVect1v, const void *pVect2v, |
315 | const void *qty_ptr) { |
316 | f32 *pVect1 = (f32 *)pVect1v; |
317 | f32 *pVect2 = (f32 *)pVect2v; |
318 | size_t qty = *((size_t *)qty_ptr); |
319 | |
320 | const f32 *pEnd1 = pVect1 + qty; |
321 | float64x2_t acc = vdupq_n_f64(0); |
322 | |
323 | while (pVect1 < pEnd1 - 3) { |
324 | float32x4_t v1 = vld1q_f32(pVect1); |
325 | float32x4_t v2 = vld1q_f32(pVect2); |
326 | pVect1 += 4; |
327 | pVect2 += 4; |
328 | |
329 | // f32x4 -> f64x2 pad for overflow |
330 | float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)), |
331 | vcvt_f64_f32(vget_low_f32(v2))); |
332 | float64x2_t high_diff = |
333 | vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2)); |
334 | |
335 | acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff)); |
336 | } |
337 | |
338 | double sum = 0; |
339 | while (pVect1 < pEnd1) { |
340 | sum += fabs((double)*pVect1 - (double)*pVect2); |
341 | pVect1++; |
342 | pVect2++; |
343 | } |
344 | |
345 | return vaddvq_f64(acc) + sum; |
346 | } |
347 | #endif |
348 | |
349 | static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v, |
350 | const void *qty_ptr) { |
351 | f32 *pVect1 = (f32 *)pVect1v; |
352 | f32 *pVect2 = (f32 *)pVect2v; |
353 | size_t qty = *((size_t *)qty_ptr); |
354 | |
355 | f32 res = 0; |
356 | for (size_t i = 0; i < qty; i++) { |
357 | f32 t = *pVect1 - *pVect2; |
358 | pVect1++; |
359 | pVect2++; |
360 | res += t * t; |
361 | } |
362 | return sqrt(res); |
363 | } |
364 | |
365 | static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) { |
366 | i8 *a = (i8 *)pA; |
367 | i8 *b = (i8 *)pB; |
368 | size_t d = *((size_t *)pD); |
369 | |
370 | f32 res = 0; |
371 | for (size_t i = 0; i < d; i++) { |
372 | f32 t = *a - *b; |
373 | a++; |
374 | b++; |
375 | res += t * t; |
376 | } |
377 | return sqrt(res); |
378 | } |
379 | |
380 | static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) { |
381 | #ifdef SQLITE_VEC_ENABLE_NEON |
382 | if ((*(const size_t *)d) > 16) { |
383 | return l2_sqr_float_neon(a, b, d); |
384 | } |
385 | #endif |
386 | #ifdef SQLITE_VEC_ENABLE_AVX |
387 | if (((*(const size_t *)d) % 16 == 0)) { |
388 | return l2_sqr_float_avx(a, b, d); |
389 | } |
390 | #endif |
391 | return l2_sqr_float(a, b, d); |
392 | } |
393 | |
394 | static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) { |
395 | #ifdef SQLITE_VEC_ENABLE_NEON |
396 | if ((*(const size_t *)d) > 7) { |
397 | return l2_sqr_int8_neon(a, b, d); |
398 | } |
399 | #endif |
400 | return l2_sqr_int8(a, b, d); |
401 | } |
402 | |
403 | static i32 l1_int8(const void *pA, const void *pB, const void *pD) { |
404 | i8 *a = (i8 *)pA; |
405 | i8 *b = (i8 *)pB; |
406 | size_t d = *((size_t *)pD); |
407 | |
408 | i32 res = 0; |
409 | for (size_t i = 0; i < d; i++) { |
410 | res += abs(*a - *b); |
411 | a++; |
412 | b++; |
413 | } |
414 | |
415 | return res; |
416 | } |
417 | |
418 | static i32 distance_l1_int8(const void *a, const void *b, const void *d) { |
419 | #ifdef SQLITE_VEC_ENABLE_NEON |
420 | if ((*(const size_t *)d) > 15) { |
421 | return l1_int8_neon(a, b, d); |
422 | } |
423 | #endif |
424 | return l1_int8(a, b, d); |
425 | } |
426 | |
427 | static double l1_f32(const void *pA, const void *pB, const void *pD) { |
428 | f32 *a = (f32 *)pA; |
429 | f32 *b = (f32 *)pB; |
430 | size_t d = *((size_t *)pD); |
431 | |
432 | double res = 0; |
433 | for (size_t i = 0; i < d; i++) { |
434 | res += fabs((double)*a - (double)*b); |
435 | a++; |
436 | b++; |
437 | } |
438 | |
439 | return res; |
440 | } |
441 | |
442 | static double distance_l1_f32(const void *a, const void *b, const void *d) { |
443 | #ifdef SQLITE_VEC_ENABLE_NEON |
444 | if ((*(const size_t *)d) > 3) { |
445 | return l1_f32_neon(a, b, d); |
446 | } |
447 | #endif |
448 | return l1_f32(a, b, d); |
449 | } |
450 | |
451 | static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v, |
452 | const void *qty_ptr) { |
453 | f32 *pVect1 = (f32 *)pVect1v; |
454 | f32 *pVect2 = (f32 *)pVect2v; |
455 | size_t qty = *((size_t *)qty_ptr); |
456 | |
457 | f32 dot = 0; |
458 | f32 aMag = 0; |
459 | f32 bMag = 0; |
460 | for (size_t i = 0; i < qty; i++) { |
461 | dot += *pVect1 * *pVect2; |
462 | aMag += *pVect1 * *pVect1; |
463 | bMag += *pVect2 * *pVect2; |
464 | pVect1++; |
465 | pVect2++; |
466 | } |
467 | return 1 - (dot / (sqrt(aMag) * sqrt(bMag))); |
468 | } |
469 | static f32 distance_cosine_int8(const void *pA, const void *pB, |
470 | const void *pD) { |
471 | i8 *a = (i8 *)pA; |
472 | i8 *b = (i8 *)pB; |
473 | size_t d = *((size_t *)pD); |
474 | |
475 | f32 dot = 0; |
476 | f32 aMag = 0; |
477 | f32 bMag = 0; |
478 | for (size_t i = 0; i < d; i++) { |
479 | dot += *a * *b; |
480 | aMag += *a * *a; |
481 | bMag += *b * *b; |
482 | a++; |
483 | b++; |
484 | } |
485 | return 1 - (dot / (sqrt(aMag) * sqrt(bMag))); |
486 | } |
487 | |
488 | // https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34 |
489 | static u8 hamdist_table[256] = { |
490 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, |
491 | 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
492 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, |
493 | 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
494 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
495 | 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
496 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, |
497 | 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
498 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
499 | 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
500 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; |
501 | |
502 | static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) { |
503 | int same = 0; |
504 | for (unsigned long i = 0; i < n; i++) { |
505 | same += hamdist_table[a[i] ^ b[i]]; |
506 | } |
507 | return (f32)same; |
508 | } |
509 | |
510 | #ifdef _MSC_VER |
511 | #if !defined(__clang__1) && (defined(_M_ARM) || defined(_M_ARM64)) |
512 | // From |
513 | // https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c, |
514 | // line 34-43 |
515 | static unsigned int __builtin_popcountl(unsigned int x) { |
516 | unsigned int c = 0; |
517 | for (; x; ++c) { |
518 | x &= x - 1; |
519 | } |
520 | return c; |
521 | } |
522 | #else |
523 | #include <intrin.h> |
524 | #define __builtin_popcountl __popcnt64 |
525 | #endif |
526 | #endif |
527 | |
528 | static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) { |
529 | int same = 0; |
530 | for (unsigned long i = 0; i < n; i++) { |
531 | same += __builtin_popcountl(a[i] ^ b[i]); |
532 | } |
533 | return (f32)same; |
534 | } |
535 | |
536 | /** |
537 | * @brief Calculate the hamming distance between two bitvectors. |
538 | * |
539 | * @param a - first bitvector, MUST have d dimensions |
540 | * @param b - second bitvector, MUST have d dimensions |
541 | * @param d - pointer to size_t, MUST be divisible by CHAR_BIT |
542 | * @return f32 |
543 | */ |
544 | static f32 distance_hamming(const void *a, const void *b, const void *d) { |
545 | size_t dimensions = *((size_t *)d); |
546 | |
547 | if ((dimensions % 64) == 0) { |
548 | return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT8); |
549 | } |
550 | return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT8); |
551 | } |
552 | |
553 | // from SQLite source: |
554 | // https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153 |
555 | static const char vecJsonIsSpaceX[] = { |
556 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
557 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
558 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
559 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
560 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
561 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
562 | |
563 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
564 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
565 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
566 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
567 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
568 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
569 | }; |
570 | |
571 | #define vecJsonIsspace(x)(vecJsonIsSpaceX[(unsigned char)x]) (vecJsonIsSpaceX[(unsigned char)x]) |
572 | |
573 | typedef void (*vector_cleanup)(void *p); |
574 | |
575 | void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_)(void)(_); } |
576 | |
577 | #define JSON_SUBTYPE74 74 |
578 | |
579 | void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) { |
580 | va_list args; |
581 | sqlite3_freesqlite3_api->free(pVTab->zErrMsg); |
582 | va_start(args, zFormat)__builtin_va_start(args, zFormat); |
583 | pVTab->zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, args); |
584 | va_end(args)__builtin_va_end(args); |
585 | } |
586 | struct Array { |
587 | size_t element_size; |
588 | size_t length; |
589 | size_t capacity; |
590 | void *z; |
591 | }; |
592 | |
593 | /** |
594 | * @brief Initial an array with the given element size and capacity. |
595 | * |
596 | * @param array |
597 | * @param element_size |
598 | * @param init_capacity |
599 | * @return SQLITE_OK on success, error code on failure. Only error is |
600 | * SQLITE_NOMEM |
601 | */ |
602 | int array_init(struct Array *array, size_t element_size, size_t init_capacity) { |
603 | int sz = element_size * init_capacity; |
604 | void *z = sqlite3_mallocsqlite3_api->malloc(sz); |
605 | if (!z) { |
606 | return SQLITE_NOMEM7; |
607 | } |
608 | memset(z, 0, sz); |
609 | |
610 | array->element_size = element_size; |
611 | array->length = 0; |
612 | array->capacity = init_capacity; |
613 | array->z = z; |
614 | return SQLITE_OK0; |
615 | } |
616 | |
617 | int array_append(struct Array *array, const void *element) { |
618 | if (array->length == array->capacity) { |
619 | size_t new_capacity = array->capacity * 2 + 100; |
620 | void *z = sqlite3_realloc64sqlite3_api->realloc64(array->z, array->element_size * new_capacity); |
621 | if (z) { |
622 | array->capacity = new_capacity; |
623 | array->z = z; |
624 | } else { |
625 | return SQLITE_NOMEM7; |
626 | } |
627 | } |
628 | memcpy(&((unsigned char *)array->z)[array->length * array->element_size], |
629 | element, array->element_size); |
630 | array->length++; |
631 | return SQLITE_OK0; |
632 | } |
633 | |
634 | void array_cleanup(struct Array *array) { |
635 | if (!array) |
636 | return; |
637 | array->element_size = 0; |
638 | array->length = 0; |
639 | array->capacity = 0; |
640 | sqlite3_freesqlite3_api->free(array->z); |
641 | array->z = NULL((void*)0); |
642 | } |
643 | |
644 | char *vector_subtype_name(int subtype) { |
645 | switch (subtype) { |
646 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: |
647 | return "float32"; |
648 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
649 | return "int8"; |
650 | case SQLITE_VEC_ELEMENT_TYPE_BIT: |
651 | return "bit"; |
652 | } |
653 | return ""; |
654 | } |
655 | char *type_name(int type) { |
656 | switch (type) { |
657 | case SQLITE_INTEGER1: |
658 | return "INTEGER"; |
659 | case SQLITE_BLOB4: |
660 | return "BLOB"; |
661 | case SQLITE_TEXT3: |
662 | return "TEXT"; |
663 | case SQLITE_FLOAT2: |
664 | return "FLOAT"; |
665 | case SQLITE_NULL5: |
666 | return "NULL"; |
667 | } |
668 | return ""; |
669 | } |
670 | |
671 | typedef void (*fvec_cleanup)(f32 *vector); |
672 | |
673 | void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_)(void)(_); } |
674 | |
675 | static int fvec_from_value(sqlite3_value *value, f32 **vector, |
676 | size_t *dimensions, fvec_cleanup *cleanup, |
677 | char **pzErr) { |
678 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); |
679 | |
680 | if (value_type == SQLITE_BLOB4) { |
681 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); |
682 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); |
683 | if (bytes == 0) { |
684 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
685 | return SQLITE_ERROR1; |
686 | } |
687 | if ((bytes % sizeof(f32)) != 0) { |
688 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("invalid float32 vector BLOB length. Must be " |
689 | "divisible by %d, found %d", |
690 | sizeof(f32), bytes); |
691 | return SQLITE_ERROR1; |
692 | } |
693 | *vector = (f32 *)blob; |
694 | *dimensions = bytes / sizeof(f32); |
695 | *cleanup = fvec_cleanup_noop; |
696 | return SQLITE_OK0; |
697 | } |
698 | |
699 | if (value_type == SQLITE_TEXT3) { |
700 | const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value); |
701 | int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value); |
702 | if (source_len == 0) { |
703 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
704 | return SQLITE_ERROR1; |
705 | } |
706 | int i = 0; |
707 | |
708 | struct Array x; |
709 | int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0)); |
710 | if (rc != SQLITE_OK0) { |
711 | return rc; |
712 | } |
713 | |
714 | // advance leading whitespace to first '[' |
715 | while (i < source_len) { |
716 | if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) { |
717 | i++; |
718 | continue; |
719 | } |
720 | if (source[i] == '[') { |
721 | break; |
722 | } |
723 | |
724 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
725 | "JSON array parsing error: Input does not start with '['"); |
726 | array_cleanup(&x); |
727 | return SQLITE_ERROR1; |
728 | } |
729 | if (source[i] != '[') { |
730 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
731 | "JSON array parsing error: Input does not start with '['"); |
732 | array_cleanup(&x); |
733 | return SQLITE_ERROR1; |
734 | } |
735 | int offset = i + 1; |
736 | |
737 | while (offset < source_len) { |
738 | char *ptr = (char *)&source[offset]; |
739 | char *endptr; |
740 | |
741 | errno(*__errno_location ()) = 0; |
742 | double result = strtod(ptr, &endptr); |
743 | if ((errno(*__errno_location ()) != 0 && result == 0) // some interval error? |
744 | || (errno(*__errno_location ()) == ERANGE34 && |
745 | (result == HUGE_VAL(__builtin_huge_val ()) || result == -HUGE_VAL(__builtin_huge_val ()))) // too big / smalls |
746 | ) { |
747 | sqlite3_freesqlite3_api->free(x.z); |
748 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); |
749 | return SQLITE_ERROR1; |
750 | } |
751 | |
752 | if (endptr == ptr) { |
753 | if (*ptr != ']') { |
754 | sqlite3_freesqlite3_api->free(x.z); |
755 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); |
756 | return SQLITE_ERROR1; |
757 | } |
758 | goto done; |
759 | } |
760 | |
761 | f32 res = (f32)result; |
762 | array_append(&x, (const void *)&res); |
763 | |
764 | offset += (endptr - ptr); |
765 | while (offset < source_len) { |
766 | if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) { |
767 | offset++; |
768 | continue; |
769 | } |
770 | if (source[offset] == ',') { |
771 | offset++; |
772 | continue; |
773 | } |
774 | if (source[offset] == ']') |
775 | goto done; |
776 | break; |
777 | } |
778 | } |
779 | |
780 | done: |
781 | |
782 | if (x.length > 0) { |
783 | *vector = (f32 *)x.z; |
784 | *dimensions = x.length; |
785 | *cleanup = (fvec_cleanup)sqlite3_freesqlite3_api->free; |
786 | return SQLITE_OK0; |
787 | } |
788 | sqlite3_freesqlite3_api->free(x.z); |
789 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
790 | return SQLITE_ERROR1; |
791 | } |
792 | |
793 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
794 | "Input must have type BLOB (compact format) or TEXT (JSON), found %s", |
795 | type_name(value_type)); |
796 | return SQLITE_ERROR1; |
797 | } |
798 | |
799 | static int bitvec_from_value(sqlite3_value *value, u8 **vector, |
800 | size_t *dimensions, vector_cleanup *cleanup, |
801 | char **pzErr) { |
802 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); |
803 | if (value_type == SQLITE_BLOB4) { |
804 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); |
805 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); |
806 | if (bytes == 0) { |
807 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
808 | return SQLITE_ERROR1; |
809 | } |
810 | *vector = (u8 *)blob; |
811 | *dimensions = bytes * CHAR_BIT8; |
812 | *cleanup = vector_cleanup_noop; |
813 | return SQLITE_OK0; |
814 | } |
815 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for bitvector."); |
816 | return SQLITE_ERROR1; |
817 | } |
818 | |
819 | static int int8_vec_from_value(sqlite3_value *value, i8 **vector, |
820 | size_t *dimensions, vector_cleanup *cleanup, |
821 | char **pzErr) { |
822 | int value_type = sqlite3_value_typesqlite3_api->value_type(value); |
823 | if (value_type == SQLITE_BLOB4) { |
824 | const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value); |
825 | int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value); |
826 | if (bytes == 0) { |
827 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
828 | return SQLITE_ERROR1; |
829 | } |
830 | *vector = (i8 *)blob; |
831 | *dimensions = bytes; |
832 | *cleanup = vector_cleanup_noop; |
833 | return SQLITE_OK0; |
834 | } |
835 | |
836 | if (value_type == SQLITE_TEXT3) { |
837 | const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value); |
838 | int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value); |
839 | int i = 0; |
840 | |
841 | if (source_len == 0) { |
842 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
843 | return SQLITE_ERROR1; |
844 | } |
845 | |
846 | struct Array x; |
847 | int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0)); |
848 | if (rc != SQLITE_OK0) { |
849 | return rc; |
850 | } |
851 | |
852 | // advance leading whitespace to first '[' |
853 | while (i < source_len) { |
854 | if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) { |
855 | i++; |
856 | continue; |
857 | } |
858 | if (source[i] == '[') { |
859 | break; |
860 | } |
861 | |
862 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
863 | "JSON array parsing error: Input does not start with '['"); |
864 | array_cleanup(&x); |
865 | return SQLITE_ERROR1; |
866 | } |
867 | if (source[i] != '[') { |
868 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
869 | "JSON array parsing error: Input does not start with '['"); |
870 | array_cleanup(&x); |
871 | return SQLITE_ERROR1; |
872 | } |
873 | int offset = i + 1; |
874 | |
875 | while (offset < source_len) { |
876 | char *ptr = (char *)&source[offset]; |
877 | char *endptr; |
878 | |
879 | errno(*__errno_location ()) = 0; |
880 | long result = strtol(ptr, &endptr, 10); |
881 | if ((errno(*__errno_location ()) != 0 && result == 0) || |
882 | (errno(*__errno_location ()) == ERANGE34 && (result == LONG_MAX9223372036854775807L || result == LONG_MIN(-9223372036854775807L -1L)))) { |
883 | sqlite3_freesqlite3_api->free(x.z); |
884 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); |
885 | return SQLITE_ERROR1; |
886 | } |
887 | |
888 | if (endptr == ptr) { |
889 | if (*ptr != ']') { |
890 | sqlite3_freesqlite3_api->free(x.z); |
891 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error"); |
892 | return SQLITE_ERROR1; |
893 | } |
894 | goto done; |
895 | } |
896 | |
897 | if (result < INT8_MIN(-128) || result > INT8_MAX(127)) { |
898 | sqlite3_freesqlite3_api->free(x.z); |
899 | *pzErr = |
900 | sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error: value out of range for int8"); |
901 | return SQLITE_ERROR1; |
902 | } |
903 | |
904 | i8 res = (i8)result; |
905 | array_append(&x, (const void *)&res); |
906 | |
907 | offset += (endptr - ptr); |
908 | while (offset < source_len) { |
909 | if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) { |
910 | offset++; |
911 | continue; |
912 | } |
913 | if (source[offset] == ',') { |
914 | offset++; |
915 | continue; |
916 | } |
917 | if (source[offset] == ']') |
918 | goto done; |
919 | break; |
920 | } |
921 | } |
922 | |
923 | done: |
924 | |
925 | if (x.length > 0) { |
926 | *vector = (i8 *)x.z; |
927 | *dimensions = x.length; |
928 | *cleanup = (vector_cleanup)sqlite3_freesqlite3_api->free; |
929 | return SQLITE_OK0; |
930 | } |
931 | sqlite3_freesqlite3_api->free(x.z); |
932 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported."); |
933 | return SQLITE_ERROR1; |
934 | } |
935 | |
936 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for int8 vector."); |
937 | return SQLITE_ERROR1; |
938 | } |
939 | |
940 | /** |
941 | * @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit |
942 | * vector. |
943 | * |
944 | * @param value: the sqlite3_value to read from. |
945 | * @param vector: Output pointer to vector data. |
946 | * @param dimensions: Output number of dimensions |
947 | * @param dimensions: Output vector element type |
948 | * @param cleanup |
949 | * @param pzErrorMessage |
950 | * @return int SQLITE_OK on success, error code otherwise |
951 | */ |
952 | int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions, |
953 | enum VectorElementType *element_type, |
954 | vector_cleanup *cleanup, char **pzErrorMessage) { |
955 | int subtype = sqlite3_value_subtypesqlite3_api->value_subtype(value); |
956 | if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) || |
957 | (subtype == JSON_SUBTYPE74)) { |
958 | int rc = fvec_from_value(value, (f32 **)vector, dimensions, |
959 | (fvec_cleanup *)cleanup, pzErrorMessage); |
960 | if (rc == SQLITE_OK0) { |
961 | *element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; |
962 | } |
963 | return rc; |
964 | } |
965 | |
966 | if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) { |
967 | int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup, |
968 | pzErrorMessage); |
969 | if (rc == SQLITE_OK0) { |
970 | *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT; |
971 | } |
972 | return rc; |
973 | } |
974 | if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) { |
975 | int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup, |
976 | pzErrorMessage); |
977 | if (rc == SQLITE_OK0) { |
978 | *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8; |
979 | } |
980 | return rc; |
981 | } |
982 | *pzErrorMessage = sqlite3_mprintfsqlite3_api->mprintf("Unknown subtype: %d", subtype); |
983 | return SQLITE_ERROR1; |
984 | } |
985 | |
986 | int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a, |
987 | void **b, enum VectorElementType *element_type, |
988 | size_t *dimensions, vector_cleanup *outACleanup, |
989 | vector_cleanup *outBCleanup, char **outError) { |
990 | int rc; |
991 | enum VectorElementType aType, bType; |
992 | size_t aDims, bDims; |
993 | char *error = NULL((void*)0); |
994 | vector_cleanup aCleanup, bCleanup; |
995 | |
996 | rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error); |
997 | if (rc != SQLITE_OK0) { |
998 | *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 1st vector: %s", error); |
999 | sqlite3_freesqlite3_api->free(error); |
1000 | return SQLITE_ERROR1; |
1001 | } |
1002 | |
1003 | rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error); |
1004 | if (rc != SQLITE_OK0) { |
1005 | *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 2nd vector: %s", error); |
1006 | sqlite3_freesqlite3_api->free(error); |
1007 | aCleanup(a); |
1008 | return SQLITE_ERROR1; |
1009 | } |
1010 | |
1011 | if (aType != bType) { |
1012 | *outError = |
1013 | sqlite3_mprintfsqlite3_api->mprintf("Vector type mistmatch. First vector has type %s, " |
1014 | "while the second has type %s.", |
1015 | vector_subtype_name(aType), vector_subtype_name(bType)); |
1016 | aCleanup(*a); |
1017 | bCleanup(*b); |
1018 | return SQLITE_ERROR1; |
1019 | } |
1020 | if (aDims != bDims) { |
1021 | *outError = sqlite3_mprintfsqlite3_api->mprintf( |
1022 | "Vector dimension mistmatch. First vector has %ld dimensions, " |
1023 | "while the second has %ld dimensions.", |
1024 | aDims, bDims); |
1025 | aCleanup(*a); |
1026 | bCleanup(*b); |
1027 | return SQLITE_ERROR1; |
1028 | } |
1029 | *element_type = aType; |
1030 | *dimensions = aDims; |
1031 | *outACleanup = aCleanup; |
1032 | *outBCleanup = bCleanup; |
1033 | return SQLITE_OK0; |
1034 | } |
1035 | |
1036 | int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); } |
1037 | |
1038 | struct VecNpyFile { |
1039 | char *path; |
1040 | size_t pathLength; |
1041 | }; |
1042 | #define SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file" "vec0-npy-file" |
1043 | |
1044 | #ifndef SQLITE_VEC_OMIT_FS |
1045 | static void vec_npy_file(sqlite3_context *context, int argc, |
1046 | sqlite3_value **argv) { |
1047 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1047, __extension__ __PRETTY_FUNCTION__); })); |
1048 | char *path = (char *)sqlite3_value_textsqlite3_api->value_text(argv[0]); |
1049 | size_t pathLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); |
1050 | struct VecNpyFile *f; |
1051 | |
1052 | f = sqlite3_mallocsqlite3_api->malloc(sizeof(*f)); |
1053 | if (!f) { |
1054 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1055 | return; |
1056 | } |
1057 | memset(f, 0, sizeof(*f)); |
1058 | |
1059 | f->path = path; |
1060 | f->pathLength = pathLength; |
1061 | sqlite3_result_pointersqlite3_api->result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file", sqlite3_freesqlite3_api->free); |
1062 | } |
1063 | #endif |
1064 | |
1065 | #pragma region scalar functions |
1066 | static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1067 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1067, __extension__ __PRETTY_FUNCTION__); })); |
1068 | int rc; |
1069 | f32 *vector = NULL((void*)0); |
1070 | size_t dimensions; |
1071 | fvec_cleanup cleanup; |
1072 | char *errmsg; |
1073 | rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); |
1074 | if (rc != SQLITE_OK0) { |
1075 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); |
1076 | sqlite3_freesqlite3_api->free(errmsg); |
1077 | return; |
1078 | } |
1079 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions * sizeof(f32), |
1080 | (void (*)(void *))cleanup); |
1081 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); |
1082 | } |
1083 | |
1084 | static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1085 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1085, __extension__ __PRETTY_FUNCTION__); })); |
1086 | int rc; |
1087 | u8 *vector; |
1088 | size_t dimensions; |
1089 | vector_cleanup cleanup; |
1090 | char *errmsg; |
1091 | rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); |
1092 | if (rc != SQLITE_OK0) { |
1093 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); |
1094 | sqlite3_freesqlite3_api->free(errmsg); |
1095 | return; |
1096 | } |
1097 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions / CHAR_BIT8, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
1098 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); |
1099 | cleanup(vector); |
1100 | } |
1101 | static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1102 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1102, __extension__ __PRETTY_FUNCTION__); })); |
1103 | int rc; |
1104 | i8 *vector; |
1105 | size_t dimensions; |
1106 | vector_cleanup cleanup; |
1107 | char *errmsg; |
1108 | rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg); |
1109 | if (rc != SQLITE_OK0) { |
1110 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); |
1111 | sqlite3_freesqlite3_api->free(errmsg); |
1112 | return; |
1113 | } |
1114 | sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
1115 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); |
1116 | cleanup(vector); |
1117 | } |
1118 | |
1119 | static void vec_length(sqlite3_context *context, int argc, |
1120 | sqlite3_value **argv) { |
1121 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1121, __extension__ __PRETTY_FUNCTION__); })); |
1122 | int rc; |
1123 | void *vector; |
1124 | size_t dimensions; |
1125 | vector_cleanup cleanup; |
1126 | char *errmsg; |
1127 | enum VectorElementType elementType; |
1128 | rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup, |
1129 | &errmsg); |
1130 | if (rc != SQLITE_OK0) { |
1131 | sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1); |
1132 | sqlite3_freesqlite3_api->free(errmsg); |
1133 | return; |
1134 | } |
1135 | sqlite3_result_int64sqlite3_api->result_int64(context, dimensions); |
1136 | cleanup(vector); |
1137 | } |
1138 | |
1139 | static void vec_distance_cosine(sqlite3_context *context, int argc, |
1140 | sqlite3_value **argv) { |
1141 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1141, __extension__ __PRETTY_FUNCTION__); })); |
1142 | int rc; |
1143 | void *a = NULL((void*)0), *b = NULL((void*)0); |
1144 | size_t dimensions; |
1145 | vector_cleanup aCleanup, bCleanup; |
1146 | char *error; |
1147 | enum VectorElementType elementType; |
1148 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1149 | &aCleanup, &bCleanup, &error); |
1150 | if (rc != SQLITE_OK0) { |
1151 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1152 | sqlite3_freesqlite3_api->free(error); |
1153 | return; |
1154 | } |
1155 | |
1156 | switch (elementType) { |
1157 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1158 | sqlite3_result_errorsqlite3_api->result_error( |
1159 | context, "Cannot calculate cosine distance between two bitvectors.", |
1160 | -1); |
1161 | goto finish; |
1162 | } |
1163 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1164 | f32 result = distance_cosine_float(a, b, &dimensions); |
1165 | sqlite3_result_doublesqlite3_api->result_double(context, result); |
1166 | goto finish; |
1167 | } |
1168 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1169 | f32 result = distance_cosine_int8(a, b, &dimensions); |
1170 | sqlite3_result_doublesqlite3_api->result_double(context, result); |
1171 | goto finish; |
1172 | } |
1173 | } |
1174 | |
1175 | finish: |
1176 | aCleanup(a); |
1177 | bCleanup(b); |
1178 | return; |
1179 | } |
1180 | |
1181 | static void vec_distance_l2(sqlite3_context *context, int argc, |
1182 | sqlite3_value **argv) { |
1183 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1183, __extension__ __PRETTY_FUNCTION__); })); |
1184 | int rc; |
1185 | void *a = NULL((void*)0), *b = NULL((void*)0); |
1186 | size_t dimensions; |
1187 | vector_cleanup aCleanup, bCleanup; |
1188 | char *error; |
1189 | enum VectorElementType elementType; |
1190 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1191 | &aCleanup, &bCleanup, &error); |
1192 | if (rc != SQLITE_OK0) { |
1193 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1194 | sqlite3_freesqlite3_api->free(error); |
1195 | return; |
1196 | } |
1197 | |
1198 | switch (elementType) { |
1199 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1200 | sqlite3_result_errorsqlite3_api->result_error( |
1201 | context, "Cannot calculate L2 distance between two bitvectors.", -1); |
1202 | goto finish; |
1203 | } |
1204 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1205 | f32 result = distance_l2_sqr_float(a, b, &dimensions); |
1206 | sqlite3_result_doublesqlite3_api->result_double(context, result); |
1207 | goto finish; |
1208 | } |
1209 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1210 | f32 result = distance_l2_sqr_int8(a, b, &dimensions); |
1211 | sqlite3_result_doublesqlite3_api->result_double(context, result); |
1212 | goto finish; |
1213 | } |
1214 | } |
1215 | |
1216 | finish: |
1217 | aCleanup(a); |
1218 | bCleanup(b); |
1219 | return; |
1220 | } |
1221 | |
1222 | static void vec_distance_l1(sqlite3_context *context, int argc, |
1223 | sqlite3_value **argv) { |
1224 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1224, __extension__ __PRETTY_FUNCTION__); })); |
1225 | int rc; |
1226 | void *a, *b; |
1227 | size_t dimensions; |
1228 | vector_cleanup aCleanup, bCleanup; |
1229 | char *error; |
1230 | enum VectorElementType elementType; |
1231 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1232 | &aCleanup, &bCleanup, &error); |
1233 | if (rc != SQLITE_OK0) { |
1234 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1235 | sqlite3_freesqlite3_api->free(error); |
1236 | return; |
1237 | } |
1238 | |
1239 | switch (elementType) { |
1240 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1241 | sqlite3_result_errorsqlite3_api->result_error( |
1242 | context, "Cannot calculate L1 distance between two bitvectors.", -1); |
1243 | goto finish; |
1244 | } |
1245 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1246 | double result = distance_l1_f32(a, b, &dimensions); |
1247 | sqlite3_result_doublesqlite3_api->result_double(context, result); |
1248 | goto finish; |
1249 | } |
1250 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1251 | i64 result = distance_l1_int8(a, b, &dimensions); |
1252 | sqlite3_result_intsqlite3_api->result_int(context, result); |
1253 | goto finish; |
1254 | } |
1255 | } |
1256 | |
1257 | finish: |
1258 | aCleanup(a); |
1259 | bCleanup(b); |
1260 | return; |
1261 | } |
1262 | |
1263 | static void vec_distance_hamming(sqlite3_context *context, int argc, |
1264 | sqlite3_value **argv) { |
1265 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1265, __extension__ __PRETTY_FUNCTION__); })); |
1266 | int rc; |
1267 | void *a = NULL((void*)0), *b = NULL((void*)0); |
1268 | size_t dimensions; |
1269 | vector_cleanup aCleanup, bCleanup; |
1270 | char *error; |
1271 | enum VectorElementType elementType; |
1272 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1273 | &aCleanup, &bCleanup, &error); |
1274 | if (rc != SQLITE_OK0) { |
1275 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1276 | sqlite3_freesqlite3_api->free(error); |
1277 | return; |
1278 | } |
1279 | |
1280 | switch (elementType) { |
1281 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1282 | sqlite3_result_doublesqlite3_api->result_double(context, distance_hamming(a, b, &dimensions)); |
1283 | goto finish; |
1284 | } |
1285 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1286 | sqlite3_result_errorsqlite3_api->result_error( |
1287 | context, |
1288 | "Cannot calculate hamming distance between two float32 vectors.", -1); |
1289 | goto finish; |
1290 | } |
1291 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1292 | sqlite3_result_errorsqlite3_api->result_error( |
1293 | context, "Cannot calculate hamming distance between two int8 vectors.", |
1294 | -1); |
1295 | goto finish; |
1296 | } |
1297 | } |
1298 | |
1299 | finish: |
1300 | aCleanup(a); |
1301 | bCleanup(b); |
1302 | return; |
1303 | } |
1304 | |
1305 | char *vec_type_name(enum VectorElementType elementType) { |
1306 | switch (elementType) { |
1307 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: |
1308 | return "float32"; |
1309 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
1310 | return "int8"; |
1311 | case SQLITE_VEC_ELEMENT_TYPE_BIT: |
1312 | return "bit"; |
1313 | } |
1314 | return ""; |
1315 | } |
1316 | |
1317 | static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1318 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1318, __extension__ __PRETTY_FUNCTION__); })); |
1319 | void *vector; |
1320 | size_t dimensions; |
1321 | vector_cleanup cleanup; |
1322 | char *pzError; |
1323 | enum VectorElementType elementType; |
1324 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, |
1325 | &cleanup, &pzError); |
1326 | if (rc != SQLITE_OK0) { |
1327 | sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1); |
1328 | sqlite3_freesqlite3_api->free(pzError); |
1329 | return; |
1330 | } |
1331 | sqlite3_result_textsqlite3_api->result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
1332 | cleanup(vector); |
1333 | } |
1334 | static void vec_quantize_binary(sqlite3_context *context, int argc, |
1335 | sqlite3_value **argv) { |
1336 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1336, __extension__ __PRETTY_FUNCTION__); })); |
1337 | void *vector; |
1338 | size_t dimensions; |
1339 | vector_cleanup vectorCleanup; |
1340 | char *pzError; |
1341 | enum VectorElementType elementType; |
1342 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, |
1343 | &vectorCleanup, &pzError); |
1344 | if (rc != SQLITE_OK0) { |
1345 | sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1); |
1346 | sqlite3_freesqlite3_api->free(pzError); |
1347 | return; |
1348 | } |
1349 | |
1350 | if (dimensions <= 0) { |
1351 | sqlite3_result_errorsqlite3_api->result_error(context, "Zero length vectors are not supported.", -1); |
1352 | goto cleanup; |
1353 | return; |
1354 | } |
1355 | if ((dimensions % CHAR_BIT8) != 0) { |
1356 | sqlite3_result_errorsqlite3_api->result_error( |
1357 | context, |
1358 | "Binary quantization requires vectors with a length divisible by 8", |
1359 | -1); |
1360 | goto cleanup; |
1361 | return; |
1362 | } |
1363 | |
1364 | int sz = dimensions / CHAR_BIT8; |
1365 | u8 *out = sqlite3_mallocsqlite3_api->malloc(sz); |
1366 | if (!out) { |
1367 | sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7); |
1368 | goto cleanup; |
1369 | return; |
1370 | } |
1371 | memset(out, 0, sz); |
1372 | |
1373 | switch (elementType) { |
1374 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1375 | |
1376 | for (size_t i = 0; i < dimensions; i++) { |
1377 | int res = ((f32 *)vector)[i] > 0.0; |
1378 | out[i / 8] |= (res << (i % 8)); |
1379 | } |
1380 | break; |
1381 | } |
1382 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1383 | for (size_t i = 0; i < dimensions; i++) { |
1384 | int res = ((i8 *)vector)[i] > 0; |
1385 | out[i / 8] |= (res << (i % 8)); |
1386 | } |
1387 | break; |
1388 | } |
1389 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1390 | sqlite3_result_errorsqlite3_api->result_error(context, |
1391 | "Can only binary quantize float or int8 vectors", -1); |
1392 | sqlite3_freesqlite3_api->free(out); |
1393 | return; |
1394 | } |
1395 | } |
1396 | sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free); |
1397 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); |
1398 | |
1399 | cleanup: |
1400 | vectorCleanup(vector); |
1401 | } |
1402 | |
1403 | static void vec_quantize_int8(sqlite3_context *context, int argc, |
1404 | sqlite3_value **argv) { |
1405 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1405, __extension__ __PRETTY_FUNCTION__); })); |
1406 | f32 *srcVector; |
1407 | size_t dimensions; |
1408 | fvec_cleanup srcCleanup; |
1409 | char *err; |
1410 | i8 *out = NULL((void*)0); |
1411 | int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err); |
1412 | if (rc != SQLITE_OK0) { |
1413 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); |
1414 | sqlite3_freesqlite3_api->free(err); |
1415 | return; |
1416 | } |
1417 | |
1418 | int sz = dimensions * sizeof(i8); |
1419 | out = sqlite3_mallocsqlite3_api->malloc(sz); |
1420 | if (!out) { |
1421 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1422 | goto cleanup; |
1423 | } |
1424 | memset(out, 0, sz); |
1425 | |
1426 | if ((sqlite3_value_typesqlite3_api->value_type(argv[1]) != SQLITE_TEXT3) || |
1427 | (sqlite3_value_bytessqlite3_api->value_bytes(argv[1]) != strlen("unit")) || |
1428 | (sqlite3_stricmpsqlite3_api->stricmp((const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]), "unit") != |
1429 | 0)) { |
1430 | sqlite3_result_errorsqlite3_api->result_error( |
1431 | context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1); |
1432 | sqlite3_freesqlite3_api->free(out); |
1433 | goto cleanup; |
1434 | } |
1435 | f32 step = (1.0 - (-1.0)) / 255; |
1436 | for (size_t i = 0; i < dimensions; i++) { |
1437 | out[i] = ((srcVector[i] - (-1.0)) / step) - 128; |
1438 | } |
1439 | |
1440 | sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(i8), sqlite3_freesqlite3_api->free); |
1441 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); |
1442 | |
1443 | cleanup: |
1444 | srcCleanup(srcVector); |
1445 | } |
1446 | |
1447 | static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1448 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1448, __extension__ __PRETTY_FUNCTION__); })); |
1449 | int rc; |
1450 | void *a = NULL((void*)0), *b = NULL((void*)0); |
1451 | size_t dimensions; |
1452 | vector_cleanup aCleanup, bCleanup; |
1453 | char *error; |
1454 | enum VectorElementType elementType; |
1455 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1456 | &aCleanup, &bCleanup, &error); |
1457 | if (rc != SQLITE_OK0) { |
1458 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1459 | sqlite3_freesqlite3_api->free(error); |
1460 | return; |
1461 | } |
1462 | |
1463 | switch (elementType) { |
1464 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1465 | sqlite3_result_errorsqlite3_api->result_error(context, "Cannot add two bitvectors together.", -1); |
1466 | goto finish; |
1467 | } |
1468 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1469 | size_t outSize = dimensions * sizeof(f32); |
1470 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1471 | if (!out) { |
1472 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1473 | goto finish; |
1474 | } |
1475 | memset(out, 0, outSize); |
1476 | for (size_t i = 0; i < dimensions; i++) { |
1477 | out[i] = ((f32 *)a)[i] + ((f32 *)b)[i]; |
1478 | } |
1479 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1480 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); |
1481 | goto finish; |
1482 | } |
1483 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1484 | size_t outSize = dimensions * sizeof(i8); |
1485 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1486 | if (!out) { |
1487 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1488 | goto finish; |
1489 | } |
1490 | memset(out, 0, outSize); |
1491 | for (size_t i = 0; i < dimensions; i++) { |
1492 | out[i] = ((i8 *)a)[i] + ((i8 *)b)[i]; |
1493 | } |
1494 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1495 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); |
1496 | goto finish; |
1497 | } |
1498 | } |
1499 | finish: |
1500 | aCleanup(a); |
1501 | bCleanup(b); |
1502 | return; |
1503 | } |
1504 | static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) { |
1505 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1505, __extension__ __PRETTY_FUNCTION__); })); |
1506 | int rc; |
1507 | void *a = NULL((void*)0), *b = NULL((void*)0); |
1508 | size_t dimensions; |
1509 | vector_cleanup aCleanup, bCleanup; |
1510 | char *error; |
1511 | enum VectorElementType elementType; |
1512 | rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions, |
1513 | &aCleanup, &bCleanup, &error); |
1514 | if (rc != SQLITE_OK0) { |
1515 | sqlite3_result_errorsqlite3_api->result_error(context, error, -1); |
1516 | sqlite3_freesqlite3_api->free(error); |
1517 | return; |
1518 | } |
1519 | |
1520 | switch (elementType) { |
1521 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1522 | sqlite3_result_errorsqlite3_api->result_error(context, "Cannot subtract two bitvectors together.", |
1523 | -1); |
1524 | goto finish; |
1525 | } |
1526 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1527 | size_t outSize = dimensions * sizeof(f32); |
1528 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1529 | if (!out) { |
1530 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1531 | goto finish; |
1532 | } |
1533 | memset(out, 0, outSize); |
1534 | for (size_t i = 0; i < dimensions; i++) { |
1535 | out[i] = ((f32 *)a)[i] - ((f32 *)b)[i]; |
1536 | } |
1537 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1538 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); |
1539 | goto finish; |
1540 | } |
1541 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1542 | size_t outSize = dimensions * sizeof(i8); |
1543 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1544 | if (!out) { |
1545 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1546 | goto finish; |
1547 | } |
1548 | memset(out, 0, outSize); |
1549 | for (size_t i = 0; i < dimensions; i++) { |
1550 | out[i] = ((i8 *)a)[i] - ((i8 *)b)[i]; |
1551 | } |
1552 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1553 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); |
1554 | goto finish; |
1555 | } |
1556 | } |
1557 | finish: |
1558 | aCleanup(a); |
1559 | bCleanup(b); |
1560 | return; |
1561 | } |
1562 | static void vec_slice(sqlite3_context *context, int argc, |
1563 | sqlite3_value **argv) { |
1564 | assert(argc == 3)((void) sizeof ((argc == 3) ? 1 : 0), __extension__ ({ if (argc == 3) ; else __assert_fail ("argc == 3", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1564, __extension__ __PRETTY_FUNCTION__); })); |
1565 | |
1566 | void *vector; |
1567 | size_t dimensions; |
1568 | vector_cleanup cleanup; |
1569 | char *err; |
1570 | enum VectorElementType elementType; |
1571 | |
1572 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, |
1573 | &cleanup, &err); |
1574 | if (rc != SQLITE_OK0) { |
1575 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); |
1576 | sqlite3_freesqlite3_api->free(err); |
1577 | return; |
1578 | } |
1579 | |
1580 | int start = sqlite3_value_intsqlite3_api->value_int(argv[1]); |
1581 | int end = sqlite3_value_intsqlite3_api->value_int(argv[2]); |
1582 | |
1583 | if (start < 0) { |
1584 | sqlite3_result_errorsqlite3_api->result_error(context, |
1585 | "slice 'start' index must be a postive number.", -1); |
1586 | goto done; |
1587 | } |
1588 | if (end < 0) { |
1589 | sqlite3_result_errorsqlite3_api->result_error(context, "slice 'end' index must be a postive number.", |
1590 | -1); |
1591 | goto done; |
1592 | } |
1593 | if (((size_t)start) > dimensions) { |
1594 | sqlite3_result_errorsqlite3_api->result_error( |
1595 | context, "slice 'start' index is greater than the number of dimensions", |
1596 | -1); |
1597 | goto done; |
1598 | } |
1599 | if (((size_t)end) > dimensions) { |
1600 | sqlite3_result_errorsqlite3_api->result_error( |
1601 | context, "slice 'end' index is greater than the number of dimensions", |
1602 | -1); |
1603 | goto done; |
1604 | } |
1605 | if (start > end) { |
1606 | sqlite3_result_errorsqlite3_api->result_error(context, |
1607 | "slice 'start' index is greater than 'end' index", -1); |
1608 | goto done; |
1609 | } |
1610 | if (start == end) { |
1611 | sqlite3_result_errorsqlite3_api->result_error(context, |
1612 | "slice 'start' index is equal to the 'end' index, " |
1613 | "vectors must have non-zero length", |
1614 | -1); |
1615 | goto done; |
1616 | } |
1617 | size_t n = end - start; |
1618 | |
1619 | switch (elementType) { |
1620 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
1621 | int outSize = n * sizeof(f32); |
1622 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1623 | if (!out) { |
1624 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1625 | goto done; |
1626 | } |
1627 | memset(out, 0, outSize); |
1628 | for (size_t i = 0; i < n; i++) { |
1629 | out[i] = ((f32 *)vector)[start + i]; |
1630 | } |
1631 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1632 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); |
1633 | goto done; |
1634 | } |
1635 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
1636 | int outSize = n * sizeof(i8); |
1637 | i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1638 | if (!out) { |
1639 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1640 | return; |
1641 | } |
1642 | memset(out, 0, outSize); |
1643 | for (size_t i = 0; i < n; i++) { |
1644 | out[i] = ((i8 *)vector)[start + i]; |
1645 | } |
1646 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1647 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); |
1648 | goto done; |
1649 | } |
1650 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
1651 | if ((start % CHAR_BIT8) != 0) { |
1652 | sqlite3_result_errorsqlite3_api->result_error(context, "start index must be divisible by 8.", -1); |
1653 | goto done; |
1654 | } |
1655 | if ((end % CHAR_BIT8) != 0) { |
1656 | sqlite3_result_errorsqlite3_api->result_error(context, "end index must be divisible by 8.", -1); |
1657 | goto done; |
1658 | } |
1659 | int outSize = n / CHAR_BIT8; |
1660 | u8 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1661 | if (!out) { |
1662 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1663 | return; |
1664 | } |
1665 | memset(out, 0, outSize); |
1666 | for (size_t i = 0; i < n / CHAR_BIT8; i++) { |
1667 | out[i] = ((u8 *)vector)[(start / CHAR_BIT8) + i]; |
1668 | } |
1669 | sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free); |
1670 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); |
1671 | goto done; |
1672 | } |
1673 | } |
1674 | done: |
1675 | cleanup(vector); |
1676 | } |
1677 | |
1678 | static void vec_to_json(sqlite3_context *context, int argc, |
1679 | sqlite3_value **argv) { |
1680 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1680, __extension__ __PRETTY_FUNCTION__); })); |
1681 | void *vector; |
1682 | size_t dimensions; |
1683 | vector_cleanup cleanup; |
1684 | char *err; |
1685 | enum VectorElementType elementType; |
1686 | |
1687 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, |
1688 | &cleanup, &err); |
1689 | if (rc != SQLITE_OK0) { |
1690 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); |
1691 | sqlite3_freesqlite3_api->free(err); |
1692 | return; |
1693 | } |
1694 | |
1695 | sqlite3_str *str = sqlite3_str_newsqlite3_api->str_new(sqlite3_context_db_handlesqlite3_api->context_db_handle(context)); |
1696 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "["); |
1697 | for (size_t i = 0; i < dimensions; i++) { |
1698 | if (i != 0) { |
1699 | sqlite3_str_appendallsqlite3_api->str_appendall(str, ","); |
1700 | } |
1701 | if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { |
1702 | f32 value = ((f32 *)vector)[i]; |
1703 | if (isnan(value)__builtin_isnan (value)) { |
1704 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "null"); |
1705 | } else { |
1706 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%f", value); |
1707 | } |
1708 | |
1709 | } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) { |
1710 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", ((i8 *)vector)[i]); |
1711 | } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) { |
1712 | u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT8)) & 1; |
1713 | sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", b); |
1714 | } |
1715 | } |
1716 | sqlite3_str_appendallsqlite3_api->str_appendall(str, "]"); |
1717 | int len = sqlite3_str_lengthsqlite3_api->str_length(str); |
1718 | char *s = sqlite3_str_finishsqlite3_api->str_finish(str); |
1719 | if (s) { |
1720 | sqlite3_result_textsqlite3_api->result_text(context, s, len, sqlite3_freesqlite3_api->free); |
1721 | sqlite3_result_subtypesqlite3_api->result_subtype(context, JSON_SUBTYPE74); |
1722 | } else { |
1723 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
1724 | } |
1725 | cleanup(vector); |
1726 | } |
1727 | |
1728 | static void vec_normalize(sqlite3_context *context, int argc, |
1729 | sqlite3_value **argv) { |
1730 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 1730, __extension__ __PRETTY_FUNCTION__); })); |
1731 | void *vector; |
1732 | size_t dimensions; |
1733 | vector_cleanup cleanup; |
1734 | char *err; |
1735 | enum VectorElementType elementType; |
1736 | |
1737 | int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, |
1738 | &cleanup, &err); |
1739 | if (rc != SQLITE_OK0) { |
1740 | sqlite3_result_errorsqlite3_api->result_error(context, err, -1); |
1741 | sqlite3_freesqlite3_api->free(err); |
1742 | return; |
1743 | } |
1744 | |
1745 | if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { |
1746 | sqlite3_result_errorsqlite3_api->result_error( |
1747 | context, "only float32 vectors are supported when normalizing", -1); |
1748 | cleanup(vector); |
1749 | return; |
1750 | } |
1751 | |
1752 | int outSize = dimensions * sizeof(f32); |
1753 | f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize); |
1754 | if (!out) { |
1755 | cleanup(vector); |
1756 | sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7); |
1757 | return; |
1758 | } |
1759 | memset(out, 0, outSize); |
1760 | |
1761 | f32 *v = (f32 *)vector; |
1762 | |
1763 | f32 norm = 0; |
1764 | for (size_t i = 0; i < dimensions; i++) { |
1765 | norm += v[i] * v[i]; |
1766 | } |
1767 | norm = sqrt(norm); |
1768 | for (size_t i = 0; i < dimensions; i++) { |
1769 | out[i] = v[i] / norm; |
1770 | } |
1771 | |
1772 | sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(f32), sqlite3_freesqlite3_api->free); |
1773 | sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32); |
1774 | cleanup(vector); |
1775 | } |
1776 | |
1777 | static void _static_text_func(sqlite3_context *context, int argc, |
1778 | sqlite3_value **argv) { |
1779 | UNUSED_PARAMETER(argc)(void)(argc); |
1780 | UNUSED_PARAMETER(argv)(void)(argv); |
1781 | sqlite3_result_textsqlite3_api->result_text(context, sqlite3_user_datasqlite3_api->user_data(context), -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
1782 | } |
1783 | |
1784 | #pragma endregion |
1785 | |
1786 | enum Vec0TokenType { |
1787 | TOKEN_TYPE_IDENTIFIER, |
1788 | TOKEN_TYPE_DIGIT, |
1789 | TOKEN_TYPE_LBRACKET, |
1790 | TOKEN_TYPE_RBRACKET, |
1791 | TOKEN_TYPE_PLUS, |
1792 | TOKEN_TYPE_EQ, |
1793 | }; |
1794 | struct Vec0Token { |
1795 | enum Vec0TokenType token_type; |
1796 | char *start; |
1797 | char *end; |
1798 | }; |
1799 | |
1800 | int is_alpha(char x) { |
1801 | return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z'); |
1802 | } |
1803 | int is_digit(char x) { return (x >= '0' && x <= '9'); } |
1804 | int is_whitespace(char x) { |
1805 | return x == ' ' || x == '\t' || x == '\n' || x == '\r'; |
1806 | } |
1807 | |
1808 | #define VEC0_TOKEN_RESULT_EOF1 1 |
1809 | #define VEC0_TOKEN_RESULT_SOME2 2 |
1810 | #define VEC0_TOKEN_RESULT_ERROR3 3 |
1811 | |
1812 | int vec0_token_next(char *start, char *end, struct Vec0Token *out) { |
1813 | char *ptr = start; |
1814 | while (ptr < end) { |
1815 | char curr = *ptr; |
1816 | if (is_whitespace(curr)) { |
1817 | ptr++; |
1818 | continue; |
1819 | } else if (curr == '+') { |
1820 | ptr++; |
1821 | out->start = ptr; |
1822 | out->end = ptr; |
1823 | out->token_type = TOKEN_TYPE_PLUS; |
1824 | return VEC0_TOKEN_RESULT_SOME2; |
1825 | } else if (curr == '[') { |
1826 | ptr++; |
1827 | out->start = ptr; |
1828 | out->end = ptr; |
1829 | out->token_type = TOKEN_TYPE_LBRACKET; |
1830 | return VEC0_TOKEN_RESULT_SOME2; |
1831 | } else if (curr == ']') { |
1832 | ptr++; |
1833 | out->start = ptr; |
1834 | out->end = ptr; |
1835 | out->token_type = TOKEN_TYPE_RBRACKET; |
1836 | return VEC0_TOKEN_RESULT_SOME2; |
1837 | } else if (curr == '=') { |
1838 | ptr++; |
1839 | out->start = ptr; |
1840 | out->end = ptr; |
1841 | out->token_type = TOKEN_TYPE_EQ; |
1842 | return VEC0_TOKEN_RESULT_SOME2; |
1843 | } else if (is_alpha(curr)) { |
1844 | char *start = ptr; |
1845 | while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) { |
1846 | ptr++; |
1847 | } |
1848 | out->start = start; |
1849 | out->end = ptr; |
1850 | out->token_type = TOKEN_TYPE_IDENTIFIER; |
1851 | return VEC0_TOKEN_RESULT_SOME2; |
1852 | } else if (is_digit(curr)) { |
1853 | char *start = ptr; |
1854 | while (ptr < end && (is_digit(*ptr))) { |
1855 | ptr++; |
1856 | } |
1857 | out->start = start; |
1858 | out->end = ptr; |
1859 | out->token_type = TOKEN_TYPE_DIGIT; |
1860 | return VEC0_TOKEN_RESULT_SOME2; |
1861 | } else { |
1862 | return VEC0_TOKEN_RESULT_ERROR3; |
1863 | } |
1864 | } |
1865 | return VEC0_TOKEN_RESULT_EOF1; |
1866 | } |
1867 | |
1868 | struct Vec0Scanner { |
1869 | char *start; |
1870 | char *end; |
1871 | char *ptr; |
1872 | }; |
1873 | |
1874 | void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source, |
1875 | int source_length) { |
1876 | scanner->start = (char *)source; |
1877 | scanner->end = (char *)source + source_length; |
1878 | scanner->ptr = (char *)source; |
1879 | } |
1880 | int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) { |
1881 | int rc = vec0_token_next(scanner->start, scanner->end, out); |
1882 | if (rc == VEC0_TOKEN_RESULT_SOME2) { |
1883 | scanner->start = out->end; |
1884 | } |
1885 | return rc; |
1886 | } |
1887 | |
1888 | int vec0_parse_table_option(const char *source, int source_length, |
1889 | char **out_key, int *out_key_length, |
1890 | char **out_value, int *out_value_length) { |
1891 | int rc; |
1892 | struct Vec0Scanner scanner; |
1893 | struct Vec0Token token; |
1894 | char *key; |
1895 | char *value; |
1896 | int keyLength, valueLength; |
1897 | |
1898 | vec0_scanner_init(&scanner, source, source_length); |
1899 | |
1900 | rc = vec0_scanner_next(&scanner, &token); |
1901 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1902 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
1903 | return SQLITE_EMPTY16; |
1904 | } |
1905 | key = token.start; |
1906 | keyLength = token.end - token.start; |
1907 | |
1908 | rc = vec0_scanner_next(&scanner, &token); |
1909 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) { |
1910 | return SQLITE_EMPTY16; |
1911 | } |
1912 | |
1913 | rc = vec0_scanner_next(&scanner, &token); |
1914 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1915 | !((token.token_type == TOKEN_TYPE_IDENTIFIER) || |
1916 | (token.token_type == TOKEN_TYPE_DIGIT))) { |
1917 | return SQLITE_ERROR1; |
1918 | } |
1919 | value = token.start; |
1920 | valueLength = token.end - token.start; |
1921 | |
1922 | rc = vec0_scanner_next(&scanner, &token); |
1923 | if (rc == VEC0_TOKEN_RESULT_EOF1) { |
1924 | *out_key = key; |
1925 | *out_key_length = keyLength; |
1926 | *out_value = value; |
1927 | *out_value_length = valueLength; |
1928 | return SQLITE_OK0; |
1929 | } |
1930 | return SQLITE_ERROR1; |
1931 | } |
1932 | /** |
1933 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if |
1934 | * it's a PARTITION KEY definition. |
1935 | * |
1936 | * @param source: argv[i] source string |
1937 | * @param source_length: length of the source string |
1938 | * @param out_column_name: If it is a partition key, the output column name. Same lifetime |
1939 | * as source, points to specific char * |
1940 | * @param out_column_name_length: Length of out_column_name in bytes |
1941 | * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER. |
1942 | * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is. |
1943 | */ |
1944 | int vec0_parse_partition_key_definition(const char *source, int source_length, |
1945 | char **out_column_name, |
1946 | int *out_column_name_length, |
1947 | int *out_column_type) { |
1948 | struct Vec0Scanner scanner; |
1949 | struct Vec0Token token; |
1950 | char *column_name; |
1951 | int column_name_length; |
1952 | int column_type; |
1953 | vec0_scanner_init(&scanner, source, source_length); |
1954 | |
1955 | // Check first token is identifier, will be the column name |
1956 | int rc = vec0_scanner_next(&scanner, &token); |
1957 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1958 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
1959 | return SQLITE_EMPTY16; |
1960 | } |
1961 | |
1962 | column_name = token.start; |
1963 | column_name_length = token.end - token.start; |
1964 | |
1965 | // Check the next token matches "text" or "integer", as column type |
1966 | rc = vec0_scanner_next(&scanner, &token); |
1967 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1968 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
1969 | return SQLITE_EMPTY16; |
1970 | } |
1971 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { |
1972 | column_type = SQLITE_TEXT3; |
1973 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == |
1974 | 0 || |
1975 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", |
1976 | token.end - token.start) == 0) { |
1977 | column_type = SQLITE_INTEGER1; |
1978 | } else { |
1979 | return SQLITE_EMPTY16; |
1980 | } |
1981 | |
1982 | // Check the next token is identifier and matches "partition" |
1983 | rc = vec0_scanner_next(&scanner, &token); |
1984 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1985 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
1986 | return SQLITE_EMPTY16; |
1987 | } |
1988 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "partition", token.end - token.start) != 0) { |
1989 | return SQLITE_EMPTY16; |
1990 | } |
1991 | |
1992 | // Check the next token is identifier and matches "key" |
1993 | rc = vec0_scanner_next(&scanner, &token); |
1994 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
1995 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
1996 | return SQLITE_EMPTY16; |
1997 | } |
1998 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) { |
1999 | return SQLITE_EMPTY16; |
2000 | } |
2001 | |
2002 | *out_column_name = column_name; |
2003 | *out_column_name_length = column_name_length; |
2004 | *out_column_type = column_type; |
2005 | |
2006 | return SQLITE_OK0; |
2007 | } |
2008 | |
2009 | /** |
2010 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if |
2011 | * it's an auxiliar column definition, ie `+[name] [type]` like `+contents text` |
2012 | * |
2013 | * @param source: argv[i] source string |
2014 | * @param source_length: length of the source string |
2015 | * @param out_column_name: If it is a partition key, the output column name. Same lifetime |
2016 | * as source, points to specific char * |
2017 | * @param out_column_name_length: Length of out_column_name in bytes |
2018 | * @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB. |
2019 | * @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is. |
2020 | */ |
2021 | int vec0_parse_auxiliary_column_definition(const char *source, int source_length, |
2022 | char **out_column_name, |
2023 | int *out_column_name_length, |
2024 | int *out_column_type) { |
2025 | struct Vec0Scanner scanner; |
2026 | struct Vec0Token token; |
2027 | char *column_name; |
2028 | int column_name_length; |
2029 | int column_type; |
2030 | vec0_scanner_init(&scanner, source, source_length); |
2031 | |
2032 | // Check first token is '+', which denotes aux columns |
2033 | int rc = vec0_scanner_next(&scanner, &token); |
2034 | if (rc != VEC0_TOKEN_RESULT_SOME2 || |
2035 | token.token_type != TOKEN_TYPE_PLUS) { |
2036 | return SQLITE_EMPTY16; |
2037 | } |
2038 | |
2039 | rc = vec0_scanner_next(&scanner, &token); |
2040 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2041 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2042 | return SQLITE_EMPTY16; |
2043 | } |
2044 | |
2045 | column_name = token.start; |
2046 | column_name_length = token.end - token.start; |
2047 | |
2048 | // Check the next token matches "text" or "integer", as column type |
2049 | rc = vec0_scanner_next(&scanner, &token); |
2050 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2051 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2052 | return SQLITE_EMPTY16; |
2053 | } |
2054 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { |
2055 | column_type = SQLITE_TEXT3; |
2056 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == |
2057 | 0 || |
2058 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", |
2059 | token.end - token.start) == 0) { |
2060 | column_type = SQLITE_INTEGER1; |
2061 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", token.end - token.start) == |
2062 | 0 || |
2063 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "double", |
2064 | token.end - token.start) == 0) { |
2065 | column_type = SQLITE_FLOAT2; |
2066 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "blob", token.end - token.start) ==0) { |
2067 | column_type = SQLITE_BLOB4; |
2068 | } else { |
2069 | return SQLITE_EMPTY16; |
2070 | } |
2071 | |
2072 | *out_column_name = column_name; |
2073 | *out_column_name_length = column_name_length; |
2074 | *out_column_type = column_type; |
2075 | |
2076 | return SQLITE_OK0; |
2077 | } |
2078 | |
2079 | typedef enum { |
2080 | VEC0_METADATA_COLUMN_KIND_BOOLEAN, |
2081 | VEC0_METADATA_COLUMN_KIND_INTEGER, |
2082 | VEC0_METADATA_COLUMN_KIND_FLOAT, |
2083 | VEC0_METADATA_COLUMN_KIND_TEXT, |
2084 | // future: blob, date, datetime |
2085 | } vec0_metadata_column_kind; |
2086 | |
2087 | /** |
2088 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if |
2089 | * it's an metadata column definition, ie `[name] [type]` like `is_released boolean` |
2090 | * |
2091 | * @param source: argv[i] source string |
2092 | * @param source_length: length of the source string |
2093 | * @param out_column_name: If it is a metadata column, the output column name. Same lifetime |
2094 | * as source, points to specific char * |
2095 | * @param out_column_name_length: Length of out_column_name in bytes |
2096 | * @param out_column_type: one of vec0_metadata_column_kind |
2097 | * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is. |
2098 | */ |
2099 | int vec0_parse_metadata_column_definition(const char *source, int source_length, |
2100 | char **out_column_name, |
2101 | int *out_column_name_length, |
2102 | vec0_metadata_column_kind *out_column_type) { |
2103 | struct Vec0Scanner scanner; |
2104 | struct Vec0Token token; |
2105 | char *column_name; |
2106 | int column_name_length; |
2107 | vec0_metadata_column_kind column_type; |
2108 | int rc; |
2109 | vec0_scanner_init(&scanner, source, source_length); |
2110 | |
2111 | rc = vec0_scanner_next(&scanner, &token); |
2112 | if (rc != VEC0_TOKEN_RESULT_SOME2 || |
2113 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2114 | return SQLITE_EMPTY16; |
2115 | } |
2116 | |
2117 | column_name = token.start; |
2118 | column_name_length = token.end - token.start; |
2119 | |
2120 | // Check the next token matches a valid metadata type |
2121 | rc = vec0_scanner_next(&scanner, &token); |
2122 | if (rc != VEC0_TOKEN_RESULT_SOME2 || |
2123 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2124 | return SQLITE_EMPTY16; |
2125 | } |
2126 | char * t = token.start; |
2127 | int n = token.end - token.start; |
2128 | if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "bool", n) == 0) { |
2129 | column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN; |
2130 | }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "int64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "int", n) == 0) { |
2131 | column_type = VEC0_METADATA_COLUMN_KIND_INTEGER; |
2132 | }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "float", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "double", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "float64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "f64", n) == 0) { |
2133 | column_type = VEC0_METADATA_COLUMN_KIND_FLOAT; |
2134 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "text", n) == 0) { |
2135 | column_type = VEC0_METADATA_COLUMN_KIND_TEXT; |
2136 | } else { |
2137 | return SQLITE_EMPTY16; |
2138 | } |
2139 | |
2140 | *out_column_name = column_name; |
2141 | *out_column_name_length = column_name_length; |
2142 | *out_column_type = column_type; |
2143 | |
2144 | return SQLITE_OK0; |
2145 | } |
2146 | |
2147 | /** |
2148 | * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if |
2149 | * it's a PRIMARY KEY definition. |
2150 | * |
2151 | * @param source: argv[i] source string |
2152 | * @param source_length: length of the source string |
2153 | * @param out_column_name: If it is a PK, the output column name. Same lifetime |
2154 | * as source, points to specific char * |
2155 | * @param out_column_name_length: Length of out_column_name in bytes |
2156 | * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER. |
2157 | * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is. |
2158 | */ |
2159 | int vec0_parse_primary_key_definition(const char *source, int source_length, |
2160 | char **out_column_name, |
2161 | int *out_column_name_length, |
2162 | int *out_column_type) { |
2163 | struct Vec0Scanner scanner; |
2164 | struct Vec0Token token; |
2165 | char *column_name; |
2166 | int column_name_length; |
2167 | int column_type; |
2168 | vec0_scanner_init(&scanner, source, source_length); |
2169 | |
2170 | // Check first token is identifier, will be the column name |
2171 | int rc = vec0_scanner_next(&scanner, &token); |
2172 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2173 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2174 | return SQLITE_EMPTY16; |
2175 | } |
2176 | |
2177 | column_name = token.start; |
2178 | column_name_length = token.end - token.start; |
2179 | |
2180 | // Check the next token matches "text" or "integer", as column type |
2181 | rc = vec0_scanner_next(&scanner, &token); |
2182 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2183 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2184 | return SQLITE_EMPTY16; |
2185 | } |
2186 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) { |
2187 | column_type = SQLITE_TEXT3; |
2188 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) == |
2189 | 0 || |
2190 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer", |
2191 | token.end - token.start) == 0) { |
2192 | column_type = SQLITE_INTEGER1; |
2193 | } else { |
2194 | return SQLITE_EMPTY16; |
2195 | } |
2196 | |
2197 | // Check the next token is identifier and matches "primary" |
2198 | rc = vec0_scanner_next(&scanner, &token); |
2199 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2200 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2201 | return SQLITE_EMPTY16; |
2202 | } |
2203 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "primary", token.end - token.start) != 0) { |
2204 | return SQLITE_EMPTY16; |
2205 | } |
2206 | |
2207 | // Check the next token is identifier and matches "key" |
2208 | rc = vec0_scanner_next(&scanner, &token); |
2209 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2210 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2211 | return SQLITE_EMPTY16; |
2212 | } |
2213 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) { |
2214 | return SQLITE_EMPTY16; |
2215 | } |
2216 | |
2217 | *out_column_name = column_name; |
2218 | *out_column_name_length = column_name_length; |
2219 | *out_column_type = column_type; |
2220 | |
2221 | return SQLITE_OK0; |
2222 | } |
2223 | |
2224 | enum Vec0DistanceMetrics { |
2225 | VEC0_DISTANCE_METRIC_L2 = 1, |
2226 | VEC0_DISTANCE_METRIC_COSINE = 2, |
2227 | VEC0_DISTANCE_METRIC_L1 = 3, |
2228 | }; |
2229 | |
2230 | struct VectorColumnDefinition { |
2231 | char *name; |
2232 | int name_length; |
2233 | size_t dimensions; |
2234 | enum VectorElementType element_type; |
2235 | enum Vec0DistanceMetrics distance_metric; |
2236 | }; |
2237 | |
2238 | struct Vec0PartitionColumnDefinition { |
2239 | int type; |
2240 | char * name; |
2241 | int name_length; |
2242 | }; |
2243 | |
2244 | struct Vec0AuxiliaryColumnDefinition { |
2245 | int type; |
2246 | char * name; |
2247 | int name_length; |
2248 | }; |
2249 | struct Vec0MetadataColumnDefinition { |
2250 | vec0_metadata_column_kind kind; |
2251 | char * name; |
2252 | int name_length; |
2253 | }; |
2254 | |
2255 | size_t vector_byte_size(enum VectorElementType element_type, |
2256 | size_t dimensions) { |
2257 | switch (element_type) { |
2258 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: |
2259 | return dimensions * sizeof(f32); |
2260 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
2261 | return dimensions * sizeof(i8); |
2262 | case SQLITE_VEC_ELEMENT_TYPE_BIT: |
2263 | return dimensions / CHAR_BIT8; |
2264 | } |
2265 | return 0; |
2266 | } |
2267 | |
2268 | size_t vector_column_byte_size(struct VectorColumnDefinition column) { |
2269 | return vector_byte_size(column.element_type, column.dimensions); |
2270 | } |
2271 | |
2272 | /** |
2273 | * @brief Parse an vec0 vtab argv[i] column definition and see if |
2274 | * it's a vector column defintion, ex `contents_embedding float[768]`. |
2275 | * |
2276 | * @param source vec0 argv[i] item |
2277 | * @param source_length length of source in bytes |
2278 | * @param outColumn Output the parse vector column to this struct, if success |
2279 | * @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column |
2280 | * definition, SQLITE_ERROR on error. |
2281 | */ |
2282 | int vec0_parse_vector_column(const char *source, int source_length, |
2283 | struct VectorColumnDefinition *outColumn) { |
2284 | // parses a vector column definition like so: |
2285 | // "abc float[123]", "abc_123 bit[1234]", eetc. |
2286 | // https://github.com/asg017/sqlite-vec/issues/46 |
2287 | int rc; |
2288 | struct Vec0Scanner scanner; |
2289 | struct Vec0Token token; |
2290 | |
2291 | char *name; |
2292 | int nameLength; |
2293 | enum VectorElementType elementType; |
2294 | enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2; |
2295 | int dimensions; |
2296 | |
2297 | vec0_scanner_init(&scanner, source, source_length); |
2298 | |
2299 | // starts with an identifier |
2300 | rc = vec0_scanner_next(&scanner, &token); |
2301 | |
2302 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2303 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2304 | return SQLITE_EMPTY16; |
2305 | } |
2306 | |
2307 | name = token.start; |
2308 | nameLength = token.end - token.start; |
2309 | |
2310 | // vector column type comes next: float, int, or bit |
2311 | rc = vec0_scanner_next(&scanner, &token); |
2312 | |
2313 | if (rc != VEC0_TOKEN_RESULT_SOME2 || |
2314 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2315 | return SQLITE_EMPTY16; |
2316 | } |
2317 | if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", 5) == 0 || |
2318 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "f32", 3) == 0) { |
2319 | elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; |
2320 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int8", 4) == 0 || |
2321 | sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "i8", 2) == 0) { |
2322 | elementType = SQLITE_VEC_ELEMENT_TYPE_INT8; |
2323 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "bit", 3) == 0) { |
2324 | elementType = SQLITE_VEC_ELEMENT_TYPE_BIT; |
2325 | } else { |
2326 | return SQLITE_EMPTY16; |
2327 | } |
2328 | |
2329 | // left '[' bracket |
2330 | rc = vec0_scanner_next(&scanner, &token); |
2331 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_LBRACKET) { |
2332 | return SQLITE_EMPTY16; |
2333 | } |
2334 | |
2335 | // digit, for vector dimension length |
2336 | rc = vec0_scanner_next(&scanner, &token); |
2337 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_DIGIT) { |
2338 | return SQLITE_ERROR1; |
2339 | } |
2340 | dimensions = atoi(token.start); |
2341 | if (dimensions <= 0) { |
2342 | return SQLITE_ERROR1; |
2343 | } |
2344 | |
2345 | // // right ']' bracket |
2346 | rc = vec0_scanner_next(&scanner, &token); |
2347 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_RBRACKET) { |
2348 | return SQLITE_ERROR1; |
2349 | } |
2350 | |
2351 | // any other tokens left should be column-level options , ex `key=value` |
2352 | // ex `distance_metric=L2 distance_metric=cosine` should error |
2353 | while (1) { |
2354 | // should be EOF or identifier (option key) |
2355 | rc = vec0_scanner_next(&scanner, &token); |
2356 | if (rc == VEC0_TOKEN_RESULT_EOF1) { |
2357 | break; |
2358 | } |
2359 | |
2360 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2361 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2362 | return SQLITE_ERROR1; |
2363 | } |
2364 | |
2365 | char *key = token.start; |
2366 | int keyLength = token.end - token.start; |
2367 | |
2368 | if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "distance_metric", keyLength) == 0) { |
2369 | |
2370 | if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) { |
2371 | return SQLITE_ERROR1; |
2372 | } |
2373 | // ensure equal sign after distance_metric |
2374 | rc = vec0_scanner_next(&scanner, &token); |
2375 | if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) { |
2376 | return SQLITE_ERROR1; |
2377 | } |
2378 | |
2379 | // distance_metric value, an identifier (L2, cosine, etc) |
2380 | rc = vec0_scanner_next(&scanner, &token); |
2381 | if (rc != VEC0_TOKEN_RESULT_SOME2 && |
2382 | token.token_type != TOKEN_TYPE_IDENTIFIER) { |
2383 | return SQLITE_ERROR1; |
2384 | } |
2385 | |
2386 | char *value = token.start; |
2387 | int valueLength = token.end - token.start; |
2388 | if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l2", valueLength) == 0) { |
2389 | distanceMetric = VEC0_DISTANCE_METRIC_L2; |
2390 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l1", valueLength) == 0) { |
2391 | distanceMetric = VEC0_DISTANCE_METRIC_L1; |
2392 | } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "cosine", valueLength) == 0) { |
2393 | distanceMetric = VEC0_DISTANCE_METRIC_COSINE; |
2394 | } else { |
2395 | return SQLITE_ERROR1; |
2396 | } |
2397 | } |
2398 | // unknown key |
2399 | else { |
2400 | return SQLITE_ERROR1; |
2401 | } |
2402 | } |
2403 | |
2404 | outColumn->name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", nameLength, name); |
2405 | if (!outColumn->name) { |
2406 | return SQLITE_ERROR1; |
2407 | } |
2408 | outColumn->name_length = nameLength; |
2409 | outColumn->distance_metric = distanceMetric; |
2410 | outColumn->element_type = elementType; |
2411 | outColumn->dimensions = dimensions; |
2412 | return SQLITE_OK0; |
2413 | } |
2414 | |
2415 | #pragma region vec_each table function |
2416 | |
2417 | typedef struct vec_each_vtab vec_each_vtab; |
2418 | struct vec_each_vtab { |
2419 | sqlite3_vtab base; |
2420 | }; |
2421 | |
2422 | typedef struct vec_each_cursor vec_each_cursor; |
2423 | struct vec_each_cursor { |
2424 | sqlite3_vtab_cursor base; |
2425 | i64 iRowid; |
2426 | enum VectorElementType vector_type; |
2427 | void *vector; |
2428 | size_t dimensions; |
2429 | vector_cleanup cleanup; |
2430 | }; |
2431 | |
2432 | static int vec_eachConnect(sqlite3 *db, void *pAux, int argc, |
2433 | const char *const *argv, sqlite3_vtab **ppVtab, |
2434 | char **pzErr) { |
2435 | UNUSED_PARAMETER(pAux)(void)(pAux); |
2436 | UNUSED_PARAMETER(argc)(void)(argc); |
2437 | UNUSED_PARAMETER(argv)(void)(argv); |
2438 | UNUSED_PARAMETER(pzErr)(void)(pzErr); |
2439 | vec_each_vtab *pNew; |
2440 | int rc; |
2441 | |
2442 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(value, vector hidden)"); |
2443 | #define VEC_EACH_COLUMN_VALUE0 0 |
2444 | #define VEC_EACH_COLUMN_VECTOR1 1 |
2445 | if (rc == SQLITE_OK0) { |
2446 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); |
2447 | *ppVtab = (sqlite3_vtab *)pNew; |
2448 | if (pNew == 0) |
2449 | return SQLITE_NOMEM7; |
2450 | memset(pNew, 0, sizeof(*pNew)); |
2451 | } |
2452 | return rc; |
2453 | } |
2454 | |
2455 | static int vec_eachDisconnect(sqlite3_vtab *pVtab) { |
2456 | vec_each_vtab *p = (vec_each_vtab *)pVtab; |
2457 | sqlite3_freesqlite3_api->free(p); |
2458 | return SQLITE_OK0; |
2459 | } |
2460 | |
2461 | static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { |
2462 | UNUSED_PARAMETER(p)(void)(p); |
2463 | vec_each_cursor *pCur; |
2464 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); |
2465 | if (pCur == 0) |
2466 | return SQLITE_NOMEM7; |
2467 | memset(pCur, 0, sizeof(*pCur)); |
2468 | *ppCursor = &pCur->base; |
2469 | return SQLITE_OK0; |
2470 | } |
2471 | |
2472 | static int vec_eachClose(sqlite3_vtab_cursor *cur) { |
2473 | vec_each_cursor *pCur = (vec_each_cursor *)cur; |
2474 | if(pCur->vector) { |
2475 | pCur->cleanup(pCur->vector); |
2476 | } |
2477 | sqlite3_freesqlite3_api->free(pCur); |
2478 | return SQLITE_OK0; |
2479 | } |
2480 | |
2481 | static int vec_eachBestIndex(sqlite3_vtab *pVTab, |
2482 | sqlite3_index_info *pIdxInfo) { |
2483 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
2484 | int hasVector = 0; |
2485 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
2486 | const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; |
2487 | // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn, |
2488 | // pCons->op, pCons->usable); |
2489 | switch (pCons->iColumn) { |
2490 | case VEC_EACH_COLUMN_VECTOR1: { |
2491 | if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) { |
2492 | hasVector = 1; |
2493 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; |
2494 | pIdxInfo->aConstraintUsage[i].omit = 1; |
2495 | } |
2496 | break; |
2497 | } |
2498 | } |
2499 | } |
2500 | if (!hasVector) { |
2501 | return SQLITE_CONSTRAINT19; |
2502 | } |
2503 | |
2504 | pIdxInfo->estimatedCost = (double)100000; |
2505 | pIdxInfo->estimatedRows = 100000; |
2506 | |
2507 | return SQLITE_OK0; |
2508 | } |
2509 | |
2510 | static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, |
2511 | const char *idxStr, int argc, sqlite3_value **argv) { |
2512 | UNUSED_PARAMETER(idxNum)(void)(idxNum); |
2513 | UNUSED_PARAMETER(idxStr)(void)(idxStr); |
2514 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 2514, __extension__ __PRETTY_FUNCTION__); })); |
2515 | vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor; |
2516 | |
2517 | if (pCur->vector) { |
2518 | pCur->cleanup(pCur->vector); |
2519 | pCur->vector = NULL((void*)0); |
2520 | } |
2521 | |
2522 | char *pzErrMsg; |
2523 | int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions, |
2524 | &pCur->vector_type, &pCur->cleanup, &pzErrMsg); |
2525 | if (rc != SQLITE_OK0) { |
2526 | return SQLITE_ERROR1; |
2527 | } |
2528 | pCur->iRowid = 0; |
2529 | return SQLITE_OK0; |
2530 | } |
2531 | |
2532 | static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { |
2533 | vec_each_cursor *pCur = (vec_each_cursor *)cur; |
2534 | *pRowid = pCur->iRowid; |
2535 | return SQLITE_OK0; |
2536 | } |
2537 | |
2538 | static int vec_eachEof(sqlite3_vtab_cursor *cur) { |
2539 | vec_each_cursor *pCur = (vec_each_cursor *)cur; |
2540 | return pCur->iRowid >= (i64)pCur->dimensions; |
2541 | } |
2542 | |
2543 | static int vec_eachNext(sqlite3_vtab_cursor *cur) { |
2544 | vec_each_cursor *pCur = (vec_each_cursor *)cur; |
2545 | pCur->iRowid++; |
2546 | return SQLITE_OK0; |
2547 | } |
2548 | |
2549 | static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context, |
2550 | int i) { |
2551 | vec_each_cursor *pCur = (vec_each_cursor *)cur; |
2552 | switch (i) { |
2553 | case VEC_EACH_COLUMN_VALUE0: |
2554 | switch (pCur->vector_type) { |
2555 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
2556 | sqlite3_result_doublesqlite3_api->result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]); |
2557 | break; |
2558 | } |
2559 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
2560 | u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT8]; |
2561 | sqlite3_result_intsqlite3_api->result_int(context, |
2562 | (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT8)))) > 0); |
2563 | break; |
2564 | } |
2565 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
2566 | sqlite3_result_intsqlite3_api->result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]); |
2567 | break; |
2568 | } |
2569 | } |
2570 | |
2571 | break; |
2572 | } |
2573 | return SQLITE_OK0; |
2574 | } |
2575 | |
2576 | static sqlite3_module vec_eachModule = { |
2577 | /* iVersion */ 0, |
2578 | /* xCreate */ 0, |
2579 | /* xConnect */ vec_eachConnect, |
2580 | /* xBestIndex */ vec_eachBestIndex, |
2581 | /* xDisconnect */ vec_eachDisconnect, |
2582 | /* xDestroy */ 0, |
2583 | /* xOpen */ vec_eachOpen, |
2584 | /* xClose */ vec_eachClose, |
2585 | /* xFilter */ vec_eachFilter, |
2586 | /* xNext */ vec_eachNext, |
2587 | /* xEof */ vec_eachEof, |
2588 | /* xColumn */ vec_eachColumn, |
2589 | /* xRowid */ vec_eachRowid, |
2590 | /* xUpdate */ 0, |
2591 | /* xBegin */ 0, |
2592 | /* xSync */ 0, |
2593 | /* xCommit */ 0, |
2594 | /* xRollback */ 0, |
2595 | /* xFindMethod */ 0, |
2596 | /* xRename */ 0, |
2597 | /* xSavepoint */ 0, |
2598 | /* xRelease */ 0, |
2599 | /* xRollbackTo */ 0, |
2600 | /* xShadowName */ 0, |
2601 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 |
2602 | /* xIntegrity */ 0 |
2603 | #endif |
2604 | }; |
2605 | |
2606 | #pragma endregion |
2607 | |
2608 | #pragma region vec_npy_each table function |
2609 | |
2610 | enum NpyTokenType { |
2611 | NPY_TOKEN_TYPE_IDENTIFIER, |
2612 | NPY_TOKEN_TYPE_NUMBER, |
2613 | NPY_TOKEN_TYPE_LPAREN, |
2614 | NPY_TOKEN_TYPE_RPAREN, |
2615 | NPY_TOKEN_TYPE_LBRACE, |
2616 | NPY_TOKEN_TYPE_RBRACE, |
2617 | NPY_TOKEN_TYPE_COLON, |
2618 | NPY_TOKEN_TYPE_COMMA, |
2619 | NPY_TOKEN_TYPE_STRING, |
2620 | NPY_TOKEN_TYPE_FALSE, |
2621 | }; |
2622 | |
2623 | struct NpyToken { |
2624 | enum NpyTokenType token_type; |
2625 | unsigned char *start; |
2626 | unsigned char *end; |
2627 | }; |
2628 | |
2629 | int npy_token_next(unsigned char *start, unsigned char *end, |
2630 | struct NpyToken *out) { |
2631 | unsigned char *ptr = start; |
2632 | while (ptr < end) { |
2633 | unsigned char curr = *ptr; |
2634 | if (is_whitespace(curr)) { |
2635 | ptr++; |
2636 | continue; |
2637 | } else if (curr == '(') { |
2638 | out->start = ptr++; |
2639 | out->end = ptr; |
2640 | out->token_type = NPY_TOKEN_TYPE_LPAREN; |
2641 | return VEC0_TOKEN_RESULT_SOME2; |
2642 | } else if (curr == ')') { |
2643 | out->start = ptr++; |
2644 | out->end = ptr; |
2645 | out->token_type = NPY_TOKEN_TYPE_RPAREN; |
2646 | return VEC0_TOKEN_RESULT_SOME2; |
2647 | } else if (curr == '{') { |
2648 | out->start = ptr++; |
2649 | out->end = ptr; |
2650 | out->token_type = NPY_TOKEN_TYPE_LBRACE; |
2651 | return VEC0_TOKEN_RESULT_SOME2; |
2652 | } else if (curr == '}') { |
2653 | out->start = ptr++; |
2654 | out->end = ptr; |
2655 | out->token_type = NPY_TOKEN_TYPE_RBRACE; |
2656 | return VEC0_TOKEN_RESULT_SOME2; |
2657 | } else if (curr == ':') { |
2658 | out->start = ptr++; |
2659 | out->end = ptr; |
2660 | out->token_type = NPY_TOKEN_TYPE_COLON; |
2661 | return VEC0_TOKEN_RESULT_SOME2; |
2662 | } else if (curr == ',') { |
2663 | out->start = ptr++; |
2664 | out->end = ptr; |
2665 | out->token_type = NPY_TOKEN_TYPE_COMMA; |
2666 | return VEC0_TOKEN_RESULT_SOME2; |
2667 | } else if (curr == '\'') { |
2668 | unsigned char *start = ptr; |
2669 | ptr++; |
2670 | while (ptr < end) { |
2671 | if ((*ptr) == '\'') { |
2672 | break; |
2673 | } |
2674 | ptr++; |
2675 | } |
2676 | if ((*ptr) != '\'') { |
2677 | return VEC0_TOKEN_RESULT_ERROR3; |
2678 | } |
2679 | out->start = start; |
2680 | out->end = ++ptr; |
2681 | out->token_type = NPY_TOKEN_TYPE_STRING; |
2682 | return VEC0_TOKEN_RESULT_SOME2; |
2683 | } else if (curr == 'F' && |
2684 | strncmp((char *)ptr, "False", strlen("False")) == 0) { |
2685 | out->start = ptr; |
2686 | out->end = (ptr + (int)strlen("False")); |
2687 | ptr = out->end; |
2688 | out->token_type = NPY_TOKEN_TYPE_FALSE; |
2689 | return VEC0_TOKEN_RESULT_SOME2; |
2690 | } else if (is_digit(curr)) { |
2691 | unsigned char *start = ptr; |
2692 | while (ptr < end && (is_digit(*ptr))) { |
2693 | ptr++; |
2694 | } |
2695 | out->start = start; |
2696 | out->end = ptr; |
2697 | out->token_type = NPY_TOKEN_TYPE_NUMBER; |
2698 | return VEC0_TOKEN_RESULT_SOME2; |
2699 | } else { |
2700 | return VEC0_TOKEN_RESULT_ERROR3; |
2701 | } |
2702 | } |
2703 | return VEC0_TOKEN_RESULT_ERROR3; |
2704 | } |
2705 | |
2706 | struct NpyScanner { |
2707 | unsigned char *start; |
2708 | unsigned char *end; |
2709 | unsigned char *ptr; |
2710 | }; |
2711 | |
2712 | void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *source, |
2713 | int source_length) { |
2714 | scanner->start = (unsigned char *)source; |
2715 | scanner->end = (unsigned char *)source + source_length; |
2716 | scanner->ptr = (unsigned char *)source; |
2717 | } |
2718 | |
2719 | int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) { |
2720 | int rc = npy_token_next(scanner->start, scanner->end, out); |
2721 | if (rc == VEC0_TOKEN_RESULT_SOME2) { |
2722 | scanner->start = out->end; |
2723 | } |
2724 | return rc; |
2725 | } |
2726 | |
2727 | #define NPY_PARSE_ERROR"Error parsing numpy array: " "Error parsing numpy array: " |
2728 | int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header, |
2729 | size_t headerLength, |
2730 | enum VectorElementType *out_element_type, |
2731 | int *fortran_order, size_t *numElements, |
2732 | size_t *numDimensions) { |
2733 | |
2734 | struct NpyScanner scanner; |
2735 | struct NpyToken token; |
2736 | int rc; |
2737 | npy_scanner_init(&scanner, header, headerLength); |
2738 | |
2739 | if (npy_scanner_next(&scanner, &token) != VEC0_TOKEN_RESULT_SOME2 && |
2740 | token.token_type != NPY_TOKEN_TYPE_LBRACE) { |
2741 | vtab_set_error(pVTab, |
2742 | NPY_PARSE_ERROR"Error parsing numpy array: " "numpy header did not start with '{'"); |
2743 | return SQLITE_ERROR1; |
2744 | } |
2745 | while (1) { |
2746 | rc = npy_scanner_next(&scanner, &token); |
2747 | if (rc != VEC0_TOKEN_RESULT_SOME2) { |
2748 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "expected key in numpy header"); |
2749 | return SQLITE_ERROR1; |
2750 | } |
2751 | |
2752 | if (token.token_type == NPY_TOKEN_TYPE_RBRACE) { |
2753 | break; |
2754 | } |
2755 | if (token.token_type != NPY_TOKEN_TYPE_STRING) { |
2756 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2757 | "expected a string as key in numpy header"); |
2758 | return SQLITE_ERROR1; |
2759 | } |
2760 | unsigned char *key = token.start; |
2761 | |
2762 | rc = npy_scanner_next(&scanner, &token); |
2763 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2764 | (token.token_type != NPY_TOKEN_TYPE_COLON)) { |
2765 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2766 | "expected a ':' after key in numpy header"); |
2767 | return SQLITE_ERROR1; |
2768 | } |
2769 | |
2770 | if (strncmp((char *)key, "'descr'", strlen("'descr'")) == 0) { |
2771 | rc = npy_scanner_next(&scanner, &token); |
2772 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2773 | (token.token_type != NPY_TOKEN_TYPE_STRING)) { |
2774 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2775 | "expected a string value after 'descr' key"); |
2776 | return SQLITE_ERROR1; |
2777 | } |
2778 | if (strncmp((char *)token.start, "'<f4'", strlen("'<f4'")) != 0) { |
2779 | vtab_set_error( |
2780 | pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2781 | "Only '<f4' values are supported in sqlite-vec numpy functions"); |
2782 | return SQLITE_ERROR1; |
2783 | } |
2784 | *out_element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; |
2785 | } else if (strncmp((char *)key, "'fortran_order'", |
2786 | strlen("'fortran_order'")) == 0) { |
2787 | rc = npy_scanner_next(&scanner, &token); |
2788 | if (rc != VEC0_TOKEN_RESULT_SOME2 || |
2789 | token.token_type != NPY_TOKEN_TYPE_FALSE) { |
2790 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2791 | "Only fortran_order = False is supported in sqlite-vec " |
2792 | "numpy functions"); |
2793 | return SQLITE_ERROR1; |
2794 | } |
2795 | *fortran_order = 0; |
2796 | } else if (strncmp((char *)key, "'shape'", strlen("'shape'")) == 0) { |
2797 | // "(xxx, xxx)" OR (xxx,) |
2798 | size_t first; |
2799 | rc = npy_scanner_next(&scanner, &token); |
2800 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2801 | (token.token_type != NPY_TOKEN_TYPE_LPAREN)) { |
2802 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2803 | "Expected left parenthesis '(' after shape key"); |
2804 | return SQLITE_ERROR1; |
2805 | } |
2806 | |
2807 | rc = npy_scanner_next(&scanner, &token); |
2808 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2809 | (token.token_type != NPY_TOKEN_TYPE_NUMBER)) { |
2810 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2811 | "Expected an initial number in shape value"); |
2812 | return SQLITE_ERROR1; |
2813 | } |
2814 | first = strtol((char *)token.start, NULL((void*)0), 10); |
2815 | |
2816 | rc = npy_scanner_next(&scanner, &token); |
2817 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2818 | (token.token_type != NPY_TOKEN_TYPE_COMMA)) { |
2819 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2820 | "Expected comma after first shape value"); |
2821 | return SQLITE_ERROR1; |
2822 | } |
2823 | |
2824 | rc = npy_scanner_next(&scanner, &token); |
2825 | if (rc != VEC0_TOKEN_RESULT_SOME2) { |
2826 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2827 | "unexpected header EOF while parsing shape"); |
2828 | return SQLITE_ERROR1; |
2829 | } |
2830 | if (token.token_type == NPY_TOKEN_TYPE_NUMBER) { |
2831 | *numElements = first; |
2832 | *numDimensions = strtol((char *)token.start, NULL((void*)0), 10); |
2833 | rc = npy_scanner_next(&scanner, &token); |
2834 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2835 | (token.token_type != NPY_TOKEN_TYPE_RPAREN)) { |
2836 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " |
2837 | "expected right parenthesis after shape value"); |
2838 | return SQLITE_ERROR1; |
2839 | } |
2840 | } else if (token.token_type == NPY_TOKEN_TYPE_RPAREN) { |
2841 | // '(0,)' means an empty array! |
2842 | *numElements = first ? 1 : 0; |
2843 | *numDimensions = first; |
2844 | } else { |
2845 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown type in shape value"); |
2846 | return SQLITE_ERROR1; |
2847 | } |
2848 | } else { |
2849 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown key in numpy header"); |
2850 | return SQLITE_ERROR1; |
2851 | } |
2852 | |
2853 | rc = npy_scanner_next(&scanner, &token); |
2854 | if ((rc != VEC0_TOKEN_RESULT_SOME2) || |
2855 | (token.token_type != NPY_TOKEN_TYPE_COMMA)) { |
2856 | vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown extra token after value"); |
2857 | return SQLITE_ERROR1; |
2858 | } |
2859 | } |
2860 | |
2861 | return SQLITE_OK0; |
2862 | } |
2863 | |
2864 | typedef struct vec_npy_each_vtab vec_npy_each_vtab; |
2865 | struct vec_npy_each_vtab { |
2866 | sqlite3_vtab base; |
2867 | }; |
2868 | |
2869 | typedef enum { |
2870 | VEC_NPY_EACH_INPUT_BUFFER, |
2871 | VEC_NPY_EACH_INPUT_FILE, |
2872 | } vec_npy_each_input_type; |
2873 | |
2874 | typedef struct vec_npy_each_cursor vec_npy_each_cursor; |
2875 | struct vec_npy_each_cursor { |
2876 | sqlite3_vtab_cursor base; |
2877 | i64 iRowid; |
2878 | // sqlite-vec compatible type of vector |
2879 | enum VectorElementType elementType; |
2880 | // number of vectors in the npy array |
2881 | size_t nElements; |
2882 | // number of dimensions each vector has |
2883 | size_t nDimensions; |
2884 | |
2885 | vec_npy_each_input_type input_type; |
2886 | |
2887 | // when input_type == VEC_NPY_EACH_INPUT_BUFFER |
2888 | |
2889 | // Buffer containing the vector data, when reading from an in-memory buffer. |
2890 | // Size: nElements * nDimensions * element_size |
2891 | // Clean up with sqlite3_free() once complete |
2892 | void *vector; |
2893 | |
2894 | // when input_type == VEC_NPY_EACH_INPUT_FILE |
2895 | |
2896 | // Opened npy file, when reading from a file. |
2897 | // fclose() when complete. |
2898 | #ifndef SQLITE_VEC_OMIT_FS |
2899 | FILE *file; |
2900 | #endif |
2901 | |
2902 | // an in-memory buffer containing a portion of the npy array. |
2903 | // Used for faster reading, instead of calling fread a lot. |
2904 | // Will have a byte-size of fileBufferSize |
2905 | void *chunksBuffer; |
2906 | // size of allocated fileBuffer in bytes |
2907 | size_t chunksBufferSize; |
2908 | //// Maximum length of the buffer, in terms of number of vectors. |
2909 | size_t maxChunks; |
2910 | |
2911 | // Counter index of the current vector into of fileBuffer to yield. |
2912 | // Starts at 0 once fileBuffer is read, and iterates to bufferLength. |
2913 | // Resets to 0 once that "buffer" is yielded and a new one is read. |
2914 | size_t currentChunkIndex; |
2915 | size_t currentChunkSize; |
2916 | |
2917 | // 0 when there are still more elements to read/yield, 1 when complete. |
2918 | int eof; |
2919 | }; |
2920 | |
2921 | static unsigned char NPY_MAGIC[6] = "\x93NUMPY"; |
2922 | |
2923 | #ifndef SQLITE_VEC_OMIT_FS |
2924 | int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor *pCur) { |
2925 | int n; |
2926 | fseek(file, 0, SEEK_END2); |
2927 | long fileSize = ftell(file); |
2928 | |
2929 | fseek(file, 0L, SEEK_SET0); |
2930 | |
2931 | unsigned char header[10]; |
2932 | n = fread(&header, sizeof(unsigned char), 10, file); |
2933 | if (n != 10) { |
2934 | vtab_set_error(pVTab, "numpy array file too short"); |
2935 | return SQLITE_ERROR1; |
2936 | } |
2937 | |
2938 | if (memcmp(NPY_MAGIC, header, sizeof(NPY_MAGIC)) != 0) { |
2939 | vtab_set_error(pVTab, |
2940 | "numpy array file does not contain the 'magic' header"); |
2941 | return SQLITE_ERROR1; |
2942 | } |
2943 | |
2944 | u8 major = header[6]; |
2945 | u8 minor = header[7]; |
2946 | uint16_t headerLength = 0; |
2947 | memcpy(&headerLength, &header[8], sizeof(uint16_t)); |
2948 | |
2949 | size_t totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) + |
2950 | sizeof(headerLength) + headerLength; |
2951 | i32 dataSize = fileSize - totalHeaderLength; |
2952 | if (dataSize < 0) { |
2953 | vtab_set_error(pVTab, "numpy array file header length is invalid"); |
2954 | return SQLITE_ERROR1; |
2955 | } |
2956 | |
2957 | unsigned char *headerX = sqlite3_mallocsqlite3_api->malloc(headerLength); |
2958 | if (headerLength && !headerX) { |
2959 | return SQLITE_NOMEM7; |
2960 | } |
2961 | |
2962 | n = fread(headerX, sizeof(char), headerLength, file); |
2963 | if (n != headerLength) { |
2964 | sqlite3_freesqlite3_api->free(headerX); |
2965 | vtab_set_error(pVTab, "numpy array file header length is invalid"); |
2966 | return SQLITE_ERROR1; |
2967 | } |
2968 | |
2969 | int fortran_order; |
2970 | enum VectorElementType element_type; |
2971 | size_t numElements; |
2972 | size_t numDimensions; |
2973 | int rc = parse_npy_header(pVTab, headerX, headerLength, &element_type, |
2974 | &fortran_order, &numElements, &numDimensions); |
2975 | sqlite3_freesqlite3_api->free(headerX); |
2976 | if (rc != SQLITE_OK0) { |
2977 | // parse_npy_header already attackes an error emssage |
2978 | return rc; |
2979 | } |
2980 | |
2981 | i32 expectedDataSize = |
2982 | numElements * vector_byte_size(element_type, numDimensions); |
2983 | if (expectedDataSize != dataSize) { |
2984 | vtab_set_error( |
2985 | pVTab, "numpy array file error: Expected a data size of %d, found %d", |
2986 | expectedDataSize, dataSize); |
2987 | return SQLITE_ERROR1; |
2988 | } |
2989 | |
2990 | pCur->maxChunks = 1024; |
2991 | pCur->chunksBufferSize = |
2992 | (vector_byte_size(element_type, numDimensions)) * pCur->maxChunks; |
2993 | pCur->chunksBuffer = sqlite3_mallocsqlite3_api->malloc(pCur->chunksBufferSize); |
2994 | if (pCur->chunksBufferSize && !pCur->chunksBuffer) { |
2995 | return SQLITE_NOMEM7; |
2996 | } |
2997 | |
2998 | pCur->currentChunkSize = |
2999 | fread(pCur->chunksBuffer, vector_byte_size(element_type, numDimensions), |
3000 | pCur->maxChunks, file); |
3001 | |
3002 | pCur->currentChunkIndex = 0; |
3003 | pCur->elementType = element_type; |
3004 | pCur->nElements = numElements; |
3005 | pCur->nDimensions = numDimensions; |
3006 | pCur->input_type = VEC_NPY_EACH_INPUT_FILE; |
3007 | |
3008 | pCur->eof = pCur->currentChunkSize == 0; |
3009 | pCur->file = file; |
3010 | return SQLITE_OK0; |
3011 | } |
3012 | #endif |
3013 | |
3014 | int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer, |
3015 | int bufferLength, void **data, size_t *numElements, |
3016 | size_t *numDimensions, |
3017 | enum VectorElementType *element_type) { |
3018 | |
3019 | if (bufferLength < 10) { |
3020 | // IMP: V03312_20150 |
3021 | vtab_set_error(pVTab, "numpy array too short"); |
3022 | return SQLITE_ERROR1; |
3023 | } |
3024 | if (memcmp(NPY_MAGIC, buffer, sizeof(NPY_MAGIC)) != 0) { |
3025 | // V11954_28792 |
3026 | vtab_set_error(pVTab, "numpy array does not contain the 'magic' header"); |
3027 | return SQLITE_ERROR1; |
3028 | } |
3029 | |
3030 | u8 major = buffer[6]; |
3031 | u8 minor = buffer[7]; |
3032 | uint16_t headerLength = 0; |
3033 | memcpy(&headerLength, &buffer[8], sizeof(uint16_t)); |
3034 | |
3035 | i32 totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) + |
3036 | sizeof(headerLength) + headerLength; |
3037 | i32 dataSize = bufferLength - totalHeaderLength; |
3038 | |
3039 | if (dataSize < 0) { |
3040 | vtab_set_error(pVTab, "numpy array header length is invalid"); |
3041 | return SQLITE_ERROR1; |
3042 | } |
3043 | |
3044 | const unsigned char *header = &buffer[10]; |
3045 | int fortran_order; |
3046 | |
3047 | int rc = parse_npy_header(pVTab, header, headerLength, element_type, |
3048 | &fortran_order, numElements, numDimensions); |
3049 | if (rc != SQLITE_OK0) { |
3050 | return rc; |
3051 | } |
3052 | |
3053 | i32 expectedDataSize = |
3054 | (*numElements * vector_byte_size(*element_type, *numDimensions)); |
3055 | if (expectedDataSize != dataSize) { |
3056 | vtab_set_error(pVTab, |
3057 | "numpy array error: Expected a data size of %d, found %d", |
3058 | expectedDataSize, dataSize); |
3059 | return SQLITE_ERROR1; |
3060 | } |
3061 | |
3062 | *data = (void *)&buffer[totalHeaderLength]; |
3063 | return SQLITE_OK0; |
3064 | } |
3065 | |
3066 | static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc, |
3067 | const char *const *argv, sqlite3_vtab **ppVtab, |
3068 | char **pzErr) { |
3069 | UNUSED_PARAMETER(pAux)(void)(pAux); |
3070 | UNUSED_PARAMETER(argc)(void)(argc); |
3071 | UNUSED_PARAMETER(argv)(void)(argv); |
3072 | UNUSED_PARAMETER(pzErr)(void)(pzErr); |
3073 | vec_npy_each_vtab *pNew; |
3074 | int rc; |
3075 | |
3076 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(vector, input hidden)"); |
3077 | #define VEC_NPY_EACH_COLUMN_VECTOR0 0 |
3078 | #define VEC_NPY_EACH_COLUMN_INPUT1 1 |
3079 | if (rc == SQLITE_OK0) { |
3080 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); |
3081 | *ppVtab = (sqlite3_vtab *)pNew; |
3082 | if (pNew == 0) |
3083 | return SQLITE_NOMEM7; |
3084 | memset(pNew, 0, sizeof(*pNew)); |
3085 | } |
3086 | return rc; |
3087 | } |
3088 | |
3089 | static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) { |
3090 | vec_npy_each_vtab *p = (vec_npy_each_vtab *)pVtab; |
3091 | sqlite3_freesqlite3_api->free(p); |
3092 | return SQLITE_OK0; |
3093 | } |
3094 | |
3095 | static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { |
3096 | UNUSED_PARAMETER(p)(void)(p); |
3097 | vec_npy_each_cursor *pCur; |
3098 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); |
3099 | if (pCur == 0) |
3100 | return SQLITE_NOMEM7; |
3101 | memset(pCur, 0, sizeof(*pCur)); |
3102 | *ppCursor = &pCur->base; |
3103 | return SQLITE_OK0; |
3104 | } |
3105 | |
3106 | static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) { |
3107 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; |
3108 | #ifndef SQLITE_VEC_OMIT_FS |
3109 | if (pCur->file) { |
3110 | fclose(pCur->file); |
3111 | pCur->file = NULL((void*)0); |
3112 | } |
3113 | #endif |
3114 | if (pCur->chunksBuffer) { |
3115 | sqlite3_freesqlite3_api->free(pCur->chunksBuffer); |
3116 | pCur->chunksBuffer = NULL((void*)0); |
3117 | } |
3118 | if (pCur->vector) { |
3119 | pCur->vector = NULL((void*)0); |
3120 | } |
3121 | sqlite3_freesqlite3_api->free(pCur); |
3122 | return SQLITE_OK0; |
3123 | } |
3124 | |
3125 | static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab, |
3126 | sqlite3_index_info *pIdxInfo) { |
3127 | int hasInput; |
3128 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
3129 | const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; |
3130 | // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn, |
3131 | // pCons->op, pCons->usable); |
3132 | switch (pCons->iColumn) { |
3133 | case VEC_NPY_EACH_COLUMN_INPUT1: { |
3134 | if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) { |
3135 | hasInput = 1; |
3136 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; |
3137 | pIdxInfo->aConstraintUsage[i].omit = 1; |
3138 | } |
3139 | break; |
3140 | } |
3141 | } |
3142 | } |
3143 | if (!hasInput) { |
3144 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("input argument is required"); |
3145 | return SQLITE_ERROR1; |
3146 | } |
3147 | |
3148 | pIdxInfo->estimatedCost = (double)100000; |
3149 | pIdxInfo->estimatedRows = 100000; |
3150 | |
3151 | return SQLITE_OK0; |
3152 | } |
3153 | |
3154 | static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, |
3155 | const char *idxStr, int argc, |
3156 | sqlite3_value **argv) { |
3157 | UNUSED_PARAMETER(idxNum)(void)(idxNum); |
3158 | UNUSED_PARAMETER(idxStr)(void)(idxStr); |
3159 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3159, __extension__ __PRETTY_FUNCTION__); })); |
3160 | int rc; |
3161 | |
3162 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor; |
3163 | |
3164 | #ifndef SQLITE_VEC_OMIT_FS |
3165 | if (pCur->file) { |
3166 | fclose(pCur->file); |
3167 | pCur->file = NULL((void*)0); |
3168 | } |
3169 | #endif |
3170 | if (pCur->chunksBuffer) { |
3171 | sqlite3_freesqlite3_api->free(pCur->chunksBuffer); |
3172 | pCur->chunksBuffer = NULL((void*)0); |
3173 | } |
3174 | if (pCur->vector) { |
3175 | pCur->vector = NULL((void*)0); |
3176 | } |
3177 | |
3178 | #ifndef SQLITE_VEC_OMIT_FS |
3179 | struct VecNpyFile *f = NULL((void*)0); |
3180 | if ((f = sqlite3_value_pointersqlite3_api->value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file"))) { |
3181 | FILE *file = fopen(f->path, "r"); |
3182 | if (!file) { |
3183 | vtab_set_error(pVtabCursor->pVtab, "Could not open numpy file"); |
3184 | return SQLITE_ERROR1; |
3185 | } |
3186 | |
3187 | rc = parse_npy_file(pVtabCursor->pVtab, file, pCur); |
3188 | if (rc != SQLITE_OK0) { |
3189 | #ifndef SQLITE_VEC_OMIT_FS |
3190 | fclose(file); |
3191 | #endif |
3192 | return rc; |
3193 | } |
3194 | |
3195 | } else |
3196 | #endif |
3197 | { |
3198 | |
3199 | const unsigned char *input = sqlite3_value_blobsqlite3_api->value_blob(argv[0]); |
3200 | int inputLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); |
3201 | void *data; |
3202 | size_t numElements; |
3203 | size_t numDimensions; |
3204 | enum VectorElementType element_type; |
3205 | |
3206 | rc = parse_npy_buffer(pVtabCursor->pVtab, input, inputLength, &data, |
3207 | &numElements, &numDimensions, &element_type); |
3208 | if (rc != SQLITE_OK0) { |
3209 | return rc; |
3210 | } |
3211 | |
3212 | pCur->vector = data; |
3213 | pCur->elementType = element_type; |
3214 | pCur->nElements = numElements; |
3215 | pCur->nDimensions = numDimensions; |
3216 | pCur->input_type = VEC_NPY_EACH_INPUT_BUFFER; |
3217 | } |
3218 | |
3219 | pCur->iRowid = 0; |
3220 | return SQLITE_OK0; |
3221 | } |
3222 | |
3223 | static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { |
3224 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; |
3225 | *pRowid = pCur->iRowid; |
3226 | return SQLITE_OK0; |
3227 | } |
3228 | |
3229 | static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) { |
3230 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; |
3231 | if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) { |
3232 | return (!pCur->nElements) || (size_t)pCur->iRowid >= pCur->nElements; |
3233 | } |
3234 | return pCur->eof; |
3235 | } |
3236 | |
3237 | static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) { |
3238 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; |
3239 | pCur->iRowid++; |
3240 | if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) { |
3241 | return SQLITE_OK0; |
3242 | } |
3243 | |
3244 | #ifndef SQLITE_VEC_OMIT_FS |
3245 | // else: input is a file |
3246 | pCur->currentChunkIndex++; |
3247 | if (pCur->currentChunkIndex >= pCur->currentChunkSize) { |
3248 | pCur->currentChunkSize = |
3249 | fread(pCur->chunksBuffer, |
3250 | vector_byte_size(pCur->elementType, pCur->nDimensions), |
3251 | pCur->maxChunks, pCur->file); |
3252 | if (!pCur->currentChunkSize) { |
3253 | pCur->eof = 1; |
3254 | } |
3255 | pCur->currentChunkIndex = 0; |
3256 | } |
3257 | #endif |
3258 | return SQLITE_OK0; |
3259 | } |
3260 | |
3261 | static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur, |
3262 | sqlite3_context *context, int i) { |
3263 | switch (i) { |
3264 | case VEC_NPY_EACH_COLUMN_VECTOR0: { |
3265 | sqlite3_result_subtypesqlite3_api->result_subtype(context, pCur->elementType); |
3266 | switch (pCur->elementType) { |
3267 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
3268 | sqlite3_result_blobsqlite3_api->result_blob( |
3269 | context, |
3270 | &((unsigned char *) |
3271 | pCur->vector)[pCur->iRowid * pCur->nDimensions * sizeof(f32)], |
3272 | pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
3273 | |
3274 | break; |
3275 | } |
3276 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
3277 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
3278 | // https://github.com/asg017/sqlite-vec/issues/42 |
3279 | sqlite3_result_errorsqlite3_api->result_error(context, |
3280 | "vec_npy_each only supports float32 vectors", -1); |
3281 | break; |
3282 | } |
3283 | } |
3284 | |
3285 | break; |
3286 | } |
3287 | } |
3288 | return SQLITE_OK0; |
3289 | } |
3290 | static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur, |
3291 | sqlite3_context *context, int i) { |
3292 | switch (i) { |
3293 | case VEC_NPY_EACH_COLUMN_VECTOR0: { |
3294 | switch (pCur->elementType) { |
3295 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
3296 | sqlite3_result_blobsqlite3_api->result_blob( |
3297 | context, |
3298 | &((unsigned char *) |
3299 | pCur->chunksBuffer)[pCur->currentChunkIndex * |
3300 | pCur->nDimensions * sizeof(f32)], |
3301 | pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
3302 | break; |
3303 | } |
3304 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
3305 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
3306 | // https://github.com/asg017/sqlite-vec/issues/42 |
3307 | sqlite3_result_errorsqlite3_api->result_error(context, |
3308 | "vec_npy_each only supports float32 vectors", -1); |
3309 | break; |
3310 | } |
3311 | } |
3312 | break; |
3313 | } |
3314 | } |
3315 | return SQLITE_OK0; |
3316 | } |
3317 | static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur, |
3318 | sqlite3_context *context, int i) { |
3319 | vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur; |
3320 | switch (pCur->input_type) { |
3321 | case VEC_NPY_EACH_INPUT_BUFFER: |
3322 | return vec_npy_eachColumnBuffer(pCur, context, i); |
3323 | case VEC_NPY_EACH_INPUT_FILE: |
3324 | return vec_npy_eachColumnFile(pCur, context, i); |
3325 | } |
3326 | return SQLITE_ERROR1; |
3327 | } |
3328 | |
3329 | static sqlite3_module vec_npy_eachModule = { |
3330 | /* iVersion */ 0, |
3331 | /* xCreate */ 0, |
3332 | /* xConnect */ vec_npy_eachConnect, |
3333 | /* xBestIndex */ vec_npy_eachBestIndex, |
3334 | /* xDisconnect */ vec_npy_eachDisconnect, |
3335 | /* xDestroy */ 0, |
3336 | /* xOpen */ vec_npy_eachOpen, |
3337 | /* xClose */ vec_npy_eachClose, |
3338 | /* xFilter */ vec_npy_eachFilter, |
3339 | /* xNext */ vec_npy_eachNext, |
3340 | /* xEof */ vec_npy_eachEof, |
3341 | /* xColumn */ vec_npy_eachColumn, |
3342 | /* xRowid */ vec_npy_eachRowid, |
3343 | /* xUpdate */ 0, |
3344 | /* xBegin */ 0, |
3345 | /* xSync */ 0, |
3346 | /* xCommit */ 0, |
3347 | /* xRollback */ 0, |
3348 | /* xFindMethod */ 0, |
3349 | /* xRename */ 0, |
3350 | /* xSavepoint */ 0, |
3351 | /* xRelease */ 0, |
3352 | /* xRollbackTo */ 0, |
3353 | /* xShadowName */ 0, |
3354 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 |
3355 | /* xIntegrity */ 0, |
3356 | #endif |
3357 | }; |
3358 | |
3359 | #pragma endregion |
3360 | |
3361 | #pragma region vec0 virtual table |
3362 | |
3363 | #define VEC0_COLUMN_ID0 0 |
3364 | #define VEC0_COLUMN_USERN_START1 1 |
3365 | #define VEC0_COLUMN_OFFSET_DISTANCE1 1 |
3366 | #define VEC0_COLUMN_OFFSET_K2 2 |
3367 | |
3368 | #define VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "\"%w\".\"%w_info\"" |
3369 | |
3370 | #define VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "\"%w\".\"%w_chunks\"" |
3371 | /// 1) schema, 2) original vtab table name |
3372 | #define VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL" ");" \ |
3373 | "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(" \ |
3374 | "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \ |
3375 | "size INTEGER NOT NULL," \ |
3376 | "validity BLOB NOT NULL," \ |
3377 | "rowids BLOB NOT NULL" \ |
3378 | ");" |
3379 | |
3380 | #define VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "\"%w\".\"%w_rowids\"" |
3381 | /// 1) schema, 2) original vtab table name |
3382 | #define VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id," "chunk_id INTEGER," "chunk_offset INTEGER" ");" \ |
3383 | "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \ |
3384 | "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \ |
3385 | "id," \ |
3386 | "chunk_id INTEGER," \ |
3387 | "chunk_offset INTEGER" \ |
3388 | ");" |
3389 | |
3390 | // vec0 tables with a text primary keys are still backed by int64 primary keys, |
3391 | // since a fixed-length rowid is required for vec0 chunks. But we add a new 'id |
3392 | // text unique' column to emulate a text primary key interface. |
3393 | #define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER" ");" \ |
3394 | "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \ |
3395 | "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \ |
3396 | "id TEXT UNIQUE NOT NULL," \ |
3397 | "chunk_id INTEGER," \ |
3398 | "chunk_offset INTEGER" \ |
3399 | ");" |
3400 | |
3401 | /// 1) schema, 2) original vtab table name |
3402 | #define VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "\"%w\".\"%w_vector_chunks%02d\"" |
3403 | |
3404 | /// 1) schema, 2) original vtab table name |
3405 | #define VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY," "vectors BLOB NOT NULL" ");" \ |
3406 | "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "(" \ |
3407 | "rowid PRIMARY KEY," \ |
3408 | "vectors BLOB NOT NULL" \ |
3409 | ");" |
3410 | |
3411 | #define VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "\"%w\".\"%w_auxiliary\"" |
3412 | |
3413 | #define VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "\"%w\".\"%w_metadatachunks%02d\"" |
3414 | #define VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "\"%w\".\"%w_metadatatext%02d\"" |
3415 | |
3416 | #define VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Internal sqlite-vec error: " |
3417 | #define REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new" "https://github.com/asg017/sqlite-vec/issues/new" |
3418 | |
3419 | typedef struct vec0_vtab vec0_vtab; |
3420 | |
3421 | #define VEC0_MAX_VECTOR_COLUMNS16 16 |
3422 | #define VEC0_MAX_PARTITION_COLUMNS4 4 |
3423 | #define VEC0_MAX_AUXILIARY_COLUMNS16 16 |
3424 | #define VEC0_MAX_METADATA_COLUMNS16 16 |
3425 | |
3426 | #define SQLITE_VEC_VEC0_MAX_DIMENSIONS8192 8192 |
3427 | #define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16 16 |
3428 | #define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12 12 |
3429 | |
3430 | typedef enum { |
3431 | // vector column, ie "contents_embedding float[1024]" |
3432 | SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1, |
3433 | |
3434 | // partition key column, ie "user_id integer partition key" |
3435 | SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2, |
3436 | |
3437 | // |
3438 | SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3, |
3439 | |
3440 | // metadata column that can be filtered, ie "genre text" |
3441 | SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4, |
3442 | } vec0_user_column_kind; |
3443 | |
3444 | struct vec0_vtab { |
3445 | sqlite3_vtab base; |
3446 | |
3447 | // the SQLite connection of the host database |
3448 | sqlite3 *db; |
3449 | |
3450 | // True if the primary key of the vec0 table has a column type TEXT. |
3451 | // Will change the schema of the _rowids table, and insert/query logic. |
3452 | int pkIsText; |
3453 | |
3454 | // number of defined vector columns. |
3455 | int numVectorColumns; |
3456 | |
3457 | // number of defined PARTITION KEY columns. |
3458 | int numPartitionColumns; |
3459 | |
3460 | // number of defined auxiliary columns |
3461 | int numAuxiliaryColumns; |
3462 | |
3463 | // number of defined metadata columns |
3464 | int numMetadataColumns; |
3465 | |
3466 | |
3467 | // Name of the schema the table exists on. |
3468 | // Must be freed with sqlite3_free() |
3469 | char *schemaName; |
3470 | |
3471 | // Name of the table the table exists on. |
3472 | // Must be freed with sqlite3_free() |
3473 | char *tableName; |
3474 | |
3475 | // Name of the _rowids shadow table. |
3476 | // Must be freed with sqlite3_free() |
3477 | char *shadowRowidsName; |
3478 | |
3479 | // Name of the _chunks shadow table. |
3480 | // Must be freed with sqlite3_free() |
3481 | char *shadowChunksName; |
3482 | |
3483 | // contains enum vec0_user_column_kind values for up to |
3484 | // numVectorColumns + numPartitionColumns entries |
3485 | vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16]; |
3486 | |
3487 | uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16]; |
3488 | |
3489 | |
3490 | // Name of all the vector chunk shadow tables. |
3491 | // Ex '_vector_chunks00' |
3492 | // Only the first numVectorColumns entries will be available. |
3493 | // The first numVectorColumns entries must be freed with sqlite3_free() |
3494 | char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS16]; |
3495 | |
3496 | // Name of all metadata chunk shadow tables, ie `_metadatachunks00` |
3497 | // Only the first numMetadataColumns entries will be available. |
3498 | // The first numMetadataColumns entries must be freed with sqlite3_free() |
3499 | char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS16]; |
3500 | |
3501 | struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS16]; |
3502 | struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS4]; |
3503 | struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS16]; |
3504 | struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS16]; |
3505 | |
3506 | int chunk_size; |
3507 | |
3508 | // select latest chunk from _chunks, getting chunk_id |
3509 | sqlite3_stmt *stmtLatestChunk; |
3510 | |
3511 | /** |
3512 | * Statement to insert a row into the _rowids table, with a rowid. |
3513 | * Parameters: |
3514 | * 1: int64, rowid to insert |
3515 | * Result columns: none |
3516 | * SQL: "INSERT INTO _rowids(rowid) VALUES (?)" |
3517 | * |
3518 | * Must be cleaned up with sqlite3_finalize(). |
3519 | */ |
3520 | sqlite3_stmt *stmtRowidsInsertRowid; |
3521 | |
3522 | /** |
3523 | * Statement to insert a row into the _rowids table, with an id. |
3524 | * The id column isn't a tradition primary key, but instead a unique |
3525 | * column to handle "text primary key" vec0 tables. The true int64 rowid |
3526 | * can be retrieved after inserting with sqlite3_last_rowid(). |
3527 | * |
3528 | * Parameters: |
3529 | * 1: text or null, id to insert |
3530 | * Result columns: none |
3531 | * |
3532 | * Must be cleaned up with sqlite3_finalize(). |
3533 | */ |
3534 | sqlite3_stmt *stmtRowidsInsertId; |
3535 | |
3536 | /** |
3537 | * Statement to update the "position" columns chunk_id and chunk_offset for |
3538 | * a given _rowids row. Used when the "next available" chunk position is found |
3539 | * for a vector. |
3540 | * |
3541 | * Parameters: |
3542 | * 1: int64, chunk_id value |
3543 | * 2: int64, chunk_offset value |
3544 | * 3: int64, rowid value |
3545 | * Result columns: none |
3546 | * |
3547 | * Must be cleaned up with sqlite3_finalize(). |
3548 | */ |
3549 | sqlite3_stmt *stmtRowidsUpdatePosition; |
3550 | |
3551 | /** |
3552 | * Statement to quickly find the chunk_id + chunk_offset of a given row. |
3553 | * Parameters: |
3554 | * 1: rowid of the row/vector to lookup |
3555 | * Result columns: |
3556 | * 0: chunk_id (i64) |
3557 | * 1: chunk_offset (i64) |
3558 | * SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?"" |
3559 | * |
3560 | * Must be cleaned up with sqlite3_finalize(). |
3561 | */ |
3562 | sqlite3_stmt *stmtRowidsGetChunkPosition; |
3563 | }; |
3564 | |
3565 | /** |
3566 | * @brief Finalize all the sqlite3_stmt members in a vec0_vtab. |
3567 | * |
3568 | * @param p vec0_vtab pointer |
3569 | */ |
3570 | void vec0_free_resources(vec0_vtab *p) { |
3571 | sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk); |
3572 | p->stmtLatestChunk = NULL((void*)0); |
3573 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid); |
3574 | p->stmtRowidsInsertRowid = NULL((void*)0); |
3575 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId); |
3576 | p->stmtRowidsInsertId = NULL((void*)0); |
3577 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition); |
3578 | p->stmtRowidsUpdatePosition = NULL((void*)0); |
3579 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition); |
3580 | p->stmtRowidsGetChunkPosition = NULL((void*)0); |
3581 | } |
3582 | |
3583 | /** |
3584 | * @brief Free all memory and sqlite3_stmt members of a vec0_vtab |
3585 | * |
3586 | * @param p vec0_vtab pointer |
3587 | */ |
3588 | void vec0_free(vec0_vtab *p) { |
3589 | vec0_free_resources(p); |
3590 | |
3591 | sqlite3_freesqlite3_api->free(p->schemaName); |
3592 | p->schemaName = NULL((void*)0); |
3593 | sqlite3_freesqlite3_api->free(p->tableName); |
3594 | p->tableName = NULL((void*)0); |
3595 | sqlite3_freesqlite3_api->free(p->shadowChunksName); |
3596 | p->shadowChunksName = NULL((void*)0); |
3597 | sqlite3_freesqlite3_api->free(p->shadowRowidsName); |
3598 | p->shadowRowidsName = NULL((void*)0); |
3599 | |
3600 | for (int i = 0; i < p->numVectorColumns; i++) { |
3601 | sqlite3_freesqlite3_api->free(p->shadowVectorChunksNames[i]); |
3602 | p->shadowVectorChunksNames[i] = NULL((void*)0); |
3603 | |
3604 | sqlite3_freesqlite3_api->free(p->vector_columns[i].name); |
3605 | p->vector_columns[i].name = NULL((void*)0); |
3606 | } |
3607 | } |
3608 | |
3609 | int vec0_num_defined_user_columns(vec0_vtab *p) { |
3610 | return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns; |
3611 | } |
3612 | |
3613 | /** |
3614 | * @brief Returns the index of the distance hidden column for the given vec0 |
3615 | * table. |
3616 | * |
3617 | * @param p vec0 table |
3618 | * @return int |
3619 | */ |
3620 | int vec0_column_distance_idx(vec0_vtab *p) { |
3621 | return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) + |
3622 | VEC0_COLUMN_OFFSET_DISTANCE1; |
3623 | } |
3624 | |
3625 | /** |
3626 | * @brief Returns the index of the k hidden column for the given vec0 table. |
3627 | * |
3628 | * @param p vec0 table |
3629 | * @return int k column index |
3630 | */ |
3631 | int vec0_column_k_idx(vec0_vtab *p) { |
3632 | return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) + |
3633 | VEC0_COLUMN_OFFSET_K2; |
3634 | } |
3635 | |
3636 | /** |
3637 | * Returns 1 if the given column-based index is a valid vector column, |
3638 | * 0 otherwise. |
3639 | */ |
3640 | int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) { |
3641 | return column_idx >= VEC0_COLUMN_USERN_START1 && |
3642 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && |
3643 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR; |
3644 | } |
3645 | |
3646 | /** |
3647 | * Returns the vector index of the given user column index. |
3648 | * ONLY call if validated with vec0_column_idx_is_vector before |
3649 | */ |
3650 | int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) { |
3651 | UNUSED_PARAMETER(pVtab)(void)(pVtab); |
3652 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; |
3653 | } |
3654 | /** |
3655 | * Returns 1 if the given column-based index is a "partition key" column, |
3656 | * 0 otherwise. |
3657 | */ |
3658 | int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) { |
3659 | return column_idx >= VEC0_COLUMN_USERN_START1 && |
3660 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && |
3661 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION; |
3662 | } |
3663 | |
3664 | /** |
3665 | * Returns the partition column index of the given user column index. |
3666 | * ONLY call if validated with vec0_column_idx_is_vector before |
3667 | */ |
3668 | int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) { |
3669 | UNUSED_PARAMETER(pVtab)(void)(pVtab); |
3670 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; |
3671 | } |
3672 | |
3673 | /** |
3674 | * Returns 1 if the given column-based index is a auxiliary column, |
3675 | * 0 otherwise. |
3676 | */ |
3677 | int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) { |
3678 | return column_idx >= VEC0_COLUMN_USERN_START1 && |
3679 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && |
3680 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY; |
3681 | } |
3682 | |
3683 | /** |
3684 | * Returns the auxiliary column index of the given user column index. |
3685 | * ONLY call if validated with vec0_column_idx_to_partition_idx before |
3686 | */ |
3687 | int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) { |
3688 | UNUSED_PARAMETER(pVtab)(void)(pVtab); |
3689 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; |
3690 | } |
3691 | |
3692 | /** |
3693 | * Returns 1 if the given column-based index is a metadata column, |
3694 | * 0 otherwise. |
3695 | */ |
3696 | int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) { |
3697 | return column_idx >= VEC0_COLUMN_USERN_START1 && |
3698 | column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) && |
3699 | pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA; |
3700 | } |
3701 | |
3702 | /** |
3703 | * Returns the metadata column index of the given user column index. |
3704 | * ONLY call if validated with vec0_column_idx_is_metadata before |
3705 | */ |
3706 | int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) { |
3707 | UNUSED_PARAMETER(pVtab)(void)(pVtab); |
3708 | return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1]; |
3709 | } |
3710 | |
3711 | /** |
3712 | * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value |
3713 | * of a vec0_vtab row with the provided rowid |
3714 | * |
3715 | * @param p vec0_vtab |
3716 | * @param rowid the rowid of the row to query |
3717 | * @param id output, optional sqlite3_value to provide the id. |
3718 | * Useful for text PK rows. Must be freed with sqlite3_value_free() |
3719 | * @param chunk_id output, the chunk_id the row belongs to |
3720 | * @param chunk_offset output, the offset within the chunk the row belongs to |
3721 | * @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE |
3722 | */ |
3723 | int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id, |
3724 | i64 *chunk_id, i64 *chunk_offset) { |
3725 | int rc; |
3726 | |
3727 | if (!p->stmtRowidsGetChunkPosition) { |
3728 | const char *zSql = |
3729 | sqlite3_mprintfsqlite3_api->mprintf("SELECT id, chunk_id, chunk_offset " |
3730 | "FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?", |
3731 | p->schemaName, p->tableName); |
3732 | if (!zSql) { |
3733 | rc = SQLITE_NOMEM7; |
3734 | goto cleanup; |
3735 | } |
3736 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0); |
3737 | sqlite3_freesqlite3_api->free((void *)zSql); |
3738 | if (rc != SQLITE_OK0) { |
3739 | vtab_set_error( |
3740 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
3741 | "could not initialize 'rowids get chunk position' statement"); |
3742 | goto cleanup; |
3743 | } |
3744 | } |
3745 | |
3746 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid); |
3747 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsGetChunkPosition); |
3748 | // special case: when no results, return SQLITE_EMPTY to convey "that chunk |
3749 | // position doesnt exist" |
3750 | if (rc == SQLITE_DONE101) { |
3751 | rc = SQLITE_EMPTY16; |
3752 | goto cleanup; |
3753 | } |
3754 | if (rc != SQLITE_ROW100) { |
3755 | goto cleanup; |
3756 | } |
3757 | |
3758 | if (id) { |
3759 | sqlite3_value *value = |
3760 | sqlite3_column_valuesqlite3_api->column_value(p->stmtRowidsGetChunkPosition, 0); |
3761 | *id = sqlite3_value_dupsqlite3_api->value_dup(value); |
3762 | if (!*id) { |
3763 | rc = SQLITE_NOMEM7; |
3764 | goto cleanup; |
3765 | } |
3766 | } |
3767 | |
3768 | if (chunk_id) { |
3769 | *chunk_id = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 1); |
3770 | } |
3771 | if (chunk_offset) { |
3772 | *chunk_offset = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 2); |
3773 | } |
3774 | |
3775 | rc = SQLITE_OK0; |
3776 | |
3777 | cleanup: |
3778 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsGetChunkPosition); |
3779 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsGetChunkPosition); |
3780 | return rc; |
3781 | } |
3782 | |
3783 | /** |
3784 | * @brief Return the id value from the _rowids table where _rowids.rowid = |
3785 | * rowid. |
3786 | * |
3787 | * @param pVtab: vec0 table to query |
3788 | * @param rowid: rowid of the row to query. |
3789 | * @param out: A dup'ed sqlite3_value of the id column. Might be null. |
3790 | * Must be cleaned up with sqlite3_value_free(). |
3791 | * @returns SQLITE_OK on success, error code on failure |
3792 | */ |
3793 | int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid, |
3794 | sqlite3_value **out) { |
3795 | // PERF: different strategy than get_chunk_position? |
3796 | return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL((void*)0), NULL((void*)0)); |
3797 | } |
3798 | |
3799 | int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) { |
3800 | sqlite3_stmt *stmt = NULL((void*)0); |
3801 | int rc; |
3802 | char *zSql; |
3803 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT rowid" |
3804 | " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE id = ?", |
3805 | p->schemaName, p->tableName); |
3806 | if (!zSql) { |
3807 | rc = SQLITE_NOMEM7; |
3808 | goto cleanup; |
3809 | } |
3810 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
3811 | sqlite3_freesqlite3_api->free(zSql); |
3812 | if (rc != SQLITE_OK0) { |
3813 | goto cleanup; |
3814 | } |
3815 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, valueId); |
3816 | rc = sqlite3_stepsqlite3_api->step(stmt); |
3817 | if (rc == SQLITE_DONE101) { |
3818 | rc = SQLITE_EMPTY16; |
3819 | goto cleanup; |
3820 | } |
3821 | if (rc != SQLITE_ROW100) { |
3822 | goto cleanup; |
3823 | } |
3824 | *rowid = sqlite3_column_int64sqlite3_api->column_int64(stmt, 0); |
3825 | rc = sqlite3_stepsqlite3_api->step(stmt); |
3826 | if (rc != SQLITE_DONE101) { |
3827 | goto cleanup; |
3828 | } |
3829 | |
3830 | rc = SQLITE_OK0; |
3831 | |
3832 | cleanup: |
3833 | sqlite3_finalizesqlite3_api->finalize(stmt); |
3834 | return rc; |
3835 | } |
3836 | |
3837 | int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) { |
3838 | if (!p->pkIsText) { |
3839 | sqlite3_result_int64sqlite3_api->result_int64(context, rowid); |
3840 | return SQLITE_OK0; |
3841 | } |
3842 | sqlite3_value *valueId; |
3843 | int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId); |
3844 | if (rc != SQLITE_OK0) { |
3845 | return rc; |
3846 | } |
3847 | if (!valueId) { |
3848 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
3849 | } else { |
3850 | sqlite3_result_valuesqlite3_api->result_value(context, valueId); |
3851 | sqlite3_value_freesqlite3_api->value_free(valueId); |
3852 | } |
3853 | return SQLITE_OK0; |
3854 | } |
3855 | |
3856 | /** |
3857 | * @brief |
3858 | * |
3859 | * @param pVtab: virtual table to query |
3860 | * @param rowid: row to lookup |
3861 | * @param vector_column_idx: which vector column to query |
3862 | * @param outVector: Output pointer to the vector buffer. |
3863 | * Must be sqlite3_free()'ed. |
3864 | * @param outVectorSize: Pointer to a int where the size of outVector |
3865 | * will be stored. |
3866 | * @return int SQLITE_OK on success. |
3867 | */ |
3868 | int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx, |
3869 | void **outVector, int *outVectorSize) { |
3870 | vec0_vtab *p = pVtab; |
3871 | int rc, brc; |
3872 | i64 chunk_id; |
3873 | i64 chunk_offset; |
3874 | size_t size; |
3875 | void *buf = NULL((void*)0); |
3876 | int blobOffset; |
3877 | sqlite3_blob *vectorBlob = NULL((void*)0); |
3878 | assert((vector_column_idx >= 0) &&((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({ if ((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3879, __extension__ __PRETTY_FUNCTION__); })) |
3879 | (vector_column_idx < pVtab->numVectorColumns))((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({ if ((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 3879, __extension__ __PRETTY_FUNCTION__); })); |
3880 | |
3881 | rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset); |
3882 | if (rc == SQLITE_EMPTY16) { |
3883 | vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid); |
3884 | goto cleanup; |
3885 | } |
3886 | if (rc != SQLITE_OK0) { |
3887 | goto cleanup; |
3888 | } |
3889 | |
3890 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, |
3891 | p->shadowVectorChunksNames[vector_column_idx], |
3892 | "vectors", chunk_id, 0, &vectorBlob); |
3893 | |
3894 | if (rc != SQLITE_OK0) { |
3895 | vtab_set_error(&pVtab->base, |
3896 | "Could not fetch vector data for %lld, opening blob failed", |
3897 | rowid); |
3898 | rc = SQLITE_ERROR1; |
3899 | goto cleanup; |
3900 | } |
3901 | |
3902 | size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]); |
3903 | blobOffset = chunk_offset * size; |
3904 | |
3905 | buf = sqlite3_mallocsqlite3_api->malloc(size); |
3906 | if (!buf) { |
3907 | rc = SQLITE_NOMEM7; |
3908 | goto cleanup; |
3909 | } |
3910 | |
3911 | rc = sqlite3_blob_readsqlite3_api->blob_read(vectorBlob, buf, size, blobOffset); |
3912 | if (rc != SQLITE_OK0) { |
3913 | sqlite3_freesqlite3_api->free(buf); |
3914 | buf = NULL((void*)0); |
3915 | vtab_set_error( |
3916 | &pVtab->base, |
3917 | "Could not fetch vector data for %lld, reading from blob failed", |
3918 | rowid); |
3919 | rc = SQLITE_ERROR1; |
3920 | goto cleanup; |
3921 | } |
3922 | |
3923 | *outVector = buf; |
3924 | if (outVectorSize) { |
3925 | *outVectorSize = size; |
3926 | } |
3927 | rc = SQLITE_OK0; |
3928 | |
3929 | cleanup: |
3930 | brc = sqlite3_blob_closesqlite3_api->blob_close(vectorBlob); |
3931 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { |
3932 | vtab_set_error( |
3933 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
3934 | "unknown error, could not close vector blob, please file an issue"); |
3935 | return brc; |
3936 | } |
3937 | |
3938 | return rc; |
3939 | } |
3940 | |
3941 | /** |
3942 | * @brief Retrieve the sqlite3_value of the i'th partition value for the given row. |
3943 | * |
3944 | * @param pVtab - the vec0_vtab in questions |
3945 | * @param rowid - rowid of target row |
3946 | * @param partition_idx - which partition column to retrieve |
3947 | * @param outValue - output sqlite3_value |
3948 | * @return int - SQLITE_OK on success, otherwise error code |
3949 | */ |
3950 | int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) { |
3951 | int rc; |
3952 | i64 chunk_id; |
3953 | i64 chunk_offset; |
3954 | rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset); |
3955 | if(rc != SQLITE_OK0) { |
3956 | return rc; |
3957 | } |
3958 | sqlite3_stmt * stmt = NULL((void*)0); |
3959 | char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName); |
3960 | if(!zSql) { |
3961 | return SQLITE_NOMEM7; |
3962 | } |
3963 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0)); |
3964 | sqlite3_freesqlite3_api->free(zSql); |
3965 | if(rc != SQLITE_OK0) { |
3966 | return rc; |
3967 | } |
3968 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, chunk_id); |
3969 | rc = sqlite3_stepsqlite3_api->step(stmt); |
3970 | if(rc != SQLITE_ROW100) { |
3971 | rc = SQLITE_ERROR1; |
3972 | goto done; |
3973 | } |
3974 | *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); |
3975 | if(!*outValue) { |
3976 | rc = SQLITE_NOMEM7; |
3977 | goto done; |
3978 | } |
3979 | rc = SQLITE_OK0; |
3980 | |
3981 | done: |
3982 | sqlite3_finalizesqlite3_api->finalize(stmt); |
3983 | return rc; |
3984 | |
3985 | } |
3986 | |
3987 | /** |
3988 | * @brief Get the value of an auxiliary column for the given rowid |
3989 | * |
3990 | * @param pVtab vec0_vtab |
3991 | * @param rowid the rowid of the row to lookup |
3992 | * @param auxiliary_idx aux index of the column we care about |
3993 | * @param outValue Output sqlite3_value to store |
3994 | * @return int SQLITE_OK on success, error code otherwise |
3995 | */ |
3996 | int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) { |
3997 | int rc; |
3998 | sqlite3_stmt * stmt = NULL((void*)0); |
3999 | char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName); |
4000 | if(!zSql) { |
4001 | return SQLITE_NOMEM7; |
4002 | } |
4003 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0)); |
4004 | sqlite3_freesqlite3_api->free(zSql); |
4005 | if(rc != SQLITE_OK0) { |
4006 | return rc; |
4007 | } |
4008 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
4009 | rc = sqlite3_stepsqlite3_api->step(stmt); |
4010 | if(rc != SQLITE_ROW100) { |
4011 | rc = SQLITE_ERROR1; |
4012 | goto done; |
4013 | } |
4014 | *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); |
4015 | if(!*outValue) { |
4016 | rc = SQLITE_NOMEM7; |
4017 | goto done; |
4018 | } |
4019 | rc = SQLITE_OK0; |
4020 | |
4021 | done: |
4022 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4023 | return rc; |
4024 | } |
4025 | |
4026 | /** |
4027 | * @brief Result the given metadata value for the given row and metadata column index. |
4028 | * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid. |
4029 | * |
4030 | * @param p |
4031 | * @param rowid |
4032 | * @param metadata_idx |
4033 | * @param context |
4034 | * @return int |
4035 | */ |
4036 | int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) { |
4037 | int rc; |
4038 | i64 chunk_id; |
4039 | i64 chunk_offset; |
4040 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); |
4041 | if(rc != SQLITE_OK0) { |
4042 | return rc; |
4043 | } |
4044 | sqlite3_blob * blobValue; |
4045 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue); |
4046 | if(rc != SQLITE_OK0) { |
4047 | return rc; |
4048 | } |
4049 | |
4050 | switch(p->metadata_columns[metadata_idx].kind) { |
4051 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
4052 | u8 block; |
4053 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT8); |
4054 | if(rc != SQLITE_OK0) { |
4055 | goto done; |
4056 | } |
4057 | int value = block >> ((chunk_offset % CHAR_BIT8)) & 1; |
4058 | sqlite3_result_intsqlite3_api->result_int(context, value); |
4059 | break; |
4060 | } |
4061 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
4062 | i64 value; |
4063 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); |
4064 | if(rc != SQLITE_OK0) { |
4065 | goto done; |
4066 | } |
4067 | sqlite3_result_int64sqlite3_api->result_int64(context, value); |
4068 | break; |
4069 | } |
4070 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
4071 | double value; |
4072 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); |
4073 | if(rc != SQLITE_OK0) { |
4074 | goto done; |
4075 | } |
4076 | sqlite3_result_doublesqlite3_api->result_double(context, value); |
4077 | break; |
4078 | } |
4079 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
4080 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
4081 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
4082 | if(rc != SQLITE_OK0) { |
4083 | goto done; |
4084 | } |
4085 | int length = ((int *)view)[0]; |
4086 | if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
4087 | sqlite3_result_textsqlite3_api->result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
4088 | } |
4089 | else { |
4090 | sqlite3_stmt * stmt; |
4091 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); |
4092 | if(!zSql) { |
4093 | rc = SQLITE_ERROR1; |
4094 | goto done; |
4095 | } |
4096 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
4097 | sqlite3_freesqlite3_api->free((void *) zSql); |
4098 | if(rc != SQLITE_OK0) { |
4099 | goto done; |
4100 | } |
4101 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
4102 | rc = sqlite3_stepsqlite3_api->step(stmt); |
4103 | if(rc != SQLITE_ROW100) { |
4104 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4105 | rc = SQLITE_ERROR1; |
4106 | goto done; |
4107 | } |
4108 | sqlite3_result_valuesqlite3_api->result_value(context, sqlite3_column_valuesqlite3_api->column_value(stmt, 0)); |
4109 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4110 | rc = SQLITE_OK0; |
4111 | } |
4112 | break; |
4113 | } |
4114 | } |
4115 | done: |
4116 | // blobValue is read-only, will not fail on close |
4117 | sqlite3_blob_closesqlite3_api->blob_close(blobValue); |
4118 | return rc; |
4119 | |
4120 | } |
4121 | |
4122 | int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) { |
4123 | int rc; |
4124 | const char *zSql; |
4125 | // lazy initialize stmtLatestChunk when needed. May be cleared during xSync() |
4126 | if (!p->stmtLatestChunk) { |
4127 | if(p->numPartitionColumns > 0) { |
4128 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
4129 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE ", |
4130 | p->schemaName, p->tableName); |
4131 | |
4132 | for(int i = 0; i < p->numPartitionColumns; i++) { |
4133 | if(i != 0) { |
4134 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND "); |
4135 | } |
4136 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", i); |
4137 | } |
4138 | zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
4139 | }else { |
4140 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", |
4141 | p->schemaName, p->tableName); |
4142 | } |
4143 | |
4144 | if (!zSql) { |
4145 | rc = SQLITE_NOMEM7; |
4146 | goto cleanup; |
4147 | } |
4148 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0); |
4149 | sqlite3_freesqlite3_api->free((void *)zSql); |
4150 | if (rc != SQLITE_OK0) { |
4151 | // IMP: V21406_05476 |
4152 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4153 | "could not initialize 'latest chunk' statement"); |
4154 | goto cleanup; |
4155 | } |
4156 | } |
4157 | |
4158 | for(int i = 0; i < p->numPartitionColumns; i++) { |
4159 | sqlite3_bind_valuesqlite3_api->bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i])); |
4160 | } |
4161 | |
4162 | rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk); |
4163 | if (rc != SQLITE_ROW100) { |
4164 | // IMP: V31559_15629 |
4165 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not find latest chunk"); |
4166 | rc = SQLITE_ERROR1; |
4167 | goto cleanup; |
4168 | } |
4169 | if(sqlite3_column_typesqlite3_api->column_type(p->stmtLatestChunk, 0) == SQLITE_NULL5){ |
4170 | rc = SQLITE_EMPTY16; |
4171 | goto cleanup; |
4172 | } |
4173 | *chunk_rowid = sqlite3_column_int64sqlite3_api->column_int64(p->stmtLatestChunk, 0); |
4174 | rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk); |
4175 | if (rc != SQLITE_DONE101) { |
4176 | vtab_set_error(&p->base, |
4177 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4178 | "unknown result code when closing out stmtLatestChunk. " |
4179 | "Please file an issue: " REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new", |
4180 | p->schemaName, p->shadowChunksName); |
4181 | goto cleanup; |
4182 | } |
4183 | rc = SQLITE_OK0; |
4184 | |
4185 | cleanup: |
4186 | if (p->stmtLatestChunk) { |
4187 | sqlite3_resetsqlite3_api->reset(p->stmtLatestChunk); |
4188 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtLatestChunk); |
4189 | } |
4190 | return rc; |
4191 | } |
4192 | |
4193 | int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) { |
4194 | int rc = SQLITE_OK0; |
4195 | int entered = 0; |
4196 | UNUSED_PARAMETER(entered)(void)(entered); // temporary |
4197 | if (!p->stmtRowidsInsertRowid) { |
4198 | const char *zSql = |
4199 | sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(rowid)" |
4200 | "VALUES (?);", |
4201 | p->schemaName, p->tableName); |
4202 | if (!zSql) { |
4203 | rc = SQLITE_NOMEM7; |
4204 | goto cleanup; |
4205 | } |
4206 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0); |
4207 | sqlite3_freesqlite3_api->free((void *)zSql); |
4208 | if (rc != SQLITE_OK0) { |
4209 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4210 | "could not initialize 'insert rowids' statement"); |
4211 | goto cleanup; |
4212 | } |
4213 | } |
4214 | |
4215 | #if SQLITE_THREADSAFE |
4216 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { |
4217 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4218 | entered = 1; |
4219 | } |
4220 | #endif |
4221 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsInsertRowid, 1, rowid); |
4222 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertRowid); |
4223 | |
4224 | if (rc != SQLITE_DONE101) { |
4225 | if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY(19 | (6<<8))) { |
4226 | // IMP: V17090_01160 |
4227 | vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key", |
4228 | p->tableName); |
4229 | } else { |
4230 | // IMP: V04679_21517 |
4231 | vtab_set_error(&p->base, |
4232 | "Error inserting rowid into rowids shadow table: %s", |
4233 | sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId))); |
4234 | } |
4235 | rc = SQLITE_ERROR1; |
4236 | goto cleanup; |
4237 | } |
4238 | |
4239 | rc = SQLITE_OK0; |
4240 | |
4241 | cleanup: |
4242 | if (p->stmtRowidsInsertRowid) { |
4243 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertRowid); |
4244 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertRowid); |
4245 | } |
4246 | |
4247 | #if SQLITE_THREADSAFE |
4248 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) { |
4249 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4250 | } |
4251 | #endif |
4252 | return rc; |
4253 | } |
4254 | |
4255 | int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) { |
4256 | int rc = SQLITE_OK0; |
4257 | int entered = 0; |
4258 | UNUSED_PARAMETER(entered)(void)(entered); // temporary |
4259 | if (!p->stmtRowidsInsertId) { |
4260 | const char *zSql = |
4261 | sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(id)" |
4262 | "VALUES (?);", |
4263 | p->schemaName, p->tableName); |
4264 | if (!zSql) { |
4265 | rc = SQLITE_NOMEM7; |
4266 | goto complete; |
4267 | } |
4268 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0); |
4269 | sqlite3_freesqlite3_api->free((void *)zSql); |
4270 | if (rc != SQLITE_OK0) { |
4271 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4272 | "could not initialize 'insert rowids id' statement"); |
4273 | goto complete; |
4274 | } |
4275 | } |
4276 | |
4277 | #if SQLITE_THREADSAFE |
4278 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { |
4279 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4280 | entered = 1; |
4281 | } |
4282 | #endif |
4283 | |
4284 | if (idValue) { |
4285 | sqlite3_bind_valuesqlite3_api->bind_value(p->stmtRowidsInsertId, 1, idValue); |
4286 | } |
4287 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertId); |
4288 | |
4289 | if (rc != SQLITE_DONE101) { |
4290 | if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE(19 | (8<<8))) { |
4291 | // IMP: V20497_04568 |
4292 | vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key", |
4293 | p->tableName); |
4294 | } else { |
4295 | // IMP: V24016_08086 |
4296 | // IMP: V15177_32015 |
4297 | vtab_set_error(&p->base, |
4298 | "Error inserting id into rowids shadow table: %s", |
4299 | sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId))); |
4300 | } |
4301 | rc = SQLITE_ERROR1; |
4302 | goto complete; |
4303 | } |
4304 | |
4305 | *rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db); |
4306 | rc = SQLITE_OK0; |
4307 | |
4308 | complete: |
4309 | if (p->stmtRowidsInsertId) { |
4310 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertId); |
4311 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertId); |
4312 | } |
4313 | |
4314 | #if SQLITE_THREADSAFE |
4315 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) { |
4316 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4317 | } |
4318 | #endif |
4319 | return rc; |
4320 | } |
4321 | |
4322 | int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) { |
4323 | switch(kind) { |
4324 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: |
4325 | return chunk_size / 8; |
4326 | case VEC0_METADATA_COLUMN_KIND_INTEGER: |
4327 | return chunk_size * sizeof(i64); |
4328 | case VEC0_METADATA_COLUMN_KIND_FLOAT: |
4329 | return chunk_size * sizeof(double); |
4330 | case VEC0_METADATA_COLUMN_KIND_TEXT: |
4331 | return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16; |
4332 | } |
4333 | return 0; |
4334 | } |
4335 | |
4336 | int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid, |
4337 | i64 chunk_offset) { |
4338 | int rc = SQLITE_OK0; |
4339 | |
4340 | if (!p->stmtRowidsUpdatePosition) { |
4341 | const char *zSql = sqlite3_mprintfsqlite3_api->mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" |
4342 | " SET chunk_id = ?, chunk_offset = ?" |
4343 | " WHERE rowid = ?", |
4344 | p->schemaName, p->tableName); |
4345 | if (!zSql) { |
4346 | rc = SQLITE_NOMEM7; |
4347 | goto cleanup; |
4348 | } |
4349 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0); |
4350 | sqlite3_freesqlite3_api->free((void *)zSql); |
4351 | if (rc != SQLITE_OK0) { |
4352 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4353 | "could not initialize 'update rowids position' statement"); |
4354 | goto cleanup; |
4355 | } |
4356 | } |
4357 | |
4358 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid); |
4359 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset); |
4360 | sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 3, rowid); |
4361 | |
4362 | rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsUpdatePosition); |
4363 | if (rc != SQLITE_DONE101) { |
4364 | // IMP: V21925_05995 |
4365 | vtab_set_error(&p->base, |
4366 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
4367 | "could not update rowids position for rowid=%lld, " |
4368 | "chunk_rowid=%lld, chunk_offset=%lld", |
4369 | rowid, chunk_rowid, chunk_offset); |
4370 | rc = SQLITE_ERROR1; |
4371 | goto cleanup; |
4372 | } |
4373 | rc = SQLITE_OK0; |
4374 | |
4375 | cleanup: |
4376 | if (p->stmtRowidsUpdatePosition) { |
4377 | sqlite3_resetsqlite3_api->reset(p->stmtRowidsUpdatePosition); |
4378 | sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsUpdatePosition); |
4379 | } |
4380 | |
4381 | return rc; |
4382 | } |
4383 | |
4384 | /** |
4385 | * @brief Adds a new chunk for the vec0 table, and the corresponding vector |
4386 | * chunks. |
4387 | * |
4388 | * Inserts a new row into the _chunks table, with blank data, and uses that new |
4389 | * rowid to insert new blank rows into _vector_chunksXX tables. |
4390 | * |
4391 | * @param p: vec0 table to add new chunk |
4392 | * @param paritionKeyValues: Array of partition key valeus for the new chunk, if available |
4393 | * @param chunk_rowid: Output pointer, if not NULL, then will be filled with the |
4394 | * new chunk rowid. |
4395 | * @return int SQLITE_OK on success, error code otherwise. |
4396 | */ |
4397 | int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) { |
4398 | int rc; |
4399 | char *zSql; |
4400 | sqlite3_stmt *stmt; |
4401 | i64 rowid; |
4402 | |
4403 | // Step 1: Insert a new row in _chunks, capture that new rowid |
4404 | if(p->numPartitionColumns > 0) { |
4405 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
4406 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName, p->tableName); |
4407 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "(size, validity, rowids"); |
4408 | for(int i = 0; i < p->numPartitionColumns; i++) { |
4409 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", partition%02d", i); |
4410 | } |
4411 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (?, ?, ?"); |
4412 | for(int i = 0; i < p->numPartitionColumns; i++) { |
4413 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?"); |
4414 | } |
4415 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); |
4416 | |
4417 | zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
4418 | }else { |
4419 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" |
4420 | "(size, validity, rowids) " |
4421 | "VALUES (?, ?, ?);", |
4422 | p->schemaName, p->tableName); |
4423 | } |
4424 | |
4425 | if (!zSql) { |
4426 | return SQLITE_NOMEM7; |
4427 | } |
4428 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
4429 | sqlite3_freesqlite3_api->free(zSql); |
4430 | if (rc != SQLITE_OK0) { |
4431 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4432 | return rc; |
4433 | } |
4434 | |
4435 | #if SQLITE_THREADSAFE |
4436 | if (sqlite3_mutex_entersqlite3_api->mutex_enter) { |
4437 | sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4438 | } |
4439 | #endif |
4440 | |
4441 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, p->chunk_size); // size |
4442 | sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT8); // validity bitmap |
4443 | sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids |
4444 | |
4445 | for(int i = 0; i < p->numPartitionColumns; i++) { |
4446 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 4 + i, partitionKeyValues[i]); |
4447 | } |
4448 | |
4449 | rc = sqlite3_stepsqlite3_api->step(stmt); |
4450 | int failed = rc != SQLITE_DONE101; |
4451 | rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db); |
4452 | #if SQLITE_THREADSAFE |
4453 | if (sqlite3_mutex_leavesqlite3_api->mutex_leave) { |
4454 | sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db)); |
4455 | } |
4456 | #endif |
4457 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4458 | if (failed) { |
4459 | return SQLITE_ERROR1; |
4460 | } |
4461 | |
4462 | // Step 2: Create new vector chunks for each vector column, with |
4463 | // that new chunk_rowid. |
4464 | |
4465 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
4466 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { |
4467 | continue; |
4468 | } |
4469 | int vector_column_idx = p->user_column_idxs[i]; |
4470 | i64 vectorsSize = |
4471 | p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]); |
4472 | |
4473 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" |
4474 | "(rowid, vectors)" |
4475 | "VALUES (?, ?)", |
4476 | p->schemaName, p->tableName, vector_column_idx); |
4477 | if (!zSql) { |
4478 | return SQLITE_NOMEM7; |
4479 | } |
4480 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
4481 | sqlite3_freesqlite3_api->free(zSql); |
4482 | |
4483 | if (rc != SQLITE_OK0) { |
4484 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4485 | return rc; |
4486 | } |
4487 | |
4488 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
4489 | sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vectorsSize); |
4490 | |
4491 | rc = sqlite3_stepsqlite3_api->step(stmt); |
4492 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4493 | if (rc != SQLITE_DONE101) { |
4494 | return rc; |
4495 | } |
4496 | } |
4497 | |
4498 | // Step 3: Create new metadata chunks for each metadata column |
4499 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
4500 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { |
4501 | continue; |
4502 | } |
4503 | int metadata_column_idx = p->user_column_idxs[i]; |
4504 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" |
4505 | "(rowid, data)" |
4506 | "VALUES (?, ?)", |
4507 | p->schemaName, p->tableName, metadata_column_idx); |
4508 | if (!zSql) { |
4509 | return SQLITE_NOMEM7; |
4510 | } |
4511 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
4512 | sqlite3_freesqlite3_api->free(zSql); |
4513 | |
4514 | if (rc != SQLITE_OK0) { |
4515 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4516 | return rc; |
4517 | } |
4518 | |
4519 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
4520 | sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size)); |
4521 | |
4522 | rc = sqlite3_stepsqlite3_api->step(stmt); |
4523 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4524 | if (rc != SQLITE_DONE101) { |
4525 | return rc; |
4526 | } |
4527 | } |
4528 | |
4529 | |
4530 | if (chunk_rowid) { |
4531 | *chunk_rowid = rowid; |
4532 | } |
4533 | |
4534 | return SQLITE_OK0; |
4535 | } |
4536 | |
4537 | struct vec0_query_fullscan_data { |
4538 | sqlite3_stmt *rowids_stmt; |
4539 | i8 done; |
4540 | }; |
4541 | void vec0_query_fullscan_data_clear( |
4542 | struct vec0_query_fullscan_data *fullscan_data) { |
4543 | if (!fullscan_data) |
4544 | return; |
4545 | |
4546 | if (fullscan_data->rowids_stmt) { |
4547 | sqlite3_finalizesqlite3_api->finalize(fullscan_data->rowids_stmt); |
4548 | fullscan_data->rowids_stmt = NULL((void*)0); |
4549 | } |
4550 | } |
4551 | |
4552 | struct vec0_query_knn_data { |
4553 | i64 k; |
4554 | i64 k_used; |
4555 | // Array of rowids of size k. Must be freed with sqlite3_free(). |
4556 | i64 *rowids; |
4557 | // Array of distances of size k. Must be freed with sqlite3_free(). |
4558 | f32 *distances; |
4559 | i64 current_idx; |
4560 | }; |
4561 | void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) { |
4562 | if (!knn_data) |
4563 | return; |
4564 | |
4565 | if (knn_data->rowids) { |
4566 | sqlite3_freesqlite3_api->free(knn_data->rowids); |
4567 | knn_data->rowids = NULL((void*)0); |
4568 | } |
4569 | if (knn_data->distances) { |
4570 | sqlite3_freesqlite3_api->free(knn_data->distances); |
4571 | knn_data->distances = NULL((void*)0); |
4572 | } |
4573 | } |
4574 | |
4575 | struct vec0_query_point_data { |
4576 | i64 rowid; |
4577 | void *vectors[VEC0_MAX_VECTOR_COLUMNS16]; |
4578 | int done; |
4579 | }; |
4580 | void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) { |
4581 | if (!point_data) |
4582 | return; |
4583 | for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS16; i++) { |
4584 | sqlite3_freesqlite3_api->free(point_data->vectors[i]); |
4585 | point_data->vectors[i] = NULL((void*)0); |
4586 | } |
4587 | } |
4588 | |
4589 | typedef enum { |
4590 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! |
4591 | |
4592 | VEC0_QUERY_PLAN_FULLSCAN = '1', |
4593 | VEC0_QUERY_PLAN_POINT = '2', |
4594 | VEC0_QUERY_PLAN_KNN = '3', |
4595 | } vec0_query_plan; |
4596 | |
4597 | typedef struct vec0_cursor vec0_cursor; |
4598 | struct vec0_cursor { |
4599 | sqlite3_vtab_cursor base; |
4600 | |
4601 | vec0_query_plan query_plan; |
4602 | struct vec0_query_fullscan_data *fullscan_data; |
4603 | struct vec0_query_knn_data *knn_data; |
4604 | struct vec0_query_point_data *point_data; |
4605 | }; |
4606 | |
4607 | void vec0_cursor_clear(vec0_cursor *pCur) { |
4608 | if (pCur->fullscan_data) { |
4609 | vec0_query_fullscan_data_clear(pCur->fullscan_data); |
4610 | sqlite3_freesqlite3_api->free(pCur->fullscan_data); |
4611 | pCur->fullscan_data = NULL((void*)0); |
4612 | } |
4613 | if (pCur->knn_data) { |
4614 | vec0_query_knn_data_clear(pCur->knn_data); |
4615 | sqlite3_freesqlite3_api->free(pCur->knn_data); |
4616 | pCur->knn_data = NULL((void*)0); |
4617 | } |
4618 | if (pCur->point_data) { |
4619 | vec0_query_point_data_clear(pCur->point_data); |
4620 | sqlite3_freesqlite3_api->free(pCur->point_data); |
4621 | pCur->point_data = NULL((void*)0); |
4622 | } |
4623 | } |
4624 | |
4625 | #define VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "vec0 constructor error: " |
4626 | static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, |
4627 | sqlite3_vtab **ppVtab, char **pzErr, bool_Bool isCreate) { |
4628 | UNUSED_PARAMETER(pAux)(void)(pAux); |
4629 | vec0_vtab *pNew; |
4630 | int rc; |
4631 | const char *zSql; |
4632 | |
4633 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); |
4634 | if (pNew == 0) |
4635 | return SQLITE_NOMEM7; |
4636 | memset(pNew, 0, sizeof(*pNew)); |
4637 | |
4638 | // Declared chunk_size=N for entire table. |
4639 | // -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N` |
4640 | // option |
4641 | int chunk_size = -1; |
4642 | int numVectorColumns = 0; |
4643 | int numPartitionColumns = 0; |
4644 | int numAuxiliaryColumns = 0; |
4645 | int numMetadataColumns = 0; |
4646 | int user_column_idx = 0; |
4647 | |
4648 | // track if a "primary key" column is defined |
4649 | char *pkColumnName = NULL((void*)0); |
4650 | int pkColumnNameLength; |
4651 | int pkColumnType = SQLITE_INTEGER1; |
4652 | |
4653 | for (int i = 3; i < argc; i++) { |
4654 | struct VectorColumnDefinition vecColumn; |
4655 | struct Vec0PartitionColumnDefinition partitionColumn; |
4656 | struct Vec0AuxiliaryColumnDefinition auxColumn; |
4657 | struct Vec0MetadataColumnDefinition metadataColumn; |
4658 | char *cName = NULL((void*)0); |
4659 | int cNameLength; |
4660 | int cType; |
4661 | |
4662 | // Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]` |
4663 | rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn); |
4664 | if (rc == SQLITE_ERROR1) { |
4665 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4666 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse vector column '%s'", argv[i]); |
4667 | goto error; |
4668 | } |
4669 | if (rc == SQLITE_OK0) { |
4670 | if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS16) { |
4671 | sqlite3_freesqlite3_api->free(vecColumn.name); |
4672 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4673 | "Too many provided vector columns, maximum %d", |
4674 | VEC0_MAX_VECTOR_COLUMNS16); |
4675 | goto error; |
4676 | } |
4677 | |
4678 | if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS8192) { |
4679 | sqlite3_freesqlite3_api->free(vecColumn.name); |
4680 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4681 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4682 | "Dimension on vector column too large, provided %lld, maximum %lld", |
4683 | (i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS8192); |
4684 | goto error; |
4685 | } |
4686 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR; |
4687 | pNew->user_column_idxs[user_column_idx] = numVectorColumns; |
4688 | memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn)); |
4689 | numVectorColumns++; |
4690 | user_column_idx++; |
4691 | |
4692 | continue; |
4693 | } |
4694 | |
4695 | // Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key` |
4696 | rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName, |
4697 | &cNameLength, &cType); |
4698 | if (rc == SQLITE_OK0) { |
4699 | if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS4) { |
4700 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4701 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4702 | "More than %d partition key columns were provided", |
4703 | VEC0_MAX_PARTITION_COLUMNS4); |
4704 | goto error; |
4705 | } |
4706 | partitionColumn.type = cType; |
4707 | partitionColumn.name_length = cNameLength; |
4708 | partitionColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); |
4709 | if(!partitionColumn.name) { |
4710 | rc = SQLITE_NOMEM7; |
4711 | goto error; |
4712 | } |
4713 | |
4714 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION; |
4715 | pNew->user_column_idxs[user_column_idx] = numPartitionColumns; |
4716 | memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn)); |
4717 | numPartitionColumns++; |
4718 | user_column_idx++; |
4719 | continue; |
4720 | } |
4721 | |
4722 | // Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key` |
4723 | rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName, |
4724 | &cNameLength, &cType); |
4725 | if (rc == SQLITE_OK0) { |
4726 | if (pkColumnName) { |
4727 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4728 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4729 | "More than one primary key definition was provided, vec0 only " |
4730 | "suports a single primary key column", |
4731 | argv[i]); |
4732 | goto error; |
4733 | } |
4734 | pkColumnName = cName; |
4735 | pkColumnNameLength = cNameLength; |
4736 | pkColumnType = cType; |
4737 | continue; |
4738 | } |
4739 | |
4740 | // Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text` |
4741 | rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName, |
4742 | &cNameLength, &cType); |
4743 | if(rc == SQLITE_OK0) { |
4744 | if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS16) { |
4745 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4746 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4747 | "More than %d auxiliary columns were provided", |
4748 | VEC0_MAX_AUXILIARY_COLUMNS16); |
4749 | goto error; |
4750 | } |
4751 | auxColumn.type = cType; |
4752 | auxColumn.name_length = cNameLength; |
4753 | auxColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); |
4754 | if(!auxColumn.name) { |
4755 | rc = SQLITE_NOMEM7; |
4756 | goto error; |
4757 | } |
4758 | |
4759 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY; |
4760 | pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns; |
4761 | memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn)); |
4762 | numAuxiliaryColumns++; |
4763 | user_column_idx++; |
4764 | continue; |
4765 | } |
4766 | |
4767 | vec0_metadata_column_kind kind; |
4768 | rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName, |
4769 | &cNameLength, &kind); |
4770 | if(rc == SQLITE_OK0) { |
4771 | if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS16) { |
4772 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4773 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4774 | "More than %d metadata columns were provided", |
4775 | VEC0_MAX_METADATA_COLUMNS16); |
4776 | goto error; |
4777 | } |
4778 | metadataColumn.kind = kind; |
4779 | metadataColumn.name_length = cNameLength; |
4780 | metadataColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName); |
4781 | if(!metadataColumn.name) { |
4782 | rc = SQLITE_NOMEM7; |
4783 | goto error; |
4784 | } |
4785 | |
4786 | pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA; |
4787 | pNew->user_column_idxs[user_column_idx] = numMetadataColumns; |
4788 | memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn)); |
4789 | numMetadataColumns++; |
4790 | user_column_idx++; |
4791 | continue; |
4792 | } |
4793 | |
4794 | // Scenario #4: Constructor argument is a table-level option, ie `chunk_size` |
4795 | |
4796 | char *key; |
4797 | char *value; |
4798 | int keyLength, valueLength; |
4799 | rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength, |
4800 | &value, &valueLength); |
4801 | if (rc == SQLITE_ERROR1) { |
4802 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4803 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse table option '%s'", argv[i]); |
4804 | goto error; |
4805 | } |
4806 | if (rc == SQLITE_OK0) { |
4807 | if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "chunk_size", keyLength) == 0) { |
4808 | chunk_size = atoi(value); |
4809 | if (chunk_size <= 0) { |
4810 | // IMP: V01931_18769 |
4811 | *pzErr = |
4812 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4813 | "chunk_size must be a non-zero positive integer"); |
4814 | goto error; |
4815 | } |
4816 | if ((chunk_size % 8) != 0) { |
4817 | // IMP: V14110_30948 |
4818 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4819 | "chunk_size must be divisible by 8"); |
4820 | goto error; |
4821 | } |
4822 | #define SQLITE_VEC_CHUNK_SIZE_MAX4096 4096 |
4823 | if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX4096) { |
4824 | *pzErr = |
4825 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "chunk_size too large"); |
4826 | goto error; |
4827 | } |
4828 | } else { |
4829 | // IMP: V27642_11712 |
4830 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
4831 | VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Unknown table option: %.*s", keyLength, key); |
4832 | goto error; |
4833 | } |
4834 | continue; |
4835 | } |
4836 | |
4837 | // Scenario #5: Unknown constructor argument |
4838 | *pzErr = |
4839 | sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Could not parse '%s'", argv[i]); |
4840 | goto error; |
4841 | } |
4842 | |
4843 | if (chunk_size < 0) { |
4844 | chunk_size = 1024; |
4845 | } |
4846 | |
4847 | if (numVectorColumns <= 0) { |
4848 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4849 | "At least one vector column is required"); |
4850 | goto error; |
4851 | } |
4852 | |
4853 | sqlite3_str *createStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
4854 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "CREATE TABLE x("); |
4855 | if (pkColumnName) { |
4856 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength, |
4857 | pkColumnName); |
4858 | } else { |
4859 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "rowid, "); |
4860 | } |
4861 | for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) { |
4862 | switch(pNew->user_column_kinds[i]) { |
4863 | case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: { |
4864 | int vector_idx = pNew->user_column_idxs[i]; |
4865 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", |
4866 | pNew->vector_columns[vector_idx].name_length, |
4867 | pNew->vector_columns[vector_idx].name); |
4868 | break; |
4869 | } |
4870 | case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: { |
4871 | int partition_idx = pNew->user_column_idxs[i]; |
4872 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", |
4873 | pNew->paritition_columns[partition_idx].name_length, |
4874 | pNew->paritition_columns[partition_idx].name); |
4875 | break; |
4876 | } |
4877 | case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: { |
4878 | int auxiliary_idx = pNew->user_column_idxs[i]; |
4879 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", |
4880 | pNew->auxiliary_columns[auxiliary_idx].name_length, |
4881 | pNew->auxiliary_columns[auxiliary_idx].name); |
4882 | break; |
4883 | } |
4884 | case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: { |
4885 | int metadata_idx = pNew->user_column_idxs[i]; |
4886 | sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ", |
4887 | pNew->metadata_columns[metadata_idx].name_length, |
4888 | pNew->metadata_columns[metadata_idx].name); |
4889 | break; |
4890 | } |
4891 | } |
4892 | |
4893 | } |
4894 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, " distance hidden, k hidden) "); |
4895 | if (pkColumnName) { |
4896 | sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "without rowid "); |
4897 | } |
4898 | zSql = sqlite3_str_finishsqlite3_api->str_finish(createStr); |
4899 | if (!zSql) { |
4900 | goto error; |
4901 | } |
4902 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, zSql); |
4903 | sqlite3_freesqlite3_api->free((void *)zSql); |
4904 | if (rc != SQLITE_OK0) { |
4905 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " |
4906 | "could not declare virtual table, '%s'", |
4907 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
4908 | goto error; |
4909 | } |
4910 | |
4911 | const char *schemaName = argv[1]; |
4912 | const char *tableName = argv[2]; |
4913 | |
4914 | pNew->db = db; |
4915 | pNew->pkIsText = pkColumnType == SQLITE_TEXT3; |
4916 | pNew->schemaName = sqlite3_mprintfsqlite3_api->mprintf("%s", schemaName); |
4917 | if (!pNew->schemaName) { |
4918 | goto error; |
4919 | } |
4920 | pNew->tableName = sqlite3_mprintfsqlite3_api->mprintf("%s", tableName); |
4921 | if (!pNew->tableName) { |
4922 | goto error; |
4923 | } |
4924 | pNew->shadowRowidsName = sqlite3_mprintfsqlite3_api->mprintf("%s_rowids", tableName); |
4925 | if (!pNew->shadowRowidsName) { |
4926 | goto error; |
4927 | } |
4928 | pNew->shadowChunksName = sqlite3_mprintfsqlite3_api->mprintf("%s_chunks", tableName); |
4929 | if (!pNew->shadowChunksName) { |
4930 | goto error; |
4931 | } |
4932 | pNew->numVectorColumns = numVectorColumns; |
4933 | pNew->numPartitionColumns = numPartitionColumns; |
4934 | pNew->numAuxiliaryColumns = numAuxiliaryColumns; |
4935 | pNew->numMetadataColumns = numMetadataColumns; |
4936 | |
4937 | for (int i = 0; i < pNew->numVectorColumns; i++) { |
4938 | pNew->shadowVectorChunksNames[i] = |
4939 | sqlite3_mprintfsqlite3_api->mprintf("%s_vector_chunks%02d", tableName, i); |
4940 | if (!pNew->shadowVectorChunksNames[i]) { |
4941 | goto error; |
4942 | } |
4943 | } |
4944 | for (int i = 0; i < pNew->numMetadataColumns; i++) { |
4945 | pNew->shadowMetadataChunksNames[i] = |
4946 | sqlite3_mprintfsqlite3_api->mprintf("%s_metadatachunks%02d", tableName, i); |
4947 | if (!pNew->shadowMetadataChunksNames[i]) { |
4948 | goto error; |
4949 | } |
4950 | } |
4951 | pNew->chunk_size = chunk_size; |
4952 | |
4953 | // if xCreate, then create the necessary shadow tables |
4954 | if (isCreate) { |
4955 | sqlite3_stmt *stmt; |
4956 | int rc; |
4957 | |
4958 | char * zCreateInfo = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" " (key text primary key, value any)", pNew->schemaName, pNew->tableName); |
4959 | if(!zCreateInfo) { |
4960 | goto error; |
4961 | } |
4962 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateInfo, -1, &stmt, NULL((void*)0)); |
4963 | |
4964 | sqlite3_freesqlite3_api->free((void *) zCreateInfo); |
4965 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
4966 | // TODO(IMP) |
4967 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4968 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_info' shadow table: %s", |
4969 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
4970 | goto error; |
4971 | } |
4972 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4973 | |
4974 | char * zSeedInfo = sqlite3_mprintfsqlite3_api->mprintf( |
4975 | "INSERT INTO "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "(key, value) VALUES " |
4976 | "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ", |
4977 | pNew->schemaName, pNew->tableName |
4978 | ); |
4979 | if(!zSeedInfo) { |
4980 | goto error; |
4981 | } |
4982 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSeedInfo, -1, &stmt, NULL((void*)0)); |
4983 | sqlite3_freesqlite3_api->free((void *) zSeedInfo); |
4984 | if (rc != SQLITE_OK0) { |
4985 | // TODO(IMP) |
4986 | sqlite3_finalizesqlite3_api->finalize(stmt); |
4987 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s", |
4988 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
4989 | goto error; |
4990 | } |
4991 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
4992 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, SQLITE_VEC_VERSION"v0.1.7-alpha.2", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
4993 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
4994 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR0); |
4995 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
4996 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR1); |
4997 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); |
4998 | sqlite3_bind_intsqlite3_api->bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH7); |
4999 | |
5000 | if(sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101) { |
5001 | // TODO(IMP) |
5002 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5003 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s", |
5004 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5005 | goto error; |
5006 | } |
5007 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5008 | |
5009 | |
5010 | |
5011 | // create the _chunks shadow table |
5012 | char *zCreateShadowChunks = NULL((void*)0); |
5013 | if(pNew->numPartitionColumns) { |
5014 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
5015 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(", pNew->schemaName, pNew->tableName); |
5016 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,"); |
5017 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "sequence_id integer,"); |
5018 | for(int i = 0; i < pNew->numPartitionColumns;i++) { |
5019 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "partition%02d,", i); |
5020 | } |
5021 | sqlite3_str_appendallsqlite3_api->str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);"); |
5022 | zCreateShadowChunks = sqlite3_str_finishsqlite3_api->str_finish(s); |
5023 | }else { |
5024 | zCreateShadowChunks = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL" ");", |
5025 | pNew->schemaName, pNew->tableName); |
5026 | } |
5027 | if (!zCreateShadowChunks) { |
5028 | goto error; |
5029 | } |
5030 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0); |
5031 | sqlite3_freesqlite3_api->free((void *)zCreateShadowChunks); |
5032 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5033 | // IMP: V17740_01811 |
5034 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5035 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_chunks' shadow table: %s", |
5036 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5037 | goto error; |
5038 | } |
5039 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5040 | |
5041 | // create the _rowids shadow table |
5042 | char *zCreateShadowRowids; |
5043 | if (pNew->pkIsText) { |
5044 | // adds a "text unique not null" constraint to the id column |
5045 | zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER" ");", |
5046 | pNew->schemaName, pNew->tableName); |
5047 | } else { |
5048 | zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT," "id," "chunk_id INTEGER," "chunk_offset INTEGER" ");", |
5049 | pNew->schemaName, pNew->tableName); |
5050 | } |
5051 | if (!zCreateShadowRowids) { |
5052 | goto error; |
5053 | } |
5054 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0); |
5055 | sqlite3_freesqlite3_api->free((void *)zCreateShadowRowids); |
5056 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5057 | // IMP: V11631_28470 |
5058 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5059 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_rowids' shadow table: %s", |
5060 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5061 | goto error; |
5062 | } |
5063 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5064 | |
5065 | for (int i = 0; i < pNew->numVectorColumns; i++) { |
5066 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY," "vectors BLOB NOT NULL" ");", |
5067 | pNew->schemaName, pNew->tableName, i); |
5068 | if (!zSql) { |
5069 | goto error; |
5070 | } |
5071 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); |
5072 | sqlite3_freesqlite3_api->free((void *)zSql); |
5073 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5074 | // IMP: V25919_09989 |
5075 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5076 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
5077 | "Could not create '_vector_chunks%02d' shadow table: %s", i, |
5078 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5079 | goto error; |
5080 | } |
5081 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5082 | } |
5083 | |
5084 | for (int i = 0; i < pNew->numMetadataColumns; i++) { |
5085 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "(rowid PRIMARY KEY, data BLOB NOT NULL);", |
5086 | pNew->schemaName, pNew->tableName, i); |
5087 | if (!zSql) { |
5088 | goto error; |
5089 | } |
5090 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); |
5091 | sqlite3_freesqlite3_api->free((void *)zSql); |
5092 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5093 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5094 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
5095 | "Could not create '_metata_chunks%02d' shadow table: %s", i, |
5096 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5097 | goto error; |
5098 | } |
5099 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5100 | |
5101 | if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { |
5102 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "(rowid PRIMARY KEY, data TEXT);", |
5103 | pNew->schemaName, pNew->tableName, i); |
5104 | if (!zSql) { |
5105 | goto error; |
5106 | } |
5107 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0); |
5108 | sqlite3_freesqlite3_api->free((void *)zSql); |
5109 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5110 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5111 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
5112 | "Could not create '_metadatatext%02d' shadow table: %s", i, |
5113 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5114 | goto error; |
5115 | } |
5116 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5117 | |
5118 | } |
5119 | } |
5120 | |
5121 | if(pNew->numAuxiliaryColumns > 0) { |
5122 | sqlite3_stmt * stmt; |
5123 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
5124 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName); |
5125 | for(int i = 0; i < pNew->numAuxiliaryColumns; i++) { |
5126 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i); |
5127 | } |
5128 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); |
5129 | char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
5130 | if(!zSql) { |
5131 | goto error; |
5132 | } |
5133 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, NULL((void*)0)); |
5134 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5135 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5136 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( |
5137 | "Could not create auxiliary shadow table: %s", |
5138 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
5139 | |
5140 | goto error; |
5141 | } |
5142 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5143 | } |
5144 | } |
5145 | |
5146 | *ppVtab = (sqlite3_vtab *)pNew; |
5147 | return SQLITE_OK0; |
5148 | |
5149 | error: |
5150 | vec0_free(pNew); |
5151 | return SQLITE_ERROR1; |
5152 | } |
5153 | |
5154 | static int vec0Create(sqlite3 *db, void *pAux, int argc, |
5155 | const char *const *argv, sqlite3_vtab **ppVtab, |
5156 | char **pzErr) { |
5157 | return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true1); |
5158 | } |
5159 | static int vec0Connect(sqlite3 *db, void *pAux, int argc, |
5160 | const char *const *argv, sqlite3_vtab **ppVtab, |
5161 | char **pzErr) { |
5162 | return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false0); |
5163 | } |
5164 | |
5165 | static int vec0Disconnect(sqlite3_vtab *pVtab) { |
5166 | vec0_vtab *p = (vec0_vtab *)pVtab; |
5167 | vec0_free(p); |
5168 | sqlite3_freesqlite3_api->free(p); |
5169 | return SQLITE_OK0; |
5170 | } |
5171 | static int vec0Destroy(sqlite3_vtab *pVtab) { |
5172 | vec0_vtab *p = (vec0_vtab *)pVtab; |
5173 | sqlite3_stmt *stmt; |
5174 | int rc; |
5175 | const char *zSql; |
5176 | |
5177 | // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail |
5178 | vec0_free_resources(p); |
5179 | |
5180 | // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of |
5181 | // provided error |
5182 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName, |
5183 | p->tableName); |
5184 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5185 | sqlite3_freesqlite3_api->free((void *)zSql); |
5186 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5187 | rc = SQLITE_ERROR1; |
5188 | vtab_set_error(pVtab, "could not drop chunks shadow table"); |
5189 | goto done; |
5190 | } |
5191 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5192 | |
5193 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"", p->schemaName, |
5194 | p->tableName); |
5195 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5196 | sqlite3_freesqlite3_api->free((void *)zSql); |
5197 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5198 | rc = SQLITE_ERROR1; |
5199 | vtab_set_error(pVtab, "could not drop info shadow table"); |
5200 | goto done; |
5201 | } |
5202 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5203 | |
5204 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"", p->schemaName, |
5205 | p->tableName); |
5206 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5207 | sqlite3_freesqlite3_api->free((void *)zSql); |
5208 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5209 | rc = SQLITE_ERROR1; |
5210 | goto done; |
5211 | } |
5212 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5213 | |
5214 | for (int i = 0; i < p->numVectorColumns; i++) { |
5215 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName, |
5216 | p->shadowVectorChunksNames[i]); |
5217 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5218 | sqlite3_freesqlite3_api->free((void *)zSql); |
5219 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5220 | rc = SQLITE_ERROR1; |
5221 | goto done; |
5222 | } |
5223 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5224 | } |
5225 | |
5226 | if(p->numAuxiliaryColumns > 0) { |
5227 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"", p->schemaName, p->tableName); |
5228 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5229 | sqlite3_freesqlite3_api->free((void *)zSql); |
5230 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5231 | rc = SQLITE_ERROR1; |
5232 | goto done; |
5233 | } |
5234 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5235 | } |
5236 | |
5237 | |
5238 | for (int i = 0; i < p->numMetadataColumns; i++) { |
5239 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"", p->schemaName,p->tableName, i); |
5240 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5241 | sqlite3_freesqlite3_api->free((void *)zSql); |
5242 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5243 | rc = SQLITE_ERROR1; |
5244 | goto done; |
5245 | } |
5246 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5247 | |
5248 | if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { |
5249 | zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"", p->schemaName,p->tableName, i); |
5250 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0); |
5251 | sqlite3_freesqlite3_api->free((void *)zSql); |
5252 | if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) { |
5253 | rc = SQLITE_ERROR1; |
5254 | goto done; |
5255 | } |
5256 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5257 | } |
5258 | } |
5259 | |
5260 | stmt = NULL((void*)0); |
5261 | rc = SQLITE_OK0; |
5262 | |
5263 | done: |
5264 | sqlite3_finalizesqlite3_api->finalize(stmt); |
5265 | vec0_free(p); |
5266 | // If there was an error |
5267 | if (rc == SQLITE_OK0) { |
5268 | sqlite3_freesqlite3_api->free(p); |
5269 | } |
5270 | return rc; |
5271 | } |
5272 | |
5273 | static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { |
5274 | UNUSED_PARAMETER(p)(void)(p); |
5275 | vec0_cursor *pCur; |
5276 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); |
5277 | if (pCur == 0) |
5278 | return SQLITE_NOMEM7; |
5279 | memset(pCur, 0, sizeof(*pCur)); |
5280 | *ppCursor = &pCur->base; |
5281 | return SQLITE_OK0; |
5282 | } |
5283 | |
5284 | static int vec0Close(sqlite3_vtab_cursor *cur) { |
5285 | vec0_cursor *pCur = (vec0_cursor *)cur; |
5286 | vec0_cursor_clear(pCur); |
5287 | sqlite3_freesqlite3_api->free(pCur); |
5288 | return SQLITE_OK0; |
5289 | } |
5290 | |
5291 | // All the different type of "values" provided to argv/argc in vec0Filter. |
5292 | // These enums denote the use and purpose of all of them. |
5293 | typedef enum { |
5294 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! |
5295 | |
5296 | VEC0_IDXSTR_KIND_KNN_MATCH = '{', |
5297 | VEC0_IDXSTR_KIND_KNN_K = '}', |
5298 | VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[', |
5299 | VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']', |
5300 | VEC0_IDXSTR_KIND_POINT_ID = '!', |
5301 | VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&', |
5302 | } vec0_idxstr_kind; |
5303 | |
5304 | // The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns |
5305 | // support, but as characters that fit nicely in idxstr. |
5306 | typedef enum { |
5307 | // If any values are updated, please update the ARCHITECTURE.md docs accordingly! |
5308 | |
5309 | VEC0_PARTITION_OPERATOR_EQ = 'a', |
5310 | VEC0_PARTITION_OPERATOR_GT = 'b', |
5311 | VEC0_PARTITION_OPERATOR_LE = 'c', |
5312 | VEC0_PARTITION_OPERATOR_LT = 'd', |
5313 | VEC0_PARTITION_OPERATOR_GE = 'e', |
5314 | VEC0_PARTITION_OPERATOR_NE = 'f', |
5315 | } vec0_partition_operator; |
5316 | typedef enum { |
5317 | VEC0_METADATA_OPERATOR_EQ = 'a', |
5318 | VEC0_METADATA_OPERATOR_GT = 'b', |
5319 | VEC0_METADATA_OPERATOR_LE = 'c', |
5320 | VEC0_METADATA_OPERATOR_LT = 'd', |
5321 | VEC0_METADATA_OPERATOR_GE = 'e', |
5322 | VEC0_METADATA_OPERATOR_NE = 'f', |
5323 | VEC0_METADATA_OPERATOR_IN = 'g', |
5324 | } vec0_metadata_operator; |
5325 | |
5326 | static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { |
5327 | vec0_vtab *p = (vec0_vtab *)pVTab; |
5328 | /** |
5329 | * Possible query plans are: |
5330 | * 1. KNN when: |
5331 | * a) An `MATCH` op on vector column |
5332 | * b) ORDER BY on distance column |
5333 | * c) LIMIT |
5334 | * d) rowid in (...) OPTIONAL |
5335 | * 2. Point when: |
5336 | * a) An `EQ` op on rowid column |
5337 | * 3. else: fullscan |
5338 | * |
5339 | */ |
5340 | int iMatchTerm = -1; |
5341 | int iMatchVectorTerm = -1; |
5342 | int iLimitTerm = -1; |
5343 | int iRowidTerm = -1; |
5344 | int iKTerm = -1; |
5345 | int iRowidInTerm = -1; |
5346 | int hasAuxConstraint = 0; |
5347 | |
5348 | #ifdef SQLITE_VEC_DEBUG |
5349 | printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint); |
5350 | #endif |
5351 | |
5352 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
5353 | u8 vtabIn = 0; |
5354 | |
5355 | #if COMPILER_SUPPORTS_VTAB_IN1 |
5356 | if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) { |
5357 | vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1); |
5358 | } |
5359 | #endif |
5360 | |
5361 | #ifdef SQLITE_VEC_DEBUG |
5362 | printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i, |
5363 | pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn, |
5364 | pIdxInfo->aConstraint[i].op, vtabIn); |
5365 | #endif |
5366 | if (!pIdxInfo->aConstraint[i].usable) |
5367 | continue; |
5368 | |
5369 | int iColumn = pIdxInfo->aConstraint[i].iColumn; |
5370 | int op = pIdxInfo->aConstraint[i].op; |
5371 | |
5372 | if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) { |
5373 | iLimitTerm = i; |
5374 | } |
5375 | if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 && |
5376 | vec0_column_idx_is_vector(p, iColumn)) { |
5377 | if (iMatchTerm > -1) { |
5378 | vtab_set_error( |
5379 | pVTab, "only 1 MATCH operator is allowed in a single vec0 query"); |
5380 | return SQLITE_ERROR1; |
5381 | } |
5382 | iMatchTerm = i; |
5383 | iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn); |
5384 | } |
5385 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == VEC0_COLUMN_ID0) { |
5386 | if (vtabIn) { |
5387 | if (iRowidInTerm != -1) { |
5388 | vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in " |
5389 | "a single vec0 query"); |
5390 | return SQLITE_ERROR1; |
5391 | } |
5392 | iRowidInTerm = i; |
5393 | |
5394 | } else { |
5395 | iRowidTerm = i; |
5396 | } |
5397 | } |
5398 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == vec0_column_k_idx(p)) { |
5399 | iKTerm = i; |
5400 | } |
5401 | if( |
5402 | (op != SQLITE_INDEX_CONSTRAINT_LIMIT73 && op != SQLITE_INDEX_CONSTRAINT_OFFSET74) |
5403 | && vec0_column_idx_is_auxiliary(p, iColumn)) { |
5404 | hasAuxConstraint = 1; |
5405 | } |
5406 | } |
5407 | |
5408 | sqlite3_str *idxStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
5409 | int rc; |
5410 | |
5411 | if (iMatchTerm >= 0) { |
5412 | if (iLimitTerm < 0 && iKTerm < 0) { |
5413 | vtab_set_error( |
5414 | pVTab, |
5415 | "A LIMIT or 'k = ?' constraint is required on vec0 knn queries."); |
5416 | rc = SQLITE_ERROR1; |
5417 | goto done; |
5418 | } |
5419 | if (iLimitTerm >= 0 && iKTerm >= 0) { |
5420 | vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both"); |
5421 | rc = SQLITE_ERROR1; |
5422 | goto done; |
5423 | } |
5424 | |
5425 | if (pIdxInfo->nOrderBy) { |
5426 | if (pIdxInfo->nOrderBy > 1) { |
5427 | vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is " |
5428 | "allowed on vec0 KNN queries"); |
5429 | rc = SQLITE_ERROR1; |
5430 | goto done; |
5431 | } |
5432 | if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) { |
5433 | vtab_set_error(pVTab, |
5434 | "Only a single 'ORDER BY distance' clause is allowed on " |
5435 | "vec0 KNN queries, not on other columns"); |
5436 | rc = SQLITE_ERROR1; |
5437 | goto done; |
5438 | } |
5439 | if (pIdxInfo->aOrderBy[0].desc) { |
5440 | vtab_set_error( |
5441 | pVTab, "Only ascending in ORDER BY distance clause is supported, " |
5442 | "DESC is not supported yet."); |
5443 | rc = SQLITE_ERROR1; |
5444 | goto done; |
5445 | } |
5446 | } |
5447 | |
5448 | if(hasAuxConstraint) { |
5449 | // IMP: V25623_09693 |
5450 | vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query."); |
5451 | rc = SQLITE_ERROR1; |
5452 | goto done; |
5453 | } |
5454 | |
5455 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN); |
5456 | |
5457 | int argvIndex = 1; |
5458 | pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++; |
5459 | pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1; |
5460 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH); |
5461 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); |
5462 | |
5463 | if (iLimitTerm >= 0) { |
5464 | pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++; |
5465 | pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1; |
5466 | } else { |
5467 | pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++; |
5468 | pIdxInfo->aConstraintUsage[iKTerm].omit = 1; |
5469 | } |
5470 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K); |
5471 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); |
5472 | |
5473 | #if COMPILER_SUPPORTS_VTAB_IN1 |
5474 | if (iRowidInTerm >= 0) { |
5475 | // already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when |
5476 | // vtabIn == 1 |
5477 | sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, iRowidInTerm, 1); |
5478 | pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++; |
5479 | pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1; |
5480 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN); |
5481 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); |
5482 | } |
5483 | #endif |
5484 | |
5485 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
5486 | if (!pIdxInfo->aConstraint[i].usable) |
5487 | continue; |
5488 | |
5489 | int iColumn = pIdxInfo->aConstraint[i].iColumn; |
5490 | int op = pIdxInfo->aConstraint[i].op; |
5491 | if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) { |
5492 | continue; |
5493 | } |
5494 | if(!vec0_column_idx_is_partition(p, iColumn)) { |
5495 | continue; |
5496 | } |
5497 | |
5498 | int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn); |
5499 | char value = 0; |
5500 | |
5501 | switch(op) { |
5502 | case SQLITE_INDEX_CONSTRAINT_EQ2: { |
5503 | value = VEC0_PARTITION_OPERATOR_EQ; |
5504 | break; |
5505 | } |
5506 | case SQLITE_INDEX_CONSTRAINT_GT4: { |
5507 | value = VEC0_PARTITION_OPERATOR_GT; |
5508 | break; |
5509 | } |
5510 | case SQLITE_INDEX_CONSTRAINT_LE8: { |
5511 | value = VEC0_PARTITION_OPERATOR_LE; |
5512 | break; |
5513 | } |
5514 | case SQLITE_INDEX_CONSTRAINT_LT16: { |
5515 | value = VEC0_PARTITION_OPERATOR_LT; |
5516 | break; |
5517 | } |
5518 | case SQLITE_INDEX_CONSTRAINT_GE32: { |
5519 | value = VEC0_PARTITION_OPERATOR_GE; |
5520 | break; |
5521 | } |
5522 | case SQLITE_INDEX_CONSTRAINT_NE68: { |
5523 | value = VEC0_PARTITION_OPERATOR_NE; |
5524 | break; |
5525 | } |
5526 | } |
5527 | |
5528 | if(value) { |
5529 | pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++; |
5530 | pIdxInfo->aConstraintUsage[i].omit = 1; |
5531 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT); |
5532 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + partition_idx); |
5533 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value); |
5534 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_'); |
5535 | } |
5536 | |
5537 | } |
5538 | |
5539 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
5540 | if (!pIdxInfo->aConstraint[i].usable) |
5541 | continue; |
5542 | |
5543 | int iColumn = pIdxInfo->aConstraint[i].iColumn; |
5544 | int op = pIdxInfo->aConstraint[i].op; |
5545 | if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) { |
5546 | continue; |
5547 | } |
5548 | if(!vec0_column_idx_is_metadata(p, iColumn)) { |
5549 | continue; |
5550 | } |
5551 | |
5552 | int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn); |
5553 | char value = 0; |
5554 | |
5555 | switch(op) { |
5556 | case SQLITE_INDEX_CONSTRAINT_EQ2: { |
5557 | int vtabIn = 0; |
5558 | #if COMPILER_SUPPORTS_VTAB_IN1 |
5559 | if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) { |
5560 | vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1); |
5561 | } |
5562 | if(vtabIn) { |
5563 | switch(p->metadata_columns[metadata_idx].kind) { |
5564 | case VEC0_METADATA_COLUMN_KIND_FLOAT: |
5565 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
5566 | // IMP: V15248_32086 |
5567 | rc = SQLITE_ERROR1; |
5568 | vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns."); |
5569 | goto done; |
5570 | break; |
5571 | } |
5572 | case VEC0_METADATA_COLUMN_KIND_INTEGER: |
5573 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
5574 | break; |
5575 | } |
5576 | } |
5577 | value = VEC0_METADATA_OPERATOR_IN; |
5578 | sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, 1); |
5579 | }else |
5580 | #endif |
5581 | { |
5582 | value = VEC0_PARTITION_OPERATOR_EQ; |
5583 | } |
5584 | break; |
5585 | } |
5586 | case SQLITE_INDEX_CONSTRAINT_GT4: { |
5587 | value = VEC0_METADATA_OPERATOR_GT; |
5588 | break; |
5589 | } |
5590 | case SQLITE_INDEX_CONSTRAINT_LE8: { |
5591 | value = VEC0_METADATA_OPERATOR_LE; |
5592 | break; |
5593 | } |
5594 | case SQLITE_INDEX_CONSTRAINT_LT16: { |
5595 | value = VEC0_METADATA_OPERATOR_LT; |
5596 | break; |
5597 | } |
5598 | case SQLITE_INDEX_CONSTRAINT_GE32: { |
5599 | value = VEC0_METADATA_OPERATOR_GE; |
5600 | break; |
5601 | } |
5602 | case SQLITE_INDEX_CONSTRAINT_NE68: { |
5603 | value = VEC0_METADATA_OPERATOR_NE; |
5604 | break; |
5605 | } |
5606 | default: { |
5607 | // IMP: V16511_00582 |
5608 | rc = SQLITE_ERROR1; |
5609 | vtab_set_error(pVTab, |
5610 | "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. " |
5611 | "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed." |
5612 | ); |
5613 | goto done; |
5614 | } |
5615 | } |
5616 | |
5617 | if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) { |
5618 | if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) { |
5619 | // IMP: V10145_26984 |
5620 | rc = SQLITE_ERROR1; |
5621 | vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns."); |
5622 | goto done; |
5623 | } |
5624 | } |
5625 | |
5626 | pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++; |
5627 | pIdxInfo->aConstraintUsage[i].omit = 1; |
5628 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT); |
5629 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + metadata_idx); |
5630 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value); |
5631 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_'); |
5632 | |
5633 | } |
5634 | |
5635 | |
5636 | |
5637 | pIdxInfo->idxNum = iMatchVectorTerm; |
5638 | pIdxInfo->estimatedCost = 30.0; |
5639 | pIdxInfo->estimatedRows = 10; |
5640 | |
5641 | } else if (iRowidTerm >= 0) { |
5642 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT); |
5643 | pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; |
5644 | pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; |
5645 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID); |
5646 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_'); |
5647 | pIdxInfo->idxNum = pIdxInfo->colUsed; |
5648 | pIdxInfo->estimatedCost = 10.0; |
5649 | pIdxInfo->estimatedRows = 1; |
5650 | } else { |
5651 | sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN); |
5652 | pIdxInfo->estimatedCost = 3000000.0; |
5653 | pIdxInfo->estimatedRows = 100000; |
5654 | } |
5655 | pIdxInfo->idxStr = sqlite3_str_finishsqlite3_api->str_finish(idxStr); |
5656 | idxStr = NULL((void*)0); |
5657 | if (!pIdxInfo->idxStr) { |
5658 | rc = SQLITE_OK0; |
5659 | goto done; |
5660 | } |
5661 | pIdxInfo->needToFreeIdxStr = 1; |
5662 | |
5663 | |
5664 | rc = SQLITE_OK0; |
5665 | |
5666 | done: |
5667 | if(idxStr) { |
5668 | sqlite3_str_finishsqlite3_api->str_finish(idxStr); |
5669 | } |
5670 | return rc; |
5671 | } |
5672 | |
5673 | // forward delcaration bc vec0Filter uses it |
5674 | static int vec0Next(sqlite3_vtab_cursor *cur); |
5675 | |
5676 | void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b, |
5677 | i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out, |
5678 | i64 *out_rowids, i64 out_length, i64 *out_used) { |
5679 | // assert((a_length >= out_length) || (b_length >= out_length)); |
5680 | i64 ptrA = 0; |
5681 | i64 ptrB = 0; |
5682 | for (int i = 0; i < out_length; i++) { |
5683 | if ((ptrA >= a_length) && (ptrB >= b_length)) { |
5684 | *out_used = i; |
5685 | return; |
5686 | } |
5687 | if (ptrA >= a_length) { |
5688 | out[i] = b[b_top_idxs[ptrB]]; |
5689 | out_rowids[i] = b_rowids[b_top_idxs[ptrB]]; |
5690 | ptrB++; |
5691 | } else if (ptrB >= b_length) { |
5692 | out[i] = a[ptrA]; |
5693 | out_rowids[i] = a_rowids[ptrA]; |
5694 | ptrA++; |
5695 | } else { |
5696 | if (a[ptrA] <= b[b_top_idxs[ptrB]]) { |
5697 | out[i] = a[ptrA]; |
5698 | out_rowids[i] = a_rowids[ptrA]; |
5699 | ptrA++; |
5700 | } else { |
5701 | out[i] = b[b_top_idxs[ptrB]]; |
5702 | out_rowids[i] = b_rowids[b_top_idxs[ptrB]]; |
5703 | ptrB++; |
5704 | } |
5705 | } |
5706 | } |
5707 | |
5708 | *out_used = out_length; |
5709 | } |
5710 | |
5711 | u8 *bitmap_new(i32 n) { |
5712 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5712, __extension__ __PRETTY_FUNCTION__); })); |
5713 | u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8); |
5714 | if (p) { |
5715 | memset(p, 0, n * sizeof(u8) / CHAR_BIT8); |
5716 | } |
5717 | return p; |
5718 | } |
5719 | u8 *bitmap_new_from(i32 n, u8 *from) { |
5720 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5720, __extension__ __PRETTY_FUNCTION__); })); |
5721 | u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8); |
5722 | if (p) { |
5723 | memcpy(p, from, n / CHAR_BIT8); |
5724 | } |
5725 | return p; |
5726 | } |
5727 | |
5728 | void bitmap_copy(u8 *base, u8 *from, i32 n) { |
5729 | assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n % 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5729, __extension__ __PRETTY_FUNCTION__); })); |
5730 | memcpy(base, from, n / CHAR_BIT8); |
5731 | } |
5732 | |
5733 | void bitmap_and_inplace(u8 *base, u8 *other, i32 n) { |
5734 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5734, __extension__ __PRETTY_FUNCTION__); })); |
5735 | for (int i = 0; i < n / CHAR_BIT8; i++) { |
5736 | base[i] = base[i] & other[i]; |
5737 | } |
5738 | } |
5739 | |
5740 | void bitmap_set(u8 *bitmap, i32 position, int value) { |
5741 | if (value) { |
5742 | bitmap[position / CHAR_BIT8] |= 1 << (position % CHAR_BIT8); |
5743 | } else { |
5744 | bitmap[position / CHAR_BIT8] &= ~(1 << (position % CHAR_BIT8)); |
5745 | } |
5746 | } |
5747 | |
5748 | int bitmap_get(u8 *bitmap, i32 position) { |
5749 | return (((bitmap[position / CHAR_BIT8]) >> (position % CHAR_BIT8)) & 1); |
5750 | } |
5751 | |
5752 | void bitmap_clear(u8 *bitmap, i32 n) { |
5753 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5753, __extension__ __PRETTY_FUNCTION__); })); |
5754 | memset(bitmap, 0, n / CHAR_BIT8); |
5755 | } |
5756 | |
5757 | void bitmap_fill(u8 *bitmap, i32 n) { |
5758 | assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if ( (n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5758, __extension__ __PRETTY_FUNCTION__); })); |
5759 | memset(bitmap, 0xFF, n / CHAR_BIT8); |
5760 | } |
5761 | |
5762 | /** |
5763 | * @brief Finds the minimum k items in distances, and writes the indicies to |
5764 | * out. |
5765 | * |
5766 | * @param distances input f32 array of size n, the items to consider. |
5767 | * @param n: size of distances array. |
5768 | * @param out: Output array of size k, will contain at most k element indicies |
5769 | * @param k: Size of output array |
5770 | * @return int |
5771 | */ |
5772 | int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k, |
5773 | u8 *bTaken, i32 *k_used) { |
5774 | assert(k > 0)((void) sizeof ((k > 0) ? 1 : 0), __extension__ ({ if (k > 0) ; else __assert_fail ("k > 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5774, __extension__ __PRETTY_FUNCTION__); })); |
5775 | assert(k <= n)((void) sizeof ((k <= n) ? 1 : 0), __extension__ ({ if (k <= n) ; else __assert_fail ("k <= n", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5775, __extension__ __PRETTY_FUNCTION__); })); |
5776 | |
5777 | bitmap_clear(bTaken, n); |
5778 | |
5779 | for (int ik = 0; ik < k; ik++) { |
5780 | int min_idx = 0; |
5781 | while (min_idx < n && |
5782 | (bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) { |
5783 | min_idx++; |
5784 | } |
5785 | if (min_idx >= n) { |
5786 | *k_used = ik; |
5787 | return SQLITE_OK0; |
5788 | } |
5789 | |
5790 | for (int i = 0; i < n; i++) { |
5791 | if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) && |
5792 | (bitmap_get(candidates, i))) { |
5793 | min_idx = i; |
5794 | } |
5795 | } |
5796 | |
5797 | out[ik] = min_idx; |
5798 | bitmap_set(bTaken, min_idx, 1); |
5799 | } |
5800 | *k_used = k; |
5801 | return SQLITE_OK0; |
5802 | } |
5803 | |
5804 | int vec0_get_metadata_text_long_value( |
5805 | vec0_vtab * p, |
5806 | sqlite3_stmt ** stmt, |
5807 | int metadata_idx, |
5808 | i64 rowid, |
5809 | int *n, |
5810 | char ** s) { |
5811 | int rc; |
5812 | if(!(*stmt)) { |
5813 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " where rowid = ?", p->schemaName, p->tableName, metadata_idx); |
5814 | if(!zSql) { |
5815 | rc = SQLITE_NOMEM7; |
5816 | goto done; |
5817 | } |
5818 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, stmt, NULL((void*)0)); |
5819 | sqlite3_freesqlite3_api->free( (void *) zSql); |
5820 | if(rc != SQLITE_OK0) { |
5821 | goto done; |
5822 | } |
5823 | } |
5824 | |
5825 | sqlite3_resetsqlite3_api->reset(*stmt); |
5826 | sqlite3_bind_int64sqlite3_api->bind_int64(*stmt, 1, rowid); |
5827 | rc = sqlite3_stepsqlite3_api->step(*stmt); |
5828 | if(rc != SQLITE_ROW100) { |
5829 | rc = SQLITE_ERROR1; |
5830 | goto done; |
5831 | } |
5832 | *s = (char *) sqlite3_column_textsqlite3_api->column_text(*stmt, 0); |
5833 | *n = sqlite3_column_bytessqlite3_api->column_bytes(*stmt, 0); |
5834 | rc = SQLITE_OK0; |
5835 | done: |
5836 | return rc; |
5837 | } |
5838 | |
5839 | /** |
5840 | * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints |
5841 | * |
5842 | * Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied |
5843 | * as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt |
5844 | * can freely step through the stmt with all constraints satisfied. |
5845 | * |
5846 | * @param p - vec0_vtab |
5847 | * @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values |
5848 | * @param argc - number of argv values from xFilter |
5849 | * @param argv - array of sqlite3_value from xFilter |
5850 | * @param outStmt - output sqlite3_stmt of chunks with all filters applied |
5851 | * @return int SQLITE_OK on success, error code otherwise |
5852 | */ |
5853 | int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) { |
5854 | // always null terminated, enforced by SQLite |
5855 | int idxStrLength = strlen(idxStr); |
5856 | // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element" |
5857 | int numValueEntries = (idxStrLength-1) / 4; |
5858 | assert(argc == numValueEntries)((void) sizeof ((argc == numValueEntries) ? 1 : 0), __extension__ ({ if (argc == numValueEntries) ; else __assert_fail ("argc == numValueEntries" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5858, __extension__ __PRETTY_FUNCTION__); })); |
5859 | |
5860 | int rc; |
5861 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
5862 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "select chunk_id, validity, rowids " |
5863 | " from " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", |
5864 | p->schemaName, p->tableName); |
5865 | |
5866 | int appendedWhere = 0; |
5867 | for(int i = 0; i < numValueEntries; i++) { |
5868 | int idx = 1 + (i * 4); |
5869 | char kind = idxStr[idx + 0]; |
5870 | if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) { |
5871 | continue; |
5872 | } |
5873 | |
5874 | int partition_idx = idxStr[idx + 1] - 'A'; |
5875 | int operator = idxStr[idx + 2]; |
5876 | // idxStr[idx + 3] is just null, a '_' placeholder |
5877 | |
5878 | if(!appendedWhere) { |
5879 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " WHERE "); |
5880 | appendedWhere = 1; |
5881 | }else { |
5882 | sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND "); |
5883 | } |
5884 | switch(operator) { |
5885 | case VEC0_PARTITION_OPERATOR_EQ: |
5886 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", partition_idx); |
5887 | break; |
5888 | case VEC0_PARTITION_OPERATOR_GT: |
5889 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d > ? ", partition_idx); |
5890 | break; |
5891 | case VEC0_PARTITION_OPERATOR_LE: |
5892 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d <= ? ", partition_idx); |
5893 | break; |
5894 | case VEC0_PARTITION_OPERATOR_LT: |
5895 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d < ? ", partition_idx); |
5896 | break; |
5897 | case VEC0_PARTITION_OPERATOR_GE: |
5898 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d >= ? ", partition_idx); |
5899 | break; |
5900 | case VEC0_PARTITION_OPERATOR_NE: |
5901 | sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d != ? ", partition_idx); |
5902 | break; |
5903 | default: { |
5904 | char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
5905 | sqlite3_freesqlite3_api->free(zSql); |
5906 | return SQLITE_ERROR1; |
5907 | } |
5908 | |
5909 | } |
5910 | |
5911 | } |
5912 | |
5913 | char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
5914 | if (!zSql) { |
5915 | return SQLITE_NOMEM7; |
5916 | } |
5917 | |
5918 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, outStmt, NULL((void*)0)); |
5919 | sqlite3_freesqlite3_api->free(zSql); |
5920 | if(rc != SQLITE_OK0) { |
5921 | return rc; |
5922 | } |
5923 | |
5924 | int n = 1; |
5925 | for(int i = 0; i < numValueEntries; i++) { |
5926 | int idx = 1 + (i * 4); |
5927 | char kind = idxStr[idx + 0]; |
5928 | if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) { |
5929 | continue; |
5930 | } |
5931 | sqlite3_bind_valuesqlite3_api->bind_value(*outStmt, n++, argv[i]); |
5932 | } |
5933 | |
5934 | return rc; |
5935 | } |
5936 | |
5937 | // a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now. |
5938 | struct Vec0MetadataIn{ |
5939 | // index of argv[i]` the constraint is on |
5940 | int argv_idx; |
5941 | // metadata column index of the constraint, derived from idxStr + argv_idx |
5942 | int metadata_idx; |
5943 | // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next() |
5944 | struct Array array; |
5945 | }; |
5946 | |
5947 | // Array elements for `xxx in (...)` values for a text column. basically just a string |
5948 | struct Vec0MetadataInTextEntry { |
5949 | int n; |
5950 | char * zString; |
5951 | }; |
5952 | |
5953 | |
5954 | int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) { |
5955 | int rc; |
5956 | sqlite3_stmt * stmt = NULL((void*)0); |
5957 | i64 * rowids = NULL((void*)0); |
5958 | sqlite3_blob * rowidsBlob; |
5959 | const char * sTarget = (const char *) sqlite3_value_textsqlite3_api->value_text(value); |
5960 | int nTarget = sqlite3_value_bytessqlite3_api->value_bytes(value); |
5961 | |
5962 | |
5963 | // TODO(perf): only text metadata news the rowids BLOB. Make it so that |
5964 | // rowids BLOB is re-used when multiple fitlers on text columns, |
5965 | // ex "name BETWEEN 'a' and 'b'"" |
5966 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob); |
5967 | if(rc != SQLITE_OK0) { |
5968 | return rc; |
5969 | } |
5970 | assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0)((void) sizeof ((sqlite3_api->blob_bytes(rowidsBlob) % sizeof (i64) == 0) ? 1 : 0), __extension__ ({ if (sqlite3_api->blob_bytes (rowidsBlob) % sizeof(i64) == 0) ; else __assert_fail ("sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5970, __extension__ __PRETTY_FUNCTION__); })); |
5971 | assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size)((void) sizeof (((sqlite3_api->blob_bytes(rowidsBlob) / sizeof (i64)) == size) ? 1 : 0), __extension__ ({ if ((sqlite3_api-> blob_bytes(rowidsBlob) / sizeof(i64)) == size) ; else __assert_fail ("(sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 5971, __extension__ __PRETTY_FUNCTION__); })); |
5972 | |
5973 | rowids = sqlite3_mallocsqlite3_api->malloc(sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob)); |
5974 | if(!rowids) { |
5975 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); |
5976 | return SQLITE_NOMEM7; |
5977 | } |
5978 | |
5979 | rc = sqlite3_blob_readsqlite3_api->blob_read(rowidsBlob, rowids, sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob), 0); |
5980 | if(rc != SQLITE_OK0) { |
5981 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); |
5982 | return rc; |
5983 | } |
5984 | sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob); |
5985 | |
5986 | switch(op) { |
5987 | int nPrefix; |
5988 | char * sPrefix; |
5989 | char *sFull; |
5990 | int nFull; |
5991 | u8 * view; |
5992 | case VEC0_METADATA_OPERATOR_EQ: { |
5993 | for(int i = 0; i < size; i++) { |
5994 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
5995 | nPrefix = ((int*) view)[0]; |
5996 | sPrefix = (char *) &view[4]; |
5997 | |
5998 | // for EQ the text lengths must match |
5999 | if(nPrefix != nTarget) { |
6000 | bitmap_set(b, i, 0); |
6001 | continue; |
6002 | } |
6003 | int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); |
6004 | |
6005 | // for short strings, use the prefix comparison direclty |
6006 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6007 | bitmap_set(b, i, cmpPrefix == 0); |
6008 | continue; |
6009 | } |
6010 | // for EQ on longs strings, the prefix must match |
6011 | if(cmpPrefix) { |
6012 | bitmap_set(b, i, 0); |
6013 | continue; |
6014 | } |
6015 | // consult the full string |
6016 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6017 | if(rc != SQLITE_OK0) { |
6018 | goto done; |
6019 | } |
6020 | if(nPrefix != nFull) { |
6021 | rc = SQLITE_ERROR1; |
6022 | goto done; |
6023 | } |
6024 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0); |
6025 | } |
6026 | break; |
6027 | } |
6028 | case VEC0_METADATA_OPERATOR_NE: { |
6029 | for(int i = 0; i < size; i++) { |
6030 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6031 | nPrefix = ((int*) view)[0]; |
6032 | sPrefix = (char *) &view[4]; |
6033 | |
6034 | // for NE if text lengths dont match, it never will |
6035 | if(nPrefix != nTarget) { |
6036 | bitmap_set(b, i, 1); |
6037 | continue; |
6038 | } |
6039 | |
6040 | int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); |
6041 | |
6042 | // for short strings, use the prefix comparison direclty |
6043 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6044 | bitmap_set(b, i, cmpPrefix != 0); |
6045 | continue; |
6046 | } |
6047 | // for NE on longs strings, if prefixes dont match, then long string wont |
6048 | if(cmpPrefix) { |
6049 | bitmap_set(b, i, 1); |
6050 | continue; |
6051 | } |
6052 | // consult the full string |
6053 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6054 | if(rc != SQLITE_OK0) { |
6055 | goto done; |
6056 | } |
6057 | if(nPrefix != nFull) { |
6058 | rc = SQLITE_ERROR1; |
6059 | goto done; |
6060 | } |
6061 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0); |
6062 | } |
6063 | break; |
6064 | } |
6065 | case VEC0_METADATA_OPERATOR_GT: { |
6066 | for(int i = 0; i < size; i++) { |
6067 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6068 | nPrefix = ((int*) view)[0]; |
6069 | sPrefix = (char *) &view[4]; |
6070 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); |
6071 | |
6072 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6073 | // if prefix match, check which is longer |
6074 | if(cmpPrefix == 0) { |
6075 | bitmap_set(b, i, nPrefix > nTarget); |
6076 | } |
6077 | else { |
6078 | bitmap_set(b, i, cmpPrefix > 0); |
6079 | } |
6080 | continue; |
6081 | } |
6082 | // TODO(perf): may not need to compare full text in some cases |
6083 | |
6084 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6085 | if(rc != SQLITE_OK0) { |
6086 | goto done; |
6087 | } |
6088 | if(nPrefix != nFull) { |
6089 | rc = SQLITE_ERROR1; |
6090 | goto done; |
6091 | } |
6092 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0); |
6093 | } |
6094 | break; |
6095 | } |
6096 | case VEC0_METADATA_OPERATOR_GE: { |
6097 | for(int i = 0; i < size; i++) { |
6098 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6099 | nPrefix = ((int*) view)[0]; |
6100 | sPrefix = (char *) &view[4]; |
6101 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); |
6102 | |
6103 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6104 | // if prefix match, check which is longer |
6105 | if(cmpPrefix == 0) { |
6106 | bitmap_set(b, i, nPrefix >= nTarget); |
6107 | } |
6108 | else { |
6109 | bitmap_set(b, i, cmpPrefix >= 0); |
6110 | } |
6111 | continue; |
6112 | } |
6113 | // TODO(perf): may not need to compare full text in some cases |
6114 | |
6115 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6116 | if(rc != SQLITE_OK0) { |
6117 | goto done; |
6118 | } |
6119 | if(nPrefix != nFull) { |
6120 | rc = SQLITE_ERROR1; |
6121 | goto done; |
6122 | } |
6123 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0); |
6124 | } |
6125 | break; |
6126 | } |
6127 | case VEC0_METADATA_OPERATOR_LE: { |
6128 | for(int i = 0; i < size; i++) { |
6129 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6130 | nPrefix = ((int*) view)[0]; |
6131 | sPrefix = (char *) &view[4]; |
6132 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); |
6133 | |
6134 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6135 | // if prefix match, check which is longer |
6136 | if(cmpPrefix == 0) { |
6137 | bitmap_set(b, i, nPrefix <= nTarget); |
6138 | } |
6139 | else { |
6140 | bitmap_set(b, i, cmpPrefix <= 0); |
6141 | } |
6142 | continue; |
6143 | } |
6144 | // TODO(perf): may not need to compare full text in some cases |
6145 | |
6146 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6147 | if(rc != SQLITE_OK0) { |
6148 | goto done; |
6149 | } |
6150 | if(nPrefix != nFull) { |
6151 | rc = SQLITE_ERROR1; |
6152 | goto done; |
6153 | } |
6154 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0); |
6155 | } |
6156 | break; |
6157 | } |
6158 | case VEC0_METADATA_OPERATOR_LT: { |
6159 | for(int i = 0; i < size; i++) { |
6160 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6161 | nPrefix = ((int*) view)[0]; |
6162 | sPrefix = (char *) &view[4]; |
6163 | int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget )) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget ))); |
6164 | |
6165 | if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6166 | // if prefix match, check which is longer |
6167 | if(cmpPrefix == 0) { |
6168 | bitmap_set(b, i, nPrefix < nTarget); |
6169 | } |
6170 | else { |
6171 | bitmap_set(b, i, cmpPrefix < 0); |
6172 | } |
6173 | continue; |
6174 | } |
6175 | // TODO(perf): may not need to compare full text in some cases |
6176 | |
6177 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6178 | if(rc != SQLITE_OK0) { |
6179 | goto done; |
6180 | } |
6181 | if(nPrefix != nFull) { |
6182 | rc = SQLITE_ERROR1; |
6183 | goto done; |
6184 | } |
6185 | bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0); |
6186 | } |
6187 | break; |
6188 | } |
6189 | |
6190 | case VEC0_METADATA_OPERATOR_IN: { |
6191 | size_t metadataInIdx = -1; |
6192 | for(size_t i = 0; i < aMetadataIn->length; i++) { |
6193 | struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]); |
6194 | if(metadataIn->argv_idx == argv_idx) { |
6195 | metadataInIdx = i; |
6196 | break; |
6197 | } |
6198 | } |
6199 | if(metadataInIdx < 0) { |
6200 | rc = SQLITE_ERROR1; |
6201 | goto done; |
6202 | } |
6203 | |
6204 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; |
6205 | struct Array * aTarget = &(metadataIn->array); |
6206 | |
6207 | |
6208 | int nPrefix; |
6209 | char * sPrefix; |
6210 | char *sFull; |
6211 | int nFull; |
6212 | u8 * view; |
6213 | for(int i = 0; i < size; i++) { |
6214 | view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
6215 | nPrefix = ((int*) view)[0]; |
6216 | sPrefix = (char *) &view[4]; |
6217 | for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { |
6218 | struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]); |
6219 | if(entry->n != nPrefix) { |
6220 | continue; |
6221 | } |
6222 | int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12))); |
6223 | if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
6224 | if(cmpPrefix == 0) { |
6225 | bitmap_set(b, i, 1); |
6226 | break; |
6227 | } |
6228 | continue; |
6229 | } |
6230 | if(cmpPrefix) { |
6231 | continue; |
6232 | } |
6233 | |
6234 | rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); |
6235 | if(rc != SQLITE_OK0) { |
6236 | goto done; |
6237 | } |
6238 | if(nPrefix != nFull) { |
6239 | rc = SQLITE_ERROR1; |
6240 | goto done; |
6241 | } |
6242 | if(strncmp(sFull, entry->zString, nFull) == 0) { |
6243 | bitmap_set(b, i, 1); |
6244 | break; |
6245 | } |
6246 | } |
6247 | } |
6248 | break; |
6249 | } |
6250 | |
6251 | } |
6252 | rc = SQLITE_OK0; |
6253 | |
6254 | done: |
6255 | sqlite3_finalizesqlite3_api->finalize(stmt); |
6256 | sqlite3_freesqlite3_api->free(rowids); |
6257 | return rc; |
6258 | |
6259 | } |
6260 | |
6261 | /** |
6262 | * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint |
6263 | * |
6264 | * @param p vec0_vtab |
6265 | * @param metadata_idx index of the metatadata column to perfrom constraints on |
6266 | * @param value sqlite3_value of the constraints value |
6267 | * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table |
6268 | * @param chunk_rowid rowid of the chunk to calculate on |
6269 | * @param b pre-allocated and zero'd out bitmap to write results to |
6270 | * @param size size of the chunk |
6271 | * @return int SQLITE_OK on success, error code otherwise |
6272 | */ |
6273 | int vec0_set_metadata_filter_bitmap( |
6274 | vec0_vtab *p, |
6275 | int metadata_idx, |
6276 | vec0_metadata_operator op, |
6277 | sqlite3_value * value, |
6278 | sqlite3_blob * blob, |
6279 | i64 chunk_rowid, |
6280 | u8* b, |
6281 | int size, |
6282 | struct Array * aMetadataIn, int argv_idx) { |
6283 | // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap? |
6284 | |
6285 | int rc; |
6286 | rc = sqlite3_blob_reopensqlite3_api->blob_reopen(blob, chunk_rowid); |
6287 | if(rc != SQLITE_OK0) { |
6288 | return rc; |
6289 | } |
6290 | |
6291 | vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; |
6292 | int szMatch = 0; |
6293 | int blobSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blob); |
6294 | switch(kind) { |
6295 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
6296 | szMatch = blobSize == size / CHAR_BIT8; |
6297 | break; |
6298 | } |
6299 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
6300 | szMatch = blobSize == size * sizeof(i64); |
6301 | break; |
6302 | } |
6303 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
6304 | szMatch = blobSize == size * sizeof(double); |
6305 | break; |
6306 | } |
6307 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
6308 | szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16; |
6309 | break; |
6310 | } |
6311 | } |
6312 | if(!szMatch) { |
6313 | return SQLITE_ERROR1; |
6314 | } |
6315 | void * buffer = sqlite3_mallocsqlite3_api->malloc(blobSize); |
6316 | if(!buffer) { |
6317 | return SQLITE_NOMEM7; |
6318 | } |
6319 | rc = sqlite3_blob_readsqlite3_api->blob_read(blob, buffer, blobSize, 0); |
6320 | if(rc != SQLITE_OK0) { |
6321 | goto done; |
6322 | } |
6323 | switch(kind) { |
6324 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
6325 | int target = sqlite3_value_intsqlite3_api->value_int(value); |
6326 | if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) { |
6327 | for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); } |
6328 | } |
6329 | else { |
6330 | for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); } |
6331 | } |
6332 | break; |
6333 | } |
6334 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
6335 | i64 * array = (i64*) buffer; |
6336 | i64 target = sqlite3_value_int64sqlite3_api->value_int64(value); |
6337 | switch(op) { |
6338 | case VEC0_METADATA_OPERATOR_EQ: { |
6339 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } |
6340 | break; |
6341 | } |
6342 | case VEC0_METADATA_OPERATOR_GT: { |
6343 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } |
6344 | break; |
6345 | } |
6346 | case VEC0_METADATA_OPERATOR_LE: { |
6347 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } |
6348 | break; |
6349 | } |
6350 | case VEC0_METADATA_OPERATOR_LT: { |
6351 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } |
6352 | break; |
6353 | } |
6354 | case VEC0_METADATA_OPERATOR_GE: { |
6355 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } |
6356 | break; |
6357 | } |
6358 | case VEC0_METADATA_OPERATOR_NE: { |
6359 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } |
6360 | break; |
6361 | } |
6362 | case VEC0_METADATA_OPERATOR_IN: { |
6363 | int metadataInIdx = -1; |
6364 | for(size_t i = 0; i < aMetadataIn->length; i++) { |
6365 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; |
6366 | if(metadataIn->argv_idx == argv_idx) { |
6367 | metadataInIdx = i; |
6368 | break; |
6369 | } |
6370 | } |
6371 | if(metadataInIdx < 0) { |
6372 | rc = SQLITE_ERROR1; |
6373 | goto done; |
6374 | } |
6375 | struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; |
6376 | struct Array * aTarget = &(metadataIn->array); |
6377 | |
6378 | for(int i = 0; i < size; i++) { |
6379 | for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { |
6380 | if( ((i64*)aTarget->z)[target_idx] == array[i]) { |
6381 | bitmap_set(b, i, 1); |
6382 | break; |
6383 | } |
6384 | } |
6385 | } |
6386 | break; |
6387 | } |
6388 | } |
6389 | break; |
6390 | } |
6391 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
6392 | double * array = (double*) buffer; |
6393 | double target = sqlite3_value_doublesqlite3_api->value_double(value); |
6394 | switch(op) { |
6395 | case VEC0_METADATA_OPERATOR_EQ: { |
6396 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } |
6397 | break; |
6398 | } |
6399 | case VEC0_METADATA_OPERATOR_GT: { |
6400 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } |
6401 | break; |
6402 | } |
6403 | case VEC0_METADATA_OPERATOR_LE: { |
6404 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } |
6405 | break; |
6406 | } |
6407 | case VEC0_METADATA_OPERATOR_LT: { |
6408 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } |
6409 | break; |
6410 | } |
6411 | case VEC0_METADATA_OPERATOR_GE: { |
6412 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } |
6413 | break; |
6414 | } |
6415 | case VEC0_METADATA_OPERATOR_NE: { |
6416 | for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } |
6417 | break; |
6418 | } |
6419 | case VEC0_METADATA_OPERATOR_IN: { |
6420 | // should never be reached |
6421 | break; |
6422 | } |
6423 | } |
6424 | break; |
6425 | } |
6426 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
6427 | rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx); |
6428 | if(rc != SQLITE_OK0) { |
6429 | goto done; |
6430 | } |
6431 | break; |
6432 | } |
6433 | } |
6434 | done: |
6435 | sqlite3_freesqlite3_api->free(buffer); |
6436 | return rc; |
6437 | } |
6438 | |
6439 | int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, |
6440 | struct VectorColumnDefinition *vector_column, |
6441 | int vectorColumnIdx, struct Array *arrayRowidsIn, |
6442 | struct Array * aMetadataIn, |
6443 | const char * idxStr, int argc, sqlite3_value ** argv, |
6444 | void *queryVector, i64 k, i64 **out_topk_rowids, |
6445 | f32 **out_topk_distances, i64 *out_used) { |
6446 | // for each chunk, get top min(k, chunk_size) rowid + distances to query vec. |
6447 | // then reconcile all topk_chunks for a true top k. |
6448 | // output only rowids + distances for now |
6449 | |
6450 | int rc = SQLITE_OK0; |
6451 | sqlite3_blob *blobVectors = NULL((void*)0); |
6452 | |
6453 | void *baseVectors = NULL((void*)0); // memory: chunk_size * dimensions * element_size |
6454 | |
6455 | // OWNED BY CALLER ON SUCCESS |
6456 | i64 *topk_rowids = NULL((void*)0); // memory: k * 4 |
6457 | // OWNED BY CALLER ON SUCCESS |
6458 | f32 *topk_distances = NULL((void*)0); // memory: k * 4 |
6459 | |
6460 | i64 *tmp_topk_rowids = NULL((void*)0); // memory: k * 4 |
6461 | f32 *tmp_topk_distances = NULL((void*)0); // memory: k * 4 |
6462 | f32 *chunk_distances = NULL((void*)0); // memory: chunk_size * 4 |
6463 | u8 *b = NULL((void*)0); // memory: chunk_size / 8 |
6464 | u8 *bTaken = NULL((void*)0); // memory: chunk_size / 8 |
6465 | i32 *chunk_topk_idxs = NULL((void*)0); // memory: k * 4 |
6466 | u8 *bmRowids = NULL((void*)0); // memory: chunk_size / 8 |
6467 | u8 *bmMetadata = NULL((void*)0); // memory: chunk_size / 8 |
6468 | // // total: a lot??? |
6469 | |
6470 | // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4) |
6471 | |
6472 | topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64)); |
6473 | if (!topk_rowids) { |
6474 | rc = SQLITE_NOMEM7; |
6475 | goto cleanup; |
6476 | } |
6477 | memset(topk_rowids, 0, k * sizeof(i64)); |
6478 | |
6479 | topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32)); |
6480 | if (!topk_distances) { |
6481 | rc = SQLITE_NOMEM7; |
6482 | goto cleanup; |
6483 | } |
6484 | memset(topk_distances, 0, k * sizeof(f32)); |
6485 | |
6486 | tmp_topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64)); |
6487 | if (!tmp_topk_rowids) { |
6488 | rc = SQLITE_NOMEM7; |
6489 | goto cleanup; |
6490 | } |
6491 | memset(tmp_topk_rowids, 0, k * sizeof(i64)); |
6492 | |
6493 | tmp_topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32)); |
6494 | if (!tmp_topk_distances) { |
6495 | rc = SQLITE_NOMEM7; |
6496 | goto cleanup; |
6497 | } |
6498 | memset(tmp_topk_distances, 0, k * sizeof(f32)); |
6499 | |
6500 | i64 k_used = 0; |
6501 | i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column); |
6502 | baseVectors = sqlite3_mallocsqlite3_api->malloc(baseVectorsSize); |
6503 | if (!baseVectors) { |
6504 | rc = SQLITE_NOMEM7; |
6505 | goto cleanup; |
6506 | } |
6507 | |
6508 | chunk_distances = sqlite3_mallocsqlite3_api->malloc(p->chunk_size * sizeof(f32)); |
6509 | if (!chunk_distances) { |
6510 | rc = SQLITE_NOMEM7; |
6511 | goto cleanup; |
6512 | } |
6513 | |
6514 | b = bitmap_new(p->chunk_size); |
6515 | if (!b) { |
6516 | rc = SQLITE_NOMEM7; |
6517 | goto cleanup; |
6518 | } |
6519 | |
6520 | bTaken = bitmap_new(p->chunk_size); |
6521 | if (!bTaken) { |
6522 | rc = SQLITE_NOMEM7; |
6523 | goto cleanup; |
6524 | } |
6525 | |
6526 | chunk_topk_idxs = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32)); |
6527 | if (!chunk_topk_idxs) { |
6528 | rc = SQLITE_NOMEM7; |
6529 | goto cleanup; |
6530 | } |
6531 | |
6532 | bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL((void*)0); |
6533 | if (arrayRowidsIn && !bmRowids) { |
6534 | rc = SQLITE_NOMEM7; |
6535 | goto cleanup; |
6536 | } |
6537 | |
6538 | sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS16]; |
6539 | memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS16); |
6540 | |
6541 | bmMetadata = bitmap_new(p->chunk_size); |
6542 | if(!bmMetadata) { |
6543 | rc = SQLITE_NOMEM7; |
6544 | goto cleanup; |
6545 | } |
6546 | |
6547 | int idxStrLength = strlen(idxStr); |
6548 | int numValueEntries = (idxStrLength-1) / 4; |
6549 | assert(numValueEntries == argc)((void) sizeof ((numValueEntries == argc) ? 1 : 0), __extension__ ({ if (numValueEntries == argc) ; else __assert_fail ("numValueEntries == argc" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6549, __extension__ __PRETTY_FUNCTION__); })); |
6550 | int hasMetadataFilters = 0; |
6551 | for(int i = 0; i < argc; i++) { |
6552 | int idx = 1 + (i * 4); |
6553 | char kind = idxStr[idx + 0]; |
6554 | if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { |
6555 | hasMetadataFilters = 1; |
6556 | break; |
6557 | } |
6558 | } |
6559 | |
6560 | while (true1) { |
6561 | rc = sqlite3_stepsqlite3_api->step(stmtChunks); |
6562 | if (rc == SQLITE_DONE101) { |
6563 | break; |
6564 | } |
6565 | if (rc != SQLITE_ROW100) { |
6566 | vtab_set_error(&p->base, "chunks iter error"); |
6567 | rc = SQLITE_ERROR1; |
6568 | goto cleanup; |
6569 | } |
6570 | memset(chunk_distances, 0, p->chunk_size * sizeof(f32)); |
6571 | memset(chunk_topk_idxs, 0, k * sizeof(i32)); |
6572 | bitmap_clear(b, p->chunk_size); |
6573 | |
6574 | i64 chunk_id = sqlite3_column_int64sqlite3_api->column_int64(stmtChunks, 0); |
6575 | unsigned char *chunkValidity = |
6576 | (unsigned char *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 1); |
6577 | i64 validitySize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 1); |
6578 | if (validitySize != p->chunk_size / CHAR_BIT8) { |
6579 | // IMP: V05271_22109 |
6580 | vtab_set_error( |
6581 | &p->base, |
6582 | "chunk validity size doesn't match - expected %lld, found %lld", |
6583 | p->chunk_size / CHAR_BIT8, validitySize); |
6584 | rc = SQLITE_ERROR1; |
6585 | goto cleanup; |
6586 | } |
6587 | |
6588 | i64 *chunkRowids = (i64 *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 2); |
6589 | i64 rowidsSize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 2); |
6590 | if (rowidsSize != p->chunk_size * sizeof(i64)) { |
6591 | // IMP: V02796_19635 |
6592 | vtab_set_error(&p->base, "rowids size doesn't match"); |
6593 | vtab_set_error( |
6594 | &p->base, |
6595 | "chunk rowids size doesn't match - expected %lld, found %lld", |
6596 | p->chunk_size * sizeof(i64), rowidsSize); |
6597 | rc = SQLITE_ERROR1; |
6598 | goto cleanup; |
6599 | } |
6600 | |
6601 | // open the vector chunk blob for the current chunk |
6602 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, |
6603 | p->shadowVectorChunksNames[vectorColumnIdx], |
6604 | "vectors", chunk_id, 0, &blobVectors); |
6605 | if (rc != SQLITE_OK0) { |
6606 | vtab_set_error(&p->base, "could not open vectors blob for chunk %lld", |
6607 | chunk_id); |
6608 | rc = SQLITE_ERROR1; |
6609 | goto cleanup; |
6610 | } |
6611 | |
6612 | i64 currentBaseVectorsSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors); |
6613 | i64 expectedBaseVectorsSize = |
6614 | p->chunk_size * vector_column_byte_size(*vector_column); |
6615 | if (currentBaseVectorsSize != expectedBaseVectorsSize) { |
6616 | // IMP: V16465_00535 |
6617 | vtab_set_error( |
6618 | &p->base, |
6619 | "vectors blob size doesn't match - expected %lld, found %lld", |
6620 | expectedBaseVectorsSize, currentBaseVectorsSize); |
6621 | rc = SQLITE_ERROR1; |
6622 | goto cleanup; |
6623 | } |
6624 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0); |
6625 | |
6626 | if (rc != SQLITE_OK0) { |
6627 | vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id); |
6628 | rc = SQLITE_ERROR1; |
6629 | goto cleanup; |
6630 | } |
6631 | |
6632 | bitmap_copy(b, chunkValidity, p->chunk_size); |
6633 | if (arrayRowidsIn) { |
6634 | bitmap_clear(bmRowids, p->chunk_size); |
6635 | |
6636 | for (int i = 0; i < p->chunk_size; i++) { |
6637 | if (!bitmap_get(chunkValidity, i)) { |
6638 | continue; |
6639 | } |
6640 | i64 rowid = chunkRowids[i]; |
6641 | void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length, |
6642 | sizeof(i64), _cmp); |
6643 | bitmap_set(bmRowids, i, in ? 1 : 0); |
6644 | } |
6645 | bitmap_and_inplace(b, bmRowids, p->chunk_size); |
6646 | } |
6647 | |
6648 | if(hasMetadataFilters) { |
6649 | for(int i = 0; i < argc; i++) { |
6650 | int idx = 1 + (i * 4); |
6651 | char kind = idxStr[idx + 0]; |
6652 | if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { |
6653 | continue; |
6654 | } |
6655 | int metadata_idx = idxStr[idx + 1] - 'A'; |
6656 | int operator = idxStr[idx + 2]; |
6657 | |
6658 | if(!metadataBlobs[metadata_idx]) { |
6659 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]); |
6660 | vtab_set_error(&p->base, "Could not open metadata blob"); |
6661 | if(rc != SQLITE_OK0) { |
6662 | goto cleanup; |
6663 | } |
6664 | } |
6665 | |
6666 | bitmap_clear(bmMetadata, p->chunk_size); |
6667 | rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i); |
6668 | if(rc != SQLITE_OK0) { |
6669 | vtab_set_error(&p->base, "Could not filter metadata fields"); |
6670 | if(rc != SQLITE_OK0) { |
6671 | goto cleanup; |
6672 | } |
6673 | } |
6674 | bitmap_and_inplace(b, bmMetadata, p->chunk_size); |
6675 | } |
6676 | } |
6677 | |
6678 | |
6679 | for (int i = 0; i < p->chunk_size; i++) { |
6680 | if (!bitmap_get(b, i)) { |
6681 | continue; |
6682 | }; |
6683 | |
6684 | f32 result; |
6685 | switch (vector_column->element_type) { |
6686 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: { |
6687 | const f32 *base_i = |
6688 | ((f32 *)baseVectors) + (i * vector_column->dimensions); |
6689 | switch (vector_column->distance_metric) { |
6690 | case VEC0_DISTANCE_METRIC_L2: { |
6691 | result = distance_l2_sqr_float(base_i, (f32 *)queryVector, |
6692 | &vector_column->dimensions); |
6693 | break; |
6694 | } |
6695 | case VEC0_DISTANCE_METRIC_L1: { |
6696 | result = distance_l1_f32(base_i, (f32 *)queryVector, |
6697 | &vector_column->dimensions); |
6698 | break; |
6699 | } |
6700 | case VEC0_DISTANCE_METRIC_COSINE: { |
6701 | result = distance_cosine_float(base_i, (f32 *)queryVector, |
6702 | &vector_column->dimensions); |
6703 | break; |
6704 | } |
6705 | } |
6706 | break; |
6707 | } |
6708 | case SQLITE_VEC_ELEMENT_TYPE_INT8: { |
6709 | const i8 *base_i = |
6710 | ((i8 *)baseVectors) + (i * vector_column->dimensions); |
6711 | switch (vector_column->distance_metric) { |
6712 | case VEC0_DISTANCE_METRIC_L2: { |
6713 | result = distance_l2_sqr_int8(base_i, (i8 *)queryVector, |
6714 | &vector_column->dimensions); |
6715 | break; |
6716 | } |
6717 | case VEC0_DISTANCE_METRIC_L1: { |
6718 | result = distance_l1_int8(base_i, (i8 *)queryVector, |
6719 | &vector_column->dimensions); |
6720 | break; |
6721 | } |
6722 | case VEC0_DISTANCE_METRIC_COSINE: { |
6723 | result = distance_cosine_int8(base_i, (i8 *)queryVector, |
6724 | &vector_column->dimensions); |
6725 | break; |
6726 | } |
6727 | } |
6728 | |
6729 | break; |
6730 | } |
6731 | case SQLITE_VEC_ELEMENT_TYPE_BIT: { |
6732 | const u8 *base_i = |
6733 | ((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT8)); |
6734 | result = distance_hamming(base_i, (u8 *)queryVector, |
6735 | &vector_column->dimensions); |
6736 | break; |
6737 | } |
6738 | } |
6739 | |
6740 | chunk_distances[i] = result; |
6741 | } |
6742 | |
6743 | int used1; |
6744 | min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs, |
6745 | min(k, p->chunk_size)(((k) <= (p->chunk_size)) ? (k) : (p->chunk_size)), bTaken, &used1); |
6746 | |
6747 | i64 used; |
6748 | merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances, |
6749 | chunkRowids, chunk_topk_idxs, |
6750 | min(min(k, p->chunk_size), used1)((((((k) <= (p->chunk_size)) ? (k) : (p->chunk_size) )) <= (used1)) ? ((((k) <= (p->chunk_size)) ? (k) : ( p->chunk_size))) : (used1)), tmp_topk_distances, |
6751 | tmp_topk_rowids, k, &used); |
6752 | |
6753 | for (int i = 0; i < used; i++) { |
6754 | topk_rowids[i] = tmp_topk_rowids[i]; |
6755 | topk_distances[i] = tmp_topk_distances[i]; |
6756 | } |
6757 | k_used = used; |
6758 | // blobVectors is always opened with read-only permissions, so this never |
6759 | // fails. |
6760 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
6761 | blobVectors = NULL((void*)0); |
6762 | } |
6763 | |
6764 | *out_topk_rowids = topk_rowids; |
6765 | *out_topk_distances = topk_distances; |
6766 | *out_used = k_used; |
6767 | rc = SQLITE_OK0; |
6768 | |
6769 | cleanup: |
6770 | if (rc != SQLITE_OK0) { |
6771 | sqlite3_freesqlite3_api->free(topk_rowids); |
6772 | sqlite3_freesqlite3_api->free(topk_distances); |
6773 | } |
6774 | sqlite3_freesqlite3_api->free(chunk_topk_idxs); |
6775 | sqlite3_freesqlite3_api->free(tmp_topk_rowids); |
6776 | sqlite3_freesqlite3_api->free(tmp_topk_distances); |
6777 | sqlite3_freesqlite3_api->free(b); |
6778 | sqlite3_freesqlite3_api->free(bTaken); |
6779 | sqlite3_freesqlite3_api->free(bmRowids); |
6780 | sqlite3_freesqlite3_api->free(baseVectors); |
6781 | sqlite3_freesqlite3_api->free(chunk_distances); |
6782 | sqlite3_freesqlite3_api->free(bmMetadata); |
6783 | for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS16; i++) { |
6784 | sqlite3_blob_closesqlite3_api->blob_close(metadataBlobs[i]); |
6785 | } |
6786 | // blobVectors is always opened with read-only permissions, so this never |
6787 | // fails. |
6788 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
6789 | return rc; |
6790 | } |
6791 | |
6792 | int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, |
6793 | const char *idxStr, int argc, sqlite3_value **argv) { |
6794 | assert(argc == (strlen(idxStr)-1) / 4)((void) sizeof ((argc == (strlen(idxStr)-1) / 4) ? 1 : 0), __extension__ ({ if (argc == (strlen(idxStr)-1) / 4) ; else __assert_fail ( "argc == (strlen(idxStr)-1) / 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6794, __extension__ __PRETTY_FUNCTION__); })); |
6795 | int rc; |
6796 | struct vec0_query_knn_data *knn_data; |
6797 | |
6798 | int vectorColumnIdx = idxNum; |
6799 | struct VectorColumnDefinition *vector_column = |
6800 | &p->vector_columns[vectorColumnIdx]; |
6801 | |
6802 | struct Array *arrayRowidsIn = NULL((void*)0); |
6803 | sqlite3_stmt *stmtChunks = NULL((void*)0); |
6804 | void *queryVector; |
6805 | size_t dimensions; |
6806 | enum VectorElementType elementType; |
6807 | vector_cleanup queryVectorCleanup = vector_cleanup_noop; |
6808 | char *pzError; |
6809 | knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data)); |
6810 | if (!knn_data) { |
6811 | return SQLITE_NOMEM7; |
6812 | } |
6813 | memset(knn_data, 0, sizeof(*knn_data)); |
6814 | // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints |
6815 | struct Array * aMetadataIn = NULL((void*)0); |
6816 | |
6817 | int query_idx =-1; |
6818 | int k_idx = -1; |
6819 | int rowid_in_idx = -1; |
6820 | for(int i = 0; i < argc; i++) { |
6821 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) { |
6822 | query_idx = i; |
6823 | } |
6824 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) { |
6825 | k_idx = i; |
6826 | } |
6827 | if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) { |
6828 | rowid_in_idx = i; |
6829 | } |
6830 | } |
6831 | assert(query_idx >= 0)((void) sizeof ((query_idx >= 0) ? 1 : 0), __extension__ ( { if (query_idx >= 0) ; else __assert_fail ("query_idx >= 0" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6831, __extension__ __PRETTY_FUNCTION__); })); |
6832 | assert(k_idx >= 0)((void) sizeof ((k_idx >= 0) ? 1 : 0), __extension__ ({ if (k_idx >= 0) ; else __assert_fail ("k_idx >= 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 6832, __extension__ __PRETTY_FUNCTION__); })); |
6833 | |
6834 | // make sure the query vector matches the vector column (type dimensions etc.) |
6835 | rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType, |
6836 | &queryVectorCleanup, &pzError); |
6837 | |
6838 | if (rc != SQLITE_OK0) { |
6839 | vtab_set_error(&p->base, |
6840 | "Query vector on the \"%.*s\" column is invalid: %z", |
6841 | vector_column->name_length, vector_column->name, pzError); |
6842 | rc = SQLITE_ERROR1; |
6843 | goto cleanup; |
6844 | } |
6845 | if (elementType != vector_column->element_type) { |
6846 | vtab_set_error( |
6847 | &p->base, |
6848 | "Query vector for the \"%.*s\" column is expected to be of type " |
6849 | "%s, but a %s vector was provided.", |
6850 | vector_column->name_length, vector_column->name, |
6851 | vector_subtype_name(vector_column->element_type), |
6852 | vector_subtype_name(elementType)); |
6853 | rc = SQLITE_ERROR1; |
6854 | goto cleanup; |
6855 | } |
6856 | if (dimensions != vector_column->dimensions) { |
6857 | vtab_set_error( |
6858 | &p->base, |
6859 | "Dimension mismatch for query vector for the \"%.*s\" column. " |
6860 | "Expected %d dimensions but received %d.", |
6861 | vector_column->name_length, vector_column->name, |
6862 | vector_column->dimensions, dimensions); |
6863 | rc = SQLITE_ERROR1; |
6864 | goto cleanup; |
6865 | } |
6866 | |
6867 | i64 k = sqlite3_value_int64sqlite3_api->value_int64(argv[k_idx]); |
6868 | if (k < 0) { |
6869 | vtab_set_error( |
6870 | &p->base, "k value in knn queries must be greater than or equal to 0."); |
6871 | rc = SQLITE_ERROR1; |
6872 | goto cleanup; |
6873 | } |
6874 | #define SQLITE_VEC_VEC0_K_MAX4096 4096 |
6875 | if (k > SQLITE_VEC_VEC0_K_MAX4096) { |
6876 | vtab_set_error( |
6877 | &p->base, |
6878 | "k value in knn query too large, provided %lld and the limit is %lld", |
6879 | k, SQLITE_VEC_VEC0_K_MAX4096); |
6880 | rc = SQLITE_ERROR1; |
6881 | goto cleanup; |
6882 | } |
6883 | |
6884 | if (k == 0) { |
6885 | knn_data->k = 0; |
6886 | pCur->knn_data = knn_data; |
6887 | pCur->query_plan = VEC0_QUERY_PLAN_KNN; |
6888 | rc = SQLITE_OK0; |
6889 | goto cleanup; |
6890 | } |
6891 | |
6892 | // handle when a `rowid in (...)` operation was provided |
6893 | // Array of all the rowids that appear in any `rowid in (...)` constraint. |
6894 | // NULL if none were provided, which means a "full" scan. |
6895 | #if COMPILER_SUPPORTS_VTAB_IN1 |
6896 | if (rowid_in_idx >= 0) { |
6897 | sqlite3_value *item; |
6898 | int rc; |
6899 | arrayRowidsIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*arrayRowidsIn)); |
6900 | if (!arrayRowidsIn) { |
6901 | rc = SQLITE_NOMEM7; |
Value stored to 'rc' is never read | |
6902 | goto cleanup; |
6903 | } |
6904 | memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn)); |
6905 | |
6906 | rc = array_init(arrayRowidsIn, sizeof(i64), 32); |
6907 | if (rc != SQLITE_OK0) { |
6908 | goto cleanup; |
6909 | } |
6910 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK0 && item; |
6911 | rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[rowid_in_idx], &item)) { |
6912 | i64 rowid; |
6913 | if (p->pkIsText) { |
6914 | rc = vec0_rowid_from_id(p, item, &rowid); |
6915 | if (rc != SQLITE_OK0) { |
6916 | goto cleanup; |
6917 | } |
6918 | } else { |
6919 | rowid = sqlite3_value_int64sqlite3_api->value_int64(item); |
6920 | } |
6921 | rc = array_append(arrayRowidsIn, &rowid); |
6922 | if (rc != SQLITE_OK0) { |
6923 | goto cleanup; |
6924 | } |
6925 | } |
6926 | if (rc != SQLITE_DONE101) { |
6927 | vtab_set_error(&p->base, "error processing rowid in (...) array"); |
6928 | goto cleanup; |
6929 | } |
6930 | qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size, |
6931 | _cmp); |
6932 | } |
6933 | #endif |
6934 | |
6935 | #if COMPILER_SUPPORTS_VTAB_IN1 |
6936 | for(int i = 0; i < argc; i++) { |
6937 | if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) { |
6938 | continue; |
6939 | } |
6940 | int metadata_idx = idxStr[1 + (i*4) + 1] - 'A'; |
6941 | if(!aMetadataIn) { |
6942 | aMetadataIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*aMetadataIn)); |
6943 | if(!aMetadataIn) { |
6944 | rc = SQLITE_NOMEM7; |
6945 | goto cleanup; |
6946 | } |
6947 | memset(aMetadataIn, 0, sizeof(*aMetadataIn)); |
6948 | rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8); |
6949 | if(rc != SQLITE_OK0) { |
6950 | goto cleanup; |
6951 | } |
6952 | } |
6953 | |
6954 | struct Vec0MetadataIn item; |
6955 | memset(&item, 0, sizeof(item)); |
6956 | item.metadata_idx=metadata_idx; |
6957 | item.argv_idx = i; |
6958 | |
6959 | switch(p->metadata_columns[metadata_idx].kind) { |
6960 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
6961 | rc = array_init(&item.array, sizeof(i64), 16); |
6962 | if(rc != SQLITE_OK0) { |
6963 | goto cleanup; |
6964 | } |
6965 | sqlite3_value *entry; |
6966 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) { |
6967 | i64 v = sqlite3_value_int64sqlite3_api->value_int64(entry); |
6968 | rc = array_append(&item.array, &v); |
6969 | if (rc != SQLITE_OK0) { |
6970 | goto cleanup; |
6971 | } |
6972 | } |
6973 | |
6974 | if (rc != SQLITE_DONE101) { |
6975 | vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression"); |
6976 | goto cleanup; |
6977 | } |
6978 | |
6979 | break; |
6980 | } |
6981 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
6982 | rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16); |
6983 | if(rc != SQLITE_OK0) { |
6984 | goto cleanup; |
6985 | } |
6986 | sqlite3_value *entry; |
6987 | for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) { |
6988 | const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(entry); |
6989 | int n = sqlite3_value_bytessqlite3_api->value_bytes(entry); |
6990 | |
6991 | struct Vec0MetadataInTextEntry entry; |
6992 | entry.zString = sqlite3_mprintfsqlite3_api->mprintf("%.*s", n, s); |
6993 | if(!entry.zString) { |
6994 | rc = SQLITE_NOMEM7; |
6995 | goto cleanup; |
6996 | } |
6997 | entry.n = n; |
6998 | rc = array_append(&item.array, &entry); |
6999 | if (rc != SQLITE_OK0) { |
7000 | goto cleanup; |
7001 | } |
7002 | } |
7003 | |
7004 | if (rc != SQLITE_DONE101) { |
7005 | vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression"); |
7006 | goto cleanup; |
7007 | } |
7008 | |
7009 | break; |
7010 | } |
7011 | default: { |
7012 | vtab_set_error(&p->base, "Internal sqlite-vec error"); |
7013 | goto cleanup; |
7014 | } |
7015 | } |
7016 | |
7017 | rc = array_append(aMetadataIn, &item); |
7018 | if(rc != SQLITE_OK0) { |
7019 | goto cleanup; |
7020 | } |
7021 | } |
7022 | #endif |
7023 | |
7024 | rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks); |
7025 | if (rc != SQLITE_OK0) { |
7026 | // IMP: V06942_23781 |
7027 | vtab_set_error(&p->base, "Error preparing stmtChunk: %s", |
7028 | sqlite3_errmsgsqlite3_api->errmsg(p->db)); |
7029 | goto cleanup; |
7030 | } |
7031 | |
7032 | i64 *topk_rowids = NULL((void*)0); |
7033 | f32 *topk_distances = NULL((void*)0); |
7034 | i64 k_used = 0; |
7035 | rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx, |
7036 | arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids, |
7037 | &topk_distances, &k_used); |
7038 | if (rc != SQLITE_OK0) { |
7039 | goto cleanup; |
7040 | } |
7041 | |
7042 | knn_data->current_idx = 0; |
7043 | knn_data->k = k; |
7044 | knn_data->rowids = topk_rowids; |
7045 | knn_data->distances = topk_distances; |
7046 | knn_data->k_used = k_used; |
7047 | |
7048 | pCur->knn_data = knn_data; |
7049 | pCur->query_plan = VEC0_QUERY_PLAN_KNN; |
7050 | rc = SQLITE_OK0; |
7051 | |
7052 | cleanup: |
7053 | sqlite3_finalizesqlite3_api->finalize(stmtChunks); |
7054 | array_cleanup(arrayRowidsIn); |
7055 | sqlite3_freesqlite3_api->free(arrayRowidsIn); |
7056 | queryVectorCleanup(queryVector); |
7057 | if(aMetadataIn) { |
7058 | for(size_t i = 0; i < aMetadataIn->length; i++) { |
7059 | struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; |
7060 | for(size_t j = 0; j < item->array.length; j++) { |
7061 | if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { |
7062 | struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j]; |
7063 | sqlite3_freesqlite3_api->free(entry.zString); |
7064 | } |
7065 | } |
7066 | array_cleanup(&item->array); |
7067 | } |
7068 | array_cleanup(aMetadataIn); |
7069 | } |
7070 | |
7071 | sqlite3_freesqlite3_api->free(aMetadataIn); |
7072 | |
7073 | return rc; |
7074 | } |
7075 | |
7076 | int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) { |
7077 | int rc; |
7078 | char *zSql; |
7079 | struct vec0_query_fullscan_data *fullscan_data; |
7080 | |
7081 | fullscan_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*fullscan_data)); |
7082 | if (!fullscan_data) { |
7083 | return SQLITE_NOMEM7; |
7084 | } |
7085 | memset(fullscan_data, 0, sizeof(*fullscan_data)); |
7086 | |
7087 | zSql = sqlite3_mprintfsqlite3_api->mprintf(" SELECT rowid " |
7088 | " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" |
7089 | " ORDER by chunk_id, chunk_offset ", |
7090 | p->schemaName, p->tableName); |
7091 | if (!zSql) { |
7092 | rc = SQLITE_NOMEM7; |
7093 | goto error; |
7094 | } |
7095 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL((void*)0)); |
7096 | sqlite3_freesqlite3_api->free(zSql); |
7097 | if (rc != SQLITE_OK0) { |
7098 | // IMP: V09901_26739 |
7099 | vtab_set_error(&p->base, "Error preparing rowid scan: %s", |
7100 | sqlite3_errmsgsqlite3_api->errmsg(p->db)); |
7101 | goto error; |
7102 | } |
7103 | |
7104 | rc = sqlite3_stepsqlite3_api->step(fullscan_data->rowids_stmt); |
7105 | |
7106 | // DONE when there's no rowids, ROW when there are, both "success" |
7107 | if (!(rc == SQLITE_ROW100 || rc == SQLITE_DONE101)) { |
7108 | goto error; |
7109 | } |
7110 | |
7111 | fullscan_data->done = rc == SQLITE_DONE101; |
7112 | pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN; |
7113 | pCur->fullscan_data = fullscan_data; |
7114 | return SQLITE_OK0; |
7115 | |
7116 | error: |
7117 | vec0_query_fullscan_data_clear(fullscan_data); |
7118 | sqlite3_freesqlite3_api->free(fullscan_data); |
7119 | return rc; |
7120 | } |
7121 | |
7122 | int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc, |
7123 | sqlite3_value **argv) { |
7124 | int rc; |
7125 | assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc == 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 7125, __extension__ __PRETTY_FUNCTION__); })); |
7126 | i64 rowid; |
7127 | struct vec0_query_point_data *point_data = NULL((void*)0); |
7128 | |
7129 | point_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*point_data)); |
7130 | if (!point_data) { |
7131 | rc = SQLITE_NOMEM7; |
7132 | goto error; |
7133 | } |
7134 | memset(point_data, 0, sizeof(*point_data)); |
7135 | |
7136 | if (p->pkIsText) { |
7137 | rc = vec0_rowid_from_id(p, argv[0], &rowid); |
7138 | if (rc == SQLITE_EMPTY16) { |
7139 | goto eof; |
7140 | } |
7141 | if (rc != SQLITE_OK0) { |
7142 | goto error; |
7143 | } |
7144 | } else { |
7145 | rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); |
7146 | } |
7147 | |
7148 | for (int i = 0; i < p->numVectorColumns; i++) { |
7149 | rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL((void*)0)); |
7150 | if (rc == SQLITE_EMPTY16) { |
7151 | goto eof; |
7152 | } |
7153 | if (rc != SQLITE_OK0) { |
7154 | goto error; |
7155 | } |
7156 | } |
7157 | |
7158 | point_data->rowid = rowid; |
7159 | point_data->done = 0; |
7160 | pCur->point_data = point_data; |
7161 | pCur->query_plan = VEC0_QUERY_PLAN_POINT; |
7162 | return SQLITE_OK0; |
7163 | |
7164 | eof: |
7165 | point_data->rowid = rowid; |
7166 | point_data->done = 1; |
7167 | pCur->point_data = point_data; |
7168 | pCur->query_plan = VEC0_QUERY_PLAN_POINT; |
7169 | return SQLITE_OK0; |
7170 | |
7171 | error: |
7172 | vec0_query_point_data_clear(point_data); |
7173 | sqlite3_freesqlite3_api->free(point_data); |
7174 | return rc; |
7175 | } |
7176 | |
7177 | static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, |
7178 | const char *idxStr, int argc, sqlite3_value **argv) { |
7179 | vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab; |
7180 | vec0_cursor *pCur = (vec0_cursor *)pVtabCursor; |
7181 | vec0_cursor_clear(pCur); |
7182 | |
7183 | int idxStrLength = strlen(idxStr); |
7184 | if(idxStrLength <= 0) { |
7185 | return SQLITE_ERROR1; |
7186 | } |
7187 | if((idxStrLength-1) % 4 != 0) { |
7188 | return SQLITE_ERROR1; |
7189 | } |
7190 | int numValueEntries = (idxStrLength-1) / 4; |
7191 | if(numValueEntries != argc) { |
7192 | return SQLITE_ERROR1; |
7193 | } |
7194 | |
7195 | char query_plan = idxStr[0]; |
7196 | switch(query_plan) { |
7197 | case VEC0_QUERY_PLAN_FULLSCAN: |
7198 | return vec0Filter_fullscan(p, pCur); |
7199 | case VEC0_QUERY_PLAN_KNN: |
7200 | return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv); |
7201 | case VEC0_QUERY_PLAN_POINT: |
7202 | return vec0Filter_point(pCur, p, argc, argv); |
7203 | default: |
7204 | vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr); |
7205 | return SQLITE_ERROR1; |
7206 | } |
7207 | } |
7208 | |
7209 | static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { |
7210 | vec0_cursor *pCur = (vec0_cursor *)cur; |
7211 | switch (pCur->query_plan) { |
7212 | case VEC0_QUERY_PLAN_FULLSCAN: { |
7213 | *pRowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0); |
7214 | return SQLITE_OK0; |
7215 | } |
7216 | case VEC0_QUERY_PLAN_POINT: { |
7217 | *pRowid = pCur->point_data->rowid; |
7218 | return SQLITE_OK0; |
7219 | } |
7220 | case VEC0_QUERY_PLAN_KNN: { |
7221 | vtab_set_error(cur->pVtab, |
7222 | "Internal sqlite-vec error: expected point query plan in " |
7223 | "vec0Rowid, found %d", |
7224 | pCur->query_plan); |
7225 | return SQLITE_ERROR1; |
7226 | } |
7227 | } |
7228 | return SQLITE_ERROR1; |
7229 | } |
7230 | |
7231 | static int vec0Next(sqlite3_vtab_cursor *cur) { |
7232 | vec0_cursor *pCur = (vec0_cursor *)cur; |
7233 | switch (pCur->query_plan) { |
7234 | case VEC0_QUERY_PLAN_FULLSCAN: { |
7235 | if (!pCur->fullscan_data) { |
7236 | return SQLITE_ERROR1; |
7237 | } |
7238 | int rc = sqlite3_stepsqlite3_api->step(pCur->fullscan_data->rowids_stmt); |
7239 | if (rc == SQLITE_DONE101) { |
7240 | pCur->fullscan_data->done = 1; |
7241 | return SQLITE_OK0; |
7242 | } |
7243 | if (rc == SQLITE_ROW100) { |
7244 | return SQLITE_OK0; |
7245 | } |
7246 | return SQLITE_ERROR1; |
7247 | } |
7248 | case VEC0_QUERY_PLAN_KNN: { |
7249 | if (!pCur->knn_data) { |
7250 | return SQLITE_ERROR1; |
7251 | } |
7252 | |
7253 | pCur->knn_data->current_idx++; |
7254 | return SQLITE_OK0; |
7255 | } |
7256 | case VEC0_QUERY_PLAN_POINT: { |
7257 | if (!pCur->point_data) { |
7258 | return SQLITE_ERROR1; |
7259 | } |
7260 | pCur->point_data->done = 1; |
7261 | return SQLITE_OK0; |
7262 | } |
7263 | } |
7264 | return SQLITE_ERROR1; |
7265 | } |
7266 | |
7267 | static int vec0Eof(sqlite3_vtab_cursor *cur) { |
7268 | vec0_cursor *pCur = (vec0_cursor *)cur; |
7269 | switch (pCur->query_plan) { |
7270 | case VEC0_QUERY_PLAN_FULLSCAN: { |
7271 | if (!pCur->fullscan_data) { |
7272 | return 1; |
7273 | } |
7274 | return pCur->fullscan_data->done; |
7275 | } |
7276 | case VEC0_QUERY_PLAN_KNN: { |
7277 | if (!pCur->knn_data) { |
7278 | return 1; |
7279 | } |
7280 | // return (pCur->knn_data->current_idx >= pCur->knn_data->k) || |
7281 | // (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX); |
7282 | return (pCur->knn_data->current_idx >= pCur->knn_data->k_used); |
7283 | } |
7284 | case VEC0_QUERY_PLAN_POINT: { |
7285 | if (!pCur->point_data) { |
7286 | return 1; |
7287 | } |
7288 | return pCur->point_data->done; |
7289 | } |
7290 | } |
7291 | return 1; |
7292 | } |
7293 | |
7294 | static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur, |
7295 | sqlite3_context *context, int i) { |
7296 | if (!pCur->fullscan_data) { |
7297 | sqlite3_result_errorsqlite3_api->result_error( |
7298 | context, "Internal sqlite-vec error: fullscan_data is NULL.", -1); |
7299 | return SQLITE_ERROR1; |
7300 | } |
7301 | i64 rowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0); |
7302 | if (i == VEC0_COLUMN_ID0) { |
7303 | return vec0_result_id(pVtab, context, rowid); |
7304 | } |
7305 | else if (vec0_column_idx_is_vector(pVtab, i)) { |
7306 | void *v; |
7307 | int sz; |
7308 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); |
7309 | int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz); |
7310 | if (rc != SQLITE_OK0) { |
7311 | return rc; |
7312 | } |
7313 | sqlite3_result_blobsqlite3_api->result_blob(context, v, sz, sqlite3_freesqlite3_api->free); |
7314 | sqlite3_result_subtypesqlite3_api->result_subtype(context, |
7315 | pVtab->vector_columns[vector_idx].element_type); |
7316 | |
7317 | } |
7318 | else if (i == vec0_column_distance_idx(pVtab)) { |
7319 | sqlite3_result_nullsqlite3_api->result_null(context); |
7320 | } |
7321 | else if(vec0_column_idx_is_partition(pVtab, i)) { |
7322 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); |
7323 | sqlite3_value * v; |
7324 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); |
7325 | if(rc == SQLITE_OK0) { |
7326 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7327 | sqlite3_value_freesqlite3_api->value_free(v); |
7328 | }else { |
7329 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7330 | } |
7331 | } |
7332 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { |
7333 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); |
7334 | sqlite3_value * v; |
7335 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); |
7336 | if(rc == SQLITE_OK0) { |
7337 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7338 | sqlite3_value_freesqlite3_api->value_free(v); |
7339 | }else { |
7340 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7341 | } |
7342 | } |
7343 | |
7344 | else if(vec0_column_idx_is_metadata(pVtab, i)) { |
7345 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { |
7346 | return SQLITE_OK0; |
7347 | } |
7348 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); |
7349 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); |
7350 | if(rc != SQLITE_OK0) { |
7351 | // IMP: V15466_32305 |
7352 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( |
7353 | "Could not extract metadata value for column %.*s at rowid %lld", |
7354 | pVtab->metadata_columns[metadata_idx].name_length, |
7355 | pVtab->metadata_columns[metadata_idx].name, rowid |
7356 | ); |
7357 | if(zErr) { |
7358 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); |
7359 | sqlite3_freesqlite3_api->free((void *) zErr); |
7360 | }else { |
7361 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
7362 | } |
7363 | } |
7364 | } |
7365 | |
7366 | return SQLITE_OK0; |
7367 | } |
7368 | |
7369 | static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur, |
7370 | sqlite3_context *context, int i) { |
7371 | if (!pCur->point_data) { |
7372 | sqlite3_result_errorsqlite3_api->result_error(context, |
7373 | "Internal sqlite-vec error: point_data is NULL.", -1); |
7374 | return SQLITE_ERROR1; |
7375 | } |
7376 | if (i == VEC0_COLUMN_ID0) { |
7377 | return vec0_result_id(pVtab, context, pCur->point_data->rowid); |
7378 | } |
7379 | else if (i == vec0_column_distance_idx(pVtab)) { |
7380 | sqlite3_result_nullsqlite3_api->result_null(context); |
7381 | return SQLITE_OK0; |
7382 | } |
7383 | else if (vec0_column_idx_is_vector(pVtab, i)) { |
7384 | if (sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { |
7385 | sqlite3_result_nullsqlite3_api->result_null(context); |
7386 | return SQLITE_OK0; |
7387 | } |
7388 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); |
7389 | sqlite3_result_blobsqlite3_api->result_blob( |
7390 | context, pCur->point_data->vectors[vector_idx], |
7391 | vector_column_byte_size(pVtab->vector_columns[vector_idx]), |
7392 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
7393 | sqlite3_result_subtypesqlite3_api->result_subtype(context, |
7394 | pVtab->vector_columns[vector_idx].element_type); |
7395 | return SQLITE_OK0; |
7396 | } |
7397 | else if(vec0_column_idx_is_partition(pVtab, i)) { |
7398 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { |
7399 | return SQLITE_OK0; |
7400 | } |
7401 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); |
7402 | i64 rowid = pCur->point_data->rowid; |
7403 | sqlite3_value * v; |
7404 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); |
7405 | if(rc == SQLITE_OK0) { |
7406 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7407 | sqlite3_value_freesqlite3_api->value_free(v); |
7408 | }else { |
7409 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7410 | } |
7411 | } |
7412 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { |
7413 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { |
7414 | return SQLITE_OK0; |
7415 | } |
7416 | i64 rowid = pCur->point_data->rowid; |
7417 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); |
7418 | sqlite3_value * v; |
7419 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); |
7420 | if(rc == SQLITE_OK0) { |
7421 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7422 | sqlite3_value_freesqlite3_api->value_free(v); |
7423 | }else { |
7424 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7425 | } |
7426 | } |
7427 | |
7428 | else if(vec0_column_idx_is_metadata(pVtab, i)) { |
7429 | if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) { |
7430 | return SQLITE_OK0; |
7431 | } |
7432 | i64 rowid = pCur->point_data->rowid; |
7433 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); |
7434 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); |
7435 | if(rc != SQLITE_OK0) { |
7436 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( |
7437 | "Could not extract metadata value for column %.*s at rowid %lld", |
7438 | pVtab->metadata_columns[metadata_idx].name_length, |
7439 | pVtab->metadata_columns[metadata_idx].name, rowid |
7440 | ); |
7441 | if(zErr) { |
7442 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); |
7443 | sqlite3_freesqlite3_api->free((void *) zErr); |
7444 | }else { |
7445 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
7446 | } |
7447 | } |
7448 | } |
7449 | |
7450 | return SQLITE_OK0; |
7451 | } |
7452 | |
7453 | static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur, |
7454 | sqlite3_context *context, int i) { |
7455 | if (!pCur->knn_data) { |
7456 | sqlite3_result_errorsqlite3_api->result_error(context, |
7457 | "Internal sqlite-vec error: knn_data is NULL.", -1); |
7458 | return SQLITE_ERROR1; |
7459 | } |
7460 | if (i == VEC0_COLUMN_ID0) { |
7461 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; |
7462 | return vec0_result_id(pVtab, context, rowid); |
7463 | } |
7464 | else if (i == vec0_column_distance_idx(pVtab)) { |
7465 | sqlite3_result_doublesqlite3_api->result_double( |
7466 | context, pCur->knn_data->distances[pCur->knn_data->current_idx]); |
7467 | return SQLITE_OK0; |
7468 | } |
7469 | else if (vec0_column_idx_is_vector(pVtab, i)) { |
7470 | void *out; |
7471 | int sz; |
7472 | int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); |
7473 | int rc = vec0_get_vector_data( |
7474 | pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx, |
7475 | &out, &sz); |
7476 | if (rc != SQLITE_OK0) { |
7477 | return rc; |
7478 | } |
7479 | sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free); |
7480 | sqlite3_result_subtypesqlite3_api->result_subtype(context, |
7481 | pVtab->vector_columns[vector_idx].element_type); |
7482 | return SQLITE_OK0; |
7483 | } |
7484 | else if(vec0_column_idx_is_partition(pVtab, i)) { |
7485 | int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i); |
7486 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; |
7487 | sqlite3_value * v; |
7488 | int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v); |
7489 | if(rc == SQLITE_OK0) { |
7490 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7491 | sqlite3_value_freesqlite3_api->value_free(v); |
7492 | }else { |
7493 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7494 | } |
7495 | } |
7496 | else if(vec0_column_idx_is_auxiliary(pVtab, i)) { |
7497 | int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i); |
7498 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; |
7499 | sqlite3_value * v; |
7500 | int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v); |
7501 | if(rc == SQLITE_OK0) { |
7502 | sqlite3_result_valuesqlite3_api->result_value(context, v); |
7503 | sqlite3_value_freesqlite3_api->value_free(v); |
7504 | }else { |
7505 | sqlite3_result_error_codesqlite3_api->result_error_code(context, rc); |
7506 | } |
7507 | } |
7508 | |
7509 | else if(vec0_column_idx_is_metadata(pVtab, i)) { |
7510 | int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); |
7511 | i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; |
7512 | int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); |
7513 | if(rc != SQLITE_OK0) { |
7514 | const char * zErr = sqlite3_mprintfsqlite3_api->mprintf( |
7515 | "Could not extract metadata value for column %.*s at rowid %lld", |
7516 | pVtab->metadata_columns[metadata_idx].name_length, |
7517 | pVtab->metadata_columns[metadata_idx].name, rowid |
7518 | ); |
7519 | if(zErr) { |
7520 | sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1); |
7521 | sqlite3_freesqlite3_api->free((void *) zErr); |
7522 | }else { |
7523 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
7524 | } |
7525 | } |
7526 | } |
7527 | |
7528 | return SQLITE_OK0; |
7529 | } |
7530 | |
7531 | static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context, |
7532 | int i) { |
7533 | vec0_cursor *pCur = (vec0_cursor *)cur; |
7534 | vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab; |
7535 | switch (pCur->query_plan) { |
7536 | case VEC0_QUERY_PLAN_FULLSCAN: { |
7537 | return vec0Column_fullscan(pVtab, pCur, context, i); |
7538 | } |
7539 | case VEC0_QUERY_PLAN_KNN: { |
7540 | return vec0Column_knn(pVtab, pCur, context, i); |
7541 | } |
7542 | case VEC0_QUERY_PLAN_POINT: { |
7543 | return vec0Column_point(pVtab, pCur, context, i); |
7544 | } |
7545 | } |
7546 | return SQLITE_OK0; |
7547 | } |
7548 | |
7549 | /** |
7550 | * @brief Handles the "insert rowid" step of a row insert operation of a vec0 |
7551 | * table. |
7552 | * |
7553 | * This function will insert a new row into the _rowids vec0 shadow table. |
7554 | * |
7555 | * @param p: virtual table |
7556 | * @param idValue: Value containing the inserted rowid/id value. |
7557 | * @param rowid: Output rowid, will point to the "real" i64 rowid |
7558 | * value that was inserted |
7559 | * @return int SQLITE_OK on success, error code on failure |
7560 | */ |
7561 | int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue, |
7562 | i64 *rowid) { |
7563 | |
7564 | /** |
7565 | * An insert into a vec0 table can happen a few different ways: |
7566 | * 1) With default INTEGER primary key: With a supplied i64 rowid |
7567 | * 2) With default INTEGER primary key: WITHOUT a supplied rowid |
7568 | * 3) With TEXT primary key: supplied text rowid |
7569 | */ |
7570 | |
7571 | int rc; |
7572 | |
7573 | // Option 3: vtab has a user-defined TEXT primary key, so ensure a text value |
7574 | // is provided. |
7575 | if (p->pkIsText) { |
7576 | if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_TEXT3) { |
7577 | // IMP: V04200_21039 |
7578 | vtab_set_error(&p->base, |
7579 | "The %s virtual table was declared with a TEXT primary " |
7580 | "key, but a non-TEXT value was provided in an INSERT.", |
7581 | p->tableName); |
7582 | return SQLITE_ERROR1; |
7583 | } |
7584 | |
7585 | return vec0_rowids_insert_id(p, idValue, rowid); |
7586 | } |
7587 | |
7588 | // Option 1: User supplied a i64 rowid |
7589 | if (sqlite3_value_typesqlite3_api->value_type(idValue) == SQLITE_INTEGER1) { |
7590 | i64 suppliedRowid = sqlite3_value_int64sqlite3_api->value_int64(idValue); |
7591 | rc = vec0_rowids_insert_rowid(p, suppliedRowid); |
7592 | if (rc == SQLITE_OK0) { |
7593 | *rowid = suppliedRowid; |
7594 | } |
7595 | return rc; |
7596 | } |
7597 | |
7598 | // Option 2: User did not suppled a rowid |
7599 | |
7600 | if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_NULL5) { |
7601 | // IMP: V30855_14925 |
7602 | vtab_set_error(&p->base, |
7603 | "Only integers are allows for primary key values on %s", |
7604 | p->tableName); |
7605 | return SQLITE_ERROR1; |
7606 | } |
7607 | // NULL to get next auto-incremented value |
7608 | return vec0_rowids_insert_id(p, NULL((void*)0), rowid); |
7609 | } |
7610 | |
7611 | /** |
7612 | * @brief Determines the "next available" chunk position for a newly inserted |
7613 | * vec0 row. |
7614 | * |
7615 | * This operation may insert a new "blank" chunk the _chunks table, if there is |
7616 | * no more space in previous chunks. |
7617 | * |
7618 | * @param p: virtual table |
7619 | * @param partitionKeyValues: array of partition key column values, to constrain |
7620 | * against any partition key columns. |
7621 | * @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table |
7622 | * that has the avialabiity. |
7623 | * @param chunk_offset: Output the index of the available space insert the |
7624 | * chunk, based on the index of the first available validity bit. |
7625 | * @param pBlobValidity: Output blob of the validity column of the available |
7626 | * chunk. Will be opened with read/write permissions. |
7627 | * @param pValidity: Output buffer of the original chunk's validity column. |
7628 | * Needs to be cleaned up with sqlite3_free(). |
7629 | * @return int SQLITE_OK on success, error code on failure |
7630 | */ |
7631 | int vec0Update_InsertNextAvailableStep( |
7632 | vec0_vtab *p, |
7633 | sqlite3_value ** partitionKeyValues, |
7634 | i64 *chunk_rowid, i64 *chunk_offset, |
7635 | sqlite3_blob **blobChunksValidity, |
7636 | const unsigned char **bufferChunksValidity) { |
7637 | |
7638 | int rc; |
7639 | i64 validitySize; |
7640 | *chunk_offset = -1; |
7641 | |
7642 | rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues); |
7643 | if(rc == SQLITE_EMPTY16) { |
7644 | goto done; |
7645 | } |
7646 | if (rc != SQLITE_OK0) { |
7647 | goto cleanup; |
7648 | } |
7649 | |
7650 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity", |
7651 | *chunk_rowid, 1, blobChunksValidity); |
7652 | if (rc != SQLITE_OK0) { |
7653 | // IMP: V22053_06123 |
7654 | vtab_set_error(&p->base, |
7655 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7656 | "could not open validity blob on %s.%s.%lld", |
7657 | p->schemaName, p->shadowChunksName, *chunk_rowid); |
7658 | goto cleanup; |
7659 | } |
7660 | |
7661 | validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity); |
7662 | if (validitySize != p->chunk_size / CHAR_BIT8) { |
7663 | // IMP: V29362_13432 |
7664 | vtab_set_error(&p->base, |
7665 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7666 | "validity blob size mismatch on " |
7667 | "%s.%s.%lld, expected %lld but received %lld.", |
7668 | p->schemaName, p->shadowChunksName, *chunk_rowid, |
7669 | (i64)(p->chunk_size / CHAR_BIT8), validitySize); |
7670 | rc = SQLITE_ERROR1; |
7671 | goto cleanup; |
7672 | } |
7673 | |
7674 | *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize); |
7675 | if (!(*bufferChunksValidity)) { |
7676 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7677 | "Could not allocate memory for validity bitmap"); |
7678 | rc = SQLITE_NOMEM7; |
7679 | goto cleanup; |
7680 | } |
7681 | |
7682 | rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity, |
7683 | validitySize, 0); |
7684 | |
7685 | if (rc != SQLITE_OK0) { |
7686 | vtab_set_error(&p->base, |
7687 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7688 | "Could not read validity bitmap for %s.%s.%lld", |
7689 | p->schemaName, p->shadowChunksName, *chunk_rowid); |
7690 | goto cleanup; |
7691 | } |
7692 | |
7693 | // find the next available offset, ie first `0` in the bitmap. |
7694 | for (int i = 0; i < validitySize; i++) { |
7695 | if ((*bufferChunksValidity)[i] == 0b11111111) |
7696 | continue; |
7697 | for (int j = 0; j < CHAR_BIT8; j++) { |
7698 | if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) { |
7699 | *chunk_offset = (i * CHAR_BIT8) + j; |
7700 | goto done; |
7701 | } |
7702 | } |
7703 | } |
7704 | |
7705 | done: |
7706 | // latest chunk was full, so need to create a new one |
7707 | if (*chunk_offset == -1) { |
7708 | rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid); |
7709 | if (rc != SQLITE_OK0) { |
7710 | // IMP: V08441_25279 |
7711 | vtab_set_error(&p->base, |
7712 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not insert a new vector chunk"); |
7713 | rc = SQLITE_ERROR1; // otherwise raises a DatabaseError and not operational |
7714 | // error? |
7715 | goto cleanup; |
7716 | } |
7717 | *chunk_offset = 0; |
7718 | |
7719 | // blobChunksValidity and pValidity are stale, pointing to the previous |
7720 | // (full) chunk. to re-assign them |
7721 | rc = sqlite3_blob_closesqlite3_api->blob_close(*blobChunksValidity); |
7722 | sqlite3_freesqlite3_api->free((void *)*bufferChunksValidity); |
7723 | *blobChunksValidity = NULL((void*)0); |
7724 | *bufferChunksValidity = NULL((void*)0); |
7725 | if (rc != SQLITE_OK0) { |
7726 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7727 | "unknown error, blobChunksValidity could not be closed, " |
7728 | "please file an issue."); |
7729 | rc = SQLITE_ERROR1; |
7730 | goto cleanup; |
7731 | } |
7732 | |
7733 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, |
7734 | "validity", *chunk_rowid, 1, blobChunksValidity); |
7735 | if (rc != SQLITE_OK0) { |
7736 | vtab_set_error( |
7737 | &p->base, |
7738 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7739 | "Could not open validity blob for newly created chunk %s.%s.%lld", |
7740 | p->schemaName, p->shadowChunksName, *chunk_rowid); |
7741 | goto cleanup; |
7742 | } |
7743 | validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity); |
7744 | if (validitySize != p->chunk_size / CHAR_BIT8) { |
7745 | vtab_set_error(&p->base, |
7746 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7747 | "validity blob size mismatch for newly created chunk " |
7748 | "%s.%s.%lld. Exepcted %lld, got %lld", |
7749 | p->schemaName, p->shadowChunksName, *chunk_rowid, |
7750 | p->chunk_size / CHAR_BIT8, validitySize); |
7751 | goto cleanup; |
7752 | } |
7753 | *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize); |
7754 | rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity, |
7755 | validitySize, 0); |
7756 | if (rc != SQLITE_OK0) { |
7757 | vtab_set_error(&p->base, |
7758 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7759 | "could not read validity blob newly created chunk " |
7760 | "%s.%s.%lld", |
7761 | p->schemaName, p->shadowChunksName, *chunk_rowid); |
7762 | goto cleanup; |
7763 | } |
7764 | } |
7765 | |
7766 | rc = SQLITE_OK0; |
7767 | |
7768 | cleanup: |
7769 | return rc; |
7770 | } |
7771 | |
7772 | /** |
7773 | * @brief Write the vector data into the provided vector blob at the given |
7774 | * offset |
7775 | * |
7776 | * @param blobVectors SQLite BLOB to write to |
7777 | * @param chunk_offset the "offset" (ie validity bitmap position) to write the |
7778 | * vector to |
7779 | * @param bVector pointer to the vector containing data |
7780 | * @param dimensions how many dimensions the vector has |
7781 | * @param element_type the vector type |
7782 | * @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure |
7783 | */ |
7784 | static int |
7785 | vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset, |
7786 | const void *bVector, size_t dimensions, |
7787 | enum VectorElementType element_type) { |
7788 | int n; |
7789 | int offset; |
7790 | |
7791 | switch (element_type) { |
7792 | case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: |
7793 | n = dimensions * sizeof(f32); |
7794 | offset = chunk_offset * dimensions * sizeof(f32); |
7795 | break; |
7796 | case SQLITE_VEC_ELEMENT_TYPE_INT8: |
7797 | n = dimensions * sizeof(i8); |
7798 | offset = chunk_offset * dimensions * sizeof(i8); |
7799 | break; |
7800 | case SQLITE_VEC_ELEMENT_TYPE_BIT: |
7801 | n = dimensions / CHAR_BIT8; |
7802 | offset = chunk_offset * dimensions / CHAR_BIT8; |
7803 | break; |
7804 | } |
7805 | |
7806 | return sqlite3_blob_writesqlite3_api->blob_write(blobVectors, bVector, n, offset); |
7807 | } |
7808 | |
7809 | /** |
7810 | * @brief |
7811 | * |
7812 | * @param p vec0 virtual table |
7813 | * @param chunk_rowid: which chunk to write to |
7814 | * @param chunk_offset: the offset inside the chunk to write the vector to. |
7815 | * @param rowid: the rowid of the inserting row |
7816 | * @param vectorDatas: array of the vector data to insert |
7817 | * @param blobValidity: writeable validity blob of the row's assigned chunk. |
7818 | * @param validity: snapshot buffer of the valdity column from the row's |
7819 | * assigned chunk. |
7820 | * @return int SQLITE_OK on success, error code on failure |
7821 | */ |
7822 | int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid, |
7823 | i64 chunk_offset, i64 rowid, |
7824 | void *vectorDatas[], |
7825 | sqlite3_blob *blobChunksValidity, |
7826 | const unsigned char *bufferChunksValidity) { |
7827 | int rc, brc; |
7828 | sqlite3_blob *blobChunksRowids = NULL((void*)0); |
7829 | |
7830 | // mark the validity bit for this row in the chunk's validity bitmap |
7831 | // Get the byte offset of the bitmap |
7832 | char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT8]; |
7833 | // set the bit at the chunk_offset position inside that byte |
7834 | bx = bx | (1 << (chunk_offset % CHAR_BIT8)); |
7835 | // write that 1 byte |
7836 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT8); |
7837 | if (rc != SQLITE_OK0) { |
7838 | vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not mark validity bit "); |
7839 | return rc; |
7840 | } |
7841 | |
7842 | // Go insert the vector data into the vector chunk shadow tables |
7843 | for (int i = 0; i < p->numVectorColumns; i++) { |
7844 | sqlite3_blob *blobVectors; |
7845 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i], |
7846 | "vectors", chunk_rowid, 1, &blobVectors); |
7847 | if (rc != SQLITE_OK0) { |
7848 | vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld", |
7849 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); |
7850 | goto cleanup; |
7851 | } |
7852 | |
7853 | i64 expected = |
7854 | p->chunk_size * vector_column_byte_size(p->vector_columns[i]); |
7855 | i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors); |
7856 | |
7857 | if (actual != expected) { |
7858 | // IMP: V16386_00456 |
7859 | vtab_set_error( |
7860 | &p->base, |
7861 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7862 | "vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld", |
7863 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected, |
7864 | actual); |
7865 | rc = SQLITE_ERROR1; |
7866 | // already error, can ignore result code |
7867 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
7868 | goto cleanup; |
7869 | }; |
7870 | |
7871 | rc = vec0_write_vector_to_vector_blob( |
7872 | blobVectors, chunk_offset, vectorDatas[i], |
7873 | p->vector_columns[i].dimensions, p->vector_columns[i].element_type); |
7874 | if (rc != SQLITE_OK0) { |
7875 | vtab_set_error(&p->base, |
7876 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7877 | "could not write vector blob on %s.%s.%lld", |
7878 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); |
7879 | rc = SQLITE_ERROR1; |
7880 | // already error, can ignore result code |
7881 | sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
7882 | goto cleanup; |
7883 | } |
7884 | rc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
7885 | if (rc != SQLITE_OK0) { |
7886 | vtab_set_error(&p->base, |
7887 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7888 | "could not close vector blob on %s.%s.%lld", |
7889 | p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid); |
7890 | rc = SQLITE_ERROR1; |
7891 | goto cleanup; |
7892 | } |
7893 | } |
7894 | |
7895 | // write the new rowid to the rowids column of the _chunks table |
7896 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", |
7897 | chunk_rowid, 1, &blobChunksRowids); |
7898 | if (rc != SQLITE_OK0) { |
7899 | // IMP: V09221_26060 |
7900 | vtab_set_error(&p->base, |
7901 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not open rowids blob on %s.%s.%lld", |
7902 | p->schemaName, p->shadowChunksName, chunk_rowid); |
7903 | goto cleanup; |
7904 | } |
7905 | i64 expected = p->chunk_size * sizeof(i64); |
7906 | i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobChunksRowids); |
7907 | if (expected != actual) { |
7908 | // IMP: V12779_29618 |
7909 | vtab_set_error( |
7910 | &p->base, |
7911 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " |
7912 | "rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld", |
7913 | p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual); |
7914 | rc = SQLITE_ERROR1; |
7915 | goto cleanup; |
7916 | } |
7917 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksRowids, &rowid, sizeof(i64), |
7918 | chunk_offset * sizeof(i64)); |
7919 | if (rc != SQLITE_OK0) { |
7920 | vtab_set_error( |
7921 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not write rowids blob on %s.%s.%lld", |
7922 | p->schemaName, p->shadowChunksName, chunk_rowid); |
7923 | rc = SQLITE_ERROR1; |
7924 | goto cleanup; |
7925 | } |
7926 | |
7927 | // Now with all the vectors inserted, go back and update the _rowids table |
7928 | // with the new chunk_rowid/chunk_offset values |
7929 | rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset); |
7930 | |
7931 | cleanup: |
7932 | brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksRowids); |
7933 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { |
7934 | vtab_set_error( |
7935 | &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not close rowids blob on %s.%s.%lld", |
7936 | p->schemaName, p->shadowChunksName, chunk_rowid); |
7937 | return brc; |
7938 | } |
7939 | return rc; |
7940 | } |
7941 | |
7942 | int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) { |
7943 | int rc; |
7944 | struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx]; |
7945 | vec0_metadata_column_kind kind = metadata_column->kind; |
7946 | |
7947 | // verify input value matches column type |
7948 | switch(kind) { |
7949 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
7950 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1 || ((sqlite3_value_intsqlite3_api->value_int(v) != 0) && (sqlite3_value_intsqlite3_api->value_int(v) != 1))) { |
7951 | rc = SQLITE_ERROR1; |
7952 | vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name); |
7953 | goto done; |
7954 | } |
7955 | break; |
7956 | } |
7957 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
7958 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1) { |
7959 | rc = SQLITE_ERROR1; |
7960 | vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); |
7961 | goto done; |
7962 | } |
7963 | break; |
7964 | } |
7965 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
7966 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_FLOAT2) { |
7967 | rc = SQLITE_ERROR1; |
7968 | vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); |
7969 | goto done; |
7970 | } |
7971 | break; |
7972 | } |
7973 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
7974 | if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_TEXT3) { |
7975 | rc = SQLITE_ERROR1; |
7976 | vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v))); |
7977 | goto done; |
7978 | } |
7979 | break; |
7980 | } |
7981 | } |
7982 | |
7983 | sqlite3_blob * blobValue = NULL((void*)0); |
7984 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue); |
7985 | if(rc != SQLITE_OK0) { |
7986 | goto done; |
7987 | } |
7988 | |
7989 | switch(kind) { |
7990 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
7991 | u8 block; |
7992 | int value = sqlite3_value_intsqlite3_api->value_int(v); |
7993 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8)); |
7994 | if(rc != SQLITE_OK0) { |
7995 | goto done; |
7996 | } |
7997 | |
7998 | if (value) { |
7999 | block |= 1 << (chunk_offset % CHAR_BIT8); |
8000 | } else { |
8001 | block &= ~(1 << (chunk_offset % CHAR_BIT8)); |
8002 | } |
8003 | |
8004 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8); |
8005 | break; |
8006 | } |
8007 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
8008 | i64 value = sqlite3_value_int64sqlite3_api->value_int64(v); |
8009 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); |
8010 | break; |
8011 | } |
8012 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
8013 | double value = sqlite3_value_doublesqlite3_api->value_double(v); |
8014 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); |
8015 | break; |
8016 | } |
8017 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
8018 | int prev_n; |
8019 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8020 | if(rc != SQLITE_OK0) { |
8021 | goto done; |
8022 | } |
8023 | |
8024 | const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(v); |
8025 | int n = sqlite3_value_bytessqlite3_api->value_bytes(v); |
8026 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
8027 | memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8028 | memcpy(view, &n, sizeof(int)); |
8029 | memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4)(((n) <= (16 -4)) ? (n) : (16 -4))); |
8030 | |
8031 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8032 | if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
8033 | const char * zSql; |
8034 | |
8035 | if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12)) { |
8036 | zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx); |
8037 | }else { |
8038 | zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx); |
8039 | } |
8040 | if(!zSql) { |
8041 | rc = SQLITE_NOMEM7; |
8042 | goto done; |
8043 | } |
8044 | sqlite3_stmt * stmt; |
8045 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8046 | if(rc != SQLITE_OK0) { |
8047 | goto done; |
8048 | } |
8049 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8050 | sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, s, n, SQLITE_STATIC((sqlite3_destructor_type)0)); |
8051 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8052 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8053 | |
8054 | if(rc != SQLITE_DONE101) { |
8055 | rc = SQLITE_ERROR1; |
8056 | goto done; |
8057 | } |
8058 | } |
8059 | else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
8060 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx); |
8061 | if(!zSql) { |
8062 | rc = SQLITE_NOMEM7; |
8063 | goto done; |
8064 | } |
8065 | sqlite3_stmt * stmt; |
8066 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8067 | if(rc != SQLITE_OK0) { |
8068 | goto done; |
8069 | } |
8070 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8071 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8072 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8073 | |
8074 | if(rc != SQLITE_DONE101) { |
8075 | rc = SQLITE_ERROR1; |
8076 | goto done; |
8077 | } |
8078 | } |
8079 | break; |
8080 | } |
8081 | } |
8082 | |
8083 | if(rc != SQLITE_OK0) { |
8084 | |
8085 | } |
8086 | rc = sqlite3_blob_closesqlite3_api->blob_close(blobValue); |
8087 | if(rc != SQLITE_OK0) { |
8088 | goto done; |
8089 | } |
8090 | |
8091 | done: |
8092 | return rc; |
8093 | } |
8094 | |
8095 | |
8096 | /** |
8097 | * @brief Handles INSERT INTO operations on a vec0 table. |
8098 | * |
8099 | * @return int SQLITE_OK on success, otherwise error code on failure |
8100 | */ |
8101 | int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, |
8102 | sqlite_int64 *pRowid) { |
8103 | UNUSED_PARAMETER(argc)(void)(argc); |
8104 | vec0_vtab *p = (vec0_vtab *)pVTab; |
8105 | int rc; |
8106 | // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow |
8107 | // table |
8108 | i64 rowid; |
8109 | |
8110 | // Array to hold the vector data of the inserted row. Individual elements will |
8111 | // have a lifetime bound to the argv[..] values. |
8112 | void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS16]; |
8113 | // Array to hold cleanup functions for vectorDatas[] |
8114 | vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS16]; |
8115 | |
8116 | sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS4]; |
8117 | |
8118 | // Rowid of the chunk in the _chunks shadow table that the row will be a part |
8119 | // of. |
8120 | i64 chunk_rowid; |
8121 | // offset within the chunk where the rowid belongs |
8122 | i64 chunk_offset; |
8123 | |
8124 | // a write-able blob of the validity column for the given chunk. Used to mark |
8125 | // validity bit |
8126 | sqlite3_blob *blobChunksValidity = NULL((void*)0); |
8127 | // buffer for the valididty column for the given chunk. Maybe not needed here? |
8128 | const unsigned char *bufferChunksValidity = NULL((void*)0); |
8129 | int numReadVectors = 0; |
8130 | |
8131 | // Read all provided partition key values into partitionKeyValues |
8132 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8133 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) { |
8134 | continue; |
8135 | } |
8136 | int partition_key_idx = p->user_column_idxs[i]; |
8137 | partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START1 + i]; |
8138 | |
8139 | int new_value_type = sqlite3_value_typesqlite3_api->value_type(partitionKeyValues[partition_key_idx]); |
8140 | if((new_value_type != SQLITE_NULL5) && (new_value_type != p->paritition_columns[partition_key_idx].type)) { |
8141 | // IMP: V11454_28292 |
8142 | vtab_set_error( |
8143 | pVTab, |
8144 | "Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.", |
8145 | p->paritition_columns[partition_key_idx].name_length, |
8146 | p->paritition_columns[partition_key_idx].name, |
8147 | type_name(p->paritition_columns[partition_key_idx].type), |
8148 | type_name(new_value_type) |
8149 | ); |
8150 | rc = SQLITE_ERROR1; |
8151 | goto cleanup; |
8152 | } |
8153 | } |
8154 | |
8155 | // read all the inserted vectors into vectorDatas, validate their lengths. |
8156 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8157 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { |
8158 | continue; |
8159 | } |
8160 | int vector_column_idx = p->user_column_idxs[i]; |
8161 | sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i]; |
8162 | size_t dimensions; |
8163 | |
8164 | char *pzError; |
8165 | enum VectorElementType elementType; |
8166 | rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions, |
8167 | &elementType, &cleanups[vector_column_idx], &pzError); |
8168 | if (rc != SQLITE_OK0) { |
8169 | // IMP: V06519_23358 |
8170 | vtab_set_error( |
8171 | pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z", |
8172 | p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError); |
8173 | rc = SQLITE_ERROR1; |
8174 | goto cleanup; |
8175 | } |
8176 | |
8177 | numReadVectors++; |
8178 | if (elementType != p->vector_columns[vector_column_idx].element_type) { |
8179 | // IMP: V08221_25059 |
8180 | vtab_set_error( |
8181 | pVTab, |
8182 | "Inserted vector for the \"%.*s\" column is expected to be of type " |
8183 | "%s, but a %s vector was provided.", |
8184 | p->vector_columns[i].name_length, p->vector_columns[i].name, |
8185 | vector_subtype_name(p->vector_columns[i].element_type), |
8186 | vector_subtype_name(elementType)); |
8187 | rc = SQLITE_ERROR1; |
8188 | goto cleanup; |
8189 | } |
8190 | |
8191 | if (dimensions != p->vector_columns[vector_column_idx].dimensions) { |
8192 | // IMP: V01145_17984 |
8193 | vtab_set_error( |
8194 | pVTab, |
8195 | "Dimension mismatch for inserted vector for the \"%.*s\" column. " |
8196 | "Expected %d dimensions but received %d.", |
8197 | p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, |
8198 | p->vector_columns[vector_column_idx].dimensions, dimensions); |
8199 | rc = SQLITE_ERROR1; |
8200 | goto cleanup; |
8201 | } |
8202 | } |
8203 | |
8204 | // Cannot insert a value in the hidden "distance" column |
8205 | if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_distance_idx(p)]) != |
8206 | SQLITE_NULL5) { |
8207 | // IMP: V24228_08298 |
8208 | vtab_set_error(pVTab, |
8209 | "A value was provided for the hidden \"distance\" column."); |
8210 | rc = SQLITE_ERROR1; |
8211 | goto cleanup; |
8212 | } |
8213 | // Cannot insert a value in the hidden "k" column |
8214 | if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL5) { |
8215 | // IMP: V11875_28713 |
8216 | vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column."); |
8217 | rc = SQLITE_ERROR1; |
8218 | goto cleanup; |
8219 | } |
8220 | |
8221 | // Step #1: Insert/get a rowid for this row, from the _rowids table. |
8222 | rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID0], &rowid); |
8223 | if (rc != SQLITE_OK0) { |
8224 | goto cleanup; |
8225 | } |
8226 | |
8227 | // Step #2: Find the next "available" position in the _chunks table for this |
8228 | // row. |
8229 | rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues, |
8230 | &chunk_rowid, &chunk_offset, |
8231 | &blobChunksValidity, |
8232 | &bufferChunksValidity); |
8233 | if (rc != SQLITE_OK0) { |
8234 | goto cleanup; |
8235 | } |
8236 | |
8237 | // Step #3: With the next available chunk position, write out all the vectors |
8238 | // to their specified location. |
8239 | rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid, |
8240 | vectorDatas, blobChunksValidity, |
8241 | bufferChunksValidity); |
8242 | if (rc != SQLITE_OK0) { |
8243 | goto cleanup; |
8244 | } |
8245 | |
8246 | if(p->numAuxiliaryColumns > 0) { |
8247 | sqlite3_stmt *stmt; |
8248 | sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0)); |
8249 | sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "(rowid ", p->schemaName, p->tableName); |
8250 | for(int i = 0; i < p->numAuxiliaryColumns; i++) { |
8251 | sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i); |
8252 | } |
8253 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (? "); |
8254 | for(int i = 0; i < p->numAuxiliaryColumns; i++) { |
8255 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?"); |
8256 | } |
8257 | sqlite3_str_appendallsqlite3_api->str_appendall(s, ")"); |
8258 | char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s); |
8259 | // TODO double check error handling ehre |
8260 | if(!zSql) { |
8261 | rc = SQLITE_NOMEM7; |
8262 | goto cleanup; |
8263 | } |
8264 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8265 | if(rc != SQLITE_OK0) { |
8266 | goto cleanup; |
8267 | } |
8268 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8269 | |
8270 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8271 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { |
8272 | continue; |
8273 | } |
8274 | int auxiliary_key_idx = p->user_column_idxs[i]; |
8275 | sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START1 + i]; |
8276 | int v_type = sqlite3_value_typesqlite3_api->value_type(v); |
8277 | if(v_type != SQLITE_NULL5 && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) { |
8278 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8279 | rc = SQLITE_CONSTRAINT19; |
8280 | vtab_set_error( |
8281 | pVTab, |
8282 | "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.", |
8283 | p->auxiliary_columns[auxiliary_key_idx].name_length, |
8284 | p->auxiliary_columns[auxiliary_key_idx].name, |
8285 | type_name(p->auxiliary_columns[auxiliary_key_idx].type), |
8286 | type_name(v_type) |
8287 | ); |
8288 | goto cleanup; |
8289 | } |
8290 | // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter |
8291 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1 + 1 + auxiliary_key_idx, v); |
8292 | } |
8293 | |
8294 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8295 | if(rc != SQLITE_DONE101) { |
8296 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8297 | rc = SQLITE_ERROR1; |
8298 | goto cleanup; |
8299 | } |
8300 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8301 | } |
8302 | |
8303 | |
8304 | for(int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8305 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { |
8306 | continue; |
8307 | } |
8308 | int metadata_idx = p->user_column_idxs[i]; |
8309 | sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START1 + i]; |
8310 | rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0); |
8311 | if(rc != SQLITE_OK0) { |
8312 | goto cleanup; |
8313 | } |
8314 | } |
8315 | |
8316 | *pRowid = rowid; |
8317 | rc = SQLITE_OK0; |
8318 | |
8319 | cleanup: |
8320 | for (int i = 0; i < numReadVectors; i++) { |
8321 | cleanups[i](vectorDatas[i]); |
8322 | } |
8323 | sqlite3_freesqlite3_api->free((void *)bufferChunksValidity); |
8324 | int brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity); |
8325 | if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) { |
8326 | vtab_set_error(&p->base, |
8327 | VEC_INTERAL_ERROR"Internal sqlite-vec error: " "unknown error, blobChunksValidity could " |
8328 | "not be closed, please file an issue"); |
8329 | return brc; |
8330 | } |
8331 | return rc; |
8332 | } |
8333 | |
8334 | int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id, |
8335 | u64 chunk_offset) { |
8336 | int rc, brc; |
8337 | sqlite3_blob *blobChunksValidity = NULL((void*)0); |
8338 | char unsigned bx; |
8339 | int validityOffset = chunk_offset / CHAR_BIT8; |
8340 | |
8341 | // 2. ensure chunks.validity bit is 1, then set to 0 |
8342 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity", |
8343 | chunk_id, 1, &blobChunksValidity); |
8344 | if (rc != SQLITE_OK0) { |
8345 | // IMP: V26002_10073 |
8346 | vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld", |
8347 | p->schemaName, p->shadowChunksName, chunk_id); |
8348 | return SQLITE_ERROR1; |
8349 | } |
8350 | // will skip the sqlite3_blob_bytes(blobChunksValidity) check for now, |
8351 | // the read below would catch it |
8352 | |
8353 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset); |
8354 | if (rc != SQLITE_OK0) { |
8355 | // IMP: V21193_05263 |
8356 | vtab_set_error( |
8357 | &p->base, "could not read validity blob for %s.%s.%lld at %d", |
8358 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); |
8359 | goto cleanup; |
8360 | } |
8361 | if (!(bx >> (chunk_offset % CHAR_BIT8))) { |
8362 | // IMP: V21193_05263 |
8363 | rc = SQLITE_ERROR1; |
8364 | vtab_set_error( |
8365 | &p->base, |
8366 | "vec0 deletion error: validity bit is not set for %s.%s.%lld at %d", |
8367 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); |
8368 | goto cleanup; |
8369 | } |
8370 | char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT8)); |
8371 | char result = bx & mask; |
8372 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &result, sizeof(bx), |
8373 | validityOffset); |
8374 | if (rc != SQLITE_OK0) { |
8375 | vtab_set_error( |
8376 | &p->base, "could not write to validity blob for %s.%s.%lld at %d", |
8377 | p->schemaName, p->shadowChunksName, chunk_id, validityOffset); |
8378 | goto cleanup; |
8379 | } |
8380 | |
8381 | cleanup: |
8382 | |
8383 | brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity); |
8384 | if (rc != SQLITE_OK0) |
8385 | return rc; |
8386 | if (brc != SQLITE_OK0) { |
8387 | vtab_set_error(&p->base, |
8388 | "vec0 deletion error: Error commiting validity blob " |
8389 | "transaction on %s.%s.%lld at %d", |
8390 | p->schemaName, p->shadowChunksName, chunk_id, |
8391 | validityOffset); |
8392 | return brc; |
8393 | } |
8394 | return SQLITE_OK0; |
8395 | } |
8396 | |
8397 | int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) { |
8398 | int rc; |
8399 | sqlite3_stmt *stmt = NULL((void*)0); |
8400 | |
8401 | char *zSql = |
8402 | sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?", |
8403 | p->schemaName, p->tableName); |
8404 | if (!zSql) { |
8405 | return SQLITE_NOMEM7; |
8406 | } |
8407 | |
8408 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8409 | sqlite3_freesqlite3_api->free(zSql); |
8410 | if (rc != SQLITE_OK0) { |
8411 | goto cleanup; |
8412 | } |
8413 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8414 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8415 | if (rc != SQLITE_DONE101) { |
8416 | goto cleanup; |
8417 | } |
8418 | rc = SQLITE_OK0; |
8419 | |
8420 | cleanup: |
8421 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8422 | return rc; |
8423 | } |
8424 | |
8425 | int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) { |
8426 | int rc; |
8427 | sqlite3_stmt *stmt = NULL((void*)0); |
8428 | |
8429 | char *zSql = |
8430 | sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?", |
8431 | p->schemaName, p->tableName); |
8432 | if (!zSql) { |
8433 | return SQLITE_NOMEM7; |
8434 | } |
8435 | |
8436 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8437 | sqlite3_freesqlite3_api->free(zSql); |
8438 | if (rc != SQLITE_OK0) { |
8439 | goto cleanup; |
8440 | } |
8441 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8442 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8443 | if (rc != SQLITE_DONE101) { |
8444 | goto cleanup; |
8445 | } |
8446 | rc = SQLITE_OK0; |
8447 | |
8448 | cleanup: |
8449 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8450 | return rc; |
8451 | } |
8452 | |
8453 | int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id, |
8454 | u64 chunk_offset) { |
8455 | int rc; |
8456 | sqlite3_blob * blobValue; |
8457 | vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; |
8458 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue); |
8459 | if(rc != SQLITE_OK0) { |
8460 | return rc; |
8461 | } |
8462 | |
8463 | switch(kind) { |
8464 | case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { |
8465 | u8 block; |
8466 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8)); |
8467 | if(rc != SQLITE_OK0) { |
8468 | goto done; |
8469 | } |
8470 | |
8471 | block &= ~(1 << (chunk_offset % CHAR_BIT8)); |
8472 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8); |
8473 | break; |
8474 | } |
8475 | case VEC0_METADATA_COLUMN_KIND_INTEGER: { |
8476 | i64 v = 0; |
8477 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64)); |
8478 | break; |
8479 | } |
8480 | case VEC0_METADATA_COLUMN_KIND_FLOAT: { |
8481 | double v = 0; |
8482 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double)); |
8483 | break; |
8484 | } |
8485 | case VEC0_METADATA_COLUMN_KIND_TEXT: { |
8486 | int n; |
8487 | rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8488 | if(rc != SQLITE_OK0) { |
8489 | goto done; |
8490 | } |
8491 | |
8492 | u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16]; |
8493 | memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8494 | rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16); |
8495 | if(rc != SQLITE_OK0) { |
8496 | goto done; |
8497 | } |
8498 | |
8499 | if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) { |
8500 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); |
8501 | if(!zSql) { |
8502 | rc = SQLITE_NOMEM7; |
8503 | goto done; |
8504 | } |
8505 | sqlite3_stmt * stmt; |
8506 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8507 | if(rc != SQLITE_OK0) { |
8508 | goto done; |
8509 | } |
8510 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid); |
8511 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8512 | if(rc != SQLITE_DONE101) { |
8513 | rc = SQLITE_ERROR1; |
8514 | goto done; |
8515 | } |
8516 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8517 | } |
8518 | break; |
8519 | } |
8520 | } |
8521 | int rc2; |
8522 | done: |
8523 | rc2 = sqlite3_blob_closesqlite3_api->blob_close(blobValue); |
8524 | if(rc == SQLITE_OK0) { |
8525 | return rc2; |
8526 | } |
8527 | return rc; |
8528 | } |
8529 | |
8530 | int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) { |
8531 | vec0_vtab *p = (vec0_vtab *)pVTab; |
8532 | int rc; |
8533 | i64 rowid; |
8534 | i64 chunk_id; |
8535 | i64 chunk_offset; |
8536 | |
8537 | if (p->pkIsText) { |
8538 | rc = vec0_rowid_from_id(p, idValue, &rowid); |
8539 | if (rc != SQLITE_OK0) { |
8540 | return rc; |
8541 | } |
8542 | } else { |
8543 | rowid = sqlite3_value_int64sqlite3_api->value_int64(idValue); |
8544 | } |
8545 | |
8546 | // 1. Find chunk position for given rowid |
8547 | // 2. Ensure that validity bit for position is 1, then set to 0 |
8548 | // 3. Zero out rowid in chunks.rowid |
8549 | // 4. Zero out vector data in all vector column chunks |
8550 | // 5. Delete value in _rowids table |
8551 | |
8552 | // 1. get chunk_id and chunk_offset from _rowids |
8553 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); |
8554 | if (rc != SQLITE_OK0) { |
8555 | return rc; |
8556 | } |
8557 | |
8558 | rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset); |
8559 | if (rc != SQLITE_OK0) { |
8560 | return rc; |
8561 | } |
8562 | |
8563 | // 3. zero out rowid in chunks.rowids |
8564 | // https://github.com/asg017/sqlite-vec/issues/54 |
8565 | |
8566 | // 4. zero out any data in vector chunks tables |
8567 | // https://github.com/asg017/sqlite-vec/issues/54 |
8568 | |
8569 | // 5. delete from _rowids table |
8570 | rc = vec0Update_Delete_DeleteRowids(p, rowid); |
8571 | if (rc != SQLITE_OK0) { |
8572 | return rc; |
8573 | } |
8574 | |
8575 | // 6. delete any auxiliary rows |
8576 | if(p->numAuxiliaryColumns > 0) { |
8577 | rc = vec0Update_Delete_DeleteAux(p, rowid); |
8578 | if (rc != SQLITE_OK0) { |
8579 | return rc; |
8580 | } |
8581 | } |
8582 | |
8583 | // 6. delete metadata |
8584 | for(int i = 0; i < p->numMetadataColumns; i++) { |
8585 | rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset); |
8586 | } |
8587 | |
8588 | return SQLITE_OK0; |
8589 | } |
8590 | |
8591 | int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) { |
8592 | int rc; |
8593 | sqlite3_stmt *stmt; |
8594 | const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx); |
8595 | if(!zSql) { |
8596 | return SQLITE_NOMEM7; |
8597 | } |
8598 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0)); |
8599 | if(rc != SQLITE_OK0) { |
8600 | return rc; |
8601 | } |
8602 | sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, value); |
8603 | sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 2, rowid); |
8604 | rc = sqlite3_stepsqlite3_api->step(stmt); |
8605 | if(rc != SQLITE_DONE101) { |
8606 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8607 | return SQLITE_ERROR1; |
8608 | } |
8609 | sqlite3_finalizesqlite3_api->finalize(stmt); |
8610 | return SQLITE_OK0; |
8611 | } |
8612 | |
8613 | int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset, |
8614 | int i, sqlite3_value *valueVector) { |
8615 | int rc; |
8616 | |
8617 | sqlite3_blob *blobVectors = NULL((void*)0); |
8618 | |
8619 | char *pzError; |
8620 | size_t dimensions; |
8621 | enum VectorElementType elementType; |
8622 | void *vector; |
8623 | vector_cleanup cleanup = vector_cleanup_noop; |
8624 | // https://github.com/asg017/sqlite-vec/issues/53 |
8625 | rc = vector_from_value(valueVector, &vector, &dimensions, &elementType, |
8626 | &cleanup, &pzError); |
8627 | if (rc != SQLITE_OK0) { |
8628 | // IMP: V15203_32042 |
8629 | vtab_set_error( |
8630 | &p->base, "Updated vector for the \"%.*s\" column is invalid: %z", |
8631 | p->vector_columns[i].name_length, p->vector_columns[i].name, pzError); |
8632 | rc = SQLITE_ERROR1; |
8633 | goto cleanup; |
8634 | } |
8635 | if (elementType != p->vector_columns[i].element_type) { |
8636 | // IMP: V03643_20481 |
8637 | vtab_set_error( |
8638 | &p->base, |
8639 | "Updated vector for the \"%.*s\" column is expected to be of type " |
8640 | "%s, but a %s vector was provided.", |
8641 | p->vector_columns[i].name_length, p->vector_columns[i].name, |
8642 | vector_subtype_name(p->vector_columns[i].element_type), |
8643 | vector_subtype_name(elementType)); |
8644 | rc = SQLITE_ERROR1; |
8645 | goto cleanup; |
8646 | } |
8647 | if (dimensions != p->vector_columns[i].dimensions) { |
8648 | // IMP: V25739_09810 |
8649 | vtab_set_error( |
8650 | &p->base, |
8651 | "Dimension mismatch for new updated vector for the \"%.*s\" column. " |
8652 | "Expected %d dimensions but received %d.", |
8653 | p->vector_columns[i].name_length, p->vector_columns[i].name, |
8654 | p->vector_columns[i].dimensions, dimensions); |
8655 | rc = SQLITE_ERROR1; |
8656 | goto cleanup; |
8657 | } |
8658 | |
8659 | rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i], |
8660 | "vectors", chunk_id, 1, &blobVectors); |
8661 | if (rc != SQLITE_OK0) { |
8662 | vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld", |
8663 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); |
8664 | goto cleanup; |
8665 | } |
8666 | rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector, |
8667 | p->vector_columns[i].dimensions, |
8668 | p->vector_columns[i].element_type); |
8669 | if (rc != SQLITE_OK0) { |
8670 | vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld", |
8671 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); |
8672 | goto cleanup; |
8673 | } |
8674 | |
8675 | cleanup: |
8676 | cleanup(vector); |
8677 | int brc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors); |
8678 | if (rc != SQLITE_OK0) { |
8679 | return rc; |
8680 | } |
8681 | if (brc != SQLITE_OK0) { |
8682 | vtab_set_error( |
8683 | &p->base, |
8684 | "Could not commit blob transaction for vectors blob for %s.%s.%lld", |
8685 | p->schemaName, p->shadowVectorChunksNames[i], chunk_id); |
8686 | return brc; |
8687 | } |
8688 | return SQLITE_OK0; |
8689 | } |
8690 | |
8691 | int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { |
8692 | UNUSED_PARAMETER(argc)(void)(argc); |
8693 | vec0_vtab *p = (vec0_vtab *)pVTab; |
8694 | int rc; |
8695 | i64 chunk_id; |
8696 | i64 chunk_offset; |
8697 | |
8698 | i64 rowid; |
8699 | if (p->pkIsText) { |
8700 | const char *a = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[0]); |
8701 | const char *b = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]); |
8702 | // IMP: V08886_25725 |
8703 | if ((sqlite3_value_bytessqlite3_api->value_bytes(argv[0]) != sqlite3_value_bytessqlite3_api->value_bytes(argv[1])) || |
8704 | strncmp(a, b, sqlite3_value_bytessqlite3_api->value_bytes(argv[0])) != 0) { |
8705 | vtab_set_error(pVTab, |
8706 | "UPDATEs on vec0 primary key values are not allowed."); |
8707 | return SQLITE_ERROR1; |
8708 | } |
8709 | rc = vec0_rowid_from_id(p, argv[0], &rowid); |
8710 | if (rc != SQLITE_OK0) { |
8711 | return rc; |
8712 | } |
8713 | } else { |
8714 | rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); |
8715 | } |
8716 | |
8717 | // 1) get chunk_id and chunk_offset from _rowids |
8718 | rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset); |
8719 | if (rc != SQLITE_OK0) { |
8720 | return rc; |
8721 | } |
8722 | |
8723 | // 2) update any partition key values |
8724 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8725 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) { |
8726 | continue; |
8727 | } |
8728 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; |
8729 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { |
8730 | continue; |
8731 | } |
8732 | vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. "); |
8733 | return SQLITE_ERROR1; |
8734 | } |
8735 | |
8736 | // 3) handle auxiliary column updates |
8737 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8738 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { |
8739 | continue; |
8740 | } |
8741 | int auxiliary_column_idx = p->user_column_idxs[i]; |
8742 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; |
8743 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { |
8744 | continue; |
8745 | } |
8746 | rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid); |
8747 | if(rc != SQLITE_OK0) { |
8748 | return SQLITE_ERROR1; |
8749 | } |
8750 | } |
8751 | |
8752 | // 4) handle metadata column updates |
8753 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8754 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { |
8755 | continue; |
8756 | } |
8757 | int metadata_column_idx = p->user_column_idxs[i]; |
8758 | sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i]; |
8759 | if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) { |
8760 | continue; |
8761 | } |
8762 | rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1); |
8763 | if(rc != SQLITE_OK0) { |
8764 | return rc; |
8765 | } |
8766 | } |
8767 | |
8768 | // 5) iterate over all new vectors, update the vectors |
8769 | for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { |
8770 | if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { |
8771 | continue; |
8772 | } |
8773 | int vector_idx = p->user_column_idxs[i]; |
8774 | sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i]; |
8775 | // in vec0Column, we check sqlite3_vtab_nochange() on vector columns. |
8776 | // If the vector column isn't being changed, we return NULL; |
8777 | // That's not great, that means vector columns can never be NULLABLE |
8778 | // (bc we cant distinguish if an updated vector is truly NULL or nochange). |
8779 | // Also it means that if someone tries to run `UPDATE v SET X = NULL`, |
8780 | // we can't effectively detect and raise an error. |
8781 | // A better solution would be to use a custom result_type for "empty", |
8782 | // but subtypes don't appear to survive xColumn -> xUpdate, it's always 0. |
8783 | // So for now, we'll just use NULL and warn people to not SET X = NULL |
8784 | // in the docs. |
8785 | if (sqlite3_value_typesqlite3_api->value_type(valueVector) == SQLITE_NULL5) { |
8786 | continue; |
8787 | } |
8788 | |
8789 | rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx, |
8790 | valueVector); |
8791 | if (rc != SQLITE_OK0) { |
8792 | return SQLITE_ERROR1; |
8793 | } |
8794 | } |
8795 | |
8796 | return SQLITE_OK0; |
8797 | } |
8798 | |
8799 | static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, |
8800 | sqlite_int64 *pRowid) { |
8801 | // DELETE operation |
8802 | if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { |
8803 | return vec0Update_Delete(pVTab, argv[0]); |
8804 | } |
8805 | // INSERT operation |
8806 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) { |
8807 | return vec0Update_Insert(pVTab, argc, argv, pRowid); |
8808 | } |
8809 | // UPDATE operation |
8810 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { |
8811 | return vec0Update_Update(pVTab, argc, argv); |
8812 | } else { |
8813 | vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0."); |
8814 | return SQLITE_ERROR1; |
8815 | } |
8816 | } |
8817 | |
8818 | static int vec0ShadowName(const char *zName) { |
8819 | static const char *azName[] = { |
8820 | "rowids", "chunks", "auxiliary", "info", |
8821 | |
8822 | // Up to VEC0_MAX_METADATA_COLUMNS |
8823 | // TODO be smarter about this man |
8824 | "metadatachunks00", |
8825 | "metadatachunks01", |
8826 | "metadatachunks02", |
8827 | "metadatachunks03", |
8828 | "metadatachunks04", |
8829 | "metadatachunks05", |
8830 | "metadatachunks06", |
8831 | "metadatachunks07", |
8832 | "metadatachunks08", |
8833 | "metadatachunks09", |
8834 | "metadatachunks10", |
8835 | "metadatachunks11", |
8836 | "metadatachunks12", |
8837 | "metadatachunks13", |
8838 | "metadatachunks14", |
8839 | "metadatachunks15", |
8840 | |
8841 | // Up to |
8842 | "metadatatext00", |
8843 | "metadatatext01", |
8844 | "metadatatext02", |
8845 | "metadatatext03", |
8846 | "metadatatext04", |
8847 | "metadatatext05", |
8848 | "metadatatext06", |
8849 | "metadatatext07", |
8850 | "metadatatext08", |
8851 | "metadatatext09", |
8852 | "metadatatext10", |
8853 | "metadatatext11", |
8854 | "metadatatext12", |
8855 | "metadatatext13", |
8856 | "metadatatext14", |
8857 | "metadatatext15", |
8858 | }; |
8859 | |
8860 | for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) { |
8861 | if (sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i]) == 0) |
8862 | return 1; |
8863 | } |
8864 | //for(size_t i = 0; i < )"vector_chunks", "metadatachunks" |
8865 | return 0; |
8866 | } |
8867 | |
8868 | static int vec0Begin(sqlite3_vtab *pVTab) { |
8869 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
8870 | return SQLITE_OK0; |
8871 | } |
8872 | static int vec0Sync(sqlite3_vtab *pVTab) { |
8873 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
8874 | vec0_vtab *p = (vec0_vtab *)pVTab; |
8875 | if (p->stmtLatestChunk) { |
8876 | sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk); |
8877 | p->stmtLatestChunk = NULL((void*)0); |
8878 | } |
8879 | if (p->stmtRowidsInsertRowid) { |
8880 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid); |
8881 | p->stmtRowidsInsertRowid = NULL((void*)0); |
8882 | } |
8883 | if (p->stmtRowidsInsertId) { |
8884 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId); |
8885 | p->stmtRowidsInsertId = NULL((void*)0); |
8886 | } |
8887 | if (p->stmtRowidsUpdatePosition) { |
8888 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition); |
8889 | p->stmtRowidsUpdatePosition = NULL((void*)0); |
8890 | } |
8891 | if (p->stmtRowidsGetChunkPosition) { |
8892 | sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition); |
8893 | p->stmtRowidsGetChunkPosition = NULL((void*)0); |
8894 | } |
8895 | return SQLITE_OK0; |
8896 | } |
8897 | static int vec0Commit(sqlite3_vtab *pVTab) { |
8898 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
8899 | return SQLITE_OK0; |
8900 | } |
8901 | static int vec0Rollback(sqlite3_vtab *pVTab) { |
8902 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
8903 | return SQLITE_OK0; |
8904 | } |
8905 | |
8906 | static sqlite3_module vec0Module = { |
8907 | /* iVersion */ 3, |
8908 | /* xCreate */ vec0Create, |
8909 | /* xConnect */ vec0Connect, |
8910 | /* xBestIndex */ vec0BestIndex, |
8911 | /* xDisconnect */ vec0Disconnect, |
8912 | /* xDestroy */ vec0Destroy, |
8913 | /* xOpen */ vec0Open, |
8914 | /* xClose */ vec0Close, |
8915 | /* xFilter */ vec0Filter, |
8916 | /* xNext */ vec0Next, |
8917 | /* xEof */ vec0Eof, |
8918 | /* xColumn */ vec0Column, |
8919 | /* xRowid */ vec0Rowid, |
8920 | /* xUpdate */ vec0Update, |
8921 | /* xBegin */ vec0Begin, |
8922 | /* xSync */ vec0Sync, |
8923 | /* xCommit */ vec0Commit, |
8924 | /* xRollback */ vec0Rollback, |
8925 | /* xFindFunction */ 0, |
8926 | /* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43 |
8927 | /* xSavepoint */ 0, |
8928 | /* xRelease */ 0, |
8929 | /* xRollbackTo */ 0, |
8930 | /* xShadowName */ vec0ShadowName, |
8931 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 |
8932 | /* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44 |
8933 | #endif |
8934 | }; |
8935 | #pragma endregion |
8936 | |
8937 | static char *POINTER_NAME_STATIC_BLOB_DEF = "vec0-static_blob_def"; |
8938 | struct static_blob_definition { |
8939 | void *p; |
8940 | size_t dimensions; |
8941 | size_t nvectors; |
8942 | enum VectorElementType element_type; |
8943 | }; |
8944 | static void vec_static_blob_from_raw(sqlite3_context *context, int argc, |
8945 | sqlite3_value **argv) { |
8946 | |
8947 | assert(argc == 4)((void) sizeof ((argc == 4) ? 1 : 0), __extension__ ({ if (argc == 4) ; else __assert_fail ("argc == 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 8947, __extension__ __PRETTY_FUNCTION__); })); |
8948 | struct static_blob_definition *p; |
8949 | p = sqlite3_mallocsqlite3_api->malloc(sizeof(*p)); |
8950 | if (!p) { |
8951 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context); |
8952 | return; |
8953 | } |
8954 | memset(p, 0, sizeof(*p)); |
8955 | p->p = (void *)sqlite3_value_int64sqlite3_api->value_int64(argv[0]); |
8956 | p->element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32; |
8957 | p->dimensions = sqlite3_value_int64sqlite3_api->value_int64(argv[2]); |
8958 | p->nvectors = sqlite3_value_int64sqlite3_api->value_int64(argv[3]); |
8959 | sqlite3_result_pointersqlite3_api->result_pointer(context, p, POINTER_NAME_STATIC_BLOB_DEF, |
8960 | sqlite3_freesqlite3_api->free); |
8961 | } |
8962 | #pragma region vec_static_blobs() table function |
8963 | |
8964 | #define MAX_STATIC_BLOBS16 16 |
8965 | |
8966 | typedef struct static_blob static_blob; |
8967 | struct static_blob { |
8968 | char *name; |
8969 | void *p; |
8970 | size_t dimensions; |
8971 | size_t nvectors; |
8972 | enum VectorElementType element_type; |
8973 | }; |
8974 | |
8975 | typedef struct vec_static_blob_data vec_static_blob_data; |
8976 | struct vec_static_blob_data { |
8977 | static_blob static_blobs[MAX_STATIC_BLOBS16]; |
8978 | }; |
8979 | |
8980 | typedef struct vec_static_blobs_vtab vec_static_blobs_vtab; |
8981 | struct vec_static_blobs_vtab { |
8982 | sqlite3_vtab base; |
8983 | vec_static_blob_data *data; |
8984 | }; |
8985 | |
8986 | typedef struct vec_static_blobs_cursor vec_static_blobs_cursor; |
8987 | struct vec_static_blobs_cursor { |
8988 | sqlite3_vtab_cursor base; |
8989 | sqlite3_int64 iRowid; |
8990 | }; |
8991 | |
8992 | static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc, |
8993 | const char *const *argv, |
8994 | sqlite3_vtab **ppVtab, char **pzErr) { |
8995 | UNUSED_PARAMETER(argc)(void)(argc); |
8996 | UNUSED_PARAMETER(argv)(void)(argv); |
8997 | UNUSED_PARAMETER(pzErr)(void)(pzErr); |
8998 | |
8999 | vec_static_blobs_vtab *pNew; |
9000 | #define VEC_STATIC_BLOBS_NAME0 0 |
9001 | #define VEC_STATIC_BLOBS_DATA1 1 |
9002 | #define VEC_STATIC_BLOBS_DIMENSIONS2 2 |
9003 | #define VEC_STATIC_BLOBS_COUNT3 3 |
9004 | int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab( |
9005 | db, "CREATE TABLE x(name, data, dimensions hidden, count hidden)"); |
9006 | if (rc == SQLITE_OK0) { |
9007 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); |
9008 | *ppVtab = (sqlite3_vtab *)pNew; |
9009 | if (pNew == 0) |
9010 | return SQLITE_NOMEM7; |
9011 | memset(pNew, 0, sizeof(*pNew)); |
9012 | pNew->data = pAux; |
9013 | } |
9014 | return rc; |
9015 | } |
9016 | |
9017 | static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) { |
9018 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVtab; |
9019 | sqlite3_freesqlite3_api->free(p); |
9020 | return SQLITE_OK0; |
9021 | } |
9022 | |
9023 | static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc, |
9024 | sqlite3_value **argv, sqlite_int64 *pRowid) { |
9025 | UNUSED_PARAMETER(pRowid)(void)(pRowid); |
9026 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVTab; |
9027 | // DELETE operation |
9028 | if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { |
9029 | return SQLITE_ERROR1; |
9030 | } |
9031 | // INSERT operation |
9032 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) { |
9033 | const char *key = |
9034 | (const char *)sqlite3_value_textsqlite3_api->value_text(argv[2 + VEC_STATIC_BLOBS_NAME0]); |
9035 | int idx = -1; |
9036 | for (int i = 0; i < MAX_STATIC_BLOBS16; i++) { |
9037 | if (!p->data->static_blobs[i].name) { |
9038 | p->data->static_blobs[i].name = sqlite3_mprintfsqlite3_api->mprintf("%s", key); |
9039 | idx = i; |
9040 | break; |
9041 | } |
9042 | } |
9043 | if (idx < 0) |
9044 | abort(); |
9045 | struct static_blob_definition *def = sqlite3_value_pointersqlite3_api->value_pointer( |
9046 | argv[2 + VEC_STATIC_BLOBS_DATA1], POINTER_NAME_STATIC_BLOB_DEF); |
9047 | p->data->static_blobs[idx].p = def->p; |
9048 | p->data->static_blobs[idx].dimensions = def->dimensions; |
9049 | p->data->static_blobs[idx].nvectors = def->nvectors; |
9050 | p->data->static_blobs[idx].element_type = def->element_type; |
9051 | |
9052 | return SQLITE_OK0; |
9053 | } |
9054 | // UPDATE operation |
9055 | else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) { |
9056 | return SQLITE_ERROR1; |
9057 | } |
9058 | return SQLITE_ERROR1; |
9059 | } |
9060 | |
9061 | static int vec_static_blobsOpen(sqlite3_vtab *p, |
9062 | sqlite3_vtab_cursor **ppCursor) { |
9063 | UNUSED_PARAMETER(p)(void)(p); |
9064 | vec_static_blobs_cursor *pCur; |
9065 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); |
9066 | if (pCur == 0) |
9067 | return SQLITE_NOMEM7; |
9068 | memset(pCur, 0, sizeof(*pCur)); |
9069 | *ppCursor = &pCur->base; |
9070 | return SQLITE_OK0; |
9071 | } |
9072 | |
9073 | static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) { |
9074 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; |
9075 | sqlite3_freesqlite3_api->free(pCur); |
9076 | return SQLITE_OK0; |
9077 | } |
9078 | |
9079 | static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab, |
9080 | sqlite3_index_info *pIdxInfo) { |
9081 | UNUSED_PARAMETER(pVTab)(void)(pVTab); |
9082 | pIdxInfo->idxNum = 1; |
9083 | pIdxInfo->estimatedCost = (double)10; |
9084 | pIdxInfo->estimatedRows = 10; |
9085 | return SQLITE_OK0; |
9086 | } |
9087 | |
9088 | static int vec_static_blobsNext(sqlite3_vtab_cursor *cur); |
9089 | static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, |
9090 | const char *idxStr, int argc, |
9091 | sqlite3_value **argv) { |
9092 | UNUSED_PARAMETER(idxNum)(void)(idxNum); |
9093 | UNUSED_PARAMETER(idxStr)(void)(idxStr); |
9094 | UNUSED_PARAMETER(argc)(void)(argc); |
9095 | UNUSED_PARAMETER(argv)(void)(argv); |
9096 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)pVtabCursor; |
9097 | pCur->iRowid = -1; |
9098 | vec_static_blobsNext(pVtabCursor); |
9099 | return SQLITE_OK0; |
9100 | } |
9101 | |
9102 | static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur, |
9103 | sqlite_int64 *pRowid) { |
9104 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; |
9105 | *pRowid = pCur->iRowid; |
9106 | return SQLITE_OK0; |
9107 | } |
9108 | |
9109 | static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) { |
9110 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; |
9111 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pCur->base.pVtab; |
9112 | pCur->iRowid++; |
9113 | while (pCur->iRowid < MAX_STATIC_BLOBS16) { |
9114 | if (p->data->static_blobs[pCur->iRowid].name) { |
9115 | return SQLITE_OK0; |
9116 | } |
9117 | pCur->iRowid++; |
9118 | } |
9119 | return SQLITE_OK0; |
9120 | } |
9121 | |
9122 | static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) { |
9123 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; |
9124 | return pCur->iRowid >= MAX_STATIC_BLOBS16; |
9125 | } |
9126 | |
9127 | static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur, |
9128 | sqlite3_context *context, int i) { |
9129 | vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur; |
9130 | vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)cur->pVtab; |
9131 | switch (i) { |
9132 | case VEC_STATIC_BLOBS_NAME0: |
9133 | sqlite3_result_textsqlite3_api->result_text(context, p->data->static_blobs[pCur->iRowid].name, -1, |
9134 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
9135 | break; |
9136 | case VEC_STATIC_BLOBS_DATA1: |
9137 | sqlite3_result_nullsqlite3_api->result_null(context); |
9138 | break; |
9139 | case VEC_STATIC_BLOBS_DIMENSIONS2: |
9140 | sqlite3_result_int64sqlite3_api->result_int64(context, |
9141 | p->data->static_blobs[pCur->iRowid].dimensions); |
9142 | break; |
9143 | case VEC_STATIC_BLOBS_COUNT3: |
9144 | sqlite3_result_int64sqlite3_api->result_int64(context, p->data->static_blobs[pCur->iRowid].nvectors); |
9145 | break; |
9146 | } |
9147 | return SQLITE_OK0; |
9148 | } |
9149 | |
9150 | static sqlite3_module vec_static_blobsModule = { |
9151 | /* iVersion */ 3, |
9152 | /* xCreate */ 0, |
9153 | /* xConnect */ vec_static_blobsConnect, |
9154 | /* xBestIndex */ vec_static_blobsBestIndex, |
9155 | /* xDisconnect */ vec_static_blobsDisconnect, |
9156 | /* xDestroy */ 0, |
9157 | /* xOpen */ vec_static_blobsOpen, |
9158 | /* xClose */ vec_static_blobsClose, |
9159 | /* xFilter */ vec_static_blobsFilter, |
9160 | /* xNext */ vec_static_blobsNext, |
9161 | /* xEof */ vec_static_blobsEof, |
9162 | /* xColumn */ vec_static_blobsColumn, |
9163 | /* xRowid */ vec_static_blobsRowid, |
9164 | /* xUpdate */ vec_static_blobsUpdate, |
9165 | /* xBegin */ 0, |
9166 | /* xSync */ 0, |
9167 | /* xCommit */ 0, |
9168 | /* xRollback */ 0, |
9169 | /* xFindMethod */ 0, |
9170 | /* xRename */ 0, |
9171 | /* xSavepoint */ 0, |
9172 | /* xRelease */ 0, |
9173 | /* xRollbackTo */ 0, |
9174 | /* xShadowName */ 0, |
9175 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 |
9176 | /* xIntegrity */ 0 |
9177 | #endif |
9178 | }; |
9179 | #pragma endregion |
9180 | |
9181 | #pragma region vec_static_blob_entries() table function |
9182 | |
9183 | typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab; |
9184 | struct vec_static_blob_entries_vtab { |
9185 | sqlite3_vtab base; |
9186 | static_blob *blob; |
9187 | }; |
9188 | typedef enum { |
9189 | VEC_SBE__QUERYPLAN_FULLSCAN = 1, |
9190 | VEC_SBE__QUERYPLAN_KNN = 2 |
9191 | } vec_sbe_query_plan; |
9192 | |
9193 | struct sbe_query_knn_data { |
9194 | i64 k; |
9195 | i64 k_used; |
9196 | // Array of rowids of size k. Must be freed with sqlite3_free(). |
9197 | i32 *rowids; |
9198 | // Array of distances of size k. Must be freed with sqlite3_free(). |
9199 | f32 *distances; |
9200 | i64 current_idx; |
9201 | }; |
9202 | void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) { |
9203 | if (!knn_data) |
9204 | return; |
9205 | |
9206 | if (knn_data->rowids) { |
9207 | sqlite3_freesqlite3_api->free(knn_data->rowids); |
9208 | knn_data->rowids = NULL((void*)0); |
9209 | } |
9210 | if (knn_data->distances) { |
9211 | sqlite3_freesqlite3_api->free(knn_data->distances); |
9212 | knn_data->distances = NULL((void*)0); |
9213 | } |
9214 | } |
9215 | |
9216 | typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor; |
9217 | struct vec_static_blob_entries_cursor { |
9218 | sqlite3_vtab_cursor base; |
9219 | sqlite3_int64 iRowid; |
9220 | vec_sbe_query_plan query_plan; |
9221 | struct sbe_query_knn_data *knn_data; |
9222 | }; |
9223 | |
9224 | static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int argc, |
9225 | const char *const *argv, |
9226 | sqlite3_vtab **ppVtab, char **pzErr) { |
9227 | UNUSED_PARAMETER(argc)(void)(argc); |
9228 | UNUSED_PARAMETER(argv)(void)(argv); |
9229 | UNUSED_PARAMETER(pzErr)(void)(pzErr); |
9230 | vec_static_blob_data *blob_data = pAux; |
9231 | int idx = -1; |
9232 | for (int i = 0; i < MAX_STATIC_BLOBS16; i++) { |
9233 | if (!blob_data->static_blobs[i].name) |
9234 | continue; |
9235 | if (strncmp(blob_data->static_blobs[i].name, argv[3], |
9236 | strlen(blob_data->static_blobs[i].name)) == 0) { |
9237 | idx = i; |
9238 | break; |
9239 | } |
9240 | } |
9241 | if (idx < 0) |
9242 | abort(); |
9243 | vec_static_blob_entries_vtab *pNew; |
9244 | #define VEC_STATIC_BLOB_ENTRIES_VECTOR0 0 |
9245 | #define VEC_STATIC_BLOB_ENTRIES_DISTANCE1 1 |
9246 | #define VEC_STATIC_BLOB_ENTRIES_K2 2 |
9247 | int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab( |
9248 | db, "CREATE TABLE x(vector, distance hidden, k hidden)"); |
9249 | if (rc == SQLITE_OK0) { |
9250 | pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); |
9251 | *ppVtab = (sqlite3_vtab *)pNew; |
9252 | if (pNew == 0) |
9253 | return SQLITE_NOMEM7; |
9254 | memset(pNew, 0, sizeof(*pNew)); |
9255 | pNew->blob = &blob_data->static_blobs[idx]; |
9256 | } |
9257 | return rc; |
9258 | } |
9259 | |
9260 | static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc, |
9261 | const char *const *argv, |
9262 | sqlite3_vtab **ppVtab, char **pzErr) { |
9263 | return vec_static_blob_entriesConnect(db, pAux, argc, argv, ppVtab, pzErr); |
9264 | } |
9265 | |
9266 | static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) { |
9267 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVtab; |
9268 | sqlite3_freesqlite3_api->free(p); |
9269 | return SQLITE_OK0; |
9270 | } |
9271 | |
9272 | static int vec_static_blob_entriesOpen(sqlite3_vtab *p, |
9273 | sqlite3_vtab_cursor **ppCursor) { |
9274 | UNUSED_PARAMETER(p)(void)(p); |
9275 | vec_static_blob_entries_cursor *pCur; |
9276 | pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur)); |
9277 | if (pCur == 0) |
9278 | return SQLITE_NOMEM7; |
9279 | memset(pCur, 0, sizeof(*pCur)); |
9280 | *ppCursor = &pCur->base; |
9281 | return SQLITE_OK0; |
9282 | } |
9283 | |
9284 | static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) { |
9285 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; |
9286 | sqlite3_freesqlite3_api->free(pCur->knn_data); |
9287 | sqlite3_freesqlite3_api->free(pCur); |
9288 | return SQLITE_OK0; |
9289 | } |
9290 | |
9291 | static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab, |
9292 | sqlite3_index_info *pIdxInfo) { |
9293 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVTab; |
9294 | int iMatchTerm = -1; |
9295 | int iLimitTerm = -1; |
9296 | // int iRowidTerm = -1; // https://github.com/asg017/sqlite-vec/issues/47 |
9297 | int iKTerm = -1; |
9298 | |
9299 | for (int i = 0; i < pIdxInfo->nConstraint; i++) { |
9300 | if (!pIdxInfo->aConstraint[i].usable) |
9301 | continue; |
9302 | |
9303 | int iColumn = pIdxInfo->aConstraint[i].iColumn; |
9304 | int op = pIdxInfo->aConstraint[i].op; |
9305 | if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 && |
9306 | iColumn == VEC_STATIC_BLOB_ENTRIES_VECTOR0) { |
9307 | if (iMatchTerm > -1) { |
9308 | // https://github.com/asg017/sqlite-vec/issues/51 |
9309 | return SQLITE_ERROR1; |
9310 | } |
9311 | iMatchTerm = i; |
9312 | } |
9313 | if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) { |
9314 | iLimitTerm = i; |
9315 | } |
9316 | if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && |
9317 | iColumn == VEC_STATIC_BLOB_ENTRIES_K2) { |
9318 | iKTerm = i; |
9319 | } |
9320 | } |
9321 | if (iMatchTerm >= 0) { |
9322 | if (iLimitTerm < 0 && iKTerm < 0) { |
9323 | // https://github.com/asg017/sqlite-vec/issues/51 |
9324 | return SQLITE_ERROR1; |
9325 | } |
9326 | if (iLimitTerm >= 0 && iKTerm >= 0) { |
9327 | return SQLITE_ERROR1; // limit or k, not both |
9328 | } |
9329 | if (pIdxInfo->nOrderBy < 1) { |
9330 | vtab_set_error(pVTab, "ORDER BY distance required"); |
9331 | return SQLITE_CONSTRAINT19; |
9332 | } |
9333 | if (pIdxInfo->nOrderBy > 1) { |
9334 | // https://github.com/asg017/sqlite-vec/issues/51 |
9335 | vtab_set_error(pVTab, "more than 1 ORDER BY clause provided"); |
9336 | return SQLITE_CONSTRAINT19; |
9337 | } |
9338 | if (pIdxInfo->aOrderBy[0].iColumn != VEC_STATIC_BLOB_ENTRIES_DISTANCE1) { |
9339 | vtab_set_error(pVTab, "ORDER BY must be on the distance column"); |
9340 | return SQLITE_CONSTRAINT19; |
9341 | } |
9342 | if (pIdxInfo->aOrderBy[0].desc) { |
9343 | vtab_set_error(pVTab, |
9344 | "Only ascending in ORDER BY distance clause is supported, " |
9345 | "DESC is not supported yet."); |
9346 | return SQLITE_CONSTRAINT19; |
9347 | } |
9348 | |
9349 | pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_KNN; |
9350 | pIdxInfo->estimatedCost = (double)10; |
9351 | pIdxInfo->estimatedRows = 10; |
9352 | |
9353 | pIdxInfo->orderByConsumed = 1; |
9354 | pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1; |
9355 | pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1; |
9356 | if (iLimitTerm >= 0) { |
9357 | pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = 2; |
9358 | pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1; |
9359 | } else { |
9360 | pIdxInfo->aConstraintUsage[iKTerm].argvIndex = 2; |
9361 | pIdxInfo->aConstraintUsage[iKTerm].omit = 1; |
9362 | } |
9363 | |
9364 | } else { |
9365 | pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_FULLSCAN; |
9366 | pIdxInfo->estimatedCost = (double)p->blob->nvectors; |
9367 | pIdxInfo->estimatedRows = p->blob->nvectors; |
9368 | } |
9369 | return SQLITE_OK0; |
9370 | } |
9371 | |
9372 | static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor, |
9373 | int idxNum, const char *idxStr, |
9374 | int argc, sqlite3_value **argv) { |
9375 | UNUSED_PARAMETER(idxStr)(void)(idxStr); |
9376 | assert(argc >= 0 && argc <= 3)((void) sizeof ((argc >= 0 && argc <= 3) ? 1 : 0 ), __extension__ ({ if (argc >= 0 && argc <= 3) ; else __assert_fail ("argc >= 0 && argc <= 3" , "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9376, __extension__ __PRETTY_FUNCTION__); })); |
9377 | vec_static_blob_entries_cursor *pCur = |
9378 | (vec_static_blob_entries_cursor *)pVtabCursor; |
9379 | vec_static_blob_entries_vtab *p = |
9380 | (vec_static_blob_entries_vtab *)pCur->base.pVtab; |
9381 | |
9382 | if (idxNum == VEC_SBE__QUERYPLAN_KNN) { |
9383 | assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc == 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9383, __extension__ __PRETTY_FUNCTION__); })); |
9384 | pCur->query_plan = VEC_SBE__QUERYPLAN_KNN; |
9385 | struct sbe_query_knn_data *knn_data; |
9386 | knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data)); |
9387 | if (!knn_data) { |
9388 | return SQLITE_NOMEM7; |
9389 | } |
9390 | memset(knn_data, 0, sizeof(*knn_data)); |
9391 | |
9392 | void *queryVector; |
9393 | size_t dimensions; |
9394 | enum VectorElementType elementType; |
9395 | vector_cleanup cleanup; |
9396 | char *err; |
9397 | int rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType, |
9398 | &cleanup, &err); |
9399 | if (rc != SQLITE_OK0) { |
9400 | return SQLITE_ERROR1; |
9401 | } |
9402 | if (elementType != p->blob->element_type) { |
9403 | return SQLITE_ERROR1; |
9404 | } |
9405 | if (dimensions != p->blob->dimensions) { |
9406 | return SQLITE_ERROR1; |
9407 | } |
9408 | |
9409 | i64 k = min(sqlite3_value_int64(argv[1]), (i64)p->blob->nvectors)(((sqlite3_api->value_int64(argv[1])) <= ((i64)p->blob ->nvectors)) ? (sqlite3_api->value_int64(argv[1])) : (( i64)p->blob->nvectors)); |
9410 | if (k < 0) { |
9411 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 |
9412 | return SQLITE_ERROR1; |
9413 | } |
9414 | if (k == 0) { |
9415 | knn_data->k = 0; |
9416 | pCur->knn_data = knn_data; |
9417 | return SQLITE_OK0; |
9418 | } |
9419 | |
9420 | size_t bsize = (p->blob->nvectors + 7) & ~7; |
9421 | |
9422 | i32 *topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32)); |
9423 | if (!topk_rowids) { |
9424 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 |
9425 | return SQLITE_ERROR1; |
9426 | } |
9427 | f32 *distances = sqlite3_mallocsqlite3_api->malloc(bsize * sizeof(f32)); |
9428 | if (!distances) { |
9429 | // HANDLE https://github.com/asg017/sqlite-vec/issues/55 |
9430 | return SQLITE_ERROR1; |
9431 | } |
9432 | |
9433 | for (size_t i = 0; i < p->blob->nvectors; i++) { |
9434 | // https://github.com/asg017/sqlite-vec/issues/52 |
9435 | float *v = ((float *)p->blob->p) + (i * p->blob->dimensions); |
9436 | distances[i] = |
9437 | distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions); |
9438 | } |
9439 | u8 *candidates = bitmap_new(bsize); |
9440 | assert(candidates)((void) sizeof ((candidates) ? 1 : 0), __extension__ ({ if (candidates ) ; else __assert_fail ("candidates", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9440, __extension__ __PRETTY_FUNCTION__); })); |
9441 | |
9442 | u8 *taken = bitmap_new(bsize); |
9443 | assert(taken)((void) sizeof ((taken) ? 1 : 0), __extension__ ({ if (taken) ; else __assert_fail ("taken", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c" , 9443, __extension__ __PRETTY_FUNCTION__); })); |
9444 | |
9445 | bitmap_fill(candidates, bsize); |
9446 | for (size_t i = bsize; i >= p->blob->nvectors; i--) { |
9447 | bitmap_set(candidates, i, 0); |
9448 | } |
9449 | i32 k_used = 0; |
9450 | min_idx(distances, bsize, candidates, topk_rowids, k, taken, &k_used); |
9451 | knn_data->current_idx = 0; |
9452 | knn_data->distances = distances; |
9453 | knn_data->k = k; |
9454 | knn_data->rowids = topk_rowids; |
9455 | |
9456 | pCur->knn_data = knn_data; |
9457 | } else { |
9458 | pCur->query_plan = VEC_SBE__QUERYPLAN_FULLSCAN; |
9459 | pCur->iRowid = 0; |
9460 | } |
9461 | |
9462 | return SQLITE_OK0; |
9463 | } |
9464 | |
9465 | static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur, |
9466 | sqlite_int64 *pRowid) { |
9467 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; |
9468 | switch (pCur->query_plan) { |
9469 | case VEC_SBE__QUERYPLAN_FULLSCAN: { |
9470 | *pRowid = pCur->iRowid; |
9471 | return SQLITE_OK0; |
9472 | } |
9473 | case VEC_SBE__QUERYPLAN_KNN: { |
9474 | i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx]; |
9475 | *pRowid = (sqlite3_int64)rowid; |
9476 | return SQLITE_OK0; |
9477 | } |
9478 | } |
9479 | return SQLITE_ERROR1; |
9480 | } |
9481 | |
9482 | static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) { |
9483 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; |
9484 | switch (pCur->query_plan) { |
9485 | case VEC_SBE__QUERYPLAN_FULLSCAN: { |
9486 | pCur->iRowid++; |
9487 | return SQLITE_OK0; |
9488 | } |
9489 | case VEC_SBE__QUERYPLAN_KNN: { |
9490 | pCur->knn_data->current_idx++; |
9491 | return SQLITE_OK0; |
9492 | } |
9493 | } |
9494 | return SQLITE_ERROR1; |
9495 | } |
9496 | |
9497 | static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) { |
9498 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; |
9499 | vec_static_blob_entries_vtab *p = |
9500 | (vec_static_blob_entries_vtab *)pCur->base.pVtab; |
9501 | switch (pCur->query_plan) { |
9502 | case VEC_SBE__QUERYPLAN_FULLSCAN: { |
9503 | return (size_t)pCur->iRowid >= p->blob->nvectors; |
9504 | } |
9505 | case VEC_SBE__QUERYPLAN_KNN: { |
9506 | return pCur->knn_data->current_idx >= pCur->knn_data->k; |
9507 | } |
9508 | } |
9509 | return SQLITE_ERROR1; |
9510 | } |
9511 | |
9512 | static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur, |
9513 | sqlite3_context *context, int i) { |
9514 | vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur; |
9515 | vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)cur->pVtab; |
9516 | |
9517 | switch (pCur->query_plan) { |
9518 | case VEC_SBE__QUERYPLAN_FULLSCAN: { |
9519 | switch (i) { |
9520 | case VEC_STATIC_BLOB_ENTRIES_VECTOR0: |
9521 | |
9522 | sqlite3_result_blobsqlite3_api->result_blob( |
9523 | context, |
9524 | ((unsigned char *)p->blob->p) + |
9525 | (pCur->iRowid * p->blob->dimensions * sizeof(float)), |
9526 | p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
9527 | sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type); |
9528 | break; |
9529 | } |
9530 | return SQLITE_OK0; |
9531 | } |
9532 | case VEC_SBE__QUERYPLAN_KNN: { |
9533 | switch (i) { |
9534 | case VEC_STATIC_BLOB_ENTRIES_VECTOR0: { |
9535 | i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx]; |
9536 | sqlite3_result_blobsqlite3_api->result_blob(context, |
9537 | ((unsigned char *)p->blob->p) + |
9538 | (rowid * p->blob->dimensions * sizeof(float)), |
9539 | p->blob->dimensions * sizeof(float), |
9540 | SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); |
9541 | sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type); |
9542 | break; |
9543 | } |
9544 | } |
9545 | return SQLITE_OK0; |
9546 | } |
9547 | } |
9548 | return SQLITE_ERROR1; |
9549 | } |
9550 | |
9551 | static sqlite3_module vec_static_blob_entriesModule = { |
9552 | /* iVersion */ 3, |
9553 | /* xCreate */ |
9554 | vec_static_blob_entriesCreate, // handle rm? |
9555 | // https://github.com/asg017/sqlite-vec/issues/55 |
9556 | /* xConnect */ vec_static_blob_entriesConnect, |
9557 | /* xBestIndex */ vec_static_blob_entriesBestIndex, |
9558 | /* xDisconnect */ vec_static_blob_entriesDisconnect, |
9559 | /* xDestroy */ vec_static_blob_entriesDisconnect, |
9560 | /* xOpen */ vec_static_blob_entriesOpen, |
9561 | /* xClose */ vec_static_blob_entriesClose, |
9562 | /* xFilter */ vec_static_blob_entriesFilter, |
9563 | /* xNext */ vec_static_blob_entriesNext, |
9564 | /* xEof */ vec_static_blob_entriesEof, |
9565 | /* xColumn */ vec_static_blob_entriesColumn, |
9566 | /* xRowid */ vec_static_blob_entriesRowid, |
9567 | /* xUpdate */ 0, |
9568 | /* xBegin */ 0, |
9569 | /* xSync */ 0, |
9570 | /* xCommit */ 0, |
9571 | /* xRollback */ 0, |
9572 | /* xFindMethod */ 0, |
9573 | /* xRename */ 0, |
9574 | /* xSavepoint */ 0, |
9575 | /* xRelease */ 0, |
9576 | /* xRollbackTo */ 0, |
9577 | /* xShadowName */ 0, |
9578 | #if SQLITE_VERSION_NUMBER3050001 >= 3044000 |
9579 | /* xIntegrity */ 0 |
9580 | #endif |
9581 | }; |
9582 | #pragma endregion |
9583 | |
9584 | #ifdef SQLITE_VEC_ENABLE_AVX |
9585 | #define SQLITE_VEC_DEBUG_BUILD_AVX"" "avx" |
9586 | #else |
9587 | #define SQLITE_VEC_DEBUG_BUILD_AVX"" "" |
9588 | #endif |
9589 | #ifdef SQLITE_VEC_ENABLE_NEON |
9590 | #define SQLITE_VEC_DEBUG_BUILD_NEON"" "neon" |
9591 | #else |
9592 | #define SQLITE_VEC_DEBUG_BUILD_NEON"" "" |
9593 | #endif |
9594 | |
9595 | #define SQLITE_VEC_DEBUG_BUILD"" " " "" \ |
9596 | SQLITE_VEC_DEBUG_BUILD_AVX"" " " SQLITE_VEC_DEBUG_BUILD_NEON"" |
9597 | |
9598 | #define SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000" "\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" "Build flags: " "" " " "" \ |
9599 | "Version: " SQLITE_VEC_VERSION"v0.1.7-alpha.2" "\n" \ |
9600 | "Date: " SQLITE_VEC_DATE"2025-01-10T23:18:50Z+0000" "\n" \ |
9601 | "Commit: " SQLITE_VEC_SOURCE"bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" \ |
9602 | "Build flags: " SQLITE_VEC_DEBUG_BUILD"" " " "" |
9603 | |
9604 | SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg, |
9605 | const sqlite3_api_routines *pApi) { |
9606 | #ifndef SQLITE_CORE |
9607 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; |
9608 | #endif |
9609 | int rc = SQLITE_OK0; |
9610 | |
9611 | #define DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) (SQLITE_UTF81 | SQLITE_INNOCUOUS0x000200000 | SQLITE_DETERMINISTIC0x000000800) |
9612 | |
9613 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), |
9614 | SQLITE_VEC_VERSION"v0.1.7-alpha.2", _static_text_func, NULL((void*)0), |
9615 | NULL((void*)0), NULL((void*)0)); |
9616 | if (rc != SQLITE_OK0) { |
9617 | return rc; |
9618 | } |
9619 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), |
9620 | SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000" "\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" "Build flags: " "" " " "", _static_text_func, |
9621 | NULL((void*)0), NULL((void*)0), NULL((void*)0)); |
9622 | if (rc != SQLITE_OK0) { |
9623 | return rc; |
9624 | } |
9625 | static struct { |
9626 | const char *zFName; |
9627 | void (*xFunc)(sqlite3_context *, int, sqlite3_value **); |
9628 | int nArg; |
9629 | int flags; |
9630 | } aFunc[] = { |
9631 | // clang-format off |
9632 | //{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION }, |
9633 | //{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING }, |
9634 | {"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, |
9635 | {"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, |
9636 | {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, |
9637 | {"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, |
9638 | {"vec_length", vec_length, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, }, |
9639 | {"vec_type", vec_type, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), }, |
9640 | {"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9641 | {"vec_add", vec_add, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9642 | {"vec_sub", vec_sub, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9643 | {"vec_slice", vec_slice, 3, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9644 | {"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9645 | {"vec_f32", vec_f32, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9646 | {"vec_bit", vec_bit, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9647 | {"vec_int8", vec_int8, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9648 | {"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9649 | {"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, }, |
9650 | // clang-format on |
9651 | }; |
9652 | |
9653 | static struct { |
9654 | char *name; |
9655 | const sqlite3_module *module; |
9656 | void *p; |
9657 | void (*xDestroy)(void *); |
9658 | } aMod[] = { |
9659 | // clang-format off |
9660 | {"vec0", &vec0Module, NULL((void*)0), NULL((void*)0)}, |
9661 | {"vec_each", &vec_eachModule, NULL((void*)0), NULL((void*)0)}, |
9662 | // clang-format on |
9663 | }; |
9664 | |
9665 | for (unsigned long i = 0; i < countof(aFunc)(sizeof(aFunc) / sizeof((aFunc)[0])) && rc == SQLITE_OK0; i++) { |
9666 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg, |
9667 | aFunc[i].flags, NULL((void*)0), aFunc[i].xFunc, NULL((void*)0), |
9668 | NULL((void*)0), NULL((void*)0)); |
9669 | if (rc != SQLITE_OK0) { |
9670 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating function %s: %s", |
9671 | aFunc[i].zFName, sqlite3_errmsgsqlite3_api->errmsg(db)); |
9672 | return rc; |
9673 | } |
9674 | } |
9675 | |
9676 | for (unsigned long i = 0; i < countof(aMod)(sizeof(aMod) / sizeof((aMod)[0])) && rc == SQLITE_OK0; i++) { |
9677 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, aMod[i].name, aMod[i].module, NULL((void*)0), NULL((void*)0)); |
9678 | if (rc != SQLITE_OK0) { |
9679 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating module %s: %s", aMod[i].name, |
9680 | sqlite3_errmsgsqlite3_api->errmsg(db)); |
9681 | return rc; |
9682 | } |
9683 | } |
9684 | |
9685 | return SQLITE_OK0; |
9686 | } |
9687 | |
9688 | #ifndef SQLITE_VEC_OMIT_FS |
9689 | SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg, |
9690 | const sqlite3_api_routines *pApi) { |
9691 | UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg); |
9692 | #ifndef SQLITE_CORE |
9693 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; |
9694 | #endif |
9695 | int rc = SQLITE_OK0; |
9696 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE0x001000000, |
9697 | NULL((void*)0), vec_npy_file, NULL((void*)0), NULL((void*)0), NULL((void*)0)); |
9698 | if(rc != SQLITE_OK0) { |
9699 | return rc; |
9700 | } |
9701 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL((void*)0), NULL((void*)0)); |
9702 | return rc; |
9703 | } |
9704 | #endif |
9705 | |
9706 | SQLITE_VEC_API int |
9707 | sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg, |
9708 | const sqlite3_api_routines *pApi) { |
9709 | UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg); |
9710 | #ifndef SQLITE_CORE |
9711 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; |
9712 | #endif |
9713 | |
9714 | int rc = SQLITE_OK0; |
9715 | vec_static_blob_data *static_blob_data; |
9716 | static_blob_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*static_blob_data)); |
9717 | if (!static_blob_data) { |
9718 | return SQLITE_NOMEM7; |
9719 | } |
9720 | memset(static_blob_data, 0, sizeof(*static_blob_data)); |
9721 | |
9722 | rc = sqlite3_create_function_v2sqlite3_api->create_function_v2( |
9723 | db, "vec_static_blob_from_raw", 4, |
9724 | DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, NULL((void*)0), |
9725 | vec_static_blob_from_raw, NULL((void*)0), NULL((void*)0), NULL((void*)0)); |
9726 | if (rc != SQLITE_OK0) |
9727 | return rc; |
9728 | |
9729 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule, |
9730 | static_blob_data, sqlite3_freesqlite3_api->free); |
9731 | if (rc != SQLITE_OK0) |
9732 | return rc; |
9733 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blob_entries", |
9734 | &vec_static_blob_entriesModule, |
9735 | static_blob_data, NULL((void*)0)); |
9736 | if (rc != SQLITE_OK0) |
9737 | return rc; |
9738 | return rc; |
9739 | } |