Bug Summary

File:root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c
Warning:line 2945, column 6
Value stored to 'minor' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name sqlite-vec.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -I /root/firefox-clang/third_party/sqlite3/ext -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c /root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c
1#include "sqlite-vec.h"
2
3#include <assert.h>
4#include <errno(*__errno_location ()).h>
5#include <float.h>
6#include <inttypes.h>
7#include <limits.h>
8#include <math.h>
9#include <stdbool.h>
10#include <stdint.h>
11#include <stdlib.h>
12#include <string.h>
13
14#ifndef SQLITE_VEC_OMIT_FS
15#include <stdio.h>
16#endif
17
18#ifndef SQLITE_CORE
19#include "sqlite3ext.h"
20SQLITE_EXTENSION_INIT3extern const sqlite3_api_routines *sqlite3_api;
21#else
22#include "sqlite3.h"
23#endif
24
25#ifndef UINT32_TYPEunsigned int
26#ifdef HAVE_UINT32_T
27#define UINT32_TYPEunsigned int uint32_t
28#else
29#define UINT32_TYPEunsigned int unsigned int
30#endif
31#endif
32#ifndef UINT16_TYPEunsigned short int
33#ifdef HAVE_UINT16_T
34#define UINT16_TYPEunsigned short int uint16_t
35#else
36#define UINT16_TYPEunsigned short int unsigned short int
37#endif
38#endif
39#ifndef INT16_TYPEshort int
40#ifdef HAVE_INT16_T
41#define INT16_TYPEshort int int16_t
42#else
43#define INT16_TYPEshort int short int
44#endif
45#endif
46#ifndef UINT8_TYPEunsigned char
47#ifdef HAVE_UINT8_T
48#define UINT8_TYPEunsigned char uint8_t
49#else
50#define UINT8_TYPEunsigned char unsigned char
51#endif
52#endif
53#ifndef INT8_TYPEsigned char
54#ifdef HAVE_INT8_T
55#define INT8_TYPEsigned char int8_t
56#else
57#define INT8_TYPEsigned char signed char
58#endif
59#endif
60#ifndef LONGDOUBLE_TYPElong double
61#define LONGDOUBLE_TYPElong double long double
62#endif
63
64typedef int8_t i8;
65typedef uint8_t u8;
66typedef int16_t i16;
67typedef int32_t i32;
68typedef sqlite3_int64 i64;
69typedef uint32_t u32;
70typedef uint64_t u64;
71typedef float f32;
72typedef size_t usize;
73
74#ifndef UNUSED_PARAMETER
75#define UNUSED_PARAMETER(X)(void)(X) (void)(X)
76#endif
77
78// sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
79// https://www.sqlite.org/changes.html#version_3_38_0
80#if SQLITE_VERSION_NUMBER3050001 >= 3038000
81#define COMPILER_SUPPORTS_VTAB_IN1 1
82#endif
83
84#ifndef SQLITE_SUBTYPE0x000100000
85#define SQLITE_SUBTYPE0x000100000 0x000100000
86#endif
87
88#ifndef SQLITE_RESULT_SUBTYPE0x001000000
89#define SQLITE_RESULT_SUBTYPE0x001000000 0x001000000
90#endif
91
92#ifndef SQLITE_INDEX_CONSTRAINT_LIMIT73
93#define SQLITE_INDEX_CONSTRAINT_LIMIT73 73
94#endif
95
96#ifndef SQLITE_INDEX_CONSTRAINT_OFFSET74
97#define SQLITE_INDEX_CONSTRAINT_OFFSET74 74
98#endif
99
100#define countof(x)(sizeof(x) / sizeof((x)[0])) (sizeof(x) / sizeof((x)[0]))
101#define min(a, b)(((a) <= (b)) ? (a) : (b)) (((a) <= (b)) ? (a) : (b))
102
103enum VectorElementType {
104 // clang-format off
105 SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0,
106 SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1,
107 SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2,
108 // clang-format on
109};
110
111#ifdef SQLITE_VEC_ENABLE_AVX
112#include <immintrin.h>
113#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
114#define PORTABLE_ALIGN64 __attribute__((aligned(64)))
115
116static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v,
117 const void *qty_ptr) {
118 f32 *pVect1 = (f32 *)pVect1v;
119 f32 *pVect2 = (f32 *)pVect2v;
120 size_t qty = *((size_t *)qty_ptr);
121 f32 PORTABLE_ALIGN32 TmpRes[8];
122 size_t qty16 = qty >> 4;
123
124 const f32 *pEnd1 = pVect1 + (qty16 << 4);
125
126 __m256 diff, v1, v2;
127 __m256 sum = _mm256_set1_ps(0);
128
129 while (pVect1 < pEnd1) {
130 v1 = _mm256_loadu_ps(pVect1);
131 pVect1 += 8;
132 v2 = _mm256_loadu_ps(pVect2);
133 pVect2 += 8;
134 diff = _mm256_sub_ps(v1, v2);
135 sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
136
137 v1 = _mm256_loadu_ps(pVect1);
138 pVect1 += 8;
139 v2 = _mm256_loadu_ps(pVect2);
140 pVect2 += 8;
141 diff = _mm256_sub_ps(v1, v2);
142 sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
143 }
144
145 _mm256_store_ps(TmpRes, sum);
146 return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] +
147 TmpRes[5] + TmpRes[6] + TmpRes[7]);
148}
149#endif
150
151#ifdef SQLITE_VEC_ENABLE_NEON
152#include <arm_neon.h>
153
154#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
155
156// thx https://github.com/nmslib/hnswlib/pull/299/files
157static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
158 const void *qty_ptr) {
159 f32 *pVect1 = (f32 *)pVect1v;
160 f32 *pVect2 = (f32 *)pVect2v;
161 size_t qty = *((size_t *)qty_ptr);
162 size_t qty16 = qty >> 4;
163
164 const f32 *pEnd1 = pVect1 + (qty16 << 4);
165
166 float32x4_t diff, v1, v2;
167 float32x4_t sum0 = vdupq_n_f32(0);
168 float32x4_t sum1 = vdupq_n_f32(0);
169 float32x4_t sum2 = vdupq_n_f32(0);
170 float32x4_t sum3 = vdupq_n_f32(0);
171
172 while (pVect1 < pEnd1) {
173 v1 = vld1q_f32(pVect1);
174 pVect1 += 4;
175 v2 = vld1q_f32(pVect2);
176 pVect2 += 4;
177 diff = vsubq_f32(v1, v2);
178 sum0 = vfmaq_f32(sum0, diff, diff);
179
180 v1 = vld1q_f32(pVect1);
181 pVect1 += 4;
182 v2 = vld1q_f32(pVect2);
183 pVect2 += 4;
184 diff = vsubq_f32(v1, v2);
185 sum1 = vfmaq_f32(sum1, diff, diff);
186
187 v1 = vld1q_f32(pVect1);
188 pVect1 += 4;
189 v2 = vld1q_f32(pVect2);
190 pVect2 += 4;
191 diff = vsubq_f32(v1, v2);
192 sum2 = vfmaq_f32(sum2, diff, diff);
193
194 v1 = vld1q_f32(pVect1);
195 pVect1 += 4;
196 v2 = vld1q_f32(pVect2);
197 pVect2 += 4;
198 diff = vsubq_f32(v1, v2);
199 sum3 = vfmaq_f32(sum3, diff, diff);
200 }
201
202 f32 sum_scalar =
203 vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3)));
204 const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
205 while (pVect1 < pEnd2) {
206 f32 diff = *pVect1 - *pVect2;
207 sum_scalar += diff * diff;
208 pVect1++;
209 pVect2++;
210 }
211
212 return sqrt(sum_scalar);
213}
214
215static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
216 const void *qty_ptr) {
217 i8 *pVect1 = (i8 *)pVect1v;
218 i8 *pVect2 = (i8 *)pVect2v;
219 size_t qty = *((size_t *)qty_ptr);
220
221 const i8 *pEnd1 = pVect1 + qty;
222 i32 sum_scalar = 0;
223
224 while (pVect1 < pEnd1 - 7) {
225 // loading 8 at a time
226 int8x8_t v1 = vld1_s8(pVect1);
227 int8x8_t v2 = vld1_s8(pVect2);
228 pVect1 += 8;
229 pVect2 += 8;
230
231 // widen to protect against overflow
232 int16x8_t v1_wide = vmovl_s8(v1);
233 int16x8_t v2_wide = vmovl_s8(v2);
234
235 int16x8_t diff = vsubq_s16(v1_wide, v2_wide);
236 int16x8_t squared_diff = vmulq_s16(diff, diff);
237 int32x4_t sum = vpaddlq_s16(squared_diff);
238
239 sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) +
240 vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3);
241 }
242
243 // handle leftovers
244 while (pVect1 < pEnd1) {
245 i16 diff = (i16)*pVect1 - (i16)*pVect2;
246 sum_scalar += diff * diff;
247 pVect1++;
248 pVect2++;
249 }
250
251 return sqrtf(sum_scalar);
252}
253
254static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v,
255 const void *qty_ptr) {
256 i8 *pVect1 = (i8 *)pVect1v;
257 i8 *pVect2 = (i8 *)pVect2v;
258 size_t qty = *((size_t *)qty_ptr);
259
260 const int8_t *pEnd1 = pVect1 + qty;
261
262 int32x4_t acc1 = vdupq_n_s32(0);
263 int32x4_t acc2 = vdupq_n_s32(0);
264 int32x4_t acc3 = vdupq_n_s32(0);
265 int32x4_t acc4 = vdupq_n_s32(0);
266
267 while (pVect1 < pEnd1 - 63) {
268 int8x16_t v1 = vld1q_s8(pVect1);
269 int8x16_t v2 = vld1q_s8(pVect2);
270 int8x16_t diff1 = vabdq_s8(v1, v2);
271 acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1)));
272
273 v1 = vld1q_s8(pVect1 + 16);
274 v2 = vld1q_s8(pVect2 + 16);
275 int8x16_t diff2 = vabdq_s8(v1, v2);
276 acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2)));
277
278 v1 = vld1q_s8(pVect1 + 32);
279 v2 = vld1q_s8(pVect2 + 32);
280 int8x16_t diff3 = vabdq_s8(v1, v2);
281 acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3)));
282
283 v1 = vld1q_s8(pVect1 + 48);
284 v2 = vld1q_s8(pVect2 + 48);
285 int8x16_t diff4 = vabdq_s8(v1, v2);
286 acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4)));
287
288 pVect1 += 64;
289 pVect2 += 64;
290 }
291
292 while (pVect1 < pEnd1 - 15) {
293 int8x16_t v1 = vld1q_s8(pVect1);
294 int8x16_t v2 = vld1q_s8(pVect2);
295 int8x16_t diff = vabdq_s8(v1, v2);
296 acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff)));
297 pVect1 += 16;
298 pVect2 += 16;
299 }
300
301 int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4));
302
303 int32_t sum = 0;
304 while (pVect1 < pEnd1) {
305 int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2);
306 sum += diff;
307 pVect1++;
308 pVect2++;
309 }
310
311 return vaddvq_s32(acc) + sum;
312}
313
314static double l1_f32_neon(const void *pVect1v, const void *pVect2v,
315 const void *qty_ptr) {
316 f32 *pVect1 = (f32 *)pVect1v;
317 f32 *pVect2 = (f32 *)pVect2v;
318 size_t qty = *((size_t *)qty_ptr);
319
320 const f32 *pEnd1 = pVect1 + qty;
321 float64x2_t acc = vdupq_n_f64(0);
322
323 while (pVect1 < pEnd1 - 3) {
324 float32x4_t v1 = vld1q_f32(pVect1);
325 float32x4_t v2 = vld1q_f32(pVect2);
326 pVect1 += 4;
327 pVect2 += 4;
328
329 // f32x4 -> f64x2 pad for overflow
330 float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)),
331 vcvt_f64_f32(vget_low_f32(v2)));
332 float64x2_t high_diff =
333 vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2));
334
335 acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff));
336 }
337
338 double sum = 0;
339 while (pVect1 < pEnd1) {
340 sum += fabs((double)*pVect1 - (double)*pVect2);
341 pVect1++;
342 pVect2++;
343 }
344
345 return vaddvq_f64(acc) + sum;
346}
347#endif
348
349static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v,
350 const void *qty_ptr) {
351 f32 *pVect1 = (f32 *)pVect1v;
352 f32 *pVect2 = (f32 *)pVect2v;
353 size_t qty = *((size_t *)qty_ptr);
354
355 f32 res = 0;
356 for (size_t i = 0; i < qty; i++) {
357 f32 t = *pVect1 - *pVect2;
358 pVect1++;
359 pVect2++;
360 res += t * t;
361 }
362 return sqrt(res);
363}
364
365static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
366 i8 *a = (i8 *)pA;
367 i8 *b = (i8 *)pB;
368 size_t d = *((size_t *)pD);
369
370 f32 res = 0;
371 for (size_t i = 0; i < d; i++) {
372 f32 t = *a - *b;
373 a++;
374 b++;
375 res += t * t;
376 }
377 return sqrt(res);
378}
379
380static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
381#ifdef SQLITE_VEC_ENABLE_NEON
382 if ((*(const size_t *)d) > 16) {
383 return l2_sqr_float_neon(a, b, d);
384 }
385#endif
386#ifdef SQLITE_VEC_ENABLE_AVX
387 if (((*(const size_t *)d) % 16 == 0)) {
388 return l2_sqr_float_avx(a, b, d);
389 }
390#endif
391 return l2_sqr_float(a, b, d);
392}
393
394static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) {
395#ifdef SQLITE_VEC_ENABLE_NEON
396 if ((*(const size_t *)d) > 7) {
397 return l2_sqr_int8_neon(a, b, d);
398 }
399#endif
400 return l2_sqr_int8(a, b, d);
401}
402
403static i32 l1_int8(const void *pA, const void *pB, const void *pD) {
404 i8 *a = (i8 *)pA;
405 i8 *b = (i8 *)pB;
406 size_t d = *((size_t *)pD);
407
408 i32 res = 0;
409 for (size_t i = 0; i < d; i++) {
410 res += abs(*a - *b);
411 a++;
412 b++;
413 }
414
415 return res;
416}
417
418static i32 distance_l1_int8(const void *a, const void *b, const void *d) {
419#ifdef SQLITE_VEC_ENABLE_NEON
420 if ((*(const size_t *)d) > 15) {
421 return l1_int8_neon(a, b, d);
422 }
423#endif
424 return l1_int8(a, b, d);
425}
426
427static double l1_f32(const void *pA, const void *pB, const void *pD) {
428 f32 *a = (f32 *)pA;
429 f32 *b = (f32 *)pB;
430 size_t d = *((size_t *)pD);
431
432 double res = 0;
433 for (size_t i = 0; i < d; i++) {
434 res += fabs((double)*a - (double)*b);
435 a++;
436 b++;
437 }
438
439 return res;
440}
441
442static double distance_l1_f32(const void *a, const void *b, const void *d) {
443#ifdef SQLITE_VEC_ENABLE_NEON
444 if ((*(const size_t *)d) > 3) {
445 return l1_f32_neon(a, b, d);
446 }
447#endif
448 return l1_f32(a, b, d);
449}
450
451static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v,
452 const void *qty_ptr) {
453 f32 *pVect1 = (f32 *)pVect1v;
454 f32 *pVect2 = (f32 *)pVect2v;
455 size_t qty = *((size_t *)qty_ptr);
456
457 f32 dot = 0;
458 f32 aMag = 0;
459 f32 bMag = 0;
460 for (size_t i = 0; i < qty; i++) {
461 dot += *pVect1 * *pVect2;
462 aMag += *pVect1 * *pVect1;
463 bMag += *pVect2 * *pVect2;
464 pVect1++;
465 pVect2++;
466 }
467 return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
468}
469static f32 distance_cosine_int8(const void *pA, const void *pB,
470 const void *pD) {
471 i8 *a = (i8 *)pA;
472 i8 *b = (i8 *)pB;
473 size_t d = *((size_t *)pD);
474
475 f32 dot = 0;
476 f32 aMag = 0;
477 f32 bMag = 0;
478 for (size_t i = 0; i < d; i++) {
479 dot += *a * *b;
480 aMag += *a * *a;
481 bMag += *b * *b;
482 a++;
483 b++;
484 }
485 return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
486}
487
488// https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34
489static u8 hamdist_table[256] = {
490 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
491 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
492 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
493 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
494 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
495 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
496 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
497 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
498 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
499 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
500 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
501
502static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
503 int same = 0;
504 for (unsigned long i = 0; i < n; i++) {
505 same += hamdist_table[a[i] ^ b[i]];
506 }
507 return (f32)same;
508}
509
510#ifdef _MSC_VER
511#if !defined(__clang__1) && (defined(_M_ARM) || defined(_M_ARM64))
512// From
513// https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
514// line 34-43
515static unsigned int __builtin_popcountl(unsigned int x) {
516 unsigned int c = 0;
517 for (; x; ++c) {
518 x &= x - 1;
519 }
520 return c;
521}
522#else
523#include <intrin.h>
524#define __builtin_popcountl __popcnt64
525#endif
526#endif
527
528static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) {
529 int same = 0;
530 for (unsigned long i = 0; i < n; i++) {
531 same += __builtin_popcountl(a[i] ^ b[i]);
532 }
533 return (f32)same;
534}
535
536/**
537 * @brief Calculate the hamming distance between two bitvectors.
538 *
539 * @param a - first bitvector, MUST have d dimensions
540 * @param b - second bitvector, MUST have d dimensions
541 * @param d - pointer to size_t, MUST be divisible by CHAR_BIT
542 * @return f32
543 */
544static f32 distance_hamming(const void *a, const void *b, const void *d) {
545 size_t dimensions = *((size_t *)d);
546
547 if ((dimensions % 64) == 0) {
548 return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT8);
549 }
550 return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT8);
551}
552
553// from SQLite source:
554// https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153
555static const char vecJsonIsSpaceX[] = {
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569};
570
571#define vecJsonIsspace(x)(vecJsonIsSpaceX[(unsigned char)x]) (vecJsonIsSpaceX[(unsigned char)x])
572
573typedef void (*vector_cleanup)(void *p);
574
575void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_)(void)(_); }
576
577#define JSON_SUBTYPE74 74
578
579void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
580 va_list args;
581 sqlite3_freesqlite3_api->free(pVTab->zErrMsg);
582 va_start(args, zFormat)__builtin_va_start(args, zFormat);
583 pVTab->zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, args);
584 va_end(args)__builtin_va_end(args);
585}
586struct Array {
587 size_t element_size;
588 size_t length;
589 size_t capacity;
590 void *z;
591};
592
593/**
594 * @brief Initial an array with the given element size and capacity.
595 *
596 * @param array
597 * @param element_size
598 * @param init_capacity
599 * @return SQLITE_OK on success, error code on failure. Only error is
600 * SQLITE_NOMEM
601 */
602int array_init(struct Array *array, size_t element_size, size_t init_capacity) {
603 int sz = element_size * init_capacity;
604 void *z = sqlite3_mallocsqlite3_api->malloc(sz);
605 if (!z) {
606 return SQLITE_NOMEM7;
607 }
608 memset(z, 0, sz);
609
610 array->element_size = element_size;
611 array->length = 0;
612 array->capacity = init_capacity;
613 array->z = z;
614 return SQLITE_OK0;
615}
616
617int array_append(struct Array *array, const void *element) {
618 if (array->length == array->capacity) {
619 size_t new_capacity = array->capacity * 2 + 100;
620 void *z = sqlite3_realloc64sqlite3_api->realloc64(array->z, array->element_size * new_capacity);
621 if (z) {
622 array->capacity = new_capacity;
623 array->z = z;
624 } else {
625 return SQLITE_NOMEM7;
626 }
627 }
628 memcpy(&((unsigned char *)array->z)[array->length * array->element_size],
629 element, array->element_size);
630 array->length++;
631 return SQLITE_OK0;
632}
633
634void array_cleanup(struct Array *array) {
635 if (!array)
636 return;
637 array->element_size = 0;
638 array->length = 0;
639 array->capacity = 0;
640 sqlite3_freesqlite3_api->free(array->z);
641 array->z = NULL((void*)0);
642}
643
644char *vector_subtype_name(int subtype) {
645 switch (subtype) {
646 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
647 return "float32";
648 case SQLITE_VEC_ELEMENT_TYPE_INT8:
649 return "int8";
650 case SQLITE_VEC_ELEMENT_TYPE_BIT:
651 return "bit";
652 }
653 return "";
654}
655char *type_name(int type) {
656 switch (type) {
657 case SQLITE_INTEGER1:
658 return "INTEGER";
659 case SQLITE_BLOB4:
660 return "BLOB";
661 case SQLITE_TEXT3:
662 return "TEXT";
663 case SQLITE_FLOAT2:
664 return "FLOAT";
665 case SQLITE_NULL5:
666 return "NULL";
667 }
668 return "";
669}
670
671typedef void (*fvec_cleanup)(f32 *vector);
672
673void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_)(void)(_); }
674
675static int fvec_from_value(sqlite3_value *value, f32 **vector,
676 size_t *dimensions, fvec_cleanup *cleanup,
677 char **pzErr) {
678 int value_type = sqlite3_value_typesqlite3_api->value_type(value);
679
680 if (value_type == SQLITE_BLOB4) {
681 const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value);
682 int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value);
683 if (bytes == 0) {
684 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
685 return SQLITE_ERROR1;
686 }
687 if ((bytes % sizeof(f32)) != 0) {
688 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("invalid float32 vector BLOB length. Must be "
689 "divisible by %d, found %d",
690 sizeof(f32), bytes);
691 return SQLITE_ERROR1;
692 }
693 *vector = (f32 *)blob;
694 *dimensions = bytes / sizeof(f32);
695 *cleanup = fvec_cleanup_noop;
696 return SQLITE_OK0;
697 }
698
699 if (value_type == SQLITE_TEXT3) {
700 const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value);
701 int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value);
702 if (source_len == 0) {
703 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
704 return SQLITE_ERROR1;
705 }
706 int i = 0;
707
708 struct Array x;
709 int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0));
710 if (rc != SQLITE_OK0) {
711 return rc;
712 }
713
714 // advance leading whitespace to first '['
715 while (i < source_len) {
716 if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) {
717 i++;
718 continue;
719 }
720 if (source[i] == '[') {
721 break;
722 }
723
724 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
725 "JSON array parsing error: Input does not start with '['");
726 array_cleanup(&x);
727 return SQLITE_ERROR1;
728 }
729 if (source[i] != '[') {
730 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
731 "JSON array parsing error: Input does not start with '['");
732 array_cleanup(&x);
733 return SQLITE_ERROR1;
734 }
735 int offset = i + 1;
736
737 while (offset < source_len) {
738 char *ptr = (char *)&source[offset];
739 char *endptr;
740
741 errno(*__errno_location ()) = 0;
742 double result = strtod(ptr, &endptr);
743 if ((errno(*__errno_location ()) != 0 && result == 0) // some interval error?
744 || (errno(*__errno_location ()) == ERANGE34 &&
745 (result == HUGE_VAL(__builtin_huge_val ()) || result == -HUGE_VAL(__builtin_huge_val ()))) // too big / smalls
746 ) {
747 sqlite3_freesqlite3_api->free(x.z);
748 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error");
749 return SQLITE_ERROR1;
750 }
751
752 if (endptr == ptr) {
753 if (*ptr != ']') {
754 sqlite3_freesqlite3_api->free(x.z);
755 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error");
756 return SQLITE_ERROR1;
757 }
758 goto done;
759 }
760
761 f32 res = (f32)result;
762 array_append(&x, (const void *)&res);
763
764 offset += (endptr - ptr);
765 while (offset < source_len) {
766 if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) {
767 offset++;
768 continue;
769 }
770 if (source[offset] == ',') {
771 offset++;
772 continue;
773 }
774 if (source[offset] == ']')
775 goto done;
776 break;
777 }
778 }
779
780 done:
781
782 if (x.length > 0) {
783 *vector = (f32 *)x.z;
784 *dimensions = x.length;
785 *cleanup = (fvec_cleanup)sqlite3_freesqlite3_api->free;
786 return SQLITE_OK0;
787 }
788 sqlite3_freesqlite3_api->free(x.z);
789 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
790 return SQLITE_ERROR1;
791 }
792
793 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
794 "Input must have type BLOB (compact format) or TEXT (JSON), found %s",
795 type_name(value_type));
796 return SQLITE_ERROR1;
797}
798
799static int bitvec_from_value(sqlite3_value *value, u8 **vector,
800 size_t *dimensions, vector_cleanup *cleanup,
801 char **pzErr) {
802 int value_type = sqlite3_value_typesqlite3_api->value_type(value);
803 if (value_type == SQLITE_BLOB4) {
804 const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value);
805 int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value);
806 if (bytes == 0) {
807 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
808 return SQLITE_ERROR1;
809 }
810 *vector = (u8 *)blob;
811 *dimensions = bytes * CHAR_BIT8;
812 *cleanup = vector_cleanup_noop;
813 return SQLITE_OK0;
814 }
815 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for bitvector.");
816 return SQLITE_ERROR1;
817}
818
819static int int8_vec_from_value(sqlite3_value *value, i8 **vector,
820 size_t *dimensions, vector_cleanup *cleanup,
821 char **pzErr) {
822 int value_type = sqlite3_value_typesqlite3_api->value_type(value);
823 if (value_type == SQLITE_BLOB4) {
824 const void *blob = sqlite3_value_blobsqlite3_api->value_blob(value);
825 int bytes = sqlite3_value_bytessqlite3_api->value_bytes(value);
826 if (bytes == 0) {
827 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
828 return SQLITE_ERROR1;
829 }
830 *vector = (i8 *)blob;
831 *dimensions = bytes;
832 *cleanup = vector_cleanup_noop;
833 return SQLITE_OK0;
834 }
835
836 if (value_type == SQLITE_TEXT3) {
837 const char *source = (const char *)sqlite3_value_textsqlite3_api->value_text(value);
838 int source_len = sqlite3_value_bytessqlite3_api->value_bytes(value);
839 int i = 0;
840
841 if (source_len == 0) {
842 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
843 return SQLITE_ERROR1;
844 }
845
846 struct Array x;
847 int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0));
848 if (rc != SQLITE_OK0) {
849 return rc;
850 }
851
852 // advance leading whitespace to first '['
853 while (i < source_len) {
854 if (vecJsonIsspace(source[i])(vecJsonIsSpaceX[(unsigned char)source[i]])) {
855 i++;
856 continue;
857 }
858 if (source[i] == '[') {
859 break;
860 }
861
862 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
863 "JSON array parsing error: Input does not start with '['");
864 array_cleanup(&x);
865 return SQLITE_ERROR1;
866 }
867 if (source[i] != '[') {
868 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
869 "JSON array parsing error: Input does not start with '['");
870 array_cleanup(&x);
871 return SQLITE_ERROR1;
872 }
873 int offset = i + 1;
874
875 while (offset < source_len) {
876 char *ptr = (char *)&source[offset];
877 char *endptr;
878
879 errno(*__errno_location ()) = 0;
880 long result = strtol(ptr, &endptr, 10);
881 if ((errno(*__errno_location ()) != 0 && result == 0) ||
882 (errno(*__errno_location ()) == ERANGE34 && (result == LONG_MAX9223372036854775807L || result == LONG_MIN(-9223372036854775807L -1L)))) {
883 sqlite3_freesqlite3_api->free(x.z);
884 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error");
885 return SQLITE_ERROR1;
886 }
887
888 if (endptr == ptr) {
889 if (*ptr != ']') {
890 sqlite3_freesqlite3_api->free(x.z);
891 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error");
892 return SQLITE_ERROR1;
893 }
894 goto done;
895 }
896
897 if (result < INT8_MIN(-128) || result > INT8_MAX(127)) {
898 sqlite3_freesqlite3_api->free(x.z);
899 *pzErr =
900 sqlite3_mprintfsqlite3_api->mprintf("JSON parsing error: value out of range for int8");
901 return SQLITE_ERROR1;
902 }
903
904 i8 res = (i8)result;
905 array_append(&x, (const void *)&res);
906
907 offset += (endptr - ptr);
908 while (offset < source_len) {
909 if (vecJsonIsspace(source[offset])(vecJsonIsSpaceX[(unsigned char)source[offset]])) {
910 offset++;
911 continue;
912 }
913 if (source[offset] == ',') {
914 offset++;
915 continue;
916 }
917 if (source[offset] == ']')
918 goto done;
919 break;
920 }
921 }
922
923 done:
924
925 if (x.length > 0) {
926 *vector = (i8 *)x.z;
927 *dimensions = x.length;
928 *cleanup = (vector_cleanup)sqlite3_freesqlite3_api->free;
929 return SQLITE_OK0;
930 }
931 sqlite3_freesqlite3_api->free(x.z);
932 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("zero-length vectors are not supported.");
933 return SQLITE_ERROR1;
934 }
935
936 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Unknown type for int8 vector.");
937 return SQLITE_ERROR1;
938}
939
940/**
941 * @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit
942 * vector.
943 *
944 * @param value: the sqlite3_value to read from.
945 * @param vector: Output pointer to vector data.
946 * @param dimensions: Output number of dimensions
947 * @param dimensions: Output vector element type
948 * @param cleanup
949 * @param pzErrorMessage
950 * @return int SQLITE_OK on success, error code otherwise
951 */
952int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions,
953 enum VectorElementType *element_type,
954 vector_cleanup *cleanup, char **pzErrorMessage) {
955 int subtype = sqlite3_value_subtypesqlite3_api->value_subtype(value);
956 if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) ||
957 (subtype == JSON_SUBTYPE74)) {
958 int rc = fvec_from_value(value, (f32 **)vector, dimensions,
959 (fvec_cleanup *)cleanup, pzErrorMessage);
960 if (rc == SQLITE_OK0) {
961 *element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
962 }
963 return rc;
964 }
965
966 if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) {
967 int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup,
968 pzErrorMessage);
969 if (rc == SQLITE_OK0) {
970 *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT;
971 }
972 return rc;
973 }
974 if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) {
975 int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup,
976 pzErrorMessage);
977 if (rc == SQLITE_OK0) {
978 *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8;
979 }
980 return rc;
981 }
982 *pzErrorMessage = sqlite3_mprintfsqlite3_api->mprintf("Unknown subtype: %d", subtype);
983 return SQLITE_ERROR1;
984}
985
986int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a,
987 void **b, enum VectorElementType *element_type,
988 size_t *dimensions, vector_cleanup *outACleanup,
989 vector_cleanup *outBCleanup, char **outError) {
990 int rc;
991 enum VectorElementType aType, bType;
992 size_t aDims, bDims;
993 char *error = NULL((void*)0);
994 vector_cleanup aCleanup, bCleanup;
995
996 rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error);
997 if (rc != SQLITE_OK0) {
998 *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 1st vector: %s", error);
999 sqlite3_freesqlite3_api->free(error);
1000 return SQLITE_ERROR1;
1001 }
1002
1003 rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error);
1004 if (rc != SQLITE_OK0) {
1005 *outError = sqlite3_mprintfsqlite3_api->mprintf("Error reading 2nd vector: %s", error);
1006 sqlite3_freesqlite3_api->free(error);
1007 aCleanup(a);
1008 return SQLITE_ERROR1;
1009 }
1010
1011 if (aType != bType) {
1012 *outError =
1013 sqlite3_mprintfsqlite3_api->mprintf("Vector type mistmatch. First vector has type %s, "
1014 "while the second has type %s.",
1015 vector_subtype_name(aType), vector_subtype_name(bType));
1016 aCleanup(*a);
1017 bCleanup(*b);
1018 return SQLITE_ERROR1;
1019 }
1020 if (aDims != bDims) {
1021 *outError = sqlite3_mprintfsqlite3_api->mprintf(
1022 "Vector dimension mistmatch. First vector has %ld dimensions, "
1023 "while the second has %ld dimensions.",
1024 aDims, bDims);
1025 aCleanup(*a);
1026 bCleanup(*b);
1027 return SQLITE_ERROR1;
1028 }
1029 *element_type = aType;
1030 *dimensions = aDims;
1031 *outACleanup = aCleanup;
1032 *outBCleanup = bCleanup;
1033 return SQLITE_OK0;
1034}
1035
1036int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); }
1037
1038struct VecNpyFile {
1039 char *path;
1040 size_t pathLength;
1041};
1042#define SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file" "vec0-npy-file"
1043
1044#ifndef SQLITE_VEC_OMIT_FS
1045static void vec_npy_file(sqlite3_context *context, int argc,
1046 sqlite3_value **argv) {
1047 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1047, __extension__ __PRETTY_FUNCTION__); }))
;
1048 char *path = (char *)sqlite3_value_textsqlite3_api->value_text(argv[0]);
1049 size_t pathLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]);
1050 struct VecNpyFile *f;
1051
1052 f = sqlite3_mallocsqlite3_api->malloc(sizeof(*f));
1053 if (!f) {
1054 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1055 return;
1056 }
1057 memset(f, 0, sizeof(*f));
1058
1059 f->path = path;
1060 f->pathLength = pathLength;
1061 sqlite3_result_pointersqlite3_api->result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file", sqlite3_freesqlite3_api->free);
1062}
1063#endif
1064
1065#pragma region scalar functions
1066static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
1067 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1067, __extension__ __PRETTY_FUNCTION__); }))
;
1068 int rc;
1069 f32 *vector = NULL((void*)0);
1070 size_t dimensions;
1071 fvec_cleanup cleanup;
1072 char *errmsg;
1073 rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
1074 if (rc != SQLITE_OK0) {
1075 sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1);
1076 sqlite3_freesqlite3_api->free(errmsg);
1077 return;
1078 }
1079 sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions * sizeof(f32),
1080 (void (*)(void *))cleanup);
1081 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
1082}
1083
1084static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) {
1085 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1085, __extension__ __PRETTY_FUNCTION__); }))
;
1086 int rc;
1087 u8 *vector;
1088 size_t dimensions;
1089 vector_cleanup cleanup;
1090 char *errmsg;
1091 rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
1092 if (rc != SQLITE_OK0) {
1093 sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1);
1094 sqlite3_freesqlite3_api->free(errmsg);
1095 return;
1096 }
1097 sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions / CHAR_BIT8, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
1098 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
1099 cleanup(vector);
1100}
1101static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) {
1102 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1102, __extension__ __PRETTY_FUNCTION__); }))
;
1103 int rc;
1104 i8 *vector;
1105 size_t dimensions;
1106 vector_cleanup cleanup;
1107 char *errmsg;
1108 rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
1109 if (rc != SQLITE_OK0) {
1110 sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1);
1111 sqlite3_freesqlite3_api->free(errmsg);
1112 return;
1113 }
1114 sqlite3_result_blobsqlite3_api->result_blob(context, vector, dimensions, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
1115 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
1116 cleanup(vector);
1117}
1118
1119static void vec_length(sqlite3_context *context, int argc,
1120 sqlite3_value **argv) {
1121 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1121, __extension__ __PRETTY_FUNCTION__); }))
;
1122 int rc;
1123 void *vector;
1124 size_t dimensions;
1125 vector_cleanup cleanup;
1126 char *errmsg;
1127 enum VectorElementType elementType;
1128 rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup,
1129 &errmsg);
1130 if (rc != SQLITE_OK0) {
1131 sqlite3_result_errorsqlite3_api->result_error(context, errmsg, -1);
1132 sqlite3_freesqlite3_api->free(errmsg);
1133 return;
1134 }
1135 sqlite3_result_int64sqlite3_api->result_int64(context, dimensions);
1136 cleanup(vector);
1137}
1138
1139static void vec_distance_cosine(sqlite3_context *context, int argc,
1140 sqlite3_value **argv) {
1141 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1141, __extension__ __PRETTY_FUNCTION__); }))
;
1142 int rc;
1143 void *a = NULL((void*)0), *b = NULL((void*)0);
1144 size_t dimensions;
1145 vector_cleanup aCleanup, bCleanup;
1146 char *error;
1147 enum VectorElementType elementType;
1148 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1149 &aCleanup, &bCleanup, &error);
1150 if (rc != SQLITE_OK0) {
1151 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1152 sqlite3_freesqlite3_api->free(error);
1153 return;
1154 }
1155
1156 switch (elementType) {
1157 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1158 sqlite3_result_errorsqlite3_api->result_error(
1159 context, "Cannot calculate cosine distance between two bitvectors.",
1160 -1);
1161 goto finish;
1162 }
1163 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1164 f32 result = distance_cosine_float(a, b, &dimensions);
1165 sqlite3_result_doublesqlite3_api->result_double(context, result);
1166 goto finish;
1167 }
1168 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1169 f32 result = distance_cosine_int8(a, b, &dimensions);
1170 sqlite3_result_doublesqlite3_api->result_double(context, result);
1171 goto finish;
1172 }
1173 }
1174
1175finish:
1176 aCleanup(a);
1177 bCleanup(b);
1178 return;
1179}
1180
1181static void vec_distance_l2(sqlite3_context *context, int argc,
1182 sqlite3_value **argv) {
1183 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1183, __extension__ __PRETTY_FUNCTION__); }))
;
1184 int rc;
1185 void *a = NULL((void*)0), *b = NULL((void*)0);
1186 size_t dimensions;
1187 vector_cleanup aCleanup, bCleanup;
1188 char *error;
1189 enum VectorElementType elementType;
1190 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1191 &aCleanup, &bCleanup, &error);
1192 if (rc != SQLITE_OK0) {
1193 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1194 sqlite3_freesqlite3_api->free(error);
1195 return;
1196 }
1197
1198 switch (elementType) {
1199 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1200 sqlite3_result_errorsqlite3_api->result_error(
1201 context, "Cannot calculate L2 distance between two bitvectors.", -1);
1202 goto finish;
1203 }
1204 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1205 f32 result = distance_l2_sqr_float(a, b, &dimensions);
1206 sqlite3_result_doublesqlite3_api->result_double(context, result);
1207 goto finish;
1208 }
1209 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1210 f32 result = distance_l2_sqr_int8(a, b, &dimensions);
1211 sqlite3_result_doublesqlite3_api->result_double(context, result);
1212 goto finish;
1213 }
1214 }
1215
1216finish:
1217 aCleanup(a);
1218 bCleanup(b);
1219 return;
1220}
1221
1222static void vec_distance_l1(sqlite3_context *context, int argc,
1223 sqlite3_value **argv) {
1224 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1224, __extension__ __PRETTY_FUNCTION__); }))
;
1225 int rc;
1226 void *a, *b;
1227 size_t dimensions;
1228 vector_cleanup aCleanup, bCleanup;
1229 char *error;
1230 enum VectorElementType elementType;
1231 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1232 &aCleanup, &bCleanup, &error);
1233 if (rc != SQLITE_OK0) {
1234 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1235 sqlite3_freesqlite3_api->free(error);
1236 return;
1237 }
1238
1239 switch (elementType) {
1240 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1241 sqlite3_result_errorsqlite3_api->result_error(
1242 context, "Cannot calculate L1 distance between two bitvectors.", -1);
1243 goto finish;
1244 }
1245 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1246 double result = distance_l1_f32(a, b, &dimensions);
1247 sqlite3_result_doublesqlite3_api->result_double(context, result);
1248 goto finish;
1249 }
1250 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1251 i64 result = distance_l1_int8(a, b, &dimensions);
1252 sqlite3_result_intsqlite3_api->result_int(context, result);
1253 goto finish;
1254 }
1255 }
1256
1257finish:
1258 aCleanup(a);
1259 bCleanup(b);
1260 return;
1261}
1262
1263static void vec_distance_hamming(sqlite3_context *context, int argc,
1264 sqlite3_value **argv) {
1265 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1265, __extension__ __PRETTY_FUNCTION__); }))
;
1266 int rc;
1267 void *a = NULL((void*)0), *b = NULL((void*)0);
1268 size_t dimensions;
1269 vector_cleanup aCleanup, bCleanup;
1270 char *error;
1271 enum VectorElementType elementType;
1272 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1273 &aCleanup, &bCleanup, &error);
1274 if (rc != SQLITE_OK0) {
1275 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1276 sqlite3_freesqlite3_api->free(error);
1277 return;
1278 }
1279
1280 switch (elementType) {
1281 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1282 sqlite3_result_doublesqlite3_api->result_double(context, distance_hamming(a, b, &dimensions));
1283 goto finish;
1284 }
1285 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1286 sqlite3_result_errorsqlite3_api->result_error(
1287 context,
1288 "Cannot calculate hamming distance between two float32 vectors.", -1);
1289 goto finish;
1290 }
1291 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1292 sqlite3_result_errorsqlite3_api->result_error(
1293 context, "Cannot calculate hamming distance between two int8 vectors.",
1294 -1);
1295 goto finish;
1296 }
1297 }
1298
1299finish:
1300 aCleanup(a);
1301 bCleanup(b);
1302 return;
1303}
1304
1305char *vec_type_name(enum VectorElementType elementType) {
1306 switch (elementType) {
1307 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
1308 return "float32";
1309 case SQLITE_VEC_ELEMENT_TYPE_INT8:
1310 return "int8";
1311 case SQLITE_VEC_ELEMENT_TYPE_BIT:
1312 return "bit";
1313 }
1314 return "";
1315}
1316
1317static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) {
1318 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1318, __extension__ __PRETTY_FUNCTION__); }))
;
1319 void *vector;
1320 size_t dimensions;
1321 vector_cleanup cleanup;
1322 char *pzError;
1323 enum VectorElementType elementType;
1324 int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
1325 &cleanup, &pzError);
1326 if (rc != SQLITE_OK0) {
1327 sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1);
1328 sqlite3_freesqlite3_api->free(pzError);
1329 return;
1330 }
1331 sqlite3_result_textsqlite3_api->result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC((sqlite3_destructor_type)0));
1332 cleanup(vector);
1333}
1334static void vec_quantize_binary(sqlite3_context *context, int argc,
1335 sqlite3_value **argv) {
1336 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1336, __extension__ __PRETTY_FUNCTION__); }))
;
1337 void *vector;
1338 size_t dimensions;
1339 vector_cleanup vectorCleanup;
1340 char *pzError;
1341 enum VectorElementType elementType;
1342 int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
1343 &vectorCleanup, &pzError);
1344 if (rc != SQLITE_OK0) {
1345 sqlite3_result_errorsqlite3_api->result_error(context, pzError, -1);
1346 sqlite3_freesqlite3_api->free(pzError);
1347 return;
1348 }
1349
1350 if (dimensions <= 0) {
1351 sqlite3_result_errorsqlite3_api->result_error(context, "Zero length vectors are not supported.", -1);
1352 goto cleanup;
1353 return;
1354 }
1355 if ((dimensions % CHAR_BIT8) != 0) {
1356 sqlite3_result_errorsqlite3_api->result_error(
1357 context,
1358 "Binary quantization requires vectors with a length divisible by 8",
1359 -1);
1360 goto cleanup;
1361 return;
1362 }
1363
1364 int sz = dimensions / CHAR_BIT8;
1365 u8 *out = sqlite3_mallocsqlite3_api->malloc(sz);
1366 if (!out) {
1367 sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7);
1368 goto cleanup;
1369 return;
1370 }
1371 memset(out, 0, sz);
1372
1373 switch (elementType) {
1374 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1375
1376 for (size_t i = 0; i < dimensions; i++) {
1377 int res = ((f32 *)vector)[i] > 0.0;
1378 out[i / 8] |= (res << (i % 8));
1379 }
1380 break;
1381 }
1382 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1383 for (size_t i = 0; i < dimensions; i++) {
1384 int res = ((i8 *)vector)[i] > 0;
1385 out[i / 8] |= (res << (i % 8));
1386 }
1387 break;
1388 }
1389 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1390 sqlite3_result_errorsqlite3_api->result_error(context,
1391 "Can only binary quantize float or int8 vectors", -1);
1392 sqlite3_freesqlite3_api->free(out);
1393 return;
1394 }
1395 }
1396 sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free);
1397 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
1398
1399cleanup:
1400 vectorCleanup(vector);
1401}
1402
1403static void vec_quantize_int8(sqlite3_context *context, int argc,
1404 sqlite3_value **argv) {
1405 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1405, __extension__ __PRETTY_FUNCTION__); }))
;
1406 f32 *srcVector;
1407 size_t dimensions;
1408 fvec_cleanup srcCleanup;
1409 char *err;
1410 i8 *out = NULL((void*)0);
1411 int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err);
1412 if (rc != SQLITE_OK0) {
1413 sqlite3_result_errorsqlite3_api->result_error(context, err, -1);
1414 sqlite3_freesqlite3_api->free(err);
1415 return;
1416 }
1417
1418 int sz = dimensions * sizeof(i8);
1419 out = sqlite3_mallocsqlite3_api->malloc(sz);
1420 if (!out) {
1421 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1422 goto cleanup;
1423 }
1424 memset(out, 0, sz);
1425
1426 if ((sqlite3_value_typesqlite3_api->value_type(argv[1]) != SQLITE_TEXT3) ||
1427 (sqlite3_value_bytessqlite3_api->value_bytes(argv[1]) != strlen("unit")) ||
1428 (sqlite3_stricmpsqlite3_api->stricmp((const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]), "unit") !=
1429 0)) {
1430 sqlite3_result_errorsqlite3_api->result_error(
1431 context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1);
1432 sqlite3_freesqlite3_api->free(out);
1433 goto cleanup;
1434 }
1435 f32 step = (1.0 - (-1.0)) / 255;
1436 for (size_t i = 0; i < dimensions; i++) {
1437 out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
1438 }
1439
1440 sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(i8), sqlite3_freesqlite3_api->free);
1441 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
1442
1443cleanup:
1444 srcCleanup(srcVector);
1445}
1446
1447static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
1448 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1448, __extension__ __PRETTY_FUNCTION__); }))
;
1449 int rc;
1450 void *a = NULL((void*)0), *b = NULL((void*)0);
1451 size_t dimensions;
1452 vector_cleanup aCleanup, bCleanup;
1453 char *error;
1454 enum VectorElementType elementType;
1455 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1456 &aCleanup, &bCleanup, &error);
1457 if (rc != SQLITE_OK0) {
1458 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1459 sqlite3_freesqlite3_api->free(error);
1460 return;
1461 }
1462
1463 switch (elementType) {
1464 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1465 sqlite3_result_errorsqlite3_api->result_error(context, "Cannot add two bitvectors together.", -1);
1466 goto finish;
1467 }
1468 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1469 size_t outSize = dimensions * sizeof(f32);
1470 f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1471 if (!out) {
1472 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1473 goto finish;
1474 }
1475 memset(out, 0, outSize);
1476 for (size_t i = 0; i < dimensions; i++) {
1477 out[i] = ((f32 *)a)[i] + ((f32 *)b)[i];
1478 }
1479 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1480 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
1481 goto finish;
1482 }
1483 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1484 size_t outSize = dimensions * sizeof(i8);
1485 i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1486 if (!out) {
1487 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1488 goto finish;
1489 }
1490 memset(out, 0, outSize);
1491 for (size_t i = 0; i < dimensions; i++) {
1492 out[i] = ((i8 *)a)[i] + ((i8 *)b)[i];
1493 }
1494 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1495 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
1496 goto finish;
1497 }
1498 }
1499finish:
1500 aCleanup(a);
1501 bCleanup(b);
1502 return;
1503}
1504static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) {
1505 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1505, __extension__ __PRETTY_FUNCTION__); }))
;
1506 int rc;
1507 void *a = NULL((void*)0), *b = NULL((void*)0);
1508 size_t dimensions;
1509 vector_cleanup aCleanup, bCleanup;
1510 char *error;
1511 enum VectorElementType elementType;
1512 rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
1513 &aCleanup, &bCleanup, &error);
1514 if (rc != SQLITE_OK0) {
1515 sqlite3_result_errorsqlite3_api->result_error(context, error, -1);
1516 sqlite3_freesqlite3_api->free(error);
1517 return;
1518 }
1519
1520 switch (elementType) {
1521 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1522 sqlite3_result_errorsqlite3_api->result_error(context, "Cannot subtract two bitvectors together.",
1523 -1);
1524 goto finish;
1525 }
1526 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1527 size_t outSize = dimensions * sizeof(f32);
1528 f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1529 if (!out) {
1530 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1531 goto finish;
1532 }
1533 memset(out, 0, outSize);
1534 for (size_t i = 0; i < dimensions; i++) {
1535 out[i] = ((f32 *)a)[i] - ((f32 *)b)[i];
1536 }
1537 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1538 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
1539 goto finish;
1540 }
1541 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1542 size_t outSize = dimensions * sizeof(i8);
1543 i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1544 if (!out) {
1545 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1546 goto finish;
1547 }
1548 memset(out, 0, outSize);
1549 for (size_t i = 0; i < dimensions; i++) {
1550 out[i] = ((i8 *)a)[i] - ((i8 *)b)[i];
1551 }
1552 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1553 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
1554 goto finish;
1555 }
1556 }
1557finish:
1558 aCleanup(a);
1559 bCleanup(b);
1560 return;
1561}
1562static void vec_slice(sqlite3_context *context, int argc,
1563 sqlite3_value **argv) {
1564 assert(argc == 3)((void) sizeof ((argc == 3) ? 1 : 0), __extension__ ({ if (argc
== 3) ; else __assert_fail ("argc == 3", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1564, __extension__ __PRETTY_FUNCTION__); }))
;
1565
1566 void *vector;
1567 size_t dimensions;
1568 vector_cleanup cleanup;
1569 char *err;
1570 enum VectorElementType elementType;
1571
1572 int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
1573 &cleanup, &err);
1574 if (rc != SQLITE_OK0) {
1575 sqlite3_result_errorsqlite3_api->result_error(context, err, -1);
1576 sqlite3_freesqlite3_api->free(err);
1577 return;
1578 }
1579
1580 int start = sqlite3_value_intsqlite3_api->value_int(argv[1]);
1581 int end = sqlite3_value_intsqlite3_api->value_int(argv[2]);
1582
1583 if (start < 0) {
1584 sqlite3_result_errorsqlite3_api->result_error(context,
1585 "slice 'start' index must be a postive number.", -1);
1586 goto done;
1587 }
1588 if (end < 0) {
1589 sqlite3_result_errorsqlite3_api->result_error(context, "slice 'end' index must be a postive number.",
1590 -1);
1591 goto done;
1592 }
1593 if (((size_t)start) > dimensions) {
1594 sqlite3_result_errorsqlite3_api->result_error(
1595 context, "slice 'start' index is greater than the number of dimensions",
1596 -1);
1597 goto done;
1598 }
1599 if (((size_t)end) > dimensions) {
1600 sqlite3_result_errorsqlite3_api->result_error(
1601 context, "slice 'end' index is greater than the number of dimensions",
1602 -1);
1603 goto done;
1604 }
1605 if (start > end) {
1606 sqlite3_result_errorsqlite3_api->result_error(context,
1607 "slice 'start' index is greater than 'end' index", -1);
1608 goto done;
1609 }
1610 if (start == end) {
1611 sqlite3_result_errorsqlite3_api->result_error(context,
1612 "slice 'start' index is equal to the 'end' index, "
1613 "vectors must have non-zero length",
1614 -1);
1615 goto done;
1616 }
1617 size_t n = end - start;
1618
1619 switch (elementType) {
1620 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
1621 int outSize = n * sizeof(f32);
1622 f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1623 if (!out) {
1624 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1625 goto done;
1626 }
1627 memset(out, 0, outSize);
1628 for (size_t i = 0; i < n; i++) {
1629 out[i] = ((f32 *)vector)[start + i];
1630 }
1631 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1632 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
1633 goto done;
1634 }
1635 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
1636 int outSize = n * sizeof(i8);
1637 i8 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1638 if (!out) {
1639 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1640 return;
1641 }
1642 memset(out, 0, outSize);
1643 for (size_t i = 0; i < n; i++) {
1644 out[i] = ((i8 *)vector)[start + i];
1645 }
1646 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1647 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
1648 goto done;
1649 }
1650 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
1651 if ((start % CHAR_BIT8) != 0) {
1652 sqlite3_result_errorsqlite3_api->result_error(context, "start index must be divisible by 8.", -1);
1653 goto done;
1654 }
1655 if ((end % CHAR_BIT8) != 0) {
1656 sqlite3_result_errorsqlite3_api->result_error(context, "end index must be divisible by 8.", -1);
1657 goto done;
1658 }
1659 int outSize = n / CHAR_BIT8;
1660 u8 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1661 if (!out) {
1662 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1663 return;
1664 }
1665 memset(out, 0, outSize);
1666 for (size_t i = 0; i < n / CHAR_BIT8; i++) {
1667 out[i] = ((u8 *)vector)[(start / CHAR_BIT8) + i];
1668 }
1669 sqlite3_result_blobsqlite3_api->result_blob(context, out, outSize, sqlite3_freesqlite3_api->free);
1670 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
1671 goto done;
1672 }
1673 }
1674done:
1675 cleanup(vector);
1676}
1677
1678static void vec_to_json(sqlite3_context *context, int argc,
1679 sqlite3_value **argv) {
1680 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1680, __extension__ __PRETTY_FUNCTION__); }))
;
1681 void *vector;
1682 size_t dimensions;
1683 vector_cleanup cleanup;
1684 char *err;
1685 enum VectorElementType elementType;
1686
1687 int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
1688 &cleanup, &err);
1689 if (rc != SQLITE_OK0) {
1690 sqlite3_result_errorsqlite3_api->result_error(context, err, -1);
1691 sqlite3_freesqlite3_api->free(err);
1692 return;
1693 }
1694
1695 sqlite3_str *str = sqlite3_str_newsqlite3_api->str_new(sqlite3_context_db_handlesqlite3_api->context_db_handle(context));
1696 sqlite3_str_appendallsqlite3_api->str_appendall(str, "[");
1697 for (size_t i = 0; i < dimensions; i++) {
1698 if (i != 0) {
1699 sqlite3_str_appendallsqlite3_api->str_appendall(str, ",");
1700 }
1701 if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
1702 f32 value = ((f32 *)vector)[i];
1703 if (isnan(value)__builtin_isnan (value)) {
1704 sqlite3_str_appendallsqlite3_api->str_appendall(str, "null");
1705 } else {
1706 sqlite3_str_appendfsqlite3_api->str_appendf(str, "%f", value);
1707 }
1708
1709 } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
1710 sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", ((i8 *)vector)[i]);
1711 } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
1712 u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT8)) & 1;
1713 sqlite3_str_appendfsqlite3_api->str_appendf(str, "%d", b);
1714 }
1715 }
1716 sqlite3_str_appendallsqlite3_api->str_appendall(str, "]");
1717 int len = sqlite3_str_lengthsqlite3_api->str_length(str);
1718 char *s = sqlite3_str_finishsqlite3_api->str_finish(str);
1719 if (s) {
1720 sqlite3_result_textsqlite3_api->result_text(context, s, len, sqlite3_freesqlite3_api->free);
1721 sqlite3_result_subtypesqlite3_api->result_subtype(context, JSON_SUBTYPE74);
1722 } else {
1723 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
1724 }
1725 cleanup(vector);
1726}
1727
1728static void vec_normalize(sqlite3_context *context, int argc,
1729 sqlite3_value **argv) {
1730 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 1730, __extension__ __PRETTY_FUNCTION__); }))
;
1731 void *vector;
1732 size_t dimensions;
1733 vector_cleanup cleanup;
1734 char *err;
1735 enum VectorElementType elementType;
1736
1737 int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
1738 &cleanup, &err);
1739 if (rc != SQLITE_OK0) {
1740 sqlite3_result_errorsqlite3_api->result_error(context, err, -1);
1741 sqlite3_freesqlite3_api->free(err);
1742 return;
1743 }
1744
1745 if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
1746 sqlite3_result_errorsqlite3_api->result_error(
1747 context, "only float32 vectors are supported when normalizing", -1);
1748 cleanup(vector);
1749 return;
1750 }
1751
1752 int outSize = dimensions * sizeof(f32);
1753 f32 *out = sqlite3_mallocsqlite3_api->malloc(outSize);
1754 if (!out) {
1755 cleanup(vector);
1756 sqlite3_result_error_codesqlite3_api->result_error_code(context, SQLITE_NOMEM7);
1757 return;
1758 }
1759 memset(out, 0, outSize);
1760
1761 f32 *v = (f32 *)vector;
1762
1763 f32 norm = 0;
1764 for (size_t i = 0; i < dimensions; i++) {
1765 norm += v[i] * v[i];
1766 }
1767 norm = sqrt(norm);
1768 for (size_t i = 0; i < dimensions; i++) {
1769 out[i] = v[i] / norm;
1770 }
1771
1772 sqlite3_result_blobsqlite3_api->result_blob(context, out, dimensions * sizeof(f32), sqlite3_freesqlite3_api->free);
1773 sqlite3_result_subtypesqlite3_api->result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
1774 cleanup(vector);
1775}
1776
1777static void _static_text_func(sqlite3_context *context, int argc,
1778 sqlite3_value **argv) {
1779 UNUSED_PARAMETER(argc)(void)(argc);
1780 UNUSED_PARAMETER(argv)(void)(argv);
1781 sqlite3_result_textsqlite3_api->result_text(context, sqlite3_user_datasqlite3_api->user_data(context), -1, SQLITE_STATIC((sqlite3_destructor_type)0));
1782}
1783
1784#pragma endregion
1785
1786enum Vec0TokenType {
1787 TOKEN_TYPE_IDENTIFIER,
1788 TOKEN_TYPE_DIGIT,
1789 TOKEN_TYPE_LBRACKET,
1790 TOKEN_TYPE_RBRACKET,
1791 TOKEN_TYPE_PLUS,
1792 TOKEN_TYPE_EQ,
1793};
1794struct Vec0Token {
1795 enum Vec0TokenType token_type;
1796 char *start;
1797 char *end;
1798};
1799
1800int is_alpha(char x) {
1801 return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
1802}
1803int is_digit(char x) { return (x >= '0' && x <= '9'); }
1804int is_whitespace(char x) {
1805 return x == ' ' || x == '\t' || x == '\n' || x == '\r';
1806}
1807
1808#define VEC0_TOKEN_RESULT_EOF1 1
1809#define VEC0_TOKEN_RESULT_SOME2 2
1810#define VEC0_TOKEN_RESULT_ERROR3 3
1811
1812int vec0_token_next(char *start, char *end, struct Vec0Token *out) {
1813 char *ptr = start;
1814 while (ptr < end) {
1815 char curr = *ptr;
1816 if (is_whitespace(curr)) {
1817 ptr++;
1818 continue;
1819 } else if (curr == '+') {
1820 ptr++;
1821 out->start = ptr;
1822 out->end = ptr;
1823 out->token_type = TOKEN_TYPE_PLUS;
1824 return VEC0_TOKEN_RESULT_SOME2;
1825 } else if (curr == '[') {
1826 ptr++;
1827 out->start = ptr;
1828 out->end = ptr;
1829 out->token_type = TOKEN_TYPE_LBRACKET;
1830 return VEC0_TOKEN_RESULT_SOME2;
1831 } else if (curr == ']') {
1832 ptr++;
1833 out->start = ptr;
1834 out->end = ptr;
1835 out->token_type = TOKEN_TYPE_RBRACKET;
1836 return VEC0_TOKEN_RESULT_SOME2;
1837 } else if (curr == '=') {
1838 ptr++;
1839 out->start = ptr;
1840 out->end = ptr;
1841 out->token_type = TOKEN_TYPE_EQ;
1842 return VEC0_TOKEN_RESULT_SOME2;
1843 } else if (is_alpha(curr)) {
1844 char *start = ptr;
1845 while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) {
1846 ptr++;
1847 }
1848 out->start = start;
1849 out->end = ptr;
1850 out->token_type = TOKEN_TYPE_IDENTIFIER;
1851 return VEC0_TOKEN_RESULT_SOME2;
1852 } else if (is_digit(curr)) {
1853 char *start = ptr;
1854 while (ptr < end && (is_digit(*ptr))) {
1855 ptr++;
1856 }
1857 out->start = start;
1858 out->end = ptr;
1859 out->token_type = TOKEN_TYPE_DIGIT;
1860 return VEC0_TOKEN_RESULT_SOME2;
1861 } else {
1862 return VEC0_TOKEN_RESULT_ERROR3;
1863 }
1864 }
1865 return VEC0_TOKEN_RESULT_EOF1;
1866}
1867
1868struct Vec0Scanner {
1869 char *start;
1870 char *end;
1871 char *ptr;
1872};
1873
1874void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source,
1875 int source_length) {
1876 scanner->start = (char *)source;
1877 scanner->end = (char *)source + source_length;
1878 scanner->ptr = (char *)source;
1879}
1880int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) {
1881 int rc = vec0_token_next(scanner->start, scanner->end, out);
1882 if (rc == VEC0_TOKEN_RESULT_SOME2) {
1883 scanner->start = out->end;
1884 }
1885 return rc;
1886}
1887
1888int vec0_parse_table_option(const char *source, int source_length,
1889 char **out_key, int *out_key_length,
1890 char **out_value, int *out_value_length) {
1891 int rc;
1892 struct Vec0Scanner scanner;
1893 struct Vec0Token token;
1894 char *key;
1895 char *value;
1896 int keyLength, valueLength;
1897
1898 vec0_scanner_init(&scanner, source, source_length);
1899
1900 rc = vec0_scanner_next(&scanner, &token);
1901 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1902 token.token_type != TOKEN_TYPE_IDENTIFIER) {
1903 return SQLITE_EMPTY16;
1904 }
1905 key = token.start;
1906 keyLength = token.end - token.start;
1907
1908 rc = vec0_scanner_next(&scanner, &token);
1909 if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) {
1910 return SQLITE_EMPTY16;
1911 }
1912
1913 rc = vec0_scanner_next(&scanner, &token);
1914 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1915 !((token.token_type == TOKEN_TYPE_IDENTIFIER) ||
1916 (token.token_type == TOKEN_TYPE_DIGIT))) {
1917 return SQLITE_ERROR1;
1918 }
1919 value = token.start;
1920 valueLength = token.end - token.start;
1921
1922 rc = vec0_scanner_next(&scanner, &token);
1923 if (rc == VEC0_TOKEN_RESULT_EOF1) {
1924 *out_key = key;
1925 *out_key_length = keyLength;
1926 *out_value = value;
1927 *out_value_length = valueLength;
1928 return SQLITE_OK0;
1929 }
1930 return SQLITE_ERROR1;
1931}
1932/**
1933 * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
1934 * it's a PARTITION KEY definition.
1935 *
1936 * @param source: argv[i] source string
1937 * @param source_length: length of the source string
1938 * @param out_column_name: If it is a partition key, the output column name. Same lifetime
1939 * as source, points to specific char *
1940 * @param out_column_name_length: Length of out_column_name in bytes
1941 * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
1942 * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
1943 */
1944int vec0_parse_partition_key_definition(const char *source, int source_length,
1945 char **out_column_name,
1946 int *out_column_name_length,
1947 int *out_column_type) {
1948 struct Vec0Scanner scanner;
1949 struct Vec0Token token;
1950 char *column_name;
1951 int column_name_length;
1952 int column_type;
1953 vec0_scanner_init(&scanner, source, source_length);
1954
1955 // Check first token is identifier, will be the column name
1956 int rc = vec0_scanner_next(&scanner, &token);
1957 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1958 token.token_type != TOKEN_TYPE_IDENTIFIER) {
1959 return SQLITE_EMPTY16;
1960 }
1961
1962 column_name = token.start;
1963 column_name_length = token.end - token.start;
1964
1965 // Check the next token matches "text" or "integer", as column type
1966 rc = vec0_scanner_next(&scanner, &token);
1967 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1968 token.token_type != TOKEN_TYPE_IDENTIFIER) {
1969 return SQLITE_EMPTY16;
1970 }
1971 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) {
1972 column_type = SQLITE_TEXT3;
1973 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) ==
1974 0 ||
1975 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer",
1976 token.end - token.start) == 0) {
1977 column_type = SQLITE_INTEGER1;
1978 } else {
1979 return SQLITE_EMPTY16;
1980 }
1981
1982 // Check the next token is identifier and matches "partition"
1983 rc = vec0_scanner_next(&scanner, &token);
1984 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1985 token.token_type != TOKEN_TYPE_IDENTIFIER) {
1986 return SQLITE_EMPTY16;
1987 }
1988 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "partition", token.end - token.start) != 0) {
1989 return SQLITE_EMPTY16;
1990 }
1991
1992 // Check the next token is identifier and matches "key"
1993 rc = vec0_scanner_next(&scanner, &token);
1994 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
1995 token.token_type != TOKEN_TYPE_IDENTIFIER) {
1996 return SQLITE_EMPTY16;
1997 }
1998 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) {
1999 return SQLITE_EMPTY16;
2000 }
2001
2002 *out_column_name = column_name;
2003 *out_column_name_length = column_name_length;
2004 *out_column_type = column_type;
2005
2006 return SQLITE_OK0;
2007}
2008
2009/**
2010 * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
2011 * it's an auxiliar column definition, ie `+[name] [type]` like `+contents text`
2012 *
2013 * @param source: argv[i] source string
2014 * @param source_length: length of the source string
2015 * @param out_column_name: If it is a partition key, the output column name. Same lifetime
2016 * as source, points to specific char *
2017 * @param out_column_name_length: Length of out_column_name in bytes
2018 * @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB.
2019 * @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is.
2020 */
2021int vec0_parse_auxiliary_column_definition(const char *source, int source_length,
2022 char **out_column_name,
2023 int *out_column_name_length,
2024 int *out_column_type) {
2025 struct Vec0Scanner scanner;
2026 struct Vec0Token token;
2027 char *column_name;
2028 int column_name_length;
2029 int column_type;
2030 vec0_scanner_init(&scanner, source, source_length);
2031
2032 // Check first token is '+', which denotes aux columns
2033 int rc = vec0_scanner_next(&scanner, &token);
2034 if (rc != VEC0_TOKEN_RESULT_SOME2 ||
2035 token.token_type != TOKEN_TYPE_PLUS) {
2036 return SQLITE_EMPTY16;
2037 }
2038
2039 rc = vec0_scanner_next(&scanner, &token);
2040 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2041 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2042 return SQLITE_EMPTY16;
2043 }
2044
2045 column_name = token.start;
2046 column_name_length = token.end - token.start;
2047
2048 // Check the next token matches "text" or "integer", as column type
2049 rc = vec0_scanner_next(&scanner, &token);
2050 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2051 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2052 return SQLITE_EMPTY16;
2053 }
2054 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) {
2055 column_type = SQLITE_TEXT3;
2056 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) ==
2057 0 ||
2058 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer",
2059 token.end - token.start) == 0) {
2060 column_type = SQLITE_INTEGER1;
2061 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", token.end - token.start) ==
2062 0 ||
2063 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "double",
2064 token.end - token.start) == 0) {
2065 column_type = SQLITE_FLOAT2;
2066 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "blob", token.end - token.start) ==0) {
2067 column_type = SQLITE_BLOB4;
2068 } else {
2069 return SQLITE_EMPTY16;
2070 }
2071
2072 *out_column_name = column_name;
2073 *out_column_name_length = column_name_length;
2074 *out_column_type = column_type;
2075
2076 return SQLITE_OK0;
2077}
2078
2079typedef enum {
2080 VEC0_METADATA_COLUMN_KIND_BOOLEAN,
2081 VEC0_METADATA_COLUMN_KIND_INTEGER,
2082 VEC0_METADATA_COLUMN_KIND_FLOAT,
2083 VEC0_METADATA_COLUMN_KIND_TEXT,
2084 // future: blob, date, datetime
2085} vec0_metadata_column_kind;
2086
2087/**
2088 * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
2089 * it's an metadata column definition, ie `[name] [type]` like `is_released boolean`
2090 *
2091 * @param source: argv[i] source string
2092 * @param source_length: length of the source string
2093 * @param out_column_name: If it is a metadata column, the output column name. Same lifetime
2094 * as source, points to specific char *
2095 * @param out_column_name_length: Length of out_column_name in bytes
2096 * @param out_column_type: one of vec0_metadata_column_kind
2097 * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is.
2098 */
2099int vec0_parse_metadata_column_definition(const char *source, int source_length,
2100 char **out_column_name,
2101 int *out_column_name_length,
2102 vec0_metadata_column_kind *out_column_type) {
2103 struct Vec0Scanner scanner;
2104 struct Vec0Token token;
2105 char *column_name;
2106 int column_name_length;
2107 vec0_metadata_column_kind column_type;
2108 int rc;
2109 vec0_scanner_init(&scanner, source, source_length);
2110
2111 rc = vec0_scanner_next(&scanner, &token);
2112 if (rc != VEC0_TOKEN_RESULT_SOME2 ||
2113 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2114 return SQLITE_EMPTY16;
2115 }
2116
2117 column_name = token.start;
2118 column_name_length = token.end - token.start;
2119
2120 // Check the next token matches a valid metadata type
2121 rc = vec0_scanner_next(&scanner, &token);
2122 if (rc != VEC0_TOKEN_RESULT_SOME2 ||
2123 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2124 return SQLITE_EMPTY16;
2125 }
2126 char * t = token.start;
2127 int n = token.end - token.start;
2128 if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "bool", n) == 0) {
2129 column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN;
2130 }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "int64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "integer", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "int", n) == 0) {
2131 column_type = VEC0_METADATA_COLUMN_KIND_INTEGER;
2132 }else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "float", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "double", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "float64", n) == 0 || sqlite3_strnicmpsqlite3_api->strnicmp(t, "f64", n) == 0) {
2133 column_type = VEC0_METADATA_COLUMN_KIND_FLOAT;
2134 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(t, "text", n) == 0) {
2135 column_type = VEC0_METADATA_COLUMN_KIND_TEXT;
2136 } else {
2137 return SQLITE_EMPTY16;
2138 }
2139
2140 *out_column_name = column_name;
2141 *out_column_name_length = column_name_length;
2142 *out_column_type = column_type;
2143
2144 return SQLITE_OK0;
2145}
2146
2147/**
2148 * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
2149 * it's a PRIMARY KEY definition.
2150 *
2151 * @param source: argv[i] source string
2152 * @param source_length: length of the source string
2153 * @param out_column_name: If it is a PK, the output column name. Same lifetime
2154 * as source, points to specific char *
2155 * @param out_column_name_length: Length of out_column_name in bytes
2156 * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
2157 * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
2158 */
2159int vec0_parse_primary_key_definition(const char *source, int source_length,
2160 char **out_column_name,
2161 int *out_column_name_length,
2162 int *out_column_type) {
2163 struct Vec0Scanner scanner;
2164 struct Vec0Token token;
2165 char *column_name;
2166 int column_name_length;
2167 int column_type;
2168 vec0_scanner_init(&scanner, source, source_length);
2169
2170 // Check first token is identifier, will be the column name
2171 int rc = vec0_scanner_next(&scanner, &token);
2172 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2173 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2174 return SQLITE_EMPTY16;
2175 }
2176
2177 column_name = token.start;
2178 column_name_length = token.end - token.start;
2179
2180 // Check the next token matches "text" or "integer", as column type
2181 rc = vec0_scanner_next(&scanner, &token);
2182 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2183 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2184 return SQLITE_EMPTY16;
2185 }
2186 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "text", token.end - token.start) == 0) {
2187 column_type = SQLITE_TEXT3;
2188 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int", token.end - token.start) ==
2189 0 ||
2190 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "integer",
2191 token.end - token.start) == 0) {
2192 column_type = SQLITE_INTEGER1;
2193 } else {
2194 return SQLITE_EMPTY16;
2195 }
2196
2197 // Check the next token is identifier and matches "primary"
2198 rc = vec0_scanner_next(&scanner, &token);
2199 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2200 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2201 return SQLITE_EMPTY16;
2202 }
2203 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "primary", token.end - token.start) != 0) {
2204 return SQLITE_EMPTY16;
2205 }
2206
2207 // Check the next token is identifier and matches "key"
2208 rc = vec0_scanner_next(&scanner, &token);
2209 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2210 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2211 return SQLITE_EMPTY16;
2212 }
2213 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "key", token.end - token.start) != 0) {
2214 return SQLITE_EMPTY16;
2215 }
2216
2217 *out_column_name = column_name;
2218 *out_column_name_length = column_name_length;
2219 *out_column_type = column_type;
2220
2221 return SQLITE_OK0;
2222}
2223
2224enum Vec0DistanceMetrics {
2225 VEC0_DISTANCE_METRIC_L2 = 1,
2226 VEC0_DISTANCE_METRIC_COSINE = 2,
2227 VEC0_DISTANCE_METRIC_L1 = 3,
2228};
2229
2230struct VectorColumnDefinition {
2231 char *name;
2232 int name_length;
2233 size_t dimensions;
2234 enum VectorElementType element_type;
2235 enum Vec0DistanceMetrics distance_metric;
2236};
2237
2238struct Vec0PartitionColumnDefinition {
2239 int type;
2240 char * name;
2241 int name_length;
2242};
2243
2244struct Vec0AuxiliaryColumnDefinition {
2245 int type;
2246 char * name;
2247 int name_length;
2248};
2249struct Vec0MetadataColumnDefinition {
2250 vec0_metadata_column_kind kind;
2251 char * name;
2252 int name_length;
2253};
2254
2255size_t vector_byte_size(enum VectorElementType element_type,
2256 size_t dimensions) {
2257 switch (element_type) {
2258 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
2259 return dimensions * sizeof(f32);
2260 case SQLITE_VEC_ELEMENT_TYPE_INT8:
2261 return dimensions * sizeof(i8);
2262 case SQLITE_VEC_ELEMENT_TYPE_BIT:
2263 return dimensions / CHAR_BIT8;
2264 }
2265 return 0;
2266}
2267
2268size_t vector_column_byte_size(struct VectorColumnDefinition column) {
2269 return vector_byte_size(column.element_type, column.dimensions);
2270}
2271
2272/**
2273 * @brief Parse an vec0 vtab argv[i] column definition and see if
2274 * it's a vector column defintion, ex `contents_embedding float[768]`.
2275 *
2276 * @param source vec0 argv[i] item
2277 * @param source_length length of source in bytes
2278 * @param outColumn Output the parse vector column to this struct, if success
2279 * @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column
2280 * definition, SQLITE_ERROR on error.
2281 */
2282int vec0_parse_vector_column(const char *source, int source_length,
2283 struct VectorColumnDefinition *outColumn) {
2284 // parses a vector column definition like so:
2285 // "abc float[123]", "abc_123 bit[1234]", eetc.
2286 // https://github.com/asg017/sqlite-vec/issues/46
2287 int rc;
2288 struct Vec0Scanner scanner;
2289 struct Vec0Token token;
2290
2291 char *name;
2292 int nameLength;
2293 enum VectorElementType elementType;
2294 enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2;
2295 int dimensions;
2296
2297 vec0_scanner_init(&scanner, source, source_length);
2298
2299 // starts with an identifier
2300 rc = vec0_scanner_next(&scanner, &token);
2301
2302 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2303 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2304 return SQLITE_EMPTY16;
2305 }
2306
2307 name = token.start;
2308 nameLength = token.end - token.start;
2309
2310 // vector column type comes next: float, int, or bit
2311 rc = vec0_scanner_next(&scanner, &token);
2312
2313 if (rc != VEC0_TOKEN_RESULT_SOME2 ||
2314 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2315 return SQLITE_EMPTY16;
2316 }
2317 if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "float", 5) == 0 ||
2318 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "f32", 3) == 0) {
2319 elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
2320 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "int8", 4) == 0 ||
2321 sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "i8", 2) == 0) {
2322 elementType = SQLITE_VEC_ELEMENT_TYPE_INT8;
2323 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(token.start, "bit", 3) == 0) {
2324 elementType = SQLITE_VEC_ELEMENT_TYPE_BIT;
2325 } else {
2326 return SQLITE_EMPTY16;
2327 }
2328
2329 // left '[' bracket
2330 rc = vec0_scanner_next(&scanner, &token);
2331 if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_LBRACKET) {
2332 return SQLITE_EMPTY16;
2333 }
2334
2335 // digit, for vector dimension length
2336 rc = vec0_scanner_next(&scanner, &token);
2337 if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_DIGIT) {
2338 return SQLITE_ERROR1;
2339 }
2340 dimensions = atoi(token.start);
2341 if (dimensions <= 0) {
2342 return SQLITE_ERROR1;
2343 }
2344
2345 // // right ']' bracket
2346 rc = vec0_scanner_next(&scanner, &token);
2347 if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_RBRACKET) {
2348 return SQLITE_ERROR1;
2349 }
2350
2351 // any other tokens left should be column-level options , ex `key=value`
2352 // ex `distance_metric=L2 distance_metric=cosine` should error
2353 while (1) {
2354 // should be EOF or identifier (option key)
2355 rc = vec0_scanner_next(&scanner, &token);
2356 if (rc == VEC0_TOKEN_RESULT_EOF1) {
2357 break;
2358 }
2359
2360 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2361 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2362 return SQLITE_ERROR1;
2363 }
2364
2365 char *key = token.start;
2366 int keyLength = token.end - token.start;
2367
2368 if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "distance_metric", keyLength) == 0) {
2369
2370 if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
2371 return SQLITE_ERROR1;
2372 }
2373 // ensure equal sign after distance_metric
2374 rc = vec0_scanner_next(&scanner, &token);
2375 if (rc != VEC0_TOKEN_RESULT_SOME2 && token.token_type != TOKEN_TYPE_EQ) {
2376 return SQLITE_ERROR1;
2377 }
2378
2379 // distance_metric value, an identifier (L2, cosine, etc)
2380 rc = vec0_scanner_next(&scanner, &token);
2381 if (rc != VEC0_TOKEN_RESULT_SOME2 &&
2382 token.token_type != TOKEN_TYPE_IDENTIFIER) {
2383 return SQLITE_ERROR1;
2384 }
2385
2386 char *value = token.start;
2387 int valueLength = token.end - token.start;
2388 if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l2", valueLength) == 0) {
2389 distanceMetric = VEC0_DISTANCE_METRIC_L2;
2390 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "l1", valueLength) == 0) {
2391 distanceMetric = VEC0_DISTANCE_METRIC_L1;
2392 } else if (sqlite3_strnicmpsqlite3_api->strnicmp(value, "cosine", valueLength) == 0) {
2393 distanceMetric = VEC0_DISTANCE_METRIC_COSINE;
2394 } else {
2395 return SQLITE_ERROR1;
2396 }
2397 }
2398 // unknown key
2399 else {
2400 return SQLITE_ERROR1;
2401 }
2402 }
2403
2404 outColumn->name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", nameLength, name);
2405 if (!outColumn->name) {
2406 return SQLITE_ERROR1;
2407 }
2408 outColumn->name_length = nameLength;
2409 outColumn->distance_metric = distanceMetric;
2410 outColumn->element_type = elementType;
2411 outColumn->dimensions = dimensions;
2412 return SQLITE_OK0;
2413}
2414
2415#pragma region vec_each table function
2416
2417typedef struct vec_each_vtab vec_each_vtab;
2418struct vec_each_vtab {
2419 sqlite3_vtab base;
2420};
2421
2422typedef struct vec_each_cursor vec_each_cursor;
2423struct vec_each_cursor {
2424 sqlite3_vtab_cursor base;
2425 i64 iRowid;
2426 enum VectorElementType vector_type;
2427 void *vector;
2428 size_t dimensions;
2429 vector_cleanup cleanup;
2430};
2431
2432static int vec_eachConnect(sqlite3 *db, void *pAux, int argc,
2433 const char *const *argv, sqlite3_vtab **ppVtab,
2434 char **pzErr) {
2435 UNUSED_PARAMETER(pAux)(void)(pAux);
2436 UNUSED_PARAMETER(argc)(void)(argc);
2437 UNUSED_PARAMETER(argv)(void)(argv);
2438 UNUSED_PARAMETER(pzErr)(void)(pzErr);
2439 vec_each_vtab *pNew;
2440 int rc;
2441
2442 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(value, vector hidden)");
2443#define VEC_EACH_COLUMN_VALUE0 0
2444#define VEC_EACH_COLUMN_VECTOR1 1
2445 if (rc == SQLITE_OK0) {
2446 pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
2447 *ppVtab = (sqlite3_vtab *)pNew;
2448 if (pNew == 0)
2449 return SQLITE_NOMEM7;
2450 memset(pNew, 0, sizeof(*pNew));
2451 }
2452 return rc;
2453}
2454
2455static int vec_eachDisconnect(sqlite3_vtab *pVtab) {
2456 vec_each_vtab *p = (vec_each_vtab *)pVtab;
2457 sqlite3_freesqlite3_api->free(p);
2458 return SQLITE_OK0;
2459}
2460
2461static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
2462 UNUSED_PARAMETER(p)(void)(p);
2463 vec_each_cursor *pCur;
2464 pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur));
2465 if (pCur == 0)
2466 return SQLITE_NOMEM7;
2467 memset(pCur, 0, sizeof(*pCur));
2468 *ppCursor = &pCur->base;
2469 return SQLITE_OK0;
2470}
2471
2472static int vec_eachClose(sqlite3_vtab_cursor *cur) {
2473 vec_each_cursor *pCur = (vec_each_cursor *)cur;
2474 if(pCur->vector) {
2475 pCur->cleanup(pCur->vector);
2476 }
2477 sqlite3_freesqlite3_api->free(pCur);
2478 return SQLITE_OK0;
2479}
2480
2481static int vec_eachBestIndex(sqlite3_vtab *pVTab,
2482 sqlite3_index_info *pIdxInfo) {
2483 UNUSED_PARAMETER(pVTab)(void)(pVTab);
2484 int hasVector = 0;
2485 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
2486 const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
2487 // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
2488 // pCons->op, pCons->usable);
2489 switch (pCons->iColumn) {
2490 case VEC_EACH_COLUMN_VECTOR1: {
2491 if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) {
2492 hasVector = 1;
2493 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2494 pIdxInfo->aConstraintUsage[i].omit = 1;
2495 }
2496 break;
2497 }
2498 }
2499 }
2500 if (!hasVector) {
2501 return SQLITE_CONSTRAINT19;
2502 }
2503
2504 pIdxInfo->estimatedCost = (double)100000;
2505 pIdxInfo->estimatedRows = 100000;
2506
2507 return SQLITE_OK0;
2508}
2509
2510static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
2511 const char *idxStr, int argc, sqlite3_value **argv) {
2512 UNUSED_PARAMETER(idxNum)(void)(idxNum);
2513 UNUSED_PARAMETER(idxStr)(void)(idxStr);
2514 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 2514, __extension__ __PRETTY_FUNCTION__); }))
;
2515 vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor;
2516
2517 if (pCur->vector) {
2518 pCur->cleanup(pCur->vector);
2519 pCur->vector = NULL((void*)0);
2520 }
2521
2522 char *pzErrMsg;
2523 int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions,
2524 &pCur->vector_type, &pCur->cleanup, &pzErrMsg);
2525 if (rc != SQLITE_OK0) {
2526 return SQLITE_ERROR1;
2527 }
2528 pCur->iRowid = 0;
2529 return SQLITE_OK0;
2530}
2531
2532static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
2533 vec_each_cursor *pCur = (vec_each_cursor *)cur;
2534 *pRowid = pCur->iRowid;
2535 return SQLITE_OK0;
2536}
2537
2538static int vec_eachEof(sqlite3_vtab_cursor *cur) {
2539 vec_each_cursor *pCur = (vec_each_cursor *)cur;
2540 return pCur->iRowid >= (i64)pCur->dimensions;
2541}
2542
2543static int vec_eachNext(sqlite3_vtab_cursor *cur) {
2544 vec_each_cursor *pCur = (vec_each_cursor *)cur;
2545 pCur->iRowid++;
2546 return SQLITE_OK0;
2547}
2548
2549static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context,
2550 int i) {
2551 vec_each_cursor *pCur = (vec_each_cursor *)cur;
2552 switch (i) {
2553 case VEC_EACH_COLUMN_VALUE0:
2554 switch (pCur->vector_type) {
2555 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2556 sqlite3_result_doublesqlite3_api->result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]);
2557 break;
2558 }
2559 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
2560 u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT8];
2561 sqlite3_result_intsqlite3_api->result_int(context,
2562 (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT8)))) > 0);
2563 break;
2564 }
2565 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2566 sqlite3_result_intsqlite3_api->result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]);
2567 break;
2568 }
2569 }
2570
2571 break;
2572 }
2573 return SQLITE_OK0;
2574}
2575
2576static sqlite3_module vec_eachModule = {
2577 /* iVersion */ 0,
2578 /* xCreate */ 0,
2579 /* xConnect */ vec_eachConnect,
2580 /* xBestIndex */ vec_eachBestIndex,
2581 /* xDisconnect */ vec_eachDisconnect,
2582 /* xDestroy */ 0,
2583 /* xOpen */ vec_eachOpen,
2584 /* xClose */ vec_eachClose,
2585 /* xFilter */ vec_eachFilter,
2586 /* xNext */ vec_eachNext,
2587 /* xEof */ vec_eachEof,
2588 /* xColumn */ vec_eachColumn,
2589 /* xRowid */ vec_eachRowid,
2590 /* xUpdate */ 0,
2591 /* xBegin */ 0,
2592 /* xSync */ 0,
2593 /* xCommit */ 0,
2594 /* xRollback */ 0,
2595 /* xFindMethod */ 0,
2596 /* xRename */ 0,
2597 /* xSavepoint */ 0,
2598 /* xRelease */ 0,
2599 /* xRollbackTo */ 0,
2600 /* xShadowName */ 0,
2601#if SQLITE_VERSION_NUMBER3050001 >= 3044000
2602 /* xIntegrity */ 0
2603#endif
2604};
2605
2606#pragma endregion
2607
2608#pragma region vec_npy_each table function
2609
2610enum NpyTokenType {
2611 NPY_TOKEN_TYPE_IDENTIFIER,
2612 NPY_TOKEN_TYPE_NUMBER,
2613 NPY_TOKEN_TYPE_LPAREN,
2614 NPY_TOKEN_TYPE_RPAREN,
2615 NPY_TOKEN_TYPE_LBRACE,
2616 NPY_TOKEN_TYPE_RBRACE,
2617 NPY_TOKEN_TYPE_COLON,
2618 NPY_TOKEN_TYPE_COMMA,
2619 NPY_TOKEN_TYPE_STRING,
2620 NPY_TOKEN_TYPE_FALSE,
2621};
2622
2623struct NpyToken {
2624 enum NpyTokenType token_type;
2625 unsigned char *start;
2626 unsigned char *end;
2627};
2628
2629int npy_token_next(unsigned char *start, unsigned char *end,
2630 struct NpyToken *out) {
2631 unsigned char *ptr = start;
2632 while (ptr < end) {
2633 unsigned char curr = *ptr;
2634 if (is_whitespace(curr)) {
2635 ptr++;
2636 continue;
2637 } else if (curr == '(') {
2638 out->start = ptr++;
2639 out->end = ptr;
2640 out->token_type = NPY_TOKEN_TYPE_LPAREN;
2641 return VEC0_TOKEN_RESULT_SOME2;
2642 } else if (curr == ')') {
2643 out->start = ptr++;
2644 out->end = ptr;
2645 out->token_type = NPY_TOKEN_TYPE_RPAREN;
2646 return VEC0_TOKEN_RESULT_SOME2;
2647 } else if (curr == '{') {
2648 out->start = ptr++;
2649 out->end = ptr;
2650 out->token_type = NPY_TOKEN_TYPE_LBRACE;
2651 return VEC0_TOKEN_RESULT_SOME2;
2652 } else if (curr == '}') {
2653 out->start = ptr++;
2654 out->end = ptr;
2655 out->token_type = NPY_TOKEN_TYPE_RBRACE;
2656 return VEC0_TOKEN_RESULT_SOME2;
2657 } else if (curr == ':') {
2658 out->start = ptr++;
2659 out->end = ptr;
2660 out->token_type = NPY_TOKEN_TYPE_COLON;
2661 return VEC0_TOKEN_RESULT_SOME2;
2662 } else if (curr == ',') {
2663 out->start = ptr++;
2664 out->end = ptr;
2665 out->token_type = NPY_TOKEN_TYPE_COMMA;
2666 return VEC0_TOKEN_RESULT_SOME2;
2667 } else if (curr == '\'') {
2668 unsigned char *start = ptr;
2669 ptr++;
2670 while (ptr < end) {
2671 if ((*ptr) == '\'') {
2672 break;
2673 }
2674 ptr++;
2675 }
2676 if ((*ptr) != '\'') {
2677 return VEC0_TOKEN_RESULT_ERROR3;
2678 }
2679 out->start = start;
2680 out->end = ++ptr;
2681 out->token_type = NPY_TOKEN_TYPE_STRING;
2682 return VEC0_TOKEN_RESULT_SOME2;
2683 } else if (curr == 'F' &&
2684 strncmp((char *)ptr, "False", strlen("False")) == 0) {
2685 out->start = ptr;
2686 out->end = (ptr + (int)strlen("False"));
2687 ptr = out->end;
2688 out->token_type = NPY_TOKEN_TYPE_FALSE;
2689 return VEC0_TOKEN_RESULT_SOME2;
2690 } else if (is_digit(curr)) {
2691 unsigned char *start = ptr;
2692 while (ptr < end && (is_digit(*ptr))) {
2693 ptr++;
2694 }
2695 out->start = start;
2696 out->end = ptr;
2697 out->token_type = NPY_TOKEN_TYPE_NUMBER;
2698 return VEC0_TOKEN_RESULT_SOME2;
2699 } else {
2700 return VEC0_TOKEN_RESULT_ERROR3;
2701 }
2702 }
2703 return VEC0_TOKEN_RESULT_ERROR3;
2704}
2705
2706struct NpyScanner {
2707 unsigned char *start;
2708 unsigned char *end;
2709 unsigned char *ptr;
2710};
2711
2712void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *source,
2713 int source_length) {
2714 scanner->start = (unsigned char *)source;
2715 scanner->end = (unsigned char *)source + source_length;
2716 scanner->ptr = (unsigned char *)source;
2717}
2718
2719int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) {
2720 int rc = npy_token_next(scanner->start, scanner->end, out);
2721 if (rc == VEC0_TOKEN_RESULT_SOME2) {
2722 scanner->start = out->end;
2723 }
2724 return rc;
2725}
2726
2727#define NPY_PARSE_ERROR"Error parsing numpy array: " "Error parsing numpy array: "
2728int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header,
2729 size_t headerLength,
2730 enum VectorElementType *out_element_type,
2731 int *fortran_order, size_t *numElements,
2732 size_t *numDimensions) {
2733
2734 struct NpyScanner scanner;
2735 struct NpyToken token;
2736 int rc;
2737 npy_scanner_init(&scanner, header, headerLength);
2738
2739 if (npy_scanner_next(&scanner, &token) != VEC0_TOKEN_RESULT_SOME2 &&
2740 token.token_type != NPY_TOKEN_TYPE_LBRACE) {
2741 vtab_set_error(pVTab,
2742 NPY_PARSE_ERROR"Error parsing numpy array: " "numpy header did not start with '{'");
2743 return SQLITE_ERROR1;
2744 }
2745 while (1) {
2746 rc = npy_scanner_next(&scanner, &token);
2747 if (rc != VEC0_TOKEN_RESULT_SOME2) {
2748 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "expected key in numpy header");
2749 return SQLITE_ERROR1;
2750 }
2751
2752 if (token.token_type == NPY_TOKEN_TYPE_RBRACE) {
2753 break;
2754 }
2755 if (token.token_type != NPY_TOKEN_TYPE_STRING) {
2756 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2757 "expected a string as key in numpy header");
2758 return SQLITE_ERROR1;
2759 }
2760 unsigned char *key = token.start;
2761
2762 rc = npy_scanner_next(&scanner, &token);
2763 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2764 (token.token_type != NPY_TOKEN_TYPE_COLON)) {
2765 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2766 "expected a ':' after key in numpy header");
2767 return SQLITE_ERROR1;
2768 }
2769
2770 if (strncmp((char *)key, "'descr'", strlen("'descr'")) == 0) {
2771 rc = npy_scanner_next(&scanner, &token);
2772 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2773 (token.token_type != NPY_TOKEN_TYPE_STRING)) {
2774 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2775 "expected a string value after 'descr' key");
2776 return SQLITE_ERROR1;
2777 }
2778 if (strncmp((char *)token.start, "'<f4'", strlen("'<f4'")) != 0) {
2779 vtab_set_error(
2780 pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2781 "Only '<f4' values are supported in sqlite-vec numpy functions");
2782 return SQLITE_ERROR1;
2783 }
2784 *out_element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
2785 } else if (strncmp((char *)key, "'fortran_order'",
2786 strlen("'fortran_order'")) == 0) {
2787 rc = npy_scanner_next(&scanner, &token);
2788 if (rc != VEC0_TOKEN_RESULT_SOME2 ||
2789 token.token_type != NPY_TOKEN_TYPE_FALSE) {
2790 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2791 "Only fortran_order = False is supported in sqlite-vec "
2792 "numpy functions");
2793 return SQLITE_ERROR1;
2794 }
2795 *fortran_order = 0;
2796 } else if (strncmp((char *)key, "'shape'", strlen("'shape'")) == 0) {
2797 // "(xxx, xxx)" OR (xxx,)
2798 size_t first;
2799 rc = npy_scanner_next(&scanner, &token);
2800 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2801 (token.token_type != NPY_TOKEN_TYPE_LPAREN)) {
2802 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2803 "Expected left parenthesis '(' after shape key");
2804 return SQLITE_ERROR1;
2805 }
2806
2807 rc = npy_scanner_next(&scanner, &token);
2808 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2809 (token.token_type != NPY_TOKEN_TYPE_NUMBER)) {
2810 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2811 "Expected an initial number in shape value");
2812 return SQLITE_ERROR1;
2813 }
2814 first = strtol((char *)token.start, NULL((void*)0), 10);
2815
2816 rc = npy_scanner_next(&scanner, &token);
2817 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2818 (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
2819 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2820 "Expected comma after first shape value");
2821 return SQLITE_ERROR1;
2822 }
2823
2824 rc = npy_scanner_next(&scanner, &token);
2825 if (rc != VEC0_TOKEN_RESULT_SOME2) {
2826 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2827 "unexpected header EOF while parsing shape");
2828 return SQLITE_ERROR1;
2829 }
2830 if (token.token_type == NPY_TOKEN_TYPE_NUMBER) {
2831 *numElements = first;
2832 *numDimensions = strtol((char *)token.start, NULL((void*)0), 10);
2833 rc = npy_scanner_next(&scanner, &token);
2834 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2835 (token.token_type != NPY_TOKEN_TYPE_RPAREN)) {
2836 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: "
2837 "expected right parenthesis after shape value");
2838 return SQLITE_ERROR1;
2839 }
2840 } else if (token.token_type == NPY_TOKEN_TYPE_RPAREN) {
2841 // '(0,)' means an empty array!
2842 *numElements = first ? 1 : 0;
2843 *numDimensions = first;
2844 } else {
2845 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown type in shape value");
2846 return SQLITE_ERROR1;
2847 }
2848 } else {
2849 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown key in numpy header");
2850 return SQLITE_ERROR1;
2851 }
2852
2853 rc = npy_scanner_next(&scanner, &token);
2854 if ((rc != VEC0_TOKEN_RESULT_SOME2) ||
2855 (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
2856 vtab_set_error(pVTab, NPY_PARSE_ERROR"Error parsing numpy array: " "unknown extra token after value");
2857 return SQLITE_ERROR1;
2858 }
2859 }
2860
2861 return SQLITE_OK0;
2862}
2863
2864typedef struct vec_npy_each_vtab vec_npy_each_vtab;
2865struct vec_npy_each_vtab {
2866 sqlite3_vtab base;
2867};
2868
2869typedef enum {
2870 VEC_NPY_EACH_INPUT_BUFFER,
2871 VEC_NPY_EACH_INPUT_FILE,
2872} vec_npy_each_input_type;
2873
2874typedef struct vec_npy_each_cursor vec_npy_each_cursor;
2875struct vec_npy_each_cursor {
2876 sqlite3_vtab_cursor base;
2877 i64 iRowid;
2878 // sqlite-vec compatible type of vector
2879 enum VectorElementType elementType;
2880 // number of vectors in the npy array
2881 size_t nElements;
2882 // number of dimensions each vector has
2883 size_t nDimensions;
2884
2885 vec_npy_each_input_type input_type;
2886
2887 // when input_type == VEC_NPY_EACH_INPUT_BUFFER
2888
2889 // Buffer containing the vector data, when reading from an in-memory buffer.
2890 // Size: nElements * nDimensions * element_size
2891 // Clean up with sqlite3_free() once complete
2892 void *vector;
2893
2894 // when input_type == VEC_NPY_EACH_INPUT_FILE
2895
2896 // Opened npy file, when reading from a file.
2897 // fclose() when complete.
2898#ifndef SQLITE_VEC_OMIT_FS
2899 FILE *file;
2900#endif
2901
2902 // an in-memory buffer containing a portion of the npy array.
2903 // Used for faster reading, instead of calling fread a lot.
2904 // Will have a byte-size of fileBufferSize
2905 void *chunksBuffer;
2906 // size of allocated fileBuffer in bytes
2907 size_t chunksBufferSize;
2908 //// Maximum length of the buffer, in terms of number of vectors.
2909 size_t maxChunks;
2910
2911 // Counter index of the current vector into of fileBuffer to yield.
2912 // Starts at 0 once fileBuffer is read, and iterates to bufferLength.
2913 // Resets to 0 once that "buffer" is yielded and a new one is read.
2914 size_t currentChunkIndex;
2915 size_t currentChunkSize;
2916
2917 // 0 when there are still more elements to read/yield, 1 when complete.
2918 int eof;
2919};
2920
2921static unsigned char NPY_MAGIC[6] = "\x93NUMPY";
2922
2923#ifndef SQLITE_VEC_OMIT_FS
2924int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor *pCur) {
2925 int n;
2926 fseek(file, 0, SEEK_END2);
2927 long fileSize = ftell(file);
2928
2929 fseek(file, 0L, SEEK_SET0);
2930
2931 unsigned char header[10];
2932 n = fread(&header, sizeof(unsigned char), 10, file);
2933 if (n != 10) {
2934 vtab_set_error(pVTab, "numpy array file too short");
2935 return SQLITE_ERROR1;
2936 }
2937
2938 if (memcmp(NPY_MAGIC, header, sizeof(NPY_MAGIC)) != 0) {
2939 vtab_set_error(pVTab,
2940 "numpy array file does not contain the 'magic' header");
2941 return SQLITE_ERROR1;
2942 }
2943
2944 u8 major = header[6];
2945 u8 minor = header[7];
Value stored to 'minor' during its initialization is never read
2946 uint16_t headerLength = 0;
2947 memcpy(&headerLength, &header[8], sizeof(uint16_t));
2948
2949 size_t totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
2950 sizeof(headerLength) + headerLength;
2951 i32 dataSize = fileSize - totalHeaderLength;
2952 if (dataSize < 0) {
2953 vtab_set_error(pVTab, "numpy array file header length is invalid");
2954 return SQLITE_ERROR1;
2955 }
2956
2957 unsigned char *headerX = sqlite3_mallocsqlite3_api->malloc(headerLength);
2958 if (headerLength && !headerX) {
2959 return SQLITE_NOMEM7;
2960 }
2961
2962 n = fread(headerX, sizeof(char), headerLength, file);
2963 if (n != headerLength) {
2964 sqlite3_freesqlite3_api->free(headerX);
2965 vtab_set_error(pVTab, "numpy array file header length is invalid");
2966 return SQLITE_ERROR1;
2967 }
2968
2969 int fortran_order;
2970 enum VectorElementType element_type;
2971 size_t numElements;
2972 size_t numDimensions;
2973 int rc = parse_npy_header(pVTab, headerX, headerLength, &element_type,
2974 &fortran_order, &numElements, &numDimensions);
2975 sqlite3_freesqlite3_api->free(headerX);
2976 if (rc != SQLITE_OK0) {
2977 // parse_npy_header already attackes an error emssage
2978 return rc;
2979 }
2980
2981 i32 expectedDataSize =
2982 numElements * vector_byte_size(element_type, numDimensions);
2983 if (expectedDataSize != dataSize) {
2984 vtab_set_error(
2985 pVTab, "numpy array file error: Expected a data size of %d, found %d",
2986 expectedDataSize, dataSize);
2987 return SQLITE_ERROR1;
2988 }
2989
2990 pCur->maxChunks = 1024;
2991 pCur->chunksBufferSize =
2992 (vector_byte_size(element_type, numDimensions)) * pCur->maxChunks;
2993 pCur->chunksBuffer = sqlite3_mallocsqlite3_api->malloc(pCur->chunksBufferSize);
2994 if (pCur->chunksBufferSize && !pCur->chunksBuffer) {
2995 return SQLITE_NOMEM7;
2996 }
2997
2998 pCur->currentChunkSize =
2999 fread(pCur->chunksBuffer, vector_byte_size(element_type, numDimensions),
3000 pCur->maxChunks, file);
3001
3002 pCur->currentChunkIndex = 0;
3003 pCur->elementType = element_type;
3004 pCur->nElements = numElements;
3005 pCur->nDimensions = numDimensions;
3006 pCur->input_type = VEC_NPY_EACH_INPUT_FILE;
3007
3008 pCur->eof = pCur->currentChunkSize == 0;
3009 pCur->file = file;
3010 return SQLITE_OK0;
3011}
3012#endif
3013
3014int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer,
3015 int bufferLength, void **data, size_t *numElements,
3016 size_t *numDimensions,
3017 enum VectorElementType *element_type) {
3018
3019 if (bufferLength < 10) {
3020 // IMP: V03312_20150
3021 vtab_set_error(pVTab, "numpy array too short");
3022 return SQLITE_ERROR1;
3023 }
3024 if (memcmp(NPY_MAGIC, buffer, sizeof(NPY_MAGIC)) != 0) {
3025 // V11954_28792
3026 vtab_set_error(pVTab, "numpy array does not contain the 'magic' header");
3027 return SQLITE_ERROR1;
3028 }
3029
3030 u8 major = buffer[6];
3031 u8 minor = buffer[7];
3032 uint16_t headerLength = 0;
3033 memcpy(&headerLength, &buffer[8], sizeof(uint16_t));
3034
3035 i32 totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
3036 sizeof(headerLength) + headerLength;
3037 i32 dataSize = bufferLength - totalHeaderLength;
3038
3039 if (dataSize < 0) {
3040 vtab_set_error(pVTab, "numpy array header length is invalid");
3041 return SQLITE_ERROR1;
3042 }
3043
3044 const unsigned char *header = &buffer[10];
3045 int fortran_order;
3046
3047 int rc = parse_npy_header(pVTab, header, headerLength, element_type,
3048 &fortran_order, numElements, numDimensions);
3049 if (rc != SQLITE_OK0) {
3050 return rc;
3051 }
3052
3053 i32 expectedDataSize =
3054 (*numElements * vector_byte_size(*element_type, *numDimensions));
3055 if (expectedDataSize != dataSize) {
3056 vtab_set_error(pVTab,
3057 "numpy array error: Expected a data size of %d, found %d",
3058 expectedDataSize, dataSize);
3059 return SQLITE_ERROR1;
3060 }
3061
3062 *data = (void *)&buffer[totalHeaderLength];
3063 return SQLITE_OK0;
3064}
3065
3066static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc,
3067 const char *const *argv, sqlite3_vtab **ppVtab,
3068 char **pzErr) {
3069 UNUSED_PARAMETER(pAux)(void)(pAux);
3070 UNUSED_PARAMETER(argc)(void)(argc);
3071 UNUSED_PARAMETER(argv)(void)(argv);
3072 UNUSED_PARAMETER(pzErr)(void)(pzErr);
3073 vec_npy_each_vtab *pNew;
3074 int rc;
3075
3076 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, "CREATE TABLE x(vector, input hidden)");
3077#define VEC_NPY_EACH_COLUMN_VECTOR0 0
3078#define VEC_NPY_EACH_COLUMN_INPUT1 1
3079 if (rc == SQLITE_OK0) {
3080 pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
3081 *ppVtab = (sqlite3_vtab *)pNew;
3082 if (pNew == 0)
3083 return SQLITE_NOMEM7;
3084 memset(pNew, 0, sizeof(*pNew));
3085 }
3086 return rc;
3087}
3088
3089static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) {
3090 vec_npy_each_vtab *p = (vec_npy_each_vtab *)pVtab;
3091 sqlite3_freesqlite3_api->free(p);
3092 return SQLITE_OK0;
3093}
3094
3095static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
3096 UNUSED_PARAMETER(p)(void)(p);
3097 vec_npy_each_cursor *pCur;
3098 pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur));
3099 if (pCur == 0)
3100 return SQLITE_NOMEM7;
3101 memset(pCur, 0, sizeof(*pCur));
3102 *ppCursor = &pCur->base;
3103 return SQLITE_OK0;
3104}
3105
3106static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
3107 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
3108#ifndef SQLITE_VEC_OMIT_FS
3109 if (pCur->file) {
3110 fclose(pCur->file);
3111 pCur->file = NULL((void*)0);
3112 }
3113#endif
3114 if (pCur->chunksBuffer) {
3115 sqlite3_freesqlite3_api->free(pCur->chunksBuffer);
3116 pCur->chunksBuffer = NULL((void*)0);
3117 }
3118 if (pCur->vector) {
3119 pCur->vector = NULL((void*)0);
3120 }
3121 sqlite3_freesqlite3_api->free(pCur);
3122 return SQLITE_OK0;
3123}
3124
3125static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab,
3126 sqlite3_index_info *pIdxInfo) {
3127 int hasInput;
3128 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
3129 const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
3130 // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
3131 // pCons->op, pCons->usable);
3132 switch (pCons->iColumn) {
3133 case VEC_NPY_EACH_COLUMN_INPUT1: {
3134 if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ2 && pCons->usable) {
3135 hasInput = 1;
3136 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
3137 pIdxInfo->aConstraintUsage[i].omit = 1;
3138 }
3139 break;
3140 }
3141 }
3142 }
3143 if (!hasInput) {
3144 pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("input argument is required");
3145 return SQLITE_ERROR1;
3146 }
3147
3148 pIdxInfo->estimatedCost = (double)100000;
3149 pIdxInfo->estimatedRows = 100000;
3150
3151 return SQLITE_OK0;
3152}
3153
3154static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
3155 const char *idxStr, int argc,
3156 sqlite3_value **argv) {
3157 UNUSED_PARAMETER(idxNum)(void)(idxNum);
3158 UNUSED_PARAMETER(idxStr)(void)(idxStr);
3159 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 3159, __extension__ __PRETTY_FUNCTION__); }))
;
3160 int rc;
3161
3162 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor;
3163
3164#ifndef SQLITE_VEC_OMIT_FS
3165 if (pCur->file) {
3166 fclose(pCur->file);
3167 pCur->file = NULL((void*)0);
3168 }
3169#endif
3170 if (pCur->chunksBuffer) {
3171 sqlite3_freesqlite3_api->free(pCur->chunksBuffer);
3172 pCur->chunksBuffer = NULL((void*)0);
3173 }
3174 if (pCur->vector) {
3175 pCur->vector = NULL((void*)0);
3176 }
3177
3178#ifndef SQLITE_VEC_OMIT_FS
3179 struct VecNpyFile *f = NULL((void*)0);
3180 if ((f = sqlite3_value_pointersqlite3_api->value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME"vec0-npy-file"))) {
3181 FILE *file = fopen(f->path, "r");
3182 if (!file) {
3183 vtab_set_error(pVtabCursor->pVtab, "Could not open numpy file");
3184 return SQLITE_ERROR1;
3185 }
3186
3187 rc = parse_npy_file(pVtabCursor->pVtab, file, pCur);
3188 if (rc != SQLITE_OK0) {
3189#ifndef SQLITE_VEC_OMIT_FS
3190 fclose(file);
3191#endif
3192 return rc;
3193 }
3194
3195 } else
3196#endif
3197 {
3198
3199 const unsigned char *input = sqlite3_value_blobsqlite3_api->value_blob(argv[0]);
3200 int inputLength = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]);
3201 void *data;
3202 size_t numElements;
3203 size_t numDimensions;
3204 enum VectorElementType element_type;
3205
3206 rc = parse_npy_buffer(pVtabCursor->pVtab, input, inputLength, &data,
3207 &numElements, &numDimensions, &element_type);
3208 if (rc != SQLITE_OK0) {
3209 return rc;
3210 }
3211
3212 pCur->vector = data;
3213 pCur->elementType = element_type;
3214 pCur->nElements = numElements;
3215 pCur->nDimensions = numDimensions;
3216 pCur->input_type = VEC_NPY_EACH_INPUT_BUFFER;
3217 }
3218
3219 pCur->iRowid = 0;
3220 return SQLITE_OK0;
3221}
3222
3223static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
3224 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
3225 *pRowid = pCur->iRowid;
3226 return SQLITE_OK0;
3227}
3228
3229static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) {
3230 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
3231 if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
3232 return (!pCur->nElements) || (size_t)pCur->iRowid >= pCur->nElements;
3233 }
3234 return pCur->eof;
3235}
3236
3237static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) {
3238 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
3239 pCur->iRowid++;
3240 if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
3241 return SQLITE_OK0;
3242 }
3243
3244#ifndef SQLITE_VEC_OMIT_FS
3245 // else: input is a file
3246 pCur->currentChunkIndex++;
3247 if (pCur->currentChunkIndex >= pCur->currentChunkSize) {
3248 pCur->currentChunkSize =
3249 fread(pCur->chunksBuffer,
3250 vector_byte_size(pCur->elementType, pCur->nDimensions),
3251 pCur->maxChunks, pCur->file);
3252 if (!pCur->currentChunkSize) {
3253 pCur->eof = 1;
3254 }
3255 pCur->currentChunkIndex = 0;
3256 }
3257#endif
3258 return SQLITE_OK0;
3259}
3260
3261static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
3262 sqlite3_context *context, int i) {
3263 switch (i) {
3264 case VEC_NPY_EACH_COLUMN_VECTOR0: {
3265 sqlite3_result_subtypesqlite3_api->result_subtype(context, pCur->elementType);
3266 switch (pCur->elementType) {
3267 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
3268 sqlite3_result_blobsqlite3_api->result_blob(
3269 context,
3270 &((unsigned char *)
3271 pCur->vector)[pCur->iRowid * pCur->nDimensions * sizeof(f32)],
3272 pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
3273
3274 break;
3275 }
3276 case SQLITE_VEC_ELEMENT_TYPE_INT8:
3277 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
3278 // https://github.com/asg017/sqlite-vec/issues/42
3279 sqlite3_result_errorsqlite3_api->result_error(context,
3280 "vec_npy_each only supports float32 vectors", -1);
3281 break;
3282 }
3283 }
3284
3285 break;
3286 }
3287 }
3288 return SQLITE_OK0;
3289}
3290static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
3291 sqlite3_context *context, int i) {
3292 switch (i) {
3293 case VEC_NPY_EACH_COLUMN_VECTOR0: {
3294 switch (pCur->elementType) {
3295 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
3296 sqlite3_result_blobsqlite3_api->result_blob(
3297 context,
3298 &((unsigned char *)
3299 pCur->chunksBuffer)[pCur->currentChunkIndex *
3300 pCur->nDimensions * sizeof(f32)],
3301 pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
3302 break;
3303 }
3304 case SQLITE_VEC_ELEMENT_TYPE_INT8:
3305 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
3306 // https://github.com/asg017/sqlite-vec/issues/42
3307 sqlite3_result_errorsqlite3_api->result_error(context,
3308 "vec_npy_each only supports float32 vectors", -1);
3309 break;
3310 }
3311 }
3312 break;
3313 }
3314 }
3315 return SQLITE_OK0;
3316}
3317static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur,
3318 sqlite3_context *context, int i) {
3319 vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
3320 switch (pCur->input_type) {
3321 case VEC_NPY_EACH_INPUT_BUFFER:
3322 return vec_npy_eachColumnBuffer(pCur, context, i);
3323 case VEC_NPY_EACH_INPUT_FILE:
3324 return vec_npy_eachColumnFile(pCur, context, i);
3325 }
3326 return SQLITE_ERROR1;
3327}
3328
3329static sqlite3_module vec_npy_eachModule = {
3330 /* iVersion */ 0,
3331 /* xCreate */ 0,
3332 /* xConnect */ vec_npy_eachConnect,
3333 /* xBestIndex */ vec_npy_eachBestIndex,
3334 /* xDisconnect */ vec_npy_eachDisconnect,
3335 /* xDestroy */ 0,
3336 /* xOpen */ vec_npy_eachOpen,
3337 /* xClose */ vec_npy_eachClose,
3338 /* xFilter */ vec_npy_eachFilter,
3339 /* xNext */ vec_npy_eachNext,
3340 /* xEof */ vec_npy_eachEof,
3341 /* xColumn */ vec_npy_eachColumn,
3342 /* xRowid */ vec_npy_eachRowid,
3343 /* xUpdate */ 0,
3344 /* xBegin */ 0,
3345 /* xSync */ 0,
3346 /* xCommit */ 0,
3347 /* xRollback */ 0,
3348 /* xFindMethod */ 0,
3349 /* xRename */ 0,
3350 /* xSavepoint */ 0,
3351 /* xRelease */ 0,
3352 /* xRollbackTo */ 0,
3353 /* xShadowName */ 0,
3354#if SQLITE_VERSION_NUMBER3050001 >= 3044000
3355 /* xIntegrity */ 0,
3356#endif
3357};
3358
3359#pragma endregion
3360
3361#pragma region vec0 virtual table
3362
3363#define VEC0_COLUMN_ID0 0
3364#define VEC0_COLUMN_USERN_START1 1
3365#define VEC0_COLUMN_OFFSET_DISTANCE1 1
3366#define VEC0_COLUMN_OFFSET_K2 2
3367
3368#define VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "\"%w\".\"%w_info\""
3369
3370#define VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "\"%w\".\"%w_chunks\""
3371/// 1) schema, 2) original vtab table name
3372#define VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,"
"size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL"
");"
\
3373 "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(" \
3374 "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \
3375 "size INTEGER NOT NULL," \
3376 "validity BLOB NOT NULL," \
3377 "rowids BLOB NOT NULL" \
3378 ");"
3379
3380#define VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "\"%w\".\"%w_rowids\""
3381/// 1) schema, 2) original vtab table name
3382#define VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
"id," "chunk_id INTEGER," "chunk_offset INTEGER" ");"
\
3383 "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \
3384 "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
3385 "id," \
3386 "chunk_id INTEGER," \
3387 "chunk_offset INTEGER" \
3388 ");"
3389
3390// vec0 tables with a text primary keys are still backed by int64 primary keys,
3391// since a fixed-length rowid is required for vec0 chunks. But we add a new 'id
3392// text unique' column to emulate a text primary key interface.
3393#define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
"id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER"
");"
\
3394 "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(" \
3395 "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
3396 "id TEXT UNIQUE NOT NULL," \
3397 "chunk_id INTEGER," \
3398 "chunk_offset INTEGER" \
3399 ");"
3400
3401/// 1) schema, 2) original vtab table name
3402#define VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "\"%w\".\"%w_vector_chunks%02d\""
3403
3404/// 1) schema, 2) original vtab table name
3405#define VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY,"
"vectors BLOB NOT NULL" ");"
\
3406 "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\"" "(" \
3407 "rowid PRIMARY KEY," \
3408 "vectors BLOB NOT NULL" \
3409 ");"
3410
3411#define VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "\"%w\".\"%w_auxiliary\""
3412
3413#define VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "\"%w\".\"%w_metadatachunks%02d\""
3414#define VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "\"%w\".\"%w_metadatatext%02d\""
3415
3416#define VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Internal sqlite-vec error: "
3417#define REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new" "https://github.com/asg017/sqlite-vec/issues/new"
3418
3419typedef struct vec0_vtab vec0_vtab;
3420
3421#define VEC0_MAX_VECTOR_COLUMNS16 16
3422#define VEC0_MAX_PARTITION_COLUMNS4 4
3423#define VEC0_MAX_AUXILIARY_COLUMNS16 16
3424#define VEC0_MAX_METADATA_COLUMNS16 16
3425
3426#define SQLITE_VEC_VEC0_MAX_DIMENSIONS8192 8192
3427#define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16 16
3428#define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12 12
3429
3430typedef enum {
3431 // vector column, ie "contents_embedding float[1024]"
3432 SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1,
3433
3434 // partition key column, ie "user_id integer partition key"
3435 SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2,
3436
3437 //
3438 SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3,
3439
3440 // metadata column that can be filtered, ie "genre text"
3441 SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4,
3442} vec0_user_column_kind;
3443
3444struct vec0_vtab {
3445 sqlite3_vtab base;
3446
3447 // the SQLite connection of the host database
3448 sqlite3 *db;
3449
3450 // True if the primary key of the vec0 table has a column type TEXT.
3451 // Will change the schema of the _rowids table, and insert/query logic.
3452 int pkIsText;
3453
3454 // number of defined vector columns.
3455 int numVectorColumns;
3456
3457 // number of defined PARTITION KEY columns.
3458 int numPartitionColumns;
3459
3460 // number of defined auxiliary columns
3461 int numAuxiliaryColumns;
3462
3463 // number of defined metadata columns
3464 int numMetadataColumns;
3465
3466
3467 // Name of the schema the table exists on.
3468 // Must be freed with sqlite3_free()
3469 char *schemaName;
3470
3471 // Name of the table the table exists on.
3472 // Must be freed with sqlite3_free()
3473 char *tableName;
3474
3475 // Name of the _rowids shadow table.
3476 // Must be freed with sqlite3_free()
3477 char *shadowRowidsName;
3478
3479 // Name of the _chunks shadow table.
3480 // Must be freed with sqlite3_free()
3481 char *shadowChunksName;
3482
3483 // contains enum vec0_user_column_kind values for up to
3484 // numVectorColumns + numPartitionColumns entries
3485 vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16];
3486
3487 uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS16 + VEC0_MAX_PARTITION_COLUMNS4 + VEC0_MAX_AUXILIARY_COLUMNS16 + VEC0_MAX_METADATA_COLUMNS16];
3488
3489
3490 // Name of all the vector chunk shadow tables.
3491 // Ex '_vector_chunks00'
3492 // Only the first numVectorColumns entries will be available.
3493 // The first numVectorColumns entries must be freed with sqlite3_free()
3494 char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS16];
3495
3496 // Name of all metadata chunk shadow tables, ie `_metadatachunks00`
3497 // Only the first numMetadataColumns entries will be available.
3498 // The first numMetadataColumns entries must be freed with sqlite3_free()
3499 char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS16];
3500
3501 struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS16];
3502 struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS4];
3503 struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS16];
3504 struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS16];
3505
3506 int chunk_size;
3507
3508 // select latest chunk from _chunks, getting chunk_id
3509 sqlite3_stmt *stmtLatestChunk;
3510
3511 /**
3512 * Statement to insert a row into the _rowids table, with a rowid.
3513 * Parameters:
3514 * 1: int64, rowid to insert
3515 * Result columns: none
3516 * SQL: "INSERT INTO _rowids(rowid) VALUES (?)"
3517 *
3518 * Must be cleaned up with sqlite3_finalize().
3519 */
3520 sqlite3_stmt *stmtRowidsInsertRowid;
3521
3522 /**
3523 * Statement to insert a row into the _rowids table, with an id.
3524 * The id column isn't a tradition primary key, but instead a unique
3525 * column to handle "text primary key" vec0 tables. The true int64 rowid
3526 * can be retrieved after inserting with sqlite3_last_rowid().
3527 *
3528 * Parameters:
3529 * 1: text or null, id to insert
3530 * Result columns: none
3531 *
3532 * Must be cleaned up with sqlite3_finalize().
3533 */
3534 sqlite3_stmt *stmtRowidsInsertId;
3535
3536 /**
3537 * Statement to update the "position" columns chunk_id and chunk_offset for
3538 * a given _rowids row. Used when the "next available" chunk position is found
3539 * for a vector.
3540 *
3541 * Parameters:
3542 * 1: int64, chunk_id value
3543 * 2: int64, chunk_offset value
3544 * 3: int64, rowid value
3545 * Result columns: none
3546 *
3547 * Must be cleaned up with sqlite3_finalize().
3548 */
3549 sqlite3_stmt *stmtRowidsUpdatePosition;
3550
3551 /**
3552 * Statement to quickly find the chunk_id + chunk_offset of a given row.
3553 * Parameters:
3554 * 1: rowid of the row/vector to lookup
3555 * Result columns:
3556 * 0: chunk_id (i64)
3557 * 1: chunk_offset (i64)
3558 * SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?""
3559 *
3560 * Must be cleaned up with sqlite3_finalize().
3561 */
3562 sqlite3_stmt *stmtRowidsGetChunkPosition;
3563};
3564
3565/**
3566 * @brief Finalize all the sqlite3_stmt members in a vec0_vtab.
3567 *
3568 * @param p vec0_vtab pointer
3569 */
3570void vec0_free_resources(vec0_vtab *p) {
3571 sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk);
3572 p->stmtLatestChunk = NULL((void*)0);
3573 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid);
3574 p->stmtRowidsInsertRowid = NULL((void*)0);
3575 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId);
3576 p->stmtRowidsInsertId = NULL((void*)0);
3577 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition);
3578 p->stmtRowidsUpdatePosition = NULL((void*)0);
3579 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition);
3580 p->stmtRowidsGetChunkPosition = NULL((void*)0);
3581}
3582
3583/**
3584 * @brief Free all memory and sqlite3_stmt members of a vec0_vtab
3585 *
3586 * @param p vec0_vtab pointer
3587 */
3588void vec0_free(vec0_vtab *p) {
3589 vec0_free_resources(p);
3590
3591 sqlite3_freesqlite3_api->free(p->schemaName);
3592 p->schemaName = NULL((void*)0);
3593 sqlite3_freesqlite3_api->free(p->tableName);
3594 p->tableName = NULL((void*)0);
3595 sqlite3_freesqlite3_api->free(p->shadowChunksName);
3596 p->shadowChunksName = NULL((void*)0);
3597 sqlite3_freesqlite3_api->free(p->shadowRowidsName);
3598 p->shadowRowidsName = NULL((void*)0);
3599
3600 for (int i = 0; i < p->numVectorColumns; i++) {
3601 sqlite3_freesqlite3_api->free(p->shadowVectorChunksNames[i]);
3602 p->shadowVectorChunksNames[i] = NULL((void*)0);
3603
3604 sqlite3_freesqlite3_api->free(p->vector_columns[i].name);
3605 p->vector_columns[i].name = NULL((void*)0);
3606 }
3607}
3608
3609int vec0_num_defined_user_columns(vec0_vtab *p) {
3610 return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns;
3611}
3612
3613/**
3614 * @brief Returns the index of the distance hidden column for the given vec0
3615 * table.
3616 *
3617 * @param p vec0 table
3618 * @return int
3619 */
3620int vec0_column_distance_idx(vec0_vtab *p) {
3621 return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) +
3622 VEC0_COLUMN_OFFSET_DISTANCE1;
3623}
3624
3625/**
3626 * @brief Returns the index of the k hidden column for the given vec0 table.
3627 *
3628 * @param p vec0 table
3629 * @return int k column index
3630 */
3631int vec0_column_k_idx(vec0_vtab *p) {
3632 return VEC0_COLUMN_USERN_START1 + (vec0_num_defined_user_columns(p) - 1) +
3633 VEC0_COLUMN_OFFSET_K2;
3634}
3635
3636/**
3637 * Returns 1 if the given column-based index is a valid vector column,
3638 * 0 otherwise.
3639 */
3640int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) {
3641 return column_idx >= VEC0_COLUMN_USERN_START1 &&
3642 column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) &&
3643 pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
3644}
3645
3646/**
3647 * Returns the vector index of the given user column index.
3648 * ONLY call if validated with vec0_column_idx_is_vector before
3649 */
3650int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) {
3651 UNUSED_PARAMETER(pVtab)(void)(pVtab);
3652 return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1];
3653}
3654/**
3655 * Returns 1 if the given column-based index is a "partition key" column,
3656 * 0 otherwise.
3657 */
3658int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) {
3659 return column_idx >= VEC0_COLUMN_USERN_START1 &&
3660 column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) &&
3661 pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
3662}
3663
3664/**
3665 * Returns the partition column index of the given user column index.
3666 * ONLY call if validated with vec0_column_idx_is_vector before
3667 */
3668int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) {
3669 UNUSED_PARAMETER(pVtab)(void)(pVtab);
3670 return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1];
3671}
3672
3673/**
3674 * Returns 1 if the given column-based index is a auxiliary column,
3675 * 0 otherwise.
3676 */
3677int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) {
3678 return column_idx >= VEC0_COLUMN_USERN_START1 &&
3679 column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) &&
3680 pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
3681}
3682
3683/**
3684 * Returns the auxiliary column index of the given user column index.
3685 * ONLY call if validated with vec0_column_idx_to_partition_idx before
3686 */
3687int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) {
3688 UNUSED_PARAMETER(pVtab)(void)(pVtab);
3689 return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1];
3690}
3691
3692/**
3693 * Returns 1 if the given column-based index is a metadata column,
3694 * 0 otherwise.
3695 */
3696int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) {
3697 return column_idx >= VEC0_COLUMN_USERN_START1 &&
3698 column_idx <= (VEC0_COLUMN_USERN_START1 + vec0_num_defined_user_columns(pVtab) - 1) &&
3699 pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START1] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
3700}
3701
3702/**
3703 * Returns the metadata column index of the given user column index.
3704 * ONLY call if validated with vec0_column_idx_is_metadata before
3705 */
3706int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) {
3707 UNUSED_PARAMETER(pVtab)(void)(pVtab);
3708 return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START1];
3709}
3710
3711/**
3712 * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value
3713 * of a vec0_vtab row with the provided rowid
3714 *
3715 * @param p vec0_vtab
3716 * @param rowid the rowid of the row to query
3717 * @param id output, optional sqlite3_value to provide the id.
3718 * Useful for text PK rows. Must be freed with sqlite3_value_free()
3719 * @param chunk_id output, the chunk_id the row belongs to
3720 * @param chunk_offset output, the offset within the chunk the row belongs to
3721 * @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE
3722 */
3723int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id,
3724 i64 *chunk_id, i64 *chunk_offset) {
3725 int rc;
3726
3727 if (!p->stmtRowidsGetChunkPosition) {
3728 const char *zSql =
3729 sqlite3_mprintfsqlite3_api->mprintf("SELECT id, chunk_id, chunk_offset "
3730 "FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?",
3731 p->schemaName, p->tableName);
3732 if (!zSql) {
3733 rc = SQLITE_NOMEM7;
3734 goto cleanup;
3735 }
3736 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0);
3737 sqlite3_freesqlite3_api->free((void *)zSql);
3738 if (rc != SQLITE_OK0) {
3739 vtab_set_error(
3740 &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
3741 "could not initialize 'rowids get chunk position' statement");
3742 goto cleanup;
3743 }
3744 }
3745
3746 sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid);
3747 rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsGetChunkPosition);
3748 // special case: when no results, return SQLITE_EMPTY to convey "that chunk
3749 // position doesnt exist"
3750 if (rc == SQLITE_DONE101) {
3751 rc = SQLITE_EMPTY16;
3752 goto cleanup;
3753 }
3754 if (rc != SQLITE_ROW100) {
3755 goto cleanup;
3756 }
3757
3758 if (id) {
3759 sqlite3_value *value =
3760 sqlite3_column_valuesqlite3_api->column_value(p->stmtRowidsGetChunkPosition, 0);
3761 *id = sqlite3_value_dupsqlite3_api->value_dup(value);
3762 if (!*id) {
3763 rc = SQLITE_NOMEM7;
3764 goto cleanup;
3765 }
3766 }
3767
3768 if (chunk_id) {
3769 *chunk_id = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 1);
3770 }
3771 if (chunk_offset) {
3772 *chunk_offset = sqlite3_column_int64sqlite3_api->column_int64(p->stmtRowidsGetChunkPosition, 2);
3773 }
3774
3775 rc = SQLITE_OK0;
3776
3777cleanup:
3778 sqlite3_resetsqlite3_api->reset(p->stmtRowidsGetChunkPosition);
3779 sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsGetChunkPosition);
3780 return rc;
3781}
3782
3783/**
3784 * @brief Return the id value from the _rowids table where _rowids.rowid =
3785 * rowid.
3786 *
3787 * @param pVtab: vec0 table to query
3788 * @param rowid: rowid of the row to query.
3789 * @param out: A dup'ed sqlite3_value of the id column. Might be null.
3790 * Must be cleaned up with sqlite3_value_free().
3791 * @returns SQLITE_OK on success, error code on failure
3792 */
3793int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
3794 sqlite3_value **out) {
3795 // PERF: different strategy than get_chunk_position?
3796 return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL((void*)0), NULL((void*)0));
3797}
3798
3799int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
3800 sqlite3_stmt *stmt = NULL((void*)0);
3801 int rc;
3802 char *zSql;
3803 zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT rowid"
3804 " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE id = ?",
3805 p->schemaName, p->tableName);
3806 if (!zSql) {
3807 rc = SQLITE_NOMEM7;
3808 goto cleanup;
3809 }
3810 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
3811 sqlite3_freesqlite3_api->free(zSql);
3812 if (rc != SQLITE_OK0) {
3813 goto cleanup;
3814 }
3815 sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, valueId);
3816 rc = sqlite3_stepsqlite3_api->step(stmt);
3817 if (rc == SQLITE_DONE101) {
3818 rc = SQLITE_EMPTY16;
3819 goto cleanup;
3820 }
3821 if (rc != SQLITE_ROW100) {
3822 goto cleanup;
3823 }
3824 *rowid = sqlite3_column_int64sqlite3_api->column_int64(stmt, 0);
3825 rc = sqlite3_stepsqlite3_api->step(stmt);
3826 if (rc != SQLITE_DONE101) {
3827 goto cleanup;
3828 }
3829
3830 rc = SQLITE_OK0;
3831
3832cleanup:
3833 sqlite3_finalizesqlite3_api->finalize(stmt);
3834 return rc;
3835}
3836
3837int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
3838 if (!p->pkIsText) {
3839 sqlite3_result_int64sqlite3_api->result_int64(context, rowid);
3840 return SQLITE_OK0;
3841 }
3842 sqlite3_value *valueId;
3843 int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId);
3844 if (rc != SQLITE_OK0) {
3845 return rc;
3846 }
3847 if (!valueId) {
3848 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
3849 } else {
3850 sqlite3_result_valuesqlite3_api->result_value(context, valueId);
3851 sqlite3_value_freesqlite3_api->value_free(valueId);
3852 }
3853 return SQLITE_OK0;
3854}
3855
3856/**
3857 * @brief
3858 *
3859 * @param pVtab: virtual table to query
3860 * @param rowid: row to lookup
3861 * @param vector_column_idx: which vector column to query
3862 * @param outVector: Output pointer to the vector buffer.
3863 * Must be sqlite3_free()'ed.
3864 * @param outVectorSize: Pointer to a int where the size of outVector
3865 * will be stored.
3866 * @return int SQLITE_OK on success.
3867 */
3868int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx,
3869 void **outVector, int *outVectorSize) {
3870 vec0_vtab *p = pVtab;
3871 int rc, brc;
3872 i64 chunk_id;
3873 i64 chunk_offset;
3874 size_t size;
3875 void *buf = NULL((void*)0);
3876 int blobOffset;
3877 sqlite3_blob *vectorBlob = NULL((void*)0);
3878 assert((vector_column_idx >= 0) &&((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx
< pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({
if ((vector_column_idx >= 0) && (vector_column_idx
< pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 3879, __extension__ __PRETTY_FUNCTION__); }))
3879 (vector_column_idx < pVtab->numVectorColumns))((void) sizeof (((vector_column_idx >= 0) && (vector_column_idx
< pVtab->numVectorColumns)) ? 1 : 0), __extension__ ({
if ((vector_column_idx >= 0) && (vector_column_idx
< pVtab->numVectorColumns)) ; else __assert_fail ("(vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 3879, __extension__ __PRETTY_FUNCTION__); }))
;
3880
3881 rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset);
3882 if (rc == SQLITE_EMPTY16) {
3883 vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid);
3884 goto cleanup;
3885 }
3886 if (rc != SQLITE_OK0) {
3887 goto cleanup;
3888 }
3889
3890 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName,
3891 p->shadowVectorChunksNames[vector_column_idx],
3892 "vectors", chunk_id, 0, &vectorBlob);
3893
3894 if (rc != SQLITE_OK0) {
3895 vtab_set_error(&pVtab->base,
3896 "Could not fetch vector data for %lld, opening blob failed",
3897 rowid);
3898 rc = SQLITE_ERROR1;
3899 goto cleanup;
3900 }
3901
3902 size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]);
3903 blobOffset = chunk_offset * size;
3904
3905 buf = sqlite3_mallocsqlite3_api->malloc(size);
3906 if (!buf) {
3907 rc = SQLITE_NOMEM7;
3908 goto cleanup;
3909 }
3910
3911 rc = sqlite3_blob_readsqlite3_api->blob_read(vectorBlob, buf, size, blobOffset);
3912 if (rc != SQLITE_OK0) {
3913 sqlite3_freesqlite3_api->free(buf);
3914 buf = NULL((void*)0);
3915 vtab_set_error(
3916 &pVtab->base,
3917 "Could not fetch vector data for %lld, reading from blob failed",
3918 rowid);
3919 rc = SQLITE_ERROR1;
3920 goto cleanup;
3921 }
3922
3923 *outVector = buf;
3924 if (outVectorSize) {
3925 *outVectorSize = size;
3926 }
3927 rc = SQLITE_OK0;
3928
3929cleanup:
3930 brc = sqlite3_blob_closesqlite3_api->blob_close(vectorBlob);
3931 if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) {
3932 vtab_set_error(
3933 &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
3934 "unknown error, could not close vector blob, please file an issue");
3935 return brc;
3936 }
3937
3938 return rc;
3939}
3940
3941/**
3942 * @brief Retrieve the sqlite3_value of the i'th partition value for the given row.
3943 *
3944 * @param pVtab - the vec0_vtab in questions
3945 * @param rowid - rowid of target row
3946 * @param partition_idx - which partition column to retrieve
3947 * @param outValue - output sqlite3_value
3948 * @return int - SQLITE_OK on success, otherwise error code
3949 */
3950int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) {
3951 int rc;
3952 i64 chunk_id;
3953 i64 chunk_offset;
3954 rc = vec0_get_chunk_position(pVtab, rowid, NULL((void*)0), &chunk_id, &chunk_offset);
3955 if(rc != SQLITE_OK0) {
3956 return rc;
3957 }
3958 sqlite3_stmt * stmt = NULL((void*)0);
3959 char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName);
3960 if(!zSql) {
3961 return SQLITE_NOMEM7;
3962 }
3963 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0));
3964 sqlite3_freesqlite3_api->free(zSql);
3965 if(rc != SQLITE_OK0) {
3966 return rc;
3967 }
3968 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, chunk_id);
3969 rc = sqlite3_stepsqlite3_api->step(stmt);
3970 if(rc != SQLITE_ROW100) {
3971 rc = SQLITE_ERROR1;
3972 goto done;
3973 }
3974 *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0));
3975 if(!*outValue) {
3976 rc = SQLITE_NOMEM7;
3977 goto done;
3978 }
3979 rc = SQLITE_OK0;
3980
3981 done:
3982 sqlite3_finalizesqlite3_api->finalize(stmt);
3983 return rc;
3984
3985}
3986
3987/**
3988 * @brief Get the value of an auxiliary column for the given rowid
3989 *
3990 * @param pVtab vec0_vtab
3991 * @param rowid the rowid of the row to lookup
3992 * @param auxiliary_idx aux index of the column we care about
3993 * @param outValue Output sqlite3_value to store
3994 * @return int SQLITE_OK on success, error code otherwise
3995 */
3996int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) {
3997 int rc;
3998 sqlite3_stmt * stmt = NULL((void*)0);
3999 char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName);
4000 if(!zSql) {
4001 return SQLITE_NOMEM7;
4002 }
4003 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pVtab->db, zSql, -1, &stmt, NULL((void*)0));
4004 sqlite3_freesqlite3_api->free(zSql);
4005 if(rc != SQLITE_OK0) {
4006 return rc;
4007 }
4008 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
4009 rc = sqlite3_stepsqlite3_api->step(stmt);
4010 if(rc != SQLITE_ROW100) {
4011 rc = SQLITE_ERROR1;
4012 goto done;
4013 }
4014 *outValue = sqlite3_value_dupsqlite3_api->value_dup(sqlite3_column_valuesqlite3_api->column_value(stmt, 0));
4015 if(!*outValue) {
4016 rc = SQLITE_NOMEM7;
4017 goto done;
4018 }
4019 rc = SQLITE_OK0;
4020
4021 done:
4022 sqlite3_finalizesqlite3_api->finalize(stmt);
4023 return rc;
4024}
4025
4026/**
4027 * @brief Result the given metadata value for the given row and metadata column index.
4028 * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid.
4029 *
4030 * @param p
4031 * @param rowid
4032 * @param metadata_idx
4033 * @param context
4034 * @return int
4035 */
4036int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) {
4037 int rc;
4038 i64 chunk_id;
4039 i64 chunk_offset;
4040 rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset);
4041 if(rc != SQLITE_OK0) {
4042 return rc;
4043 }
4044 sqlite3_blob * blobValue;
4045 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue);
4046 if(rc != SQLITE_OK0) {
4047 return rc;
4048 }
4049
4050 switch(p->metadata_columns[metadata_idx].kind) {
4051 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
4052 u8 block;
4053 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT8);
4054 if(rc != SQLITE_OK0) {
4055 goto done;
4056 }
4057 int value = block >> ((chunk_offset % CHAR_BIT8)) & 1;
4058 sqlite3_result_intsqlite3_api->result_int(context, value);
4059 break;
4060 }
4061 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
4062 i64 value;
4063 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
4064 if(rc != SQLITE_OK0) {
4065 goto done;
4066 }
4067 sqlite3_result_int64sqlite3_api->result_int64(context, value);
4068 break;
4069 }
4070 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
4071 double value;
4072 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
4073 if(rc != SQLITE_OK0) {
4074 goto done;
4075 }
4076 sqlite3_result_doublesqlite3_api->result_double(context, value);
4077 break;
4078 }
4079 case VEC0_METADATA_COLUMN_KIND_TEXT: {
4080 u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
4081 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
4082 if(rc != SQLITE_OK0) {
4083 goto done;
4084 }
4085 int length = ((int *)view)[0];
4086 if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
4087 sqlite3_result_textsqlite3_api->result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
4088 }
4089 else {
4090 sqlite3_stmt * stmt;
4091 const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
4092 if(!zSql) {
4093 rc = SQLITE_ERROR1;
4094 goto done;
4095 }
4096 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
4097 sqlite3_freesqlite3_api->free((void *) zSql);
4098 if(rc != SQLITE_OK0) {
4099 goto done;
4100 }
4101 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
4102 rc = sqlite3_stepsqlite3_api->step(stmt);
4103 if(rc != SQLITE_ROW100) {
4104 sqlite3_finalizesqlite3_api->finalize(stmt);
4105 rc = SQLITE_ERROR1;
4106 goto done;
4107 }
4108 sqlite3_result_valuesqlite3_api->result_value(context, sqlite3_column_valuesqlite3_api->column_value(stmt, 0));
4109 sqlite3_finalizesqlite3_api->finalize(stmt);
4110 rc = SQLITE_OK0;
4111 }
4112 break;
4113 }
4114 }
4115 done:
4116 // blobValue is read-only, will not fail on close
4117 sqlite3_blob_closesqlite3_api->blob_close(blobValue);
4118 return rc;
4119
4120}
4121
4122int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) {
4123 int rc;
4124 const char *zSql;
4125 // lazy initialize stmtLatestChunk when needed. May be cleared during xSync()
4126 if (!p->stmtLatestChunk) {
4127 if(p->numPartitionColumns > 0) {
4128 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
4129 sqlite3_str_appendfsqlite3_api->str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" " WHERE ",
4130 p->schemaName, p->tableName);
4131
4132 for(int i = 0; i < p->numPartitionColumns; i++) {
4133 if(i != 0) {
4134 sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND ");
4135 }
4136 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", i);
4137 }
4138 zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
4139 }else {
4140 zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"",
4141 p->schemaName, p->tableName);
4142 }
4143
4144 if (!zSql) {
4145 rc = SQLITE_NOMEM7;
4146 goto cleanup;
4147 }
4148 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0);
4149 sqlite3_freesqlite3_api->free((void *)zSql);
4150 if (rc != SQLITE_OK0) {
4151 // IMP: V21406_05476
4152 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4153 "could not initialize 'latest chunk' statement");
4154 goto cleanup;
4155 }
4156 }
4157
4158 for(int i = 0; i < p->numPartitionColumns; i++) {
4159 sqlite3_bind_valuesqlite3_api->bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i]));
4160 }
4161
4162 rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk);
4163 if (rc != SQLITE_ROW100) {
4164 // IMP: V31559_15629
4165 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not find latest chunk");
4166 rc = SQLITE_ERROR1;
4167 goto cleanup;
4168 }
4169 if(sqlite3_column_typesqlite3_api->column_type(p->stmtLatestChunk, 0) == SQLITE_NULL5){
4170 rc = SQLITE_EMPTY16;
4171 goto cleanup;
4172 }
4173 *chunk_rowid = sqlite3_column_int64sqlite3_api->column_int64(p->stmtLatestChunk, 0);
4174 rc = sqlite3_stepsqlite3_api->step(p->stmtLatestChunk);
4175 if (rc != SQLITE_DONE101) {
4176 vtab_set_error(&p->base,
4177 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4178 "unknown result code when closing out stmtLatestChunk. "
4179 "Please file an issue: " REPORT_URL"https://github.com/asg017/sqlite-vec/issues/new",
4180 p->schemaName, p->shadowChunksName);
4181 goto cleanup;
4182 }
4183 rc = SQLITE_OK0;
4184
4185cleanup:
4186 if (p->stmtLatestChunk) {
4187 sqlite3_resetsqlite3_api->reset(p->stmtLatestChunk);
4188 sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtLatestChunk);
4189 }
4190 return rc;
4191}
4192
4193int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) {
4194 int rc = SQLITE_OK0;
4195 int entered = 0;
4196 UNUSED_PARAMETER(entered)(void)(entered); // temporary
4197 if (!p->stmtRowidsInsertRowid) {
4198 const char *zSql =
4199 sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(rowid)"
4200 "VALUES (?);",
4201 p->schemaName, p->tableName);
4202 if (!zSql) {
4203 rc = SQLITE_NOMEM7;
4204 goto cleanup;
4205 }
4206 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0);
4207 sqlite3_freesqlite3_api->free((void *)zSql);
4208 if (rc != SQLITE_OK0) {
4209 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4210 "could not initialize 'insert rowids' statement");
4211 goto cleanup;
4212 }
4213 }
4214
4215#if SQLITE_THREADSAFE
4216 if (sqlite3_mutex_entersqlite3_api->mutex_enter) {
4217 sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4218 entered = 1;
4219 }
4220#endif
4221 sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsInsertRowid, 1, rowid);
4222 rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertRowid);
4223
4224 if (rc != SQLITE_DONE101) {
4225 if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY(19 | (6<<8))) {
4226 // IMP: V17090_01160
4227 vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
4228 p->tableName);
4229 } else {
4230 // IMP: V04679_21517
4231 vtab_set_error(&p->base,
4232 "Error inserting rowid into rowids shadow table: %s",
4233 sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId)));
4234 }
4235 rc = SQLITE_ERROR1;
4236 goto cleanup;
4237 }
4238
4239 rc = SQLITE_OK0;
4240
4241cleanup:
4242 if (p->stmtRowidsInsertRowid) {
4243 sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertRowid);
4244 sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertRowid);
4245 }
4246
4247#if SQLITE_THREADSAFE
4248 if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) {
4249 sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4250 }
4251#endif
4252 return rc;
4253}
4254
4255int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) {
4256 int rc = SQLITE_OK0;
4257 int entered = 0;
4258 UNUSED_PARAMETER(entered)(void)(entered); // temporary
4259 if (!p->stmtRowidsInsertId) {
4260 const char *zSql =
4261 sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" "(id)"
4262 "VALUES (?);",
4263 p->schemaName, p->tableName);
4264 if (!zSql) {
4265 rc = SQLITE_NOMEM7;
4266 goto complete;
4267 }
4268 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0);
4269 sqlite3_freesqlite3_api->free((void *)zSql);
4270 if (rc != SQLITE_OK0) {
4271 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4272 "could not initialize 'insert rowids id' statement");
4273 goto complete;
4274 }
4275 }
4276
4277#if SQLITE_THREADSAFE
4278 if (sqlite3_mutex_entersqlite3_api->mutex_enter) {
4279 sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4280 entered = 1;
4281 }
4282#endif
4283
4284 if (idValue) {
4285 sqlite3_bind_valuesqlite3_api->bind_value(p->stmtRowidsInsertId, 1, idValue);
4286 }
4287 rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsInsertId);
4288
4289 if (rc != SQLITE_DONE101) {
4290 if (sqlite3_extended_errcodesqlite3_api->extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE(19 | (8<<8))) {
4291 // IMP: V20497_04568
4292 vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
4293 p->tableName);
4294 } else {
4295 // IMP: V24016_08086
4296 // IMP: V15177_32015
4297 vtab_set_error(&p->base,
4298 "Error inserting id into rowids shadow table: %s",
4299 sqlite3_errmsgsqlite3_api->errmsg(sqlite3_db_handlesqlite3_api->db_handle(p->stmtRowidsInsertId)));
4300 }
4301 rc = SQLITE_ERROR1;
4302 goto complete;
4303 }
4304
4305 *rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db);
4306 rc = SQLITE_OK0;
4307
4308complete:
4309 if (p->stmtRowidsInsertId) {
4310 sqlite3_resetsqlite3_api->reset(p->stmtRowidsInsertId);
4311 sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsInsertId);
4312 }
4313
4314#if SQLITE_THREADSAFE
4315 if (sqlite3_mutex_leavesqlite3_api->mutex_leave && entered) {
4316 sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4317 }
4318#endif
4319 return rc;
4320}
4321
4322int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) {
4323 switch(kind) {
4324 case VEC0_METADATA_COLUMN_KIND_BOOLEAN:
4325 return chunk_size / 8;
4326 case VEC0_METADATA_COLUMN_KIND_INTEGER:
4327 return chunk_size * sizeof(i64);
4328 case VEC0_METADATA_COLUMN_KIND_FLOAT:
4329 return chunk_size * sizeof(double);
4330 case VEC0_METADATA_COLUMN_KIND_TEXT:
4331 return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16;
4332 }
4333 return 0;
4334}
4335
4336int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid,
4337 i64 chunk_offset) {
4338 int rc = SQLITE_OK0;
4339
4340 if (!p->stmtRowidsUpdatePosition) {
4341 const char *zSql = sqlite3_mprintfsqlite3_api->mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\""
4342 " SET chunk_id = ?, chunk_offset = ?"
4343 " WHERE rowid = ?",
4344 p->schemaName, p->tableName);
4345 if (!zSql) {
4346 rc = SQLITE_NOMEM7;
4347 goto cleanup;
4348 }
4349 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0);
4350 sqlite3_freesqlite3_api->free((void *)zSql);
4351 if (rc != SQLITE_OK0) {
4352 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4353 "could not initialize 'update rowids position' statement");
4354 goto cleanup;
4355 }
4356 }
4357
4358 sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid);
4359 sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset);
4360 sqlite3_bind_int64sqlite3_api->bind_int64(p->stmtRowidsUpdatePosition, 3, rowid);
4361
4362 rc = sqlite3_stepsqlite3_api->step(p->stmtRowidsUpdatePosition);
4363 if (rc != SQLITE_DONE101) {
4364 // IMP: V21925_05995
4365 vtab_set_error(&p->base,
4366 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
4367 "could not update rowids position for rowid=%lld, "
4368 "chunk_rowid=%lld, chunk_offset=%lld",
4369 rowid, chunk_rowid, chunk_offset);
4370 rc = SQLITE_ERROR1;
4371 goto cleanup;
4372 }
4373 rc = SQLITE_OK0;
4374
4375cleanup:
4376 if (p->stmtRowidsUpdatePosition) {
4377 sqlite3_resetsqlite3_api->reset(p->stmtRowidsUpdatePosition);
4378 sqlite3_clear_bindingssqlite3_api->clear_bindings(p->stmtRowidsUpdatePosition);
4379 }
4380
4381 return rc;
4382}
4383
4384/**
4385 * @brief Adds a new chunk for the vec0 table, and the corresponding vector
4386 * chunks.
4387 *
4388 * Inserts a new row into the _chunks table, with blank data, and uses that new
4389 * rowid to insert new blank rows into _vector_chunksXX tables.
4390 *
4391 * @param p: vec0 table to add new chunk
4392 * @param paritionKeyValues: Array of partition key valeus for the new chunk, if available
4393 * @param chunk_rowid: Output pointer, if not NULL, then will be filled with the
4394 * new chunk rowid.
4395 * @return int SQLITE_OK on success, error code otherwise.
4396 */
4397int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) {
4398 int rc;
4399 char *zSql;
4400 sqlite3_stmt *stmt;
4401 i64 rowid;
4402
4403 // Step 1: Insert a new row in _chunks, capture that new rowid
4404 if(p->numPartitionColumns > 0) {
4405 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
4406 sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName, p->tableName);
4407 sqlite3_str_appendallsqlite3_api->str_appendall(s, "(size, validity, rowids");
4408 for(int i = 0; i < p->numPartitionColumns; i++) {
4409 sqlite3_str_appendfsqlite3_api->str_appendf(s, ", partition%02d", i);
4410 }
4411 sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (?, ?, ?");
4412 for(int i = 0; i < p->numPartitionColumns; i++) {
4413 sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?");
4414 }
4415 sqlite3_str_appendallsqlite3_api->str_appendall(s, ")");
4416
4417 zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
4418 }else {
4419 zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\""
4420 "(size, validity, rowids) "
4421 "VALUES (?, ?, ?);",
4422 p->schemaName, p->tableName);
4423 }
4424
4425 if (!zSql) {
4426 return SQLITE_NOMEM7;
4427 }
4428 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
4429 sqlite3_freesqlite3_api->free(zSql);
4430 if (rc != SQLITE_OK0) {
4431 sqlite3_finalizesqlite3_api->finalize(stmt);
4432 return rc;
4433 }
4434
4435#if SQLITE_THREADSAFE
4436 if (sqlite3_mutex_entersqlite3_api->mutex_enter) {
4437 sqlite3_mutex_entersqlite3_api->mutex_enter(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4438 }
4439#endif
4440
4441 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, p->chunk_size); // size
4442 sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT8); // validity bitmap
4443 sqlite3_bind_zeroblobsqlite3_api->bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids
4444
4445 for(int i = 0; i < p->numPartitionColumns; i++) {
4446 sqlite3_bind_valuesqlite3_api->bind_value(stmt, 4 + i, partitionKeyValues[i]);
4447 }
4448
4449 rc = sqlite3_stepsqlite3_api->step(stmt);
4450 int failed = rc != SQLITE_DONE101;
4451 rowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->db);
4452#if SQLITE_THREADSAFE
4453 if (sqlite3_mutex_leavesqlite3_api->mutex_leave) {
4454 sqlite3_mutex_leavesqlite3_api->mutex_leave(sqlite3_db_mutexsqlite3_api->db_mutex(p->db));
4455 }
4456#endif
4457 sqlite3_finalizesqlite3_api->finalize(stmt);
4458 if (failed) {
4459 return SQLITE_ERROR1;
4460 }
4461
4462 // Step 2: Create new vector chunks for each vector column, with
4463 // that new chunk_rowid.
4464
4465 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
4466 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
4467 continue;
4468 }
4469 int vector_column_idx = p->user_column_idxs[i];
4470 i64 vectorsSize =
4471 p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]);
4472
4473 zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME"\"%w\".\"%w_vector_chunks%02d\""
4474 "(rowid, vectors)"
4475 "VALUES (?, ?)",
4476 p->schemaName, p->tableName, vector_column_idx);
4477 if (!zSql) {
4478 return SQLITE_NOMEM7;
4479 }
4480 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
4481 sqlite3_freesqlite3_api->free(zSql);
4482
4483 if (rc != SQLITE_OK0) {
4484 sqlite3_finalizesqlite3_api->finalize(stmt);
4485 return rc;
4486 }
4487
4488 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
4489 sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vectorsSize);
4490
4491 rc = sqlite3_stepsqlite3_api->step(stmt);
4492 sqlite3_finalizesqlite3_api->finalize(stmt);
4493 if (rc != SQLITE_DONE101) {
4494 return rc;
4495 }
4496 }
4497
4498 // Step 3: Create new metadata chunks for each metadata column
4499 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
4500 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
4501 continue;
4502 }
4503 int metadata_column_idx = p->user_column_idxs[i];
4504 zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\""
4505 "(rowid, data)"
4506 "VALUES (?, ?)",
4507 p->schemaName, p->tableName, metadata_column_idx);
4508 if (!zSql) {
4509 return SQLITE_NOMEM7;
4510 }
4511 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
4512 sqlite3_freesqlite3_api->free(zSql);
4513
4514 if (rc != SQLITE_OK0) {
4515 sqlite3_finalizesqlite3_api->finalize(stmt);
4516 return rc;
4517 }
4518
4519 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
4520 sqlite3_bind_zeroblob64sqlite3_api->bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
4521
4522 rc = sqlite3_stepsqlite3_api->step(stmt);
4523 sqlite3_finalizesqlite3_api->finalize(stmt);
4524 if (rc != SQLITE_DONE101) {
4525 return rc;
4526 }
4527 }
4528
4529
4530 if (chunk_rowid) {
4531 *chunk_rowid = rowid;
4532 }
4533
4534 return SQLITE_OK0;
4535}
4536
4537struct vec0_query_fullscan_data {
4538 sqlite3_stmt *rowids_stmt;
4539 i8 done;
4540};
4541void vec0_query_fullscan_data_clear(
4542 struct vec0_query_fullscan_data *fullscan_data) {
4543 if (!fullscan_data)
4544 return;
4545
4546 if (fullscan_data->rowids_stmt) {
4547 sqlite3_finalizesqlite3_api->finalize(fullscan_data->rowids_stmt);
4548 fullscan_data->rowids_stmt = NULL((void*)0);
4549 }
4550}
4551
4552struct vec0_query_knn_data {
4553 i64 k;
4554 i64 k_used;
4555 // Array of rowids of size k. Must be freed with sqlite3_free().
4556 i64 *rowids;
4557 // Array of distances of size k. Must be freed with sqlite3_free().
4558 f32 *distances;
4559 i64 current_idx;
4560};
4561void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
4562 if (!knn_data)
4563 return;
4564
4565 if (knn_data->rowids) {
4566 sqlite3_freesqlite3_api->free(knn_data->rowids);
4567 knn_data->rowids = NULL((void*)0);
4568 }
4569 if (knn_data->distances) {
4570 sqlite3_freesqlite3_api->free(knn_data->distances);
4571 knn_data->distances = NULL((void*)0);
4572 }
4573}
4574
4575struct vec0_query_point_data {
4576 i64 rowid;
4577 void *vectors[VEC0_MAX_VECTOR_COLUMNS16];
4578 int done;
4579};
4580void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) {
4581 if (!point_data)
4582 return;
4583 for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS16; i++) {
4584 sqlite3_freesqlite3_api->free(point_data->vectors[i]);
4585 point_data->vectors[i] = NULL((void*)0);
4586 }
4587}
4588
4589typedef enum {
4590 // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
4591
4592 VEC0_QUERY_PLAN_FULLSCAN = '1',
4593 VEC0_QUERY_PLAN_POINT = '2',
4594 VEC0_QUERY_PLAN_KNN = '3',
4595} vec0_query_plan;
4596
4597typedef struct vec0_cursor vec0_cursor;
4598struct vec0_cursor {
4599 sqlite3_vtab_cursor base;
4600
4601 vec0_query_plan query_plan;
4602 struct vec0_query_fullscan_data *fullscan_data;
4603 struct vec0_query_knn_data *knn_data;
4604 struct vec0_query_point_data *point_data;
4605};
4606
4607void vec0_cursor_clear(vec0_cursor *pCur) {
4608 if (pCur->fullscan_data) {
4609 vec0_query_fullscan_data_clear(pCur->fullscan_data);
4610 sqlite3_freesqlite3_api->free(pCur->fullscan_data);
4611 pCur->fullscan_data = NULL((void*)0);
4612 }
4613 if (pCur->knn_data) {
4614 vec0_query_knn_data_clear(pCur->knn_data);
4615 sqlite3_freesqlite3_api->free(pCur->knn_data);
4616 pCur->knn_data = NULL((void*)0);
4617 }
4618 if (pCur->point_data) {
4619 vec0_query_point_data_clear(pCur->point_data);
4620 sqlite3_freesqlite3_api->free(pCur->point_data);
4621 pCur->point_data = NULL((void*)0);
4622 }
4623}
4624
4625#define VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "vec0 constructor error: "
4626static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
4627 sqlite3_vtab **ppVtab, char **pzErr, bool_Bool isCreate) {
4628 UNUSED_PARAMETER(pAux)(void)(pAux);
4629 vec0_vtab *pNew;
4630 int rc;
4631 const char *zSql;
4632
4633 pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
4634 if (pNew == 0)
4635 return SQLITE_NOMEM7;
4636 memset(pNew, 0, sizeof(*pNew));
4637
4638 // Declared chunk_size=N for entire table.
4639 // -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N`
4640 // option
4641 int chunk_size = -1;
4642 int numVectorColumns = 0;
4643 int numPartitionColumns = 0;
4644 int numAuxiliaryColumns = 0;
4645 int numMetadataColumns = 0;
4646 int user_column_idx = 0;
4647
4648 // track if a "primary key" column is defined
4649 char *pkColumnName = NULL((void*)0);
4650 int pkColumnNameLength;
4651 int pkColumnType = SQLITE_INTEGER1;
4652
4653 for (int i = 3; i < argc; i++) {
4654 struct VectorColumnDefinition vecColumn;
4655 struct Vec0PartitionColumnDefinition partitionColumn;
4656 struct Vec0AuxiliaryColumnDefinition auxColumn;
4657 struct Vec0MetadataColumnDefinition metadataColumn;
4658 char *cName = NULL((void*)0);
4659 int cNameLength;
4660 int cType;
4661
4662 // Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]`
4663 rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn);
4664 if (rc == SQLITE_ERROR1) {
4665 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4666 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse vector column '%s'", argv[i]);
4667 goto error;
4668 }
4669 if (rc == SQLITE_OK0) {
4670 if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS16) {
4671 sqlite3_freesqlite3_api->free(vecColumn.name);
4672 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4673 "Too many provided vector columns, maximum %d",
4674 VEC0_MAX_VECTOR_COLUMNS16);
4675 goto error;
4676 }
4677
4678 if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS8192) {
4679 sqlite3_freesqlite3_api->free(vecColumn.name);
4680 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4681 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4682 "Dimension on vector column too large, provided %lld, maximum %lld",
4683 (i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS8192);
4684 goto error;
4685 }
4686 pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
4687 pNew->user_column_idxs[user_column_idx] = numVectorColumns;
4688 memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn));
4689 numVectorColumns++;
4690 user_column_idx++;
4691
4692 continue;
4693 }
4694
4695 // Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key`
4696 rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName,
4697 &cNameLength, &cType);
4698 if (rc == SQLITE_OK0) {
4699 if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS4) {
4700 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4701 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4702 "More than %d partition key columns were provided",
4703 VEC0_MAX_PARTITION_COLUMNS4);
4704 goto error;
4705 }
4706 partitionColumn.type = cType;
4707 partitionColumn.name_length = cNameLength;
4708 partitionColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName);
4709 if(!partitionColumn.name) {
4710 rc = SQLITE_NOMEM7;
4711 goto error;
4712 }
4713
4714 pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
4715 pNew->user_column_idxs[user_column_idx] = numPartitionColumns;
4716 memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn));
4717 numPartitionColumns++;
4718 user_column_idx++;
4719 continue;
4720 }
4721
4722 // Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key`
4723 rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName,
4724 &cNameLength, &cType);
4725 if (rc == SQLITE_OK0) {
4726 if (pkColumnName) {
4727 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4728 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4729 "More than one primary key definition was provided, vec0 only "
4730 "suports a single primary key column",
4731 argv[i]);
4732 goto error;
4733 }
4734 pkColumnName = cName;
4735 pkColumnNameLength = cNameLength;
4736 pkColumnType = cType;
4737 continue;
4738 }
4739
4740 // Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text`
4741 rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName,
4742 &cNameLength, &cType);
4743 if(rc == SQLITE_OK0) {
4744 if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS16) {
4745 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4746 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4747 "More than %d auxiliary columns were provided",
4748 VEC0_MAX_AUXILIARY_COLUMNS16);
4749 goto error;
4750 }
4751 auxColumn.type = cType;
4752 auxColumn.name_length = cNameLength;
4753 auxColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName);
4754 if(!auxColumn.name) {
4755 rc = SQLITE_NOMEM7;
4756 goto error;
4757 }
4758
4759 pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
4760 pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns;
4761 memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn));
4762 numAuxiliaryColumns++;
4763 user_column_idx++;
4764 continue;
4765 }
4766
4767 vec0_metadata_column_kind kind;
4768 rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName,
4769 &cNameLength, &kind);
4770 if(rc == SQLITE_OK0) {
4771 if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS16) {
4772 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4773 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4774 "More than %d metadata columns were provided",
4775 VEC0_MAX_METADATA_COLUMNS16);
4776 goto error;
4777 }
4778 metadataColumn.kind = kind;
4779 metadataColumn.name_length = cNameLength;
4780 metadataColumn.name = sqlite3_mprintfsqlite3_api->mprintf("%.*s", cNameLength, cName);
4781 if(!metadataColumn.name) {
4782 rc = SQLITE_NOMEM7;
4783 goto error;
4784 }
4785
4786 pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
4787 pNew->user_column_idxs[user_column_idx] = numMetadataColumns;
4788 memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn));
4789 numMetadataColumns++;
4790 user_column_idx++;
4791 continue;
4792 }
4793
4794 // Scenario #4: Constructor argument is a table-level option, ie `chunk_size`
4795
4796 char *key;
4797 char *value;
4798 int keyLength, valueLength;
4799 rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength,
4800 &value, &valueLength);
4801 if (rc == SQLITE_ERROR1) {
4802 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4803 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "could not parse table option '%s'", argv[i]);
4804 goto error;
4805 }
4806 if (rc == SQLITE_OK0) {
4807 if (sqlite3_strnicmpsqlite3_api->strnicmp(key, "chunk_size", keyLength) == 0) {
4808 chunk_size = atoi(value);
4809 if (chunk_size <= 0) {
4810 // IMP: V01931_18769
4811 *pzErr =
4812 sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4813 "chunk_size must be a non-zero positive integer");
4814 goto error;
4815 }
4816 if ((chunk_size % 8) != 0) {
4817 // IMP: V14110_30948
4818 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4819 "chunk_size must be divisible by 8");
4820 goto error;
4821 }
4822#define SQLITE_VEC_CHUNK_SIZE_MAX4096 4096
4823 if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX4096) {
4824 *pzErr =
4825 sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "chunk_size too large");
4826 goto error;
4827 }
4828 } else {
4829 // IMP: V27642_11712
4830 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4831 VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Unknown table option: %.*s", keyLength, key);
4832 goto error;
4833 }
4834 continue;
4835 }
4836
4837 // Scenario #5: Unknown constructor argument
4838 *pzErr =
4839 sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: " "Could not parse '%s'", argv[i]);
4840 goto error;
4841 }
4842
4843 if (chunk_size < 0) {
4844 chunk_size = 1024;
4845 }
4846
4847 if (numVectorColumns <= 0) {
4848 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4849 "At least one vector column is required");
4850 goto error;
4851 }
4852
4853 sqlite3_str *createStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
4854 sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "CREATE TABLE x(");
4855 if (pkColumnName) {
4856 sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength,
4857 pkColumnName);
4858 } else {
4859 sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "rowid, ");
4860 }
4861 for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) {
4862 switch(pNew->user_column_kinds[i]) {
4863 case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: {
4864 int vector_idx = pNew->user_column_idxs[i];
4865 sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ",
4866 pNew->vector_columns[vector_idx].name_length,
4867 pNew->vector_columns[vector_idx].name);
4868 break;
4869 }
4870 case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: {
4871 int partition_idx = pNew->user_column_idxs[i];
4872 sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ",
4873 pNew->paritition_columns[partition_idx].name_length,
4874 pNew->paritition_columns[partition_idx].name);
4875 break;
4876 }
4877 case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: {
4878 int auxiliary_idx = pNew->user_column_idxs[i];
4879 sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ",
4880 pNew->auxiliary_columns[auxiliary_idx].name_length,
4881 pNew->auxiliary_columns[auxiliary_idx].name);
4882 break;
4883 }
4884 case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: {
4885 int metadata_idx = pNew->user_column_idxs[i];
4886 sqlite3_str_appendfsqlite3_api->str_appendf(createStr, "\"%.*w\", ",
4887 pNew->metadata_columns[metadata_idx].name_length,
4888 pNew->metadata_columns[metadata_idx].name);
4889 break;
4890 }
4891 }
4892
4893 }
4894 sqlite3_str_appendallsqlite3_api->str_appendall(createStr, " distance hidden, k hidden) ");
4895 if (pkColumnName) {
4896 sqlite3_str_appendallsqlite3_api->str_appendall(createStr, "without rowid ");
4897 }
4898 zSql = sqlite3_str_finishsqlite3_api->str_finish(createStr);
4899 if (!zSql) {
4900 goto error;
4901 }
4902 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, zSql);
4903 sqlite3_freesqlite3_api->free((void *)zSql);
4904 if (rc != SQLITE_OK0) {
4905 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(VEC_CONSTRUCTOR_ERROR"vec0 constructor error: "
4906 "could not declare virtual table, '%s'",
4907 sqlite3_errmsgsqlite3_api->errmsg(db));
4908 goto error;
4909 }
4910
4911 const char *schemaName = argv[1];
4912 const char *tableName = argv[2];
4913
4914 pNew->db = db;
4915 pNew->pkIsText = pkColumnType == SQLITE_TEXT3;
4916 pNew->schemaName = sqlite3_mprintfsqlite3_api->mprintf("%s", schemaName);
4917 if (!pNew->schemaName) {
4918 goto error;
4919 }
4920 pNew->tableName = sqlite3_mprintfsqlite3_api->mprintf("%s", tableName);
4921 if (!pNew->tableName) {
4922 goto error;
4923 }
4924 pNew->shadowRowidsName = sqlite3_mprintfsqlite3_api->mprintf("%s_rowids", tableName);
4925 if (!pNew->shadowRowidsName) {
4926 goto error;
4927 }
4928 pNew->shadowChunksName = sqlite3_mprintfsqlite3_api->mprintf("%s_chunks", tableName);
4929 if (!pNew->shadowChunksName) {
4930 goto error;
4931 }
4932 pNew->numVectorColumns = numVectorColumns;
4933 pNew->numPartitionColumns = numPartitionColumns;
4934 pNew->numAuxiliaryColumns = numAuxiliaryColumns;
4935 pNew->numMetadataColumns = numMetadataColumns;
4936
4937 for (int i = 0; i < pNew->numVectorColumns; i++) {
4938 pNew->shadowVectorChunksNames[i] =
4939 sqlite3_mprintfsqlite3_api->mprintf("%s_vector_chunks%02d", tableName, i);
4940 if (!pNew->shadowVectorChunksNames[i]) {
4941 goto error;
4942 }
4943 }
4944 for (int i = 0; i < pNew->numMetadataColumns; i++) {
4945 pNew->shadowMetadataChunksNames[i] =
4946 sqlite3_mprintfsqlite3_api->mprintf("%s_metadatachunks%02d", tableName, i);
4947 if (!pNew->shadowMetadataChunksNames[i]) {
4948 goto error;
4949 }
4950 }
4951 pNew->chunk_size = chunk_size;
4952
4953 // if xCreate, then create the necessary shadow tables
4954 if (isCreate) {
4955 sqlite3_stmt *stmt;
4956 int rc;
4957
4958 char * zCreateInfo = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" " (key text primary key, value any)", pNew->schemaName, pNew->tableName);
4959 if(!zCreateInfo) {
4960 goto error;
4961 }
4962 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateInfo, -1, &stmt, NULL((void*)0));
4963
4964 sqlite3_freesqlite3_api->free((void *) zCreateInfo);
4965 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
4966 // TODO(IMP)
4967 sqlite3_finalizesqlite3_api->finalize(stmt);
4968 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_info' shadow table: %s",
4969 sqlite3_errmsgsqlite3_api->errmsg(db));
4970 goto error;
4971 }
4972 sqlite3_finalizesqlite3_api->finalize(stmt);
4973
4974 char * zSeedInfo = sqlite3_mprintfsqlite3_api->mprintf(
4975 "INSERT INTO "VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"" "(key, value) VALUES "
4976 "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ",
4977 pNew->schemaName, pNew->tableName
4978 );
4979 if(!zSeedInfo) {
4980 goto error;
4981 }
4982 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSeedInfo, -1, &stmt, NULL((void*)0));
4983 sqlite3_freesqlite3_api->free((void *) zSeedInfo);
4984 if (rc != SQLITE_OK0) {
4985 // TODO(IMP)
4986 sqlite3_finalizesqlite3_api->finalize(stmt);
4987 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s",
4988 sqlite3_errmsgsqlite3_api->errmsg(db));
4989 goto error;
4990 }
4991 sqlite3_bind_textsqlite3_api->bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
4992 sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, SQLITE_VEC_VERSION"v0.1.7-alpha.2", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
4993 sqlite3_bind_textsqlite3_api->bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
4994 sqlite3_bind_intsqlite3_api->bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR0);
4995 sqlite3_bind_textsqlite3_api->bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
4996 sqlite3_bind_intsqlite3_api->bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR1);
4997 sqlite3_bind_textsqlite3_api->bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
4998 sqlite3_bind_intsqlite3_api->bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH7);
4999
5000 if(sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101) {
5001 // TODO(IMP)
5002 sqlite3_finalizesqlite3_api->finalize(stmt);
5003 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not seed '_info' shadow table: %s",
5004 sqlite3_errmsgsqlite3_api->errmsg(db));
5005 goto error;
5006 }
5007 sqlite3_finalizesqlite3_api->finalize(stmt);
5008
5009
5010
5011 // create the _chunks shadow table
5012 char *zCreateShadowChunks = NULL((void*)0);
5013 if(pNew->numPartitionColumns) {
5014 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
5015 sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"" "(", pNew->schemaName, pNew->tableName);
5016 sqlite3_str_appendallsqlite3_api->str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,");
5017 sqlite3_str_appendallsqlite3_api->str_appendall(s, "sequence_id integer,");
5018 for(int i = 0; i < pNew->numPartitionColumns;i++) {
5019 sqlite3_str_appendfsqlite3_api->str_appendf(s, "partition%02d,", i);
5020 }
5021 sqlite3_str_appendallsqlite3_api->str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);");
5022 zCreateShadowChunks = sqlite3_str_finishsqlite3_api->str_finish(s);
5023 }else {
5024 zCreateShadowChunks = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_CHUNKS_CREATE"CREATE TABLE " "\"%w\".\"%w_chunks\"" "(" "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,"
"size INTEGER NOT NULL," "validity BLOB NOT NULL," "rowids BLOB NOT NULL"
");"
,
5025 pNew->schemaName, pNew->tableName);
5026 }
5027 if (!zCreateShadowChunks) {
5028 goto error;
5029 }
5030 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0);
5031 sqlite3_freesqlite3_api->free((void *)zCreateShadowChunks);
5032 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5033 // IMP: V17740_01811
5034 sqlite3_finalizesqlite3_api->finalize(stmt);
5035 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_chunks' shadow table: %s",
5036 sqlite3_errmsgsqlite3_api->errmsg(db));
5037 goto error;
5038 }
5039 sqlite3_finalizesqlite3_api->finalize(stmt);
5040
5041 // create the _rowids shadow table
5042 char *zCreateShadowRowids;
5043 if (pNew->pkIsText) {
5044 // adds a "text unique not null" constraint to the id column
5045 zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
"id TEXT UNIQUE NOT NULL," "chunk_id INTEGER," "chunk_offset INTEGER"
");"
,
5046 pNew->schemaName, pNew->tableName);
5047 } else {
5048 zCreateShadowRowids = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC"CREATE TABLE " "\"%w\".\"%w_rowids\"" "(" "rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
"id," "chunk_id INTEGER," "chunk_offset INTEGER" ");"
,
5049 pNew->schemaName, pNew->tableName);
5050 }
5051 if (!zCreateShadowRowids) {
5052 goto error;
5053 }
5054 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0);
5055 sqlite3_freesqlite3_api->free((void *)zCreateShadowRowids);
5056 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5057 // IMP: V11631_28470
5058 sqlite3_finalizesqlite3_api->finalize(stmt);
5059 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("Could not create '_rowids' shadow table: %s",
5060 sqlite3_errmsgsqlite3_api->errmsg(db));
5061 goto error;
5062 }
5063 sqlite3_finalizesqlite3_api->finalize(stmt);
5064
5065 for (int i = 0; i < pNew->numVectorColumns; i++) {
5066 char *zSql = sqlite3_mprintfsqlite3_api->mprintf(VEC0_SHADOW_VECTOR_N_CREATE"CREATE TABLE " "\"%w\".\"%w_vector_chunks%02d\"" "(" "rowid PRIMARY KEY,"
"vectors BLOB NOT NULL" ");"
,
5067 pNew->schemaName, pNew->tableName, i);
5068 if (!zSql) {
5069 goto error;
5070 }
5071 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0);
5072 sqlite3_freesqlite3_api->free((void *)zSql);
5073 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5074 // IMP: V25919_09989
5075 sqlite3_finalizesqlite3_api->finalize(stmt);
5076 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5077 "Could not create '_vector_chunks%02d' shadow table: %s", i,
5078 sqlite3_errmsgsqlite3_api->errmsg(db));
5079 goto error;
5080 }
5081 sqlite3_finalizesqlite3_api->finalize(stmt);
5082 }
5083
5084 for (int i = 0; i < pNew->numMetadataColumns; i++) {
5085 char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"" "(rowid PRIMARY KEY, data BLOB NOT NULL);",
5086 pNew->schemaName, pNew->tableName, i);
5087 if (!zSql) {
5088 goto error;
5089 }
5090 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0);
5091 sqlite3_freesqlite3_api->free((void *)zSql);
5092 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5093 sqlite3_finalizesqlite3_api->finalize(stmt);
5094 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5095 "Could not create '_metata_chunks%02d' shadow table: %s", i,
5096 sqlite3_errmsgsqlite3_api->errmsg(db));
5097 goto error;
5098 }
5099 sqlite3_finalizesqlite3_api->finalize(stmt);
5100
5101 if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
5102 char *zSql = sqlite3_mprintfsqlite3_api->mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" "(rowid PRIMARY KEY, data TEXT);",
5103 pNew->schemaName, pNew->tableName, i);
5104 if (!zSql) {
5105 goto error;
5106 }
5107 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, 0);
5108 sqlite3_freesqlite3_api->free((void *)zSql);
5109 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5110 sqlite3_finalizesqlite3_api->finalize(stmt);
5111 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5112 "Could not create '_metadatatext%02d' shadow table: %s", i,
5113 sqlite3_errmsgsqlite3_api->errmsg(db));
5114 goto error;
5115 }
5116 sqlite3_finalizesqlite3_api->finalize(stmt);
5117
5118 }
5119 }
5120
5121 if(pNew->numAuxiliaryColumns > 0) {
5122 sqlite3_stmt * stmt;
5123 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
5124 sqlite3_str_appendfsqlite3_api->str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName);
5125 for(int i = 0; i < pNew->numAuxiliaryColumns; i++) {
5126 sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i);
5127 }
5128 sqlite3_str_appendallsqlite3_api->str_appendall(s, ")");
5129 char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
5130 if(!zSql) {
5131 goto error;
5132 }
5133 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(db, zSql, -1, &stmt, NULL((void*)0));
5134 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5135 sqlite3_finalizesqlite3_api->finalize(stmt);
5136 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5137 "Could not create auxiliary shadow table: %s",
5138 sqlite3_errmsgsqlite3_api->errmsg(db));
5139
5140 goto error;
5141 }
5142 sqlite3_finalizesqlite3_api->finalize(stmt);
5143 }
5144 }
5145
5146 *ppVtab = (sqlite3_vtab *)pNew;
5147 return SQLITE_OK0;
5148
5149error:
5150 vec0_free(pNew);
5151 return SQLITE_ERROR1;
5152}
5153
5154static int vec0Create(sqlite3 *db, void *pAux, int argc,
5155 const char *const *argv, sqlite3_vtab **ppVtab,
5156 char **pzErr) {
5157 return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true1);
5158}
5159static int vec0Connect(sqlite3 *db, void *pAux, int argc,
5160 const char *const *argv, sqlite3_vtab **ppVtab,
5161 char **pzErr) {
5162 return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false0);
5163}
5164
5165static int vec0Disconnect(sqlite3_vtab *pVtab) {
5166 vec0_vtab *p = (vec0_vtab *)pVtab;
5167 vec0_free(p);
5168 sqlite3_freesqlite3_api->free(p);
5169 return SQLITE_OK0;
5170}
5171static int vec0Destroy(sqlite3_vtab *pVtab) {
5172 vec0_vtab *p = (vec0_vtab *)pVtab;
5173 sqlite3_stmt *stmt;
5174 int rc;
5175 const char *zSql;
5176
5177 // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail
5178 vec0_free_resources(p);
5179
5180 // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of
5181 // provided error
5182 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"", p->schemaName,
5183 p->tableName);
5184 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5185 sqlite3_freesqlite3_api->free((void *)zSql);
5186 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5187 rc = SQLITE_ERROR1;
5188 vtab_set_error(pVtab, "could not drop chunks shadow table");
5189 goto done;
5190 }
5191 sqlite3_finalizesqlite3_api->finalize(stmt);
5192
5193 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME"\"%w\".\"%w_info\"", p->schemaName,
5194 p->tableName);
5195 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5196 sqlite3_freesqlite3_api->free((void *)zSql);
5197 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5198 rc = SQLITE_ERROR1;
5199 vtab_set_error(pVtab, "could not drop info shadow table");
5200 goto done;
5201 }
5202 sqlite3_finalizesqlite3_api->finalize(stmt);
5203
5204 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"", p->schemaName,
5205 p->tableName);
5206 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5207 sqlite3_freesqlite3_api->free((void *)zSql);
5208 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5209 rc = SQLITE_ERROR1;
5210 goto done;
5211 }
5212 sqlite3_finalizesqlite3_api->finalize(stmt);
5213
5214 for (int i = 0; i < p->numVectorColumns; i++) {
5215 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName,
5216 p->shadowVectorChunksNames[i]);
5217 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5218 sqlite3_freesqlite3_api->free((void *)zSql);
5219 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5220 rc = SQLITE_ERROR1;
5221 goto done;
5222 }
5223 sqlite3_finalizesqlite3_api->finalize(stmt);
5224 }
5225
5226 if(p->numAuxiliaryColumns > 0) {
5227 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"", p->schemaName, p->tableName);
5228 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5229 sqlite3_freesqlite3_api->free((void *)zSql);
5230 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5231 rc = SQLITE_ERROR1;
5232 goto done;
5233 }
5234 sqlite3_finalizesqlite3_api->finalize(stmt);
5235 }
5236
5237
5238 for (int i = 0; i < p->numMetadataColumns; i++) {
5239 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME"\"%w\".\"%w_metadatachunks%02d\"", p->schemaName,p->tableName, i);
5240 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5241 sqlite3_freesqlite3_api->free((void *)zSql);
5242 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5243 rc = SQLITE_ERROR1;
5244 goto done;
5245 }
5246 sqlite3_finalizesqlite3_api->finalize(stmt);
5247
5248 if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
5249 zSql = sqlite3_mprintfsqlite3_api->mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"", p->schemaName,p->tableName, i);
5250 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, 0);
5251 sqlite3_freesqlite3_api->free((void *)zSql);
5252 if ((rc != SQLITE_OK0) || (sqlite3_stepsqlite3_api->step(stmt) != SQLITE_DONE101)) {
5253 rc = SQLITE_ERROR1;
5254 goto done;
5255 }
5256 sqlite3_finalizesqlite3_api->finalize(stmt);
5257 }
5258 }
5259
5260 stmt = NULL((void*)0);
5261 rc = SQLITE_OK0;
5262
5263done:
5264 sqlite3_finalizesqlite3_api->finalize(stmt);
5265 vec0_free(p);
5266 // If there was an error
5267 if (rc == SQLITE_OK0) {
5268 sqlite3_freesqlite3_api->free(p);
5269 }
5270 return rc;
5271}
5272
5273static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
5274 UNUSED_PARAMETER(p)(void)(p);
5275 vec0_cursor *pCur;
5276 pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur));
5277 if (pCur == 0)
5278 return SQLITE_NOMEM7;
5279 memset(pCur, 0, sizeof(*pCur));
5280 *ppCursor = &pCur->base;
5281 return SQLITE_OK0;
5282}
5283
5284static int vec0Close(sqlite3_vtab_cursor *cur) {
5285 vec0_cursor *pCur = (vec0_cursor *)cur;
5286 vec0_cursor_clear(pCur);
5287 sqlite3_freesqlite3_api->free(pCur);
5288 return SQLITE_OK0;
5289}
5290
5291// All the different type of "values" provided to argv/argc in vec0Filter.
5292// These enums denote the use and purpose of all of them.
5293typedef enum {
5294 // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
5295
5296 VEC0_IDXSTR_KIND_KNN_MATCH = '{',
5297 VEC0_IDXSTR_KIND_KNN_K = '}',
5298 VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
5299 VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
5300 VEC0_IDXSTR_KIND_POINT_ID = '!',
5301 VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
5302} vec0_idxstr_kind;
5303
5304// The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns
5305// support, but as characters that fit nicely in idxstr.
5306typedef enum {
5307 // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
5308
5309 VEC0_PARTITION_OPERATOR_EQ = 'a',
5310 VEC0_PARTITION_OPERATOR_GT = 'b',
5311 VEC0_PARTITION_OPERATOR_LE = 'c',
5312 VEC0_PARTITION_OPERATOR_LT = 'd',
5313 VEC0_PARTITION_OPERATOR_GE = 'e',
5314 VEC0_PARTITION_OPERATOR_NE = 'f',
5315} vec0_partition_operator;
5316typedef enum {
5317 VEC0_METADATA_OPERATOR_EQ = 'a',
5318 VEC0_METADATA_OPERATOR_GT = 'b',
5319 VEC0_METADATA_OPERATOR_LE = 'c',
5320 VEC0_METADATA_OPERATOR_LT = 'd',
5321 VEC0_METADATA_OPERATOR_GE = 'e',
5322 VEC0_METADATA_OPERATOR_NE = 'f',
5323 VEC0_METADATA_OPERATOR_IN = 'g',
5324} vec0_metadata_operator;
5325
5326static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
5327 vec0_vtab *p = (vec0_vtab *)pVTab;
5328 /**
5329 * Possible query plans are:
5330 * 1. KNN when:
5331 * a) An `MATCH` op on vector column
5332 * b) ORDER BY on distance column
5333 * c) LIMIT
5334 * d) rowid in (...) OPTIONAL
5335 * 2. Point when:
5336 * a) An `EQ` op on rowid column
5337 * 3. else: fullscan
5338 *
5339 */
5340 int iMatchTerm = -1;
5341 int iMatchVectorTerm = -1;
5342 int iLimitTerm = -1;
5343 int iRowidTerm = -1;
5344 int iKTerm = -1;
5345 int iRowidInTerm = -1;
5346 int hasAuxConstraint = 0;
5347
5348#ifdef SQLITE_VEC_DEBUG
5349 printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint);
5350#endif
5351
5352 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
5353 u8 vtabIn = 0;
5354
5355#if COMPILER_SUPPORTS_VTAB_IN1
5356 if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) {
5357 vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1);
5358 }
5359#endif
5360
5361#ifdef SQLITE_VEC_DEBUG
5362 printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
5363 pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn,
5364 pIdxInfo->aConstraint[i].op, vtabIn);
5365#endif
5366 if (!pIdxInfo->aConstraint[i].usable)
5367 continue;
5368
5369 int iColumn = pIdxInfo->aConstraint[i].iColumn;
5370 int op = pIdxInfo->aConstraint[i].op;
5371
5372 if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) {
5373 iLimitTerm = i;
5374 }
5375 if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 &&
5376 vec0_column_idx_is_vector(p, iColumn)) {
5377 if (iMatchTerm > -1) {
5378 vtab_set_error(
5379 pVTab, "only 1 MATCH operator is allowed in a single vec0 query");
5380 return SQLITE_ERROR1;
5381 }
5382 iMatchTerm = i;
5383 iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn);
5384 }
5385 if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == VEC0_COLUMN_ID0) {
5386 if (vtabIn) {
5387 if (iRowidInTerm != -1) {
5388 vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in "
5389 "a single vec0 query");
5390 return SQLITE_ERROR1;
5391 }
5392 iRowidInTerm = i;
5393
5394 } else {
5395 iRowidTerm = i;
5396 }
5397 }
5398 if (op == SQLITE_INDEX_CONSTRAINT_EQ2 && iColumn == vec0_column_k_idx(p)) {
5399 iKTerm = i;
5400 }
5401 if(
5402 (op != SQLITE_INDEX_CONSTRAINT_LIMIT73 && op != SQLITE_INDEX_CONSTRAINT_OFFSET74)
5403 && vec0_column_idx_is_auxiliary(p, iColumn)) {
5404 hasAuxConstraint = 1;
5405 }
5406 }
5407
5408 sqlite3_str *idxStr = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
5409 int rc;
5410
5411 if (iMatchTerm >= 0) {
5412 if (iLimitTerm < 0 && iKTerm < 0) {
5413 vtab_set_error(
5414 pVTab,
5415 "A LIMIT or 'k = ?' constraint is required on vec0 knn queries.");
5416 rc = SQLITE_ERROR1;
5417 goto done;
5418 }
5419 if (iLimitTerm >= 0 && iKTerm >= 0) {
5420 vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both");
5421 rc = SQLITE_ERROR1;
5422 goto done;
5423 }
5424
5425 if (pIdxInfo->nOrderBy) {
5426 if (pIdxInfo->nOrderBy > 1) {
5427 vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is "
5428 "allowed on vec0 KNN queries");
5429 rc = SQLITE_ERROR1;
5430 goto done;
5431 }
5432 if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) {
5433 vtab_set_error(pVTab,
5434 "Only a single 'ORDER BY distance' clause is allowed on "
5435 "vec0 KNN queries, not on other columns");
5436 rc = SQLITE_ERROR1;
5437 goto done;
5438 }
5439 if (pIdxInfo->aOrderBy[0].desc) {
5440 vtab_set_error(
5441 pVTab, "Only ascending in ORDER BY distance clause is supported, "
5442 "DESC is not supported yet.");
5443 rc = SQLITE_ERROR1;
5444 goto done;
5445 }
5446 }
5447
5448 if(hasAuxConstraint) {
5449 // IMP: V25623_09693
5450 vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query.");
5451 rc = SQLITE_ERROR1;
5452 goto done;
5453 }
5454
5455 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN);
5456
5457 int argvIndex = 1;
5458 pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++;
5459 pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
5460 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH);
5461 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_');
5462
5463 if (iLimitTerm >= 0) {
5464 pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++;
5465 pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
5466 } else {
5467 pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++;
5468 pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
5469 }
5470 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K);
5471 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_');
5472
5473#if COMPILER_SUPPORTS_VTAB_IN1
5474 if (iRowidInTerm >= 0) {
5475 // already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
5476 // vtabIn == 1
5477 sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, iRowidInTerm, 1);
5478 pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++;
5479 pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
5480 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN);
5481 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_');
5482 }
5483#endif
5484
5485 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
5486 if (!pIdxInfo->aConstraint[i].usable)
5487 continue;
5488
5489 int iColumn = pIdxInfo->aConstraint[i].iColumn;
5490 int op = pIdxInfo->aConstraint[i].op;
5491 if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) {
5492 continue;
5493 }
5494 if(!vec0_column_idx_is_partition(p, iColumn)) {
5495 continue;
5496 }
5497
5498 int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn);
5499 char value = 0;
5500
5501 switch(op) {
5502 case SQLITE_INDEX_CONSTRAINT_EQ2: {
5503 value = VEC0_PARTITION_OPERATOR_EQ;
5504 break;
5505 }
5506 case SQLITE_INDEX_CONSTRAINT_GT4: {
5507 value = VEC0_PARTITION_OPERATOR_GT;
5508 break;
5509 }
5510 case SQLITE_INDEX_CONSTRAINT_LE8: {
5511 value = VEC0_PARTITION_OPERATOR_LE;
5512 break;
5513 }
5514 case SQLITE_INDEX_CONSTRAINT_LT16: {
5515 value = VEC0_PARTITION_OPERATOR_LT;
5516 break;
5517 }
5518 case SQLITE_INDEX_CONSTRAINT_GE32: {
5519 value = VEC0_PARTITION_OPERATOR_GE;
5520 break;
5521 }
5522 case SQLITE_INDEX_CONSTRAINT_NE68: {
5523 value = VEC0_PARTITION_OPERATOR_NE;
5524 break;
5525 }
5526 }
5527
5528 if(value) {
5529 pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
5530 pIdxInfo->aConstraintUsage[i].omit = 1;
5531 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT);
5532 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + partition_idx);
5533 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value);
5534 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_');
5535 }
5536
5537 }
5538
5539 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
5540 if (!pIdxInfo->aConstraint[i].usable)
5541 continue;
5542
5543 int iColumn = pIdxInfo->aConstraint[i].iColumn;
5544 int op = pIdxInfo->aConstraint[i].op;
5545 if(op == SQLITE_INDEX_CONSTRAINT_LIMIT73 || op == SQLITE_INDEX_CONSTRAINT_OFFSET74) {
5546 continue;
5547 }
5548 if(!vec0_column_idx_is_metadata(p, iColumn)) {
5549 continue;
5550 }
5551
5552 int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn);
5553 char value = 0;
5554
5555 switch(op) {
5556 case SQLITE_INDEX_CONSTRAINT_EQ2: {
5557 int vtabIn = 0;
5558 #if COMPILER_SUPPORTS_VTAB_IN1
5559 if (sqlite3_libversion_numbersqlite3_api->libversion_number() >= 3038000) {
5560 vtabIn = sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, -1);
5561 }
5562 if(vtabIn) {
5563 switch(p->metadata_columns[metadata_idx].kind) {
5564 case VEC0_METADATA_COLUMN_KIND_FLOAT:
5565 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
5566 // IMP: V15248_32086
5567 rc = SQLITE_ERROR1;
5568 vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.");
5569 goto done;
5570 break;
5571 }
5572 case VEC0_METADATA_COLUMN_KIND_INTEGER:
5573 case VEC0_METADATA_COLUMN_KIND_TEXT: {
5574 break;
5575 }
5576 }
5577 value = VEC0_METADATA_OPERATOR_IN;
5578 sqlite3_vtab_insqlite3_api->vtab_in(pIdxInfo, i, 1);
5579 }else
5580 #endif
5581 {
5582 value = VEC0_PARTITION_OPERATOR_EQ;
5583 }
5584 break;
5585 }
5586 case SQLITE_INDEX_CONSTRAINT_GT4: {
5587 value = VEC0_METADATA_OPERATOR_GT;
5588 break;
5589 }
5590 case SQLITE_INDEX_CONSTRAINT_LE8: {
5591 value = VEC0_METADATA_OPERATOR_LE;
5592 break;
5593 }
5594 case SQLITE_INDEX_CONSTRAINT_LT16: {
5595 value = VEC0_METADATA_OPERATOR_LT;
5596 break;
5597 }
5598 case SQLITE_INDEX_CONSTRAINT_GE32: {
5599 value = VEC0_METADATA_OPERATOR_GE;
5600 break;
5601 }
5602 case SQLITE_INDEX_CONSTRAINT_NE68: {
5603 value = VEC0_METADATA_OPERATOR_NE;
5604 break;
5605 }
5606 default: {
5607 // IMP: V16511_00582
5608 rc = SQLITE_ERROR1;
5609 vtab_set_error(pVTab,
5610 "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
5611 "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed."
5612 );
5613 goto done;
5614 }
5615 }
5616
5617 if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) {
5618 if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) {
5619 // IMP: V10145_26984
5620 rc = SQLITE_ERROR1;
5621 vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns.");
5622 goto done;
5623 }
5624 }
5625
5626 pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
5627 pIdxInfo->aConstraintUsage[i].omit = 1;
5628 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT);
5629 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, 'A' + metadata_idx);
5630 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, value);
5631 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, '_');
5632
5633 }
5634
5635
5636
5637 pIdxInfo->idxNum = iMatchVectorTerm;
5638 pIdxInfo->estimatedCost = 30.0;
5639 pIdxInfo->estimatedRows = 10;
5640
5641 } else if (iRowidTerm >= 0) {
5642 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT);
5643 pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
5644 pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
5645 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID);
5646 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 3, '_');
5647 pIdxInfo->idxNum = pIdxInfo->colUsed;
5648 pIdxInfo->estimatedCost = 10.0;
5649 pIdxInfo->estimatedRows = 1;
5650 } else {
5651 sqlite3_str_appendcharsqlite3_api->str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN);
5652 pIdxInfo->estimatedCost = 3000000.0;
5653 pIdxInfo->estimatedRows = 100000;
5654 }
5655 pIdxInfo->idxStr = sqlite3_str_finishsqlite3_api->str_finish(idxStr);
5656 idxStr = NULL((void*)0);
5657 if (!pIdxInfo->idxStr) {
5658 rc = SQLITE_OK0;
5659 goto done;
5660 }
5661 pIdxInfo->needToFreeIdxStr = 1;
5662
5663
5664 rc = SQLITE_OK0;
5665
5666 done:
5667 if(idxStr) {
5668 sqlite3_str_finishsqlite3_api->str_finish(idxStr);
5669 }
5670 return rc;
5671}
5672
5673// forward delcaration bc vec0Filter uses it
5674static int vec0Next(sqlite3_vtab_cursor *cur);
5675
5676void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b,
5677 i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out,
5678 i64 *out_rowids, i64 out_length, i64 *out_used) {
5679 // assert((a_length >= out_length) || (b_length >= out_length));
5680 i64 ptrA = 0;
5681 i64 ptrB = 0;
5682 for (int i = 0; i < out_length; i++) {
5683 if ((ptrA >= a_length) && (ptrB >= b_length)) {
5684 *out_used = i;
5685 return;
5686 }
5687 if (ptrA >= a_length) {
5688 out[i] = b[b_top_idxs[ptrB]];
5689 out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
5690 ptrB++;
5691 } else if (ptrB >= b_length) {
5692 out[i] = a[ptrA];
5693 out_rowids[i] = a_rowids[ptrA];
5694 ptrA++;
5695 } else {
5696 if (a[ptrA] <= b[b_top_idxs[ptrB]]) {
5697 out[i] = a[ptrA];
5698 out_rowids[i] = a_rowids[ptrA];
5699 ptrA++;
5700 } else {
5701 out[i] = b[b_top_idxs[ptrB]];
5702 out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
5703 ptrB++;
5704 }
5705 }
5706 }
5707
5708 *out_used = out_length;
5709}
5710
5711u8 *bitmap_new(i32 n) {
5712 assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n
% 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5712, __extension__ __PRETTY_FUNCTION__); }))
;
5713 u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8);
5714 if (p) {
5715 memset(p, 0, n * sizeof(u8) / CHAR_BIT8);
5716 }
5717 return p;
5718}
5719u8 *bitmap_new_from(i32 n, u8 *from) {
5720 assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n
% 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5720, __extension__ __PRETTY_FUNCTION__); }))
;
5721 u8 *p = sqlite3_mallocsqlite3_api->malloc(n * sizeof(u8) / CHAR_BIT8);
5722 if (p) {
5723 memcpy(p, from, n / CHAR_BIT8);
5724 }
5725 return p;
5726}
5727
5728void bitmap_copy(u8 *base, u8 *from, i32 n) {
5729 assert(n % 8 == 0)((void) sizeof ((n % 8 == 0) ? 1 : 0), __extension__ ({ if (n
% 8 == 0) ; else __assert_fail ("n % 8 == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5729, __extension__ __PRETTY_FUNCTION__); }))
;
5730 memcpy(base, from, n / CHAR_BIT8);
5731}
5732
5733void bitmap_and_inplace(u8 *base, u8 *other, i32 n) {
5734 assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if (
(n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5734, __extension__ __PRETTY_FUNCTION__); }))
;
5735 for (int i = 0; i < n / CHAR_BIT8; i++) {
5736 base[i] = base[i] & other[i];
5737 }
5738}
5739
5740void bitmap_set(u8 *bitmap, i32 position, int value) {
5741 if (value) {
5742 bitmap[position / CHAR_BIT8] |= 1 << (position % CHAR_BIT8);
5743 } else {
5744 bitmap[position / CHAR_BIT8] &= ~(1 << (position % CHAR_BIT8));
5745 }
5746}
5747
5748int bitmap_get(u8 *bitmap, i32 position) {
5749 return (((bitmap[position / CHAR_BIT8]) >> (position % CHAR_BIT8)) & 1);
5750}
5751
5752void bitmap_clear(u8 *bitmap, i32 n) {
5753 assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if (
(n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5753, __extension__ __PRETTY_FUNCTION__); }))
;
5754 memset(bitmap, 0, n / CHAR_BIT8);
5755}
5756
5757void bitmap_fill(u8 *bitmap, i32 n) {
5758 assert((n % 8) == 0)((void) sizeof (((n % 8) == 0) ? 1 : 0), __extension__ ({ if (
(n % 8) == 0) ; else __assert_fail ("(n % 8) == 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5758, __extension__ __PRETTY_FUNCTION__); }))
;
5759 memset(bitmap, 0xFF, n / CHAR_BIT8);
5760}
5761
5762/**
5763 * @brief Finds the minimum k items in distances, and writes the indicies to
5764 * out.
5765 *
5766 * @param distances input f32 array of size n, the items to consider.
5767 * @param n: size of distances array.
5768 * @param out: Output array of size k, will contain at most k element indicies
5769 * @param k: Size of output array
5770 * @return int
5771 */
5772int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k,
5773 u8 *bTaken, i32 *k_used) {
5774 assert(k > 0)((void) sizeof ((k > 0) ? 1 : 0), __extension__ ({ if (k >
0) ; else __assert_fail ("k > 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5774, __extension__ __PRETTY_FUNCTION__); }))
;
5775 assert(k <= n)((void) sizeof ((k <= n) ? 1 : 0), __extension__ ({ if (k <=
n) ; else __assert_fail ("k <= n", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5775, __extension__ __PRETTY_FUNCTION__); }))
;
5776
5777 bitmap_clear(bTaken, n);
5778
5779 for (int ik = 0; ik < k; ik++) {
5780 int min_idx = 0;
5781 while (min_idx < n &&
5782 (bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) {
5783 min_idx++;
5784 }
5785 if (min_idx >= n) {
5786 *k_used = ik;
5787 return SQLITE_OK0;
5788 }
5789
5790 for (int i = 0; i < n; i++) {
5791 if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) &&
5792 (bitmap_get(candidates, i))) {
5793 min_idx = i;
5794 }
5795 }
5796
5797 out[ik] = min_idx;
5798 bitmap_set(bTaken, min_idx, 1);
5799 }
5800 *k_used = k;
5801 return SQLITE_OK0;
5802}
5803
5804int vec0_get_metadata_text_long_value(
5805 vec0_vtab * p,
5806 sqlite3_stmt ** stmt,
5807 int metadata_idx,
5808 i64 rowid,
5809 int *n,
5810 char ** s) {
5811 int rc;
5812 if(!(*stmt)) {
5813 const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " where rowid = ?", p->schemaName, p->tableName, metadata_idx);
5814 if(!zSql) {
5815 rc = SQLITE_NOMEM7;
5816 goto done;
5817 }
5818 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, stmt, NULL((void*)0));
5819 sqlite3_freesqlite3_api->free( (void *) zSql);
5820 if(rc != SQLITE_OK0) {
5821 goto done;
5822 }
5823 }
5824
5825 sqlite3_resetsqlite3_api->reset(*stmt);
5826 sqlite3_bind_int64sqlite3_api->bind_int64(*stmt, 1, rowid);
5827 rc = sqlite3_stepsqlite3_api->step(*stmt);
5828 if(rc != SQLITE_ROW100) {
5829 rc = SQLITE_ERROR1;
5830 goto done;
5831 }
5832 *s = (char *) sqlite3_column_textsqlite3_api->column_text(*stmt, 0);
5833 *n = sqlite3_column_bytessqlite3_api->column_bytes(*stmt, 0);
5834 rc = SQLITE_OK0;
5835 done:
5836 return rc;
5837}
5838
5839/**
5840 * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints
5841 *
5842 * Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied
5843 * as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt
5844 * can freely step through the stmt with all constraints satisfied.
5845 *
5846 * @param p - vec0_vtab
5847 * @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values
5848 * @param argc - number of argv values from xFilter
5849 * @param argv - array of sqlite3_value from xFilter
5850 * @param outStmt - output sqlite3_stmt of chunks with all filters applied
5851 * @return int SQLITE_OK on success, error code otherwise
5852 */
5853int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) {
5854 // always null terminated, enforced by SQLite
5855 int idxStrLength = strlen(idxStr);
5856 // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element"
5857 int numValueEntries = (idxStrLength-1) / 4;
5858 assert(argc == numValueEntries)((void) sizeof ((argc == numValueEntries) ? 1 : 0), __extension__
({ if (argc == numValueEntries) ; else __assert_fail ("argc == numValueEntries"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5858, __extension__ __PRETTY_FUNCTION__); }))
;
5859
5860 int rc;
5861 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
5862 sqlite3_str_appendfsqlite3_api->str_appendf(s, "select chunk_id, validity, rowids "
5863 " from " VEC0_SHADOW_CHUNKS_NAME"\"%w\".\"%w_chunks\"",
5864 p->schemaName, p->tableName);
5865
5866 int appendedWhere = 0;
5867 for(int i = 0; i < numValueEntries; i++) {
5868 int idx = 1 + (i * 4);
5869 char kind = idxStr[idx + 0];
5870 if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
5871 continue;
5872 }
5873
5874 int partition_idx = idxStr[idx + 1] - 'A';
5875 int operator = idxStr[idx + 2];
5876 // idxStr[idx + 3] is just null, a '_' placeholder
5877
5878 if(!appendedWhere) {
5879 sqlite3_str_appendallsqlite3_api->str_appendall(s, " WHERE ");
5880 appendedWhere = 1;
5881 }else {
5882 sqlite3_str_appendallsqlite3_api->str_appendall(s, " AND ");
5883 }
5884 switch(operator) {
5885 case VEC0_PARTITION_OPERATOR_EQ:
5886 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d = ? ", partition_idx);
5887 break;
5888 case VEC0_PARTITION_OPERATOR_GT:
5889 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d > ? ", partition_idx);
5890 break;
5891 case VEC0_PARTITION_OPERATOR_LE:
5892 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d <= ? ", partition_idx);
5893 break;
5894 case VEC0_PARTITION_OPERATOR_LT:
5895 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d < ? ", partition_idx);
5896 break;
5897 case VEC0_PARTITION_OPERATOR_GE:
5898 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d >= ? ", partition_idx);
5899 break;
5900 case VEC0_PARTITION_OPERATOR_NE:
5901 sqlite3_str_appendfsqlite3_api->str_appendf(s, " partition%02d != ? ", partition_idx);
5902 break;
5903 default: {
5904 char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
5905 sqlite3_freesqlite3_api->free(zSql);
5906 return SQLITE_ERROR1;
5907 }
5908
5909 }
5910
5911 }
5912
5913 char *zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
5914 if (!zSql) {
5915 return SQLITE_NOMEM7;
5916 }
5917
5918 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, outStmt, NULL((void*)0));
5919 sqlite3_freesqlite3_api->free(zSql);
5920 if(rc != SQLITE_OK0) {
5921 return rc;
5922 }
5923
5924 int n = 1;
5925 for(int i = 0; i < numValueEntries; i++) {
5926 int idx = 1 + (i * 4);
5927 char kind = idxStr[idx + 0];
5928 if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
5929 continue;
5930 }
5931 sqlite3_bind_valuesqlite3_api->bind_value(*outStmt, n++, argv[i]);
5932 }
5933
5934 return rc;
5935}
5936
5937// a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
5938struct Vec0MetadataIn{
5939 // index of argv[i]` the constraint is on
5940 int argv_idx;
5941 // metadata column index of the constraint, derived from idxStr + argv_idx
5942 int metadata_idx;
5943 // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
5944 struct Array array;
5945};
5946
5947// Array elements for `xxx in (...)` values for a text column. basically just a string
5948struct Vec0MetadataInTextEntry {
5949 int n;
5950 char * zString;
5951};
5952
5953
5954int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) {
5955 int rc;
5956 sqlite3_stmt * stmt = NULL((void*)0);
5957 i64 * rowids = NULL((void*)0);
5958 sqlite3_blob * rowidsBlob;
5959 const char * sTarget = (const char *) sqlite3_value_textsqlite3_api->value_text(value);
5960 int nTarget = sqlite3_value_bytessqlite3_api->value_bytes(value);
5961
5962
5963 // TODO(perf): only text metadata news the rowids BLOB. Make it so that
5964 // rowids BLOB is re-used when multiple fitlers on text columns,
5965 // ex "name BETWEEN 'a' and 'b'""
5966 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob);
5967 if(rc != SQLITE_OK0) {
5968 return rc;
5969 }
5970 assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0)((void) sizeof ((sqlite3_api->blob_bytes(rowidsBlob) % sizeof
(i64) == 0) ? 1 : 0), __extension__ ({ if (sqlite3_api->blob_bytes
(rowidsBlob) % sizeof(i64) == 0) ; else __assert_fail ("sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5970, __extension__ __PRETTY_FUNCTION__); }))
;
5971 assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size)((void) sizeof (((sqlite3_api->blob_bytes(rowidsBlob) / sizeof
(i64)) == size) ? 1 : 0), __extension__ ({ if ((sqlite3_api->
blob_bytes(rowidsBlob) / sizeof(i64)) == size) ; else __assert_fail
("(sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 5971, __extension__ __PRETTY_FUNCTION__); }))
;
5972
5973 rowids = sqlite3_mallocsqlite3_api->malloc(sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob));
5974 if(!rowids) {
5975 sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob);
5976 return SQLITE_NOMEM7;
5977 }
5978
5979 rc = sqlite3_blob_readsqlite3_api->blob_read(rowidsBlob, rowids, sqlite3_blob_bytessqlite3_api->blob_bytes(rowidsBlob), 0);
5980 if(rc != SQLITE_OK0) {
5981 sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob);
5982 return rc;
5983 }
5984 sqlite3_blob_closesqlite3_api->blob_close(rowidsBlob);
5985
5986 switch(op) {
5987 int nPrefix;
5988 char * sPrefix;
5989 char *sFull;
5990 int nFull;
5991 u8 * view;
5992 case VEC0_METADATA_OPERATOR_EQ: {
5993 for(int i = 0; i < size; i++) {
5994 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
5995 nPrefix = ((int*) view)[0];
5996 sPrefix = (char *) &view[4];
5997
5998 // for EQ the text lengths must match
5999 if(nPrefix != nTarget) {
6000 bitmap_set(b, i, 0);
6001 continue;
6002 }
6003 int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12)));
6004
6005 // for short strings, use the prefix comparison direclty
6006 if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6007 bitmap_set(b, i, cmpPrefix == 0);
6008 continue;
6009 }
6010 // for EQ on longs strings, the prefix must match
6011 if(cmpPrefix) {
6012 bitmap_set(b, i, 0);
6013 continue;
6014 }
6015 // consult the full string
6016 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6017 if(rc != SQLITE_OK0) {
6018 goto done;
6019 }
6020 if(nPrefix != nFull) {
6021 rc = SQLITE_ERROR1;
6022 goto done;
6023 }
6024 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0);
6025 }
6026 break;
6027 }
6028 case VEC0_METADATA_OPERATOR_NE: {
6029 for(int i = 0; i < size; i++) {
6030 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6031 nPrefix = ((int*) view)[0];
6032 sPrefix = (char *) &view[4];
6033
6034 // for NE if text lengths dont match, it never will
6035 if(nPrefix != nTarget) {
6036 bitmap_set(b, i, 1);
6037 continue;
6038 }
6039
6040 int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12)));
6041
6042 // for short strings, use the prefix comparison direclty
6043 if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6044 bitmap_set(b, i, cmpPrefix != 0);
6045 continue;
6046 }
6047 // for NE on longs strings, if prefixes dont match, then long string wont
6048 if(cmpPrefix) {
6049 bitmap_set(b, i, 1);
6050 continue;
6051 }
6052 // consult the full string
6053 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6054 if(rc != SQLITE_OK0) {
6055 goto done;
6056 }
6057 if(nPrefix != nFull) {
6058 rc = SQLITE_ERROR1;
6059 goto done;
6060 }
6061 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0);
6062 }
6063 break;
6064 }
6065 case VEC0_METADATA_OPERATOR_GT: {
6066 for(int i = 0; i < size; i++) {
6067 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6068 nPrefix = ((int*) view)[0];
6069 sPrefix = (char *) &view[4];
6070 int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget
)) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget
))
);
6071
6072 if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6073 // if prefix match, check which is longer
6074 if(cmpPrefix == 0) {
6075 bitmap_set(b, i, nPrefix > nTarget);
6076 }
6077 else {
6078 bitmap_set(b, i, cmpPrefix > 0);
6079 }
6080 continue;
6081 }
6082 // TODO(perf): may not need to compare full text in some cases
6083
6084 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6085 if(rc != SQLITE_OK0) {
6086 goto done;
6087 }
6088 if(nPrefix != nFull) {
6089 rc = SQLITE_ERROR1;
6090 goto done;
6091 }
6092 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0);
6093 }
6094 break;
6095 }
6096 case VEC0_METADATA_OPERATOR_GE: {
6097 for(int i = 0; i < size; i++) {
6098 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6099 nPrefix = ((int*) view)[0];
6100 sPrefix = (char *) &view[4];
6101 int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget
)) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget
))
);
6102
6103 if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6104 // if prefix match, check which is longer
6105 if(cmpPrefix == 0) {
6106 bitmap_set(b, i, nPrefix >= nTarget);
6107 }
6108 else {
6109 bitmap_set(b, i, cmpPrefix >= 0);
6110 }
6111 continue;
6112 }
6113 // TODO(perf): may not need to compare full text in some cases
6114
6115 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6116 if(rc != SQLITE_OK0) {
6117 goto done;
6118 }
6119 if(nPrefix != nFull) {
6120 rc = SQLITE_ERROR1;
6121 goto done;
6122 }
6123 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0);
6124 }
6125 break;
6126 }
6127 case VEC0_METADATA_OPERATOR_LE: {
6128 for(int i = 0; i < size; i++) {
6129 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6130 nPrefix = ((int*) view)[0];
6131 sPrefix = (char *) &view[4];
6132 int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget
)) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget
))
);
6133
6134 if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6135 // if prefix match, check which is longer
6136 if(cmpPrefix == 0) {
6137 bitmap_set(b, i, nPrefix <= nTarget);
6138 }
6139 else {
6140 bitmap_set(b, i, cmpPrefix <= 0);
6141 }
6142 continue;
6143 }
6144 // TODO(perf): may not need to compare full text in some cases
6145
6146 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6147 if(rc != SQLITE_OK0) {
6148 goto done;
6149 }
6150 if(nPrefix != nFull) {
6151 rc = SQLITE_ERROR1;
6152 goto done;
6153 }
6154 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0);
6155 }
6156 break;
6157 }
6158 case VEC0_METADATA_OPERATOR_LT: {
6159 for(int i = 0; i < size; i++) {
6160 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6161 nPrefix = ((int*) view)[0];
6162 sPrefix = (char *) &view[4];
6163 int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)((((((nPrefix) <= (12)) ? (nPrefix) : (12))) <= (nTarget
)) ? ((((nPrefix) <= (12)) ? (nPrefix) : (12))) : (nTarget
))
);
6164
6165 if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6166 // if prefix match, check which is longer
6167 if(cmpPrefix == 0) {
6168 bitmap_set(b, i, nPrefix < nTarget);
6169 }
6170 else {
6171 bitmap_set(b, i, cmpPrefix < 0);
6172 }
6173 continue;
6174 }
6175 // TODO(perf): may not need to compare full text in some cases
6176
6177 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6178 if(rc != SQLITE_OK0) {
6179 goto done;
6180 }
6181 if(nPrefix != nFull) {
6182 rc = SQLITE_ERROR1;
6183 goto done;
6184 }
6185 bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0);
6186 }
6187 break;
6188 }
6189
6190 case VEC0_METADATA_OPERATOR_IN: {
6191 size_t metadataInIdx = -1;
6192 for(size_t i = 0; i < aMetadataIn->length; i++) {
6193 struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]);
6194 if(metadataIn->argv_idx == argv_idx) {
6195 metadataInIdx = i;
6196 break;
6197 }
6198 }
6199 if(metadataInIdx < 0) {
6200 rc = SQLITE_ERROR1;
6201 goto done;
6202 }
6203
6204 struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
6205 struct Array * aTarget = &(metadataIn->array);
6206
6207
6208 int nPrefix;
6209 char * sPrefix;
6210 char *sFull;
6211 int nFull;
6212 u8 * view;
6213 for(int i = 0; i < size; i++) {
6214 view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
6215 nPrefix = ((int*) view)[0];
6216 sPrefix = (char *) &view[4];
6217 for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
6218 struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]);
6219 if(entry->n != nPrefix) {
6220 continue;
6221 }
6222 int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)(((nPrefix) <= (12)) ? (nPrefix) : (12)));
6223 if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
6224 if(cmpPrefix == 0) {
6225 bitmap_set(b, i, 1);
6226 break;
6227 }
6228 continue;
6229 }
6230 if(cmpPrefix) {
6231 continue;
6232 }
6233
6234 rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
6235 if(rc != SQLITE_OK0) {
6236 goto done;
6237 }
6238 if(nPrefix != nFull) {
6239 rc = SQLITE_ERROR1;
6240 goto done;
6241 }
6242 if(strncmp(sFull, entry->zString, nFull) == 0) {
6243 bitmap_set(b, i, 1);
6244 break;
6245 }
6246 }
6247 }
6248 break;
6249 }
6250
6251 }
6252 rc = SQLITE_OK0;
6253
6254 done:
6255 sqlite3_finalizesqlite3_api->finalize(stmt);
6256 sqlite3_freesqlite3_api->free(rowids);
6257 return rc;
6258
6259}
6260
6261/**
6262 * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint
6263 *
6264 * @param p vec0_vtab
6265 * @param metadata_idx index of the metatadata column to perfrom constraints on
6266 * @param value sqlite3_value of the constraints value
6267 * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table
6268 * @param chunk_rowid rowid of the chunk to calculate on
6269 * @param b pre-allocated and zero'd out bitmap to write results to
6270 * @param size size of the chunk
6271 * @return int SQLITE_OK on success, error code otherwise
6272 */
6273int vec0_set_metadata_filter_bitmap(
6274 vec0_vtab *p,
6275 int metadata_idx,
6276 vec0_metadata_operator op,
6277 sqlite3_value * value,
6278 sqlite3_blob * blob,
6279 i64 chunk_rowid,
6280 u8* b,
6281 int size,
6282 struct Array * aMetadataIn, int argv_idx) {
6283 // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
6284
6285 int rc;
6286 rc = sqlite3_blob_reopensqlite3_api->blob_reopen(blob, chunk_rowid);
6287 if(rc != SQLITE_OK0) {
6288 return rc;
6289 }
6290
6291 vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
6292 int szMatch = 0;
6293 int blobSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blob);
6294 switch(kind) {
6295 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
6296 szMatch = blobSize == size / CHAR_BIT8;
6297 break;
6298 }
6299 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
6300 szMatch = blobSize == size * sizeof(i64);
6301 break;
6302 }
6303 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
6304 szMatch = blobSize == size * sizeof(double);
6305 break;
6306 }
6307 case VEC0_METADATA_COLUMN_KIND_TEXT: {
6308 szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16;
6309 break;
6310 }
6311 }
6312 if(!szMatch) {
6313 return SQLITE_ERROR1;
6314 }
6315 void * buffer = sqlite3_mallocsqlite3_api->malloc(blobSize);
6316 if(!buffer) {
6317 return SQLITE_NOMEM7;
6318 }
6319 rc = sqlite3_blob_readsqlite3_api->blob_read(blob, buffer, blobSize, 0);
6320 if(rc != SQLITE_OK0) {
6321 goto done;
6322 }
6323 switch(kind) {
6324 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
6325 int target = sqlite3_value_intsqlite3_api->value_int(value);
6326 if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) {
6327 for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); }
6328 }
6329 else {
6330 for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); }
6331 }
6332 break;
6333 }
6334 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
6335 i64 * array = (i64*) buffer;
6336 i64 target = sqlite3_value_int64sqlite3_api->value_int64(value);
6337 switch(op) {
6338 case VEC0_METADATA_OPERATOR_EQ: {
6339 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
6340 break;
6341 }
6342 case VEC0_METADATA_OPERATOR_GT: {
6343 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
6344 break;
6345 }
6346 case VEC0_METADATA_OPERATOR_LE: {
6347 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
6348 break;
6349 }
6350 case VEC0_METADATA_OPERATOR_LT: {
6351 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
6352 break;
6353 }
6354 case VEC0_METADATA_OPERATOR_GE: {
6355 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
6356 break;
6357 }
6358 case VEC0_METADATA_OPERATOR_NE: {
6359 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
6360 break;
6361 }
6362 case VEC0_METADATA_OPERATOR_IN: {
6363 int metadataInIdx = -1;
6364 for(size_t i = 0; i < aMetadataIn->length; i++) {
6365 struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
6366 if(metadataIn->argv_idx == argv_idx) {
6367 metadataInIdx = i;
6368 break;
6369 }
6370 }
6371 if(metadataInIdx < 0) {
6372 rc = SQLITE_ERROR1;
6373 goto done;
6374 }
6375 struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
6376 struct Array * aTarget = &(metadataIn->array);
6377
6378 for(int i = 0; i < size; i++) {
6379 for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
6380 if( ((i64*)aTarget->z)[target_idx] == array[i]) {
6381 bitmap_set(b, i, 1);
6382 break;
6383 }
6384 }
6385 }
6386 break;
6387 }
6388 }
6389 break;
6390 }
6391 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
6392 double * array = (double*) buffer;
6393 double target = sqlite3_value_doublesqlite3_api->value_double(value);
6394 switch(op) {
6395 case VEC0_METADATA_OPERATOR_EQ: {
6396 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
6397 break;
6398 }
6399 case VEC0_METADATA_OPERATOR_GT: {
6400 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
6401 break;
6402 }
6403 case VEC0_METADATA_OPERATOR_LE: {
6404 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
6405 break;
6406 }
6407 case VEC0_METADATA_OPERATOR_LT: {
6408 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
6409 break;
6410 }
6411 case VEC0_METADATA_OPERATOR_GE: {
6412 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
6413 break;
6414 }
6415 case VEC0_METADATA_OPERATOR_NE: {
6416 for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
6417 break;
6418 }
6419 case VEC0_METADATA_OPERATOR_IN: {
6420 // should never be reached
6421 break;
6422 }
6423 }
6424 break;
6425 }
6426 case VEC0_METADATA_COLUMN_KIND_TEXT: {
6427 rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx);
6428 if(rc != SQLITE_OK0) {
6429 goto done;
6430 }
6431 break;
6432 }
6433 }
6434 done:
6435 sqlite3_freesqlite3_api->free(buffer);
6436 return rc;
6437}
6438
6439int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
6440 struct VectorColumnDefinition *vector_column,
6441 int vectorColumnIdx, struct Array *arrayRowidsIn,
6442 struct Array * aMetadataIn,
6443 const char * idxStr, int argc, sqlite3_value ** argv,
6444 void *queryVector, i64 k, i64 **out_topk_rowids,
6445 f32 **out_topk_distances, i64 *out_used) {
6446 // for each chunk, get top min(k, chunk_size) rowid + distances to query vec.
6447 // then reconcile all topk_chunks for a true top k.
6448 // output only rowids + distances for now
6449
6450 int rc = SQLITE_OK0;
6451 sqlite3_blob *blobVectors = NULL((void*)0);
6452
6453 void *baseVectors = NULL((void*)0); // memory: chunk_size * dimensions * element_size
6454
6455 // OWNED BY CALLER ON SUCCESS
6456 i64 *topk_rowids = NULL((void*)0); // memory: k * 4
6457 // OWNED BY CALLER ON SUCCESS
6458 f32 *topk_distances = NULL((void*)0); // memory: k * 4
6459
6460 i64 *tmp_topk_rowids = NULL((void*)0); // memory: k * 4
6461 f32 *tmp_topk_distances = NULL((void*)0); // memory: k * 4
6462 f32 *chunk_distances = NULL((void*)0); // memory: chunk_size * 4
6463 u8 *b = NULL((void*)0); // memory: chunk_size / 8
6464 u8 *bTaken = NULL((void*)0); // memory: chunk_size / 8
6465 i32 *chunk_topk_idxs = NULL((void*)0); // memory: k * 4
6466 u8 *bmRowids = NULL((void*)0); // memory: chunk_size / 8
6467 u8 *bmMetadata = NULL((void*)0); // memory: chunk_size / 8
6468 // // total: a lot???
6469
6470 // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4)
6471
6472 topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64));
6473 if (!topk_rowids) {
6474 rc = SQLITE_NOMEM7;
6475 goto cleanup;
6476 }
6477 memset(topk_rowids, 0, k * sizeof(i64));
6478
6479 topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32));
6480 if (!topk_distances) {
6481 rc = SQLITE_NOMEM7;
6482 goto cleanup;
6483 }
6484 memset(topk_distances, 0, k * sizeof(f32));
6485
6486 tmp_topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i64));
6487 if (!tmp_topk_rowids) {
6488 rc = SQLITE_NOMEM7;
6489 goto cleanup;
6490 }
6491 memset(tmp_topk_rowids, 0, k * sizeof(i64));
6492
6493 tmp_topk_distances = sqlite3_mallocsqlite3_api->malloc(k * sizeof(f32));
6494 if (!tmp_topk_distances) {
6495 rc = SQLITE_NOMEM7;
6496 goto cleanup;
6497 }
6498 memset(tmp_topk_distances, 0, k * sizeof(f32));
6499
6500 i64 k_used = 0;
6501 i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column);
6502 baseVectors = sqlite3_mallocsqlite3_api->malloc(baseVectorsSize);
6503 if (!baseVectors) {
6504 rc = SQLITE_NOMEM7;
6505 goto cleanup;
6506 }
6507
6508 chunk_distances = sqlite3_mallocsqlite3_api->malloc(p->chunk_size * sizeof(f32));
6509 if (!chunk_distances) {
6510 rc = SQLITE_NOMEM7;
6511 goto cleanup;
6512 }
6513
6514 b = bitmap_new(p->chunk_size);
6515 if (!b) {
6516 rc = SQLITE_NOMEM7;
6517 goto cleanup;
6518 }
6519
6520 bTaken = bitmap_new(p->chunk_size);
6521 if (!bTaken) {
6522 rc = SQLITE_NOMEM7;
6523 goto cleanup;
6524 }
6525
6526 chunk_topk_idxs = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32));
6527 if (!chunk_topk_idxs) {
6528 rc = SQLITE_NOMEM7;
6529 goto cleanup;
6530 }
6531
6532 bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL((void*)0);
6533 if (arrayRowidsIn && !bmRowids) {
6534 rc = SQLITE_NOMEM7;
6535 goto cleanup;
6536 }
6537
6538 sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS16];
6539 memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS16);
6540
6541 bmMetadata = bitmap_new(p->chunk_size);
6542 if(!bmMetadata) {
6543 rc = SQLITE_NOMEM7;
6544 goto cleanup;
6545 }
6546
6547 int idxStrLength = strlen(idxStr);
6548 int numValueEntries = (idxStrLength-1) / 4;
6549 assert(numValueEntries == argc)((void) sizeof ((numValueEntries == argc) ? 1 : 0), __extension__
({ if (numValueEntries == argc) ; else __assert_fail ("numValueEntries == argc"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 6549, __extension__ __PRETTY_FUNCTION__); }))
;
6550 int hasMetadataFilters = 0;
6551 for(int i = 0; i < argc; i++) {
6552 int idx = 1 + (i * 4);
6553 char kind = idxStr[idx + 0];
6554 if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
6555 hasMetadataFilters = 1;
6556 break;
6557 }
6558 }
6559
6560 while (true1) {
6561 rc = sqlite3_stepsqlite3_api->step(stmtChunks);
6562 if (rc == SQLITE_DONE101) {
6563 break;
6564 }
6565 if (rc != SQLITE_ROW100) {
6566 vtab_set_error(&p->base, "chunks iter error");
6567 rc = SQLITE_ERROR1;
6568 goto cleanup;
6569 }
6570 memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
6571 memset(chunk_topk_idxs, 0, k * sizeof(i32));
6572 bitmap_clear(b, p->chunk_size);
6573
6574 i64 chunk_id = sqlite3_column_int64sqlite3_api->column_int64(stmtChunks, 0);
6575 unsigned char *chunkValidity =
6576 (unsigned char *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 1);
6577 i64 validitySize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 1);
6578 if (validitySize != p->chunk_size / CHAR_BIT8) {
6579 // IMP: V05271_22109
6580 vtab_set_error(
6581 &p->base,
6582 "chunk validity size doesn't match - expected %lld, found %lld",
6583 p->chunk_size / CHAR_BIT8, validitySize);
6584 rc = SQLITE_ERROR1;
6585 goto cleanup;
6586 }
6587
6588 i64 *chunkRowids = (i64 *)sqlite3_column_blobsqlite3_api->column_blob(stmtChunks, 2);
6589 i64 rowidsSize = sqlite3_column_bytessqlite3_api->column_bytes(stmtChunks, 2);
6590 if (rowidsSize != p->chunk_size * sizeof(i64)) {
6591 // IMP: V02796_19635
6592 vtab_set_error(&p->base, "rowids size doesn't match");
6593 vtab_set_error(
6594 &p->base,
6595 "chunk rowids size doesn't match - expected %lld, found %lld",
6596 p->chunk_size * sizeof(i64), rowidsSize);
6597 rc = SQLITE_ERROR1;
6598 goto cleanup;
6599 }
6600
6601 // open the vector chunk blob for the current chunk
6602 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName,
6603 p->shadowVectorChunksNames[vectorColumnIdx],
6604 "vectors", chunk_id, 0, &blobVectors);
6605 if (rc != SQLITE_OK0) {
6606 vtab_set_error(&p->base, "could not open vectors blob for chunk %lld",
6607 chunk_id);
6608 rc = SQLITE_ERROR1;
6609 goto cleanup;
6610 }
6611
6612 i64 currentBaseVectorsSize = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors);
6613 i64 expectedBaseVectorsSize =
6614 p->chunk_size * vector_column_byte_size(*vector_column);
6615 if (currentBaseVectorsSize != expectedBaseVectorsSize) {
6616 // IMP: V16465_00535
6617 vtab_set_error(
6618 &p->base,
6619 "vectors blob size doesn't match - expected %lld, found %lld",
6620 expectedBaseVectorsSize, currentBaseVectorsSize);
6621 rc = SQLITE_ERROR1;
6622 goto cleanup;
6623 }
6624 rc = sqlite3_blob_readsqlite3_api->blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0);
6625
6626 if (rc != SQLITE_OK0) {
6627 vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id);
6628 rc = SQLITE_ERROR1;
6629 goto cleanup;
6630 }
6631
6632 bitmap_copy(b, chunkValidity, p->chunk_size);
6633 if (arrayRowidsIn) {
6634 bitmap_clear(bmRowids, p->chunk_size);
6635
6636 for (int i = 0; i < p->chunk_size; i++) {
6637 if (!bitmap_get(chunkValidity, i)) {
6638 continue;
6639 }
6640 i64 rowid = chunkRowids[i];
6641 void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length,
6642 sizeof(i64), _cmp);
6643 bitmap_set(bmRowids, i, in ? 1 : 0);
6644 }
6645 bitmap_and_inplace(b, bmRowids, p->chunk_size);
6646 }
6647
6648 if(hasMetadataFilters) {
6649 for(int i = 0; i < argc; i++) {
6650 int idx = 1 + (i * 4);
6651 char kind = idxStr[idx + 0];
6652 if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
6653 continue;
6654 }
6655 int metadata_idx = idxStr[idx + 1] - 'A';
6656 int operator = idxStr[idx + 2];
6657
6658 if(!metadataBlobs[metadata_idx]) {
6659 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]);
6660 vtab_set_error(&p->base, "Could not open metadata blob");
6661 if(rc != SQLITE_OK0) {
6662 goto cleanup;
6663 }
6664 }
6665
6666 bitmap_clear(bmMetadata, p->chunk_size);
6667 rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i);
6668 if(rc != SQLITE_OK0) {
6669 vtab_set_error(&p->base, "Could not filter metadata fields");
6670 if(rc != SQLITE_OK0) {
6671 goto cleanup;
6672 }
6673 }
6674 bitmap_and_inplace(b, bmMetadata, p->chunk_size);
6675 }
6676 }
6677
6678
6679 for (int i = 0; i < p->chunk_size; i++) {
6680 if (!bitmap_get(b, i)) {
6681 continue;
6682 };
6683
6684 f32 result;
6685 switch (vector_column->element_type) {
6686 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
6687 const f32 *base_i =
6688 ((f32 *)baseVectors) + (i * vector_column->dimensions);
6689 switch (vector_column->distance_metric) {
6690 case VEC0_DISTANCE_METRIC_L2: {
6691 result = distance_l2_sqr_float(base_i, (f32 *)queryVector,
6692 &vector_column->dimensions);
6693 break;
6694 }
6695 case VEC0_DISTANCE_METRIC_L1: {
6696 result = distance_l1_f32(base_i, (f32 *)queryVector,
6697 &vector_column->dimensions);
6698 break;
6699 }
6700 case VEC0_DISTANCE_METRIC_COSINE: {
6701 result = distance_cosine_float(base_i, (f32 *)queryVector,
6702 &vector_column->dimensions);
6703 break;
6704 }
6705 }
6706 break;
6707 }
6708 case SQLITE_VEC_ELEMENT_TYPE_INT8: {
6709 const i8 *base_i =
6710 ((i8 *)baseVectors) + (i * vector_column->dimensions);
6711 switch (vector_column->distance_metric) {
6712 case VEC0_DISTANCE_METRIC_L2: {
6713 result = distance_l2_sqr_int8(base_i, (i8 *)queryVector,
6714 &vector_column->dimensions);
6715 break;
6716 }
6717 case VEC0_DISTANCE_METRIC_L1: {
6718 result = distance_l1_int8(base_i, (i8 *)queryVector,
6719 &vector_column->dimensions);
6720 break;
6721 }
6722 case VEC0_DISTANCE_METRIC_COSINE: {
6723 result = distance_cosine_int8(base_i, (i8 *)queryVector,
6724 &vector_column->dimensions);
6725 break;
6726 }
6727 }
6728
6729 break;
6730 }
6731 case SQLITE_VEC_ELEMENT_TYPE_BIT: {
6732 const u8 *base_i =
6733 ((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT8));
6734 result = distance_hamming(base_i, (u8 *)queryVector,
6735 &vector_column->dimensions);
6736 break;
6737 }
6738 }
6739
6740 chunk_distances[i] = result;
6741 }
6742
6743 int used1;
6744 min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
6745 min(k, p->chunk_size)(((k) <= (p->chunk_size)) ? (k) : (p->chunk_size)), bTaken, &used1);
6746
6747 i64 used;
6748 merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances,
6749 chunkRowids, chunk_topk_idxs,
6750 min(min(k, p->chunk_size), used1)((((((k) <= (p->chunk_size)) ? (k) : (p->chunk_size)
)) <= (used1)) ? ((((k) <= (p->chunk_size)) ? (k) : (
p->chunk_size))) : (used1))
, tmp_topk_distances,
6751 tmp_topk_rowids, k, &used);
6752
6753 for (int i = 0; i < used; i++) {
6754 topk_rowids[i] = tmp_topk_rowids[i];
6755 topk_distances[i] = tmp_topk_distances[i];
6756 }
6757 k_used = used;
6758 // blobVectors is always opened with read-only permissions, so this never
6759 // fails.
6760 sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
6761 blobVectors = NULL((void*)0);
6762 }
6763
6764 *out_topk_rowids = topk_rowids;
6765 *out_topk_distances = topk_distances;
6766 *out_used = k_used;
6767 rc = SQLITE_OK0;
6768
6769cleanup:
6770 if (rc != SQLITE_OK0) {
6771 sqlite3_freesqlite3_api->free(topk_rowids);
6772 sqlite3_freesqlite3_api->free(topk_distances);
6773 }
6774 sqlite3_freesqlite3_api->free(chunk_topk_idxs);
6775 sqlite3_freesqlite3_api->free(tmp_topk_rowids);
6776 sqlite3_freesqlite3_api->free(tmp_topk_distances);
6777 sqlite3_freesqlite3_api->free(b);
6778 sqlite3_freesqlite3_api->free(bTaken);
6779 sqlite3_freesqlite3_api->free(bmRowids);
6780 sqlite3_freesqlite3_api->free(baseVectors);
6781 sqlite3_freesqlite3_api->free(chunk_distances);
6782 sqlite3_freesqlite3_api->free(bmMetadata);
6783 for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS16; i++) {
6784 sqlite3_blob_closesqlite3_api->blob_close(metadataBlobs[i]);
6785 }
6786 // blobVectors is always opened with read-only permissions, so this never
6787 // fails.
6788 sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
6789 return rc;
6790}
6791
6792int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
6793 const char *idxStr, int argc, sqlite3_value **argv) {
6794 assert(argc == (strlen(idxStr)-1) / 4)((void) sizeof ((argc == (strlen(idxStr)-1) / 4) ? 1 : 0), __extension__
({ if (argc == (strlen(idxStr)-1) / 4) ; else __assert_fail (
"argc == (strlen(idxStr)-1) / 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 6794, __extension__ __PRETTY_FUNCTION__); }))
;
6795 int rc;
6796 struct vec0_query_knn_data *knn_data;
6797
6798 int vectorColumnIdx = idxNum;
6799 struct VectorColumnDefinition *vector_column =
6800 &p->vector_columns[vectorColumnIdx];
6801
6802 struct Array *arrayRowidsIn = NULL((void*)0);
6803 sqlite3_stmt *stmtChunks = NULL((void*)0);
6804 void *queryVector;
6805 size_t dimensions;
6806 enum VectorElementType elementType;
6807 vector_cleanup queryVectorCleanup = vector_cleanup_noop;
6808 char *pzError;
6809 knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data));
6810 if (!knn_data) {
6811 return SQLITE_NOMEM7;
6812 }
6813 memset(knn_data, 0, sizeof(*knn_data));
6814 // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
6815 struct Array * aMetadataIn = NULL((void*)0);
6816
6817 int query_idx =-1;
6818 int k_idx = -1;
6819 int rowid_in_idx = -1;
6820 for(int i = 0; i < argc; i++) {
6821 if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) {
6822 query_idx = i;
6823 }
6824 if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) {
6825 k_idx = i;
6826 }
6827 if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) {
6828 rowid_in_idx = i;
6829 }
6830 }
6831 assert(query_idx >= 0)((void) sizeof ((query_idx >= 0) ? 1 : 0), __extension__ (
{ if (query_idx >= 0) ; else __assert_fail ("query_idx >= 0"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 6831, __extension__ __PRETTY_FUNCTION__); }))
;
6832 assert(k_idx >= 0)((void) sizeof ((k_idx >= 0) ? 1 : 0), __extension__ ({ if
(k_idx >= 0) ; else __assert_fail ("k_idx >= 0", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 6832, __extension__ __PRETTY_FUNCTION__); }))
;
6833
6834 // make sure the query vector matches the vector column (type dimensions etc.)
6835 rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType,
6836 &queryVectorCleanup, &pzError);
6837
6838 if (rc != SQLITE_OK0) {
6839 vtab_set_error(&p->base,
6840 "Query vector on the \"%.*s\" column is invalid: %z",
6841 vector_column->name_length, vector_column->name, pzError);
6842 rc = SQLITE_ERROR1;
6843 goto cleanup;
6844 }
6845 if (elementType != vector_column->element_type) {
6846 vtab_set_error(
6847 &p->base,
6848 "Query vector for the \"%.*s\" column is expected to be of type "
6849 "%s, but a %s vector was provided.",
6850 vector_column->name_length, vector_column->name,
6851 vector_subtype_name(vector_column->element_type),
6852 vector_subtype_name(elementType));
6853 rc = SQLITE_ERROR1;
6854 goto cleanup;
6855 }
6856 if (dimensions != vector_column->dimensions) {
6857 vtab_set_error(
6858 &p->base,
6859 "Dimension mismatch for query vector for the \"%.*s\" column. "
6860 "Expected %d dimensions but received %d.",
6861 vector_column->name_length, vector_column->name,
6862 vector_column->dimensions, dimensions);
6863 rc = SQLITE_ERROR1;
6864 goto cleanup;
6865 }
6866
6867 i64 k = sqlite3_value_int64sqlite3_api->value_int64(argv[k_idx]);
6868 if (k < 0) {
6869 vtab_set_error(
6870 &p->base, "k value in knn queries must be greater than or equal to 0.");
6871 rc = SQLITE_ERROR1;
6872 goto cleanup;
6873 }
6874#define SQLITE_VEC_VEC0_K_MAX4096 4096
6875 if (k > SQLITE_VEC_VEC0_K_MAX4096) {
6876 vtab_set_error(
6877 &p->base,
6878 "k value in knn query too large, provided %lld and the limit is %lld",
6879 k, SQLITE_VEC_VEC0_K_MAX4096);
6880 rc = SQLITE_ERROR1;
6881 goto cleanup;
6882 }
6883
6884 if (k == 0) {
6885 knn_data->k = 0;
6886 pCur->knn_data = knn_data;
6887 pCur->query_plan = VEC0_QUERY_PLAN_KNN;
6888 rc = SQLITE_OK0;
6889 goto cleanup;
6890 }
6891
6892// handle when a `rowid in (...)` operation was provided
6893// Array of all the rowids that appear in any `rowid in (...)` constraint.
6894// NULL if none were provided, which means a "full" scan.
6895#if COMPILER_SUPPORTS_VTAB_IN1
6896 if (rowid_in_idx >= 0) {
6897 sqlite3_value *item;
6898 int rc;
6899 arrayRowidsIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*arrayRowidsIn));
6900 if (!arrayRowidsIn) {
6901 rc = SQLITE_NOMEM7;
6902 goto cleanup;
6903 }
6904 memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn));
6905
6906 rc = array_init(arrayRowidsIn, sizeof(i64), 32);
6907 if (rc != SQLITE_OK0) {
6908 goto cleanup;
6909 }
6910 for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK0 && item;
6911 rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[rowid_in_idx], &item)) {
6912 i64 rowid;
6913 if (p->pkIsText) {
6914 rc = vec0_rowid_from_id(p, item, &rowid);
6915 if (rc != SQLITE_OK0) {
6916 goto cleanup;
6917 }
6918 } else {
6919 rowid = sqlite3_value_int64sqlite3_api->value_int64(item);
6920 }
6921 rc = array_append(arrayRowidsIn, &rowid);
6922 if (rc != SQLITE_OK0) {
6923 goto cleanup;
6924 }
6925 }
6926 if (rc != SQLITE_DONE101) {
6927 vtab_set_error(&p->base, "error processing rowid in (...) array");
6928 goto cleanup;
6929 }
6930 qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
6931 _cmp);
6932 }
6933#endif
6934
6935 #if COMPILER_SUPPORTS_VTAB_IN1
6936 for(int i = 0; i < argc; i++) {
6937 if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) {
6938 continue;
6939 }
6940 int metadata_idx = idxStr[1 + (i*4) + 1] - 'A';
6941 if(!aMetadataIn) {
6942 aMetadataIn = sqlite3_mallocsqlite3_api->malloc(sizeof(*aMetadataIn));
6943 if(!aMetadataIn) {
6944 rc = SQLITE_NOMEM7;
6945 goto cleanup;
6946 }
6947 memset(aMetadataIn, 0, sizeof(*aMetadataIn));
6948 rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8);
6949 if(rc != SQLITE_OK0) {
6950 goto cleanup;
6951 }
6952 }
6953
6954 struct Vec0MetadataIn item;
6955 memset(&item, 0, sizeof(item));
6956 item.metadata_idx=metadata_idx;
6957 item.argv_idx = i;
6958
6959 switch(p->metadata_columns[metadata_idx].kind) {
6960 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
6961 rc = array_init(&item.array, sizeof(i64), 16);
6962 if(rc != SQLITE_OK0) {
6963 goto cleanup;
6964 }
6965 sqlite3_value *entry;
6966 for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) {
6967 i64 v = sqlite3_value_int64sqlite3_api->value_int64(entry);
6968 rc = array_append(&item.array, &v);
6969 if (rc != SQLITE_OK0) {
6970 goto cleanup;
6971 }
6972 }
6973
6974 if (rc != SQLITE_DONE101) {
6975 vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression");
6976 goto cleanup;
6977 }
6978
6979 break;
6980 }
6981 case VEC0_METADATA_COLUMN_KIND_TEXT: {
6982 rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16);
6983 if(rc != SQLITE_OK0) {
6984 goto cleanup;
6985 }
6986 sqlite3_value *entry;
6987 for (rc = sqlite3_vtab_in_firstsqlite3_api->vtab_in_first(argv[i], &entry); rc == SQLITE_OK0 && entry; rc = sqlite3_vtab_in_nextsqlite3_api->vtab_in_next(argv[i], &entry)) {
6988 const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(entry);
6989 int n = sqlite3_value_bytessqlite3_api->value_bytes(entry);
6990
6991 struct Vec0MetadataInTextEntry entry;
6992 entry.zString = sqlite3_mprintfsqlite3_api->mprintf("%.*s", n, s);
6993 if(!entry.zString) {
6994 rc = SQLITE_NOMEM7;
6995 goto cleanup;
6996 }
6997 entry.n = n;
6998 rc = array_append(&item.array, &entry);
6999 if (rc != SQLITE_OK0) {
7000 goto cleanup;
7001 }
7002 }
7003
7004 if (rc != SQLITE_DONE101) {
7005 vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression");
7006 goto cleanup;
7007 }
7008
7009 break;
7010 }
7011 default: {
7012 vtab_set_error(&p->base, "Internal sqlite-vec error");
7013 goto cleanup;
7014 }
7015 }
7016
7017 rc = array_append(aMetadataIn, &item);
7018 if(rc != SQLITE_OK0) {
7019 goto cleanup;
7020 }
7021 }
7022 #endif
7023
7024 rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks);
7025 if (rc != SQLITE_OK0) {
7026 // IMP: V06942_23781
7027 vtab_set_error(&p->base, "Error preparing stmtChunk: %s",
7028 sqlite3_errmsgsqlite3_api->errmsg(p->db));
7029 goto cleanup;
7030 }
7031
7032 i64 *topk_rowids = NULL((void*)0);
7033 f32 *topk_distances = NULL((void*)0);
7034 i64 k_used = 0;
7035 rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx,
7036 arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
7037 &topk_distances, &k_used);
7038 if (rc != SQLITE_OK0) {
7039 goto cleanup;
7040 }
7041
7042 knn_data->current_idx = 0;
7043 knn_data->k = k;
7044 knn_data->rowids = topk_rowids;
7045 knn_data->distances = topk_distances;
7046 knn_data->k_used = k_used;
7047
7048 pCur->knn_data = knn_data;
7049 pCur->query_plan = VEC0_QUERY_PLAN_KNN;
7050 rc = SQLITE_OK0;
7051
7052cleanup:
7053 sqlite3_finalizesqlite3_api->finalize(stmtChunks);
7054 array_cleanup(arrayRowidsIn);
7055 sqlite3_freesqlite3_api->free(arrayRowidsIn);
7056 queryVectorCleanup(queryVector);
7057 if(aMetadataIn) {
7058 for(size_t i = 0; i < aMetadataIn->length; i++) {
7059 struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
7060 for(size_t j = 0; j < item->array.length; j++) {
7061 if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
7062 struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j];
7063 sqlite3_freesqlite3_api->free(entry.zString);
7064 }
7065 }
7066 array_cleanup(&item->array);
7067 }
7068 array_cleanup(aMetadataIn);
7069 }
7070
7071 sqlite3_freesqlite3_api->free(aMetadataIn);
7072
7073 return rc;
7074}
7075
7076int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
7077 int rc;
7078 char *zSql;
7079 struct vec0_query_fullscan_data *fullscan_data;
7080
7081 fullscan_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*fullscan_data));
7082 if (!fullscan_data) {
7083 return SQLITE_NOMEM7;
7084 }
7085 memset(fullscan_data, 0, sizeof(*fullscan_data));
7086
7087 zSql = sqlite3_mprintfsqlite3_api->mprintf(" SELECT rowid "
7088 " FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\""
7089 " ORDER by chunk_id, chunk_offset ",
7090 p->schemaName, p->tableName);
7091 if (!zSql) {
7092 rc = SQLITE_NOMEM7;
7093 goto error;
7094 }
7095 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL((void*)0));
7096 sqlite3_freesqlite3_api->free(zSql);
7097 if (rc != SQLITE_OK0) {
7098 // IMP: V09901_26739
7099 vtab_set_error(&p->base, "Error preparing rowid scan: %s",
7100 sqlite3_errmsgsqlite3_api->errmsg(p->db));
7101 goto error;
7102 }
7103
7104 rc = sqlite3_stepsqlite3_api->step(fullscan_data->rowids_stmt);
7105
7106 // DONE when there's no rowids, ROW when there are, both "success"
7107 if (!(rc == SQLITE_ROW100 || rc == SQLITE_DONE101)) {
7108 goto error;
7109 }
7110
7111 fullscan_data->done = rc == SQLITE_DONE101;
7112 pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN;
7113 pCur->fullscan_data = fullscan_data;
7114 return SQLITE_OK0;
7115
7116error:
7117 vec0_query_fullscan_data_clear(fullscan_data);
7118 sqlite3_freesqlite3_api->free(fullscan_data);
7119 return rc;
7120}
7121
7122int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
7123 sqlite3_value **argv) {
7124 int rc;
7125 assert(argc == 1)((void) sizeof ((argc == 1) ? 1 : 0), __extension__ ({ if (argc
== 1) ; else __assert_fail ("argc == 1", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 7125, __extension__ __PRETTY_FUNCTION__); }))
;
7126 i64 rowid;
7127 struct vec0_query_point_data *point_data = NULL((void*)0);
7128
7129 point_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*point_data));
7130 if (!point_data) {
7131 rc = SQLITE_NOMEM7;
7132 goto error;
7133 }
7134 memset(point_data, 0, sizeof(*point_data));
7135
7136 if (p->pkIsText) {
7137 rc = vec0_rowid_from_id(p, argv[0], &rowid);
7138 if (rc == SQLITE_EMPTY16) {
7139 goto eof;
7140 }
7141 if (rc != SQLITE_OK0) {
7142 goto error;
7143 }
7144 } else {
7145 rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]);
7146 }
7147
7148 for (int i = 0; i < p->numVectorColumns; i++) {
7149 rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL((void*)0));
7150 if (rc == SQLITE_EMPTY16) {
7151 goto eof;
7152 }
7153 if (rc != SQLITE_OK0) {
7154 goto error;
7155 }
7156 }
7157
7158 point_data->rowid = rowid;
7159 point_data->done = 0;
7160 pCur->point_data = point_data;
7161 pCur->query_plan = VEC0_QUERY_PLAN_POINT;
7162 return SQLITE_OK0;
7163
7164eof:
7165 point_data->rowid = rowid;
7166 point_data->done = 1;
7167 pCur->point_data = point_data;
7168 pCur->query_plan = VEC0_QUERY_PLAN_POINT;
7169 return SQLITE_OK0;
7170
7171error:
7172 vec0_query_point_data_clear(point_data);
7173 sqlite3_freesqlite3_api->free(point_data);
7174 return rc;
7175}
7176
7177static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
7178 const char *idxStr, int argc, sqlite3_value **argv) {
7179 vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab;
7180 vec0_cursor *pCur = (vec0_cursor *)pVtabCursor;
7181 vec0_cursor_clear(pCur);
7182
7183 int idxStrLength = strlen(idxStr);
7184 if(idxStrLength <= 0) {
7185 return SQLITE_ERROR1;
7186 }
7187 if((idxStrLength-1) % 4 != 0) {
7188 return SQLITE_ERROR1;
7189 }
7190 int numValueEntries = (idxStrLength-1) / 4;
7191 if(numValueEntries != argc) {
7192 return SQLITE_ERROR1;
7193 }
7194
7195 char query_plan = idxStr[0];
7196 switch(query_plan) {
7197 case VEC0_QUERY_PLAN_FULLSCAN:
7198 return vec0Filter_fullscan(p, pCur);
7199 case VEC0_QUERY_PLAN_KNN:
7200 return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv);
7201 case VEC0_QUERY_PLAN_POINT:
7202 return vec0Filter_point(pCur, p, argc, argv);
7203 default:
7204 vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr);
7205 return SQLITE_ERROR1;
7206 }
7207}
7208
7209static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
7210 vec0_cursor *pCur = (vec0_cursor *)cur;
7211 switch (pCur->query_plan) {
7212 case VEC0_QUERY_PLAN_FULLSCAN: {
7213 *pRowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0);
7214 return SQLITE_OK0;
7215 }
7216 case VEC0_QUERY_PLAN_POINT: {
7217 *pRowid = pCur->point_data->rowid;
7218 return SQLITE_OK0;
7219 }
7220 case VEC0_QUERY_PLAN_KNN: {
7221 vtab_set_error(cur->pVtab,
7222 "Internal sqlite-vec error: expected point query plan in "
7223 "vec0Rowid, found %d",
7224 pCur->query_plan);
7225 return SQLITE_ERROR1;
7226 }
7227 }
7228 return SQLITE_ERROR1;
7229}
7230
7231static int vec0Next(sqlite3_vtab_cursor *cur) {
7232 vec0_cursor *pCur = (vec0_cursor *)cur;
7233 switch (pCur->query_plan) {
7234 case VEC0_QUERY_PLAN_FULLSCAN: {
7235 if (!pCur->fullscan_data) {
7236 return SQLITE_ERROR1;
7237 }
7238 int rc = sqlite3_stepsqlite3_api->step(pCur->fullscan_data->rowids_stmt);
7239 if (rc == SQLITE_DONE101) {
7240 pCur->fullscan_data->done = 1;
7241 return SQLITE_OK0;
7242 }
7243 if (rc == SQLITE_ROW100) {
7244 return SQLITE_OK0;
7245 }
7246 return SQLITE_ERROR1;
7247 }
7248 case VEC0_QUERY_PLAN_KNN: {
7249 if (!pCur->knn_data) {
7250 return SQLITE_ERROR1;
7251 }
7252
7253 pCur->knn_data->current_idx++;
7254 return SQLITE_OK0;
7255 }
7256 case VEC0_QUERY_PLAN_POINT: {
7257 if (!pCur->point_data) {
7258 return SQLITE_ERROR1;
7259 }
7260 pCur->point_data->done = 1;
7261 return SQLITE_OK0;
7262 }
7263 }
7264 return SQLITE_ERROR1;
7265}
7266
7267static int vec0Eof(sqlite3_vtab_cursor *cur) {
7268 vec0_cursor *pCur = (vec0_cursor *)cur;
7269 switch (pCur->query_plan) {
7270 case VEC0_QUERY_PLAN_FULLSCAN: {
7271 if (!pCur->fullscan_data) {
7272 return 1;
7273 }
7274 return pCur->fullscan_data->done;
7275 }
7276 case VEC0_QUERY_PLAN_KNN: {
7277 if (!pCur->knn_data) {
7278 return 1;
7279 }
7280 // return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
7281 // (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
7282 return (pCur->knn_data->current_idx >= pCur->knn_data->k_used);
7283 }
7284 case VEC0_QUERY_PLAN_POINT: {
7285 if (!pCur->point_data) {
7286 return 1;
7287 }
7288 return pCur->point_data->done;
7289 }
7290 }
7291 return 1;
7292}
7293
7294static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
7295 sqlite3_context *context, int i) {
7296 if (!pCur->fullscan_data) {
7297 sqlite3_result_errorsqlite3_api->result_error(
7298 context, "Internal sqlite-vec error: fullscan_data is NULL.", -1);
7299 return SQLITE_ERROR1;
7300 }
7301 i64 rowid = sqlite3_column_int64sqlite3_api->column_int64(pCur->fullscan_data->rowids_stmt, 0);
7302 if (i == VEC0_COLUMN_ID0) {
7303 return vec0_result_id(pVtab, context, rowid);
7304 }
7305 else if (vec0_column_idx_is_vector(pVtab, i)) {
7306 void *v;
7307 int sz;
7308 int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
7309 int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz);
7310 if (rc != SQLITE_OK0) {
7311 return rc;
7312 }
7313 sqlite3_result_blobsqlite3_api->result_blob(context, v, sz, sqlite3_freesqlite3_api->free);
7314 sqlite3_result_subtypesqlite3_api->result_subtype(context,
7315 pVtab->vector_columns[vector_idx].element_type);
7316
7317 }
7318 else if (i == vec0_column_distance_idx(pVtab)) {
7319 sqlite3_result_nullsqlite3_api->result_null(context);
7320 }
7321 else if(vec0_column_idx_is_partition(pVtab, i)) {
7322 int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
7323 sqlite3_value * v;
7324 int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
7325 if(rc == SQLITE_OK0) {
7326 sqlite3_result_valuesqlite3_api->result_value(context, v);
7327 sqlite3_value_freesqlite3_api->value_free(v);
7328 }else {
7329 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7330 }
7331 }
7332 else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
7333 int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
7334 sqlite3_value * v;
7335 int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
7336 if(rc == SQLITE_OK0) {
7337 sqlite3_result_valuesqlite3_api->result_value(context, v);
7338 sqlite3_value_freesqlite3_api->value_free(v);
7339 }else {
7340 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7341 }
7342 }
7343
7344 else if(vec0_column_idx_is_metadata(pVtab, i)) {
7345 if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) {
7346 return SQLITE_OK0;
7347 }
7348 int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
7349 int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
7350 if(rc != SQLITE_OK0) {
7351 // IMP: V15466_32305
7352 const char * zErr = sqlite3_mprintfsqlite3_api->mprintf(
7353 "Could not extract metadata value for column %.*s at rowid %lld",
7354 pVtab->metadata_columns[metadata_idx].name_length,
7355 pVtab->metadata_columns[metadata_idx].name, rowid
7356 );
7357 if(zErr) {
7358 sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1);
7359 sqlite3_freesqlite3_api->free((void *) zErr);
7360 }else {
7361 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
7362 }
7363 }
7364 }
7365
7366 return SQLITE_OK0;
7367}
7368
7369static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
7370 sqlite3_context *context, int i) {
7371 if (!pCur->point_data) {
7372 sqlite3_result_errorsqlite3_api->result_error(context,
7373 "Internal sqlite-vec error: point_data is NULL.", -1);
7374 return SQLITE_ERROR1;
7375 }
7376 if (i == VEC0_COLUMN_ID0) {
7377 return vec0_result_id(pVtab, context, pCur->point_data->rowid);
7378 }
7379 else if (i == vec0_column_distance_idx(pVtab)) {
7380 sqlite3_result_nullsqlite3_api->result_null(context);
7381 return SQLITE_OK0;
7382 }
7383 else if (vec0_column_idx_is_vector(pVtab, i)) {
7384 if (sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) {
7385 sqlite3_result_nullsqlite3_api->result_null(context);
7386 return SQLITE_OK0;
7387 }
7388 int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
7389 sqlite3_result_blobsqlite3_api->result_blob(
7390 context, pCur->point_data->vectors[vector_idx],
7391 vector_column_byte_size(pVtab->vector_columns[vector_idx]),
7392 SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
7393 sqlite3_result_subtypesqlite3_api->result_subtype(context,
7394 pVtab->vector_columns[vector_idx].element_type);
7395 return SQLITE_OK0;
7396 }
7397 else if(vec0_column_idx_is_partition(pVtab, i)) {
7398 if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) {
7399 return SQLITE_OK0;
7400 }
7401 int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
7402 i64 rowid = pCur->point_data->rowid;
7403 sqlite3_value * v;
7404 int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
7405 if(rc == SQLITE_OK0) {
7406 sqlite3_result_valuesqlite3_api->result_value(context, v);
7407 sqlite3_value_freesqlite3_api->value_free(v);
7408 }else {
7409 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7410 }
7411 }
7412 else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
7413 if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) {
7414 return SQLITE_OK0;
7415 }
7416 i64 rowid = pCur->point_data->rowid;
7417 int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
7418 sqlite3_value * v;
7419 int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
7420 if(rc == SQLITE_OK0) {
7421 sqlite3_result_valuesqlite3_api->result_value(context, v);
7422 sqlite3_value_freesqlite3_api->value_free(v);
7423 }else {
7424 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7425 }
7426 }
7427
7428 else if(vec0_column_idx_is_metadata(pVtab, i)) {
7429 if(sqlite3_vtab_nochangesqlite3_api->vtab_nochange(context)) {
7430 return SQLITE_OK0;
7431 }
7432 i64 rowid = pCur->point_data->rowid;
7433 int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
7434 int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
7435 if(rc != SQLITE_OK0) {
7436 const char * zErr = sqlite3_mprintfsqlite3_api->mprintf(
7437 "Could not extract metadata value for column %.*s at rowid %lld",
7438 pVtab->metadata_columns[metadata_idx].name_length,
7439 pVtab->metadata_columns[metadata_idx].name, rowid
7440 );
7441 if(zErr) {
7442 sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1);
7443 sqlite3_freesqlite3_api->free((void *) zErr);
7444 }else {
7445 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
7446 }
7447 }
7448 }
7449
7450 return SQLITE_OK0;
7451}
7452
7453static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
7454 sqlite3_context *context, int i) {
7455 if (!pCur->knn_data) {
7456 sqlite3_result_errorsqlite3_api->result_error(context,
7457 "Internal sqlite-vec error: knn_data is NULL.", -1);
7458 return SQLITE_ERROR1;
7459 }
7460 if (i == VEC0_COLUMN_ID0) {
7461 i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
7462 return vec0_result_id(pVtab, context, rowid);
7463 }
7464 else if (i == vec0_column_distance_idx(pVtab)) {
7465 sqlite3_result_doublesqlite3_api->result_double(
7466 context, pCur->knn_data->distances[pCur->knn_data->current_idx]);
7467 return SQLITE_OK0;
7468 }
7469 else if (vec0_column_idx_is_vector(pVtab, i)) {
7470 void *out;
7471 int sz;
7472 int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
7473 int rc = vec0_get_vector_data(
7474 pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx,
7475 &out, &sz);
7476 if (rc != SQLITE_OK0) {
7477 return rc;
7478 }
7479 sqlite3_result_blobsqlite3_api->result_blob(context, out, sz, sqlite3_freesqlite3_api->free);
7480 sqlite3_result_subtypesqlite3_api->result_subtype(context,
7481 pVtab->vector_columns[vector_idx].element_type);
7482 return SQLITE_OK0;
7483 }
7484 else if(vec0_column_idx_is_partition(pVtab, i)) {
7485 int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
7486 i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
7487 sqlite3_value * v;
7488 int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
7489 if(rc == SQLITE_OK0) {
7490 sqlite3_result_valuesqlite3_api->result_value(context, v);
7491 sqlite3_value_freesqlite3_api->value_free(v);
7492 }else {
7493 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7494 }
7495 }
7496 else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
7497 int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
7498 i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
7499 sqlite3_value * v;
7500 int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
7501 if(rc == SQLITE_OK0) {
7502 sqlite3_result_valuesqlite3_api->result_value(context, v);
7503 sqlite3_value_freesqlite3_api->value_free(v);
7504 }else {
7505 sqlite3_result_error_codesqlite3_api->result_error_code(context, rc);
7506 }
7507 }
7508
7509 else if(vec0_column_idx_is_metadata(pVtab, i)) {
7510 int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
7511 i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
7512 int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
7513 if(rc != SQLITE_OK0) {
7514 const char * zErr = sqlite3_mprintfsqlite3_api->mprintf(
7515 "Could not extract metadata value for column %.*s at rowid %lld",
7516 pVtab->metadata_columns[metadata_idx].name_length,
7517 pVtab->metadata_columns[metadata_idx].name, rowid
7518 );
7519 if(zErr) {
7520 sqlite3_result_errorsqlite3_api->result_error(context, zErr, -1);
7521 sqlite3_freesqlite3_api->free((void *) zErr);
7522 }else {
7523 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
7524 }
7525 }
7526 }
7527
7528 return SQLITE_OK0;
7529}
7530
7531static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
7532 int i) {
7533 vec0_cursor *pCur = (vec0_cursor *)cur;
7534 vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab;
7535 switch (pCur->query_plan) {
7536 case VEC0_QUERY_PLAN_FULLSCAN: {
7537 return vec0Column_fullscan(pVtab, pCur, context, i);
7538 }
7539 case VEC0_QUERY_PLAN_KNN: {
7540 return vec0Column_knn(pVtab, pCur, context, i);
7541 }
7542 case VEC0_QUERY_PLAN_POINT: {
7543 return vec0Column_point(pVtab, pCur, context, i);
7544 }
7545 }
7546 return SQLITE_OK0;
7547}
7548
7549/**
7550 * @brief Handles the "insert rowid" step of a row insert operation of a vec0
7551 * table.
7552 *
7553 * This function will insert a new row into the _rowids vec0 shadow table.
7554 *
7555 * @param p: virtual table
7556 * @param idValue: Value containing the inserted rowid/id value.
7557 * @param rowid: Output rowid, will point to the "real" i64 rowid
7558 * value that was inserted
7559 * @return int SQLITE_OK on success, error code on failure
7560 */
7561int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
7562 i64 *rowid) {
7563
7564 /**
7565 * An insert into a vec0 table can happen a few different ways:
7566 * 1) With default INTEGER primary key: With a supplied i64 rowid
7567 * 2) With default INTEGER primary key: WITHOUT a supplied rowid
7568 * 3) With TEXT primary key: supplied text rowid
7569 */
7570
7571 int rc;
7572
7573 // Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
7574 // is provided.
7575 if (p->pkIsText) {
7576 if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_TEXT3) {
7577 // IMP: V04200_21039
7578 vtab_set_error(&p->base,
7579 "The %s virtual table was declared with a TEXT primary "
7580 "key, but a non-TEXT value was provided in an INSERT.",
7581 p->tableName);
7582 return SQLITE_ERROR1;
7583 }
7584
7585 return vec0_rowids_insert_id(p, idValue, rowid);
7586 }
7587
7588 // Option 1: User supplied a i64 rowid
7589 if (sqlite3_value_typesqlite3_api->value_type(idValue) == SQLITE_INTEGER1) {
7590 i64 suppliedRowid = sqlite3_value_int64sqlite3_api->value_int64(idValue);
7591 rc = vec0_rowids_insert_rowid(p, suppliedRowid);
7592 if (rc == SQLITE_OK0) {
7593 *rowid = suppliedRowid;
7594 }
7595 return rc;
7596 }
7597
7598 // Option 2: User did not suppled a rowid
7599
7600 if (sqlite3_value_typesqlite3_api->value_type(idValue) != SQLITE_NULL5) {
7601 // IMP: V30855_14925
7602 vtab_set_error(&p->base,
7603 "Only integers are allows for primary key values on %s",
7604 p->tableName);
7605 return SQLITE_ERROR1;
7606 }
7607 // NULL to get next auto-incremented value
7608 return vec0_rowids_insert_id(p, NULL((void*)0), rowid);
7609}
7610
7611/**
7612 * @brief Determines the "next available" chunk position for a newly inserted
7613 * vec0 row.
7614 *
7615 * This operation may insert a new "blank" chunk the _chunks table, if there is
7616 * no more space in previous chunks.
7617 *
7618 * @param p: virtual table
7619 * @param partitionKeyValues: array of partition key column values, to constrain
7620 * against any partition key columns.
7621 * @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table
7622 * that has the avialabiity.
7623 * @param chunk_offset: Output the index of the available space insert the
7624 * chunk, based on the index of the first available validity bit.
7625 * @param pBlobValidity: Output blob of the validity column of the available
7626 * chunk. Will be opened with read/write permissions.
7627 * @param pValidity: Output buffer of the original chunk's validity column.
7628 * Needs to be cleaned up with sqlite3_free().
7629 * @return int SQLITE_OK on success, error code on failure
7630 */
7631int vec0Update_InsertNextAvailableStep(
7632 vec0_vtab *p,
7633 sqlite3_value ** partitionKeyValues,
7634 i64 *chunk_rowid, i64 *chunk_offset,
7635 sqlite3_blob **blobChunksValidity,
7636 const unsigned char **bufferChunksValidity) {
7637
7638 int rc;
7639 i64 validitySize;
7640 *chunk_offset = -1;
7641
7642 rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues);
7643 if(rc == SQLITE_EMPTY16) {
7644 goto done;
7645 }
7646 if (rc != SQLITE_OK0) {
7647 goto cleanup;
7648 }
7649
7650 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
7651 *chunk_rowid, 1, blobChunksValidity);
7652 if (rc != SQLITE_OK0) {
7653 // IMP: V22053_06123
7654 vtab_set_error(&p->base,
7655 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7656 "could not open validity blob on %s.%s.%lld",
7657 p->schemaName, p->shadowChunksName, *chunk_rowid);
7658 goto cleanup;
7659 }
7660
7661 validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity);
7662 if (validitySize != p->chunk_size / CHAR_BIT8) {
7663 // IMP: V29362_13432
7664 vtab_set_error(&p->base,
7665 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7666 "validity blob size mismatch on "
7667 "%s.%s.%lld, expected %lld but received %lld.",
7668 p->schemaName, p->shadowChunksName, *chunk_rowid,
7669 (i64)(p->chunk_size / CHAR_BIT8), validitySize);
7670 rc = SQLITE_ERROR1;
7671 goto cleanup;
7672 }
7673
7674 *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize);
7675 if (!(*bufferChunksValidity)) {
7676 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7677 "Could not allocate memory for validity bitmap");
7678 rc = SQLITE_NOMEM7;
7679 goto cleanup;
7680 }
7681
7682 rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
7683 validitySize, 0);
7684
7685 if (rc != SQLITE_OK0) {
7686 vtab_set_error(&p->base,
7687 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7688 "Could not read validity bitmap for %s.%s.%lld",
7689 p->schemaName, p->shadowChunksName, *chunk_rowid);
7690 goto cleanup;
7691 }
7692
7693 // find the next available offset, ie first `0` in the bitmap.
7694 for (int i = 0; i < validitySize; i++) {
7695 if ((*bufferChunksValidity)[i] == 0b11111111)
7696 continue;
7697 for (int j = 0; j < CHAR_BIT8; j++) {
7698 if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) {
7699 *chunk_offset = (i * CHAR_BIT8) + j;
7700 goto done;
7701 }
7702 }
7703 }
7704
7705done:
7706 // latest chunk was full, so need to create a new one
7707 if (*chunk_offset == -1) {
7708 rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid);
7709 if (rc != SQLITE_OK0) {
7710 // IMP: V08441_25279
7711 vtab_set_error(&p->base,
7712 VEC_INTERAL_ERROR"Internal sqlite-vec error: " "Could not insert a new vector chunk");
7713 rc = SQLITE_ERROR1; // otherwise raises a DatabaseError and not operational
7714 // error?
7715 goto cleanup;
7716 }
7717 *chunk_offset = 0;
7718
7719 // blobChunksValidity and pValidity are stale, pointing to the previous
7720 // (full) chunk. to re-assign them
7721 rc = sqlite3_blob_closesqlite3_api->blob_close(*blobChunksValidity);
7722 sqlite3_freesqlite3_api->free((void *)*bufferChunksValidity);
7723 *blobChunksValidity = NULL((void*)0);
7724 *bufferChunksValidity = NULL((void*)0);
7725 if (rc != SQLITE_OK0) {
7726 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7727 "unknown error, blobChunksValidity could not be closed, "
7728 "please file an issue.");
7729 rc = SQLITE_ERROR1;
7730 goto cleanup;
7731 }
7732
7733 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName,
7734 "validity", *chunk_rowid, 1, blobChunksValidity);
7735 if (rc != SQLITE_OK0) {
7736 vtab_set_error(
7737 &p->base,
7738 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7739 "Could not open validity blob for newly created chunk %s.%s.%lld",
7740 p->schemaName, p->shadowChunksName, *chunk_rowid);
7741 goto cleanup;
7742 }
7743 validitySize = sqlite3_blob_bytessqlite3_api->blob_bytes(*blobChunksValidity);
7744 if (validitySize != p->chunk_size / CHAR_BIT8) {
7745 vtab_set_error(&p->base,
7746 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7747 "validity blob size mismatch for newly created chunk "
7748 "%s.%s.%lld. Exepcted %lld, got %lld",
7749 p->schemaName, p->shadowChunksName, *chunk_rowid,
7750 p->chunk_size / CHAR_BIT8, validitySize);
7751 goto cleanup;
7752 }
7753 *bufferChunksValidity = sqlite3_mallocsqlite3_api->malloc(validitySize);
7754 rc = sqlite3_blob_readsqlite3_api->blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
7755 validitySize, 0);
7756 if (rc != SQLITE_OK0) {
7757 vtab_set_error(&p->base,
7758 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7759 "could not read validity blob newly created chunk "
7760 "%s.%s.%lld",
7761 p->schemaName, p->shadowChunksName, *chunk_rowid);
7762 goto cleanup;
7763 }
7764 }
7765
7766 rc = SQLITE_OK0;
7767
7768cleanup:
7769 return rc;
7770}
7771
7772/**
7773 * @brief Write the vector data into the provided vector blob at the given
7774 * offset
7775 *
7776 * @param blobVectors SQLite BLOB to write to
7777 * @param chunk_offset the "offset" (ie validity bitmap position) to write the
7778 * vector to
7779 * @param bVector pointer to the vector containing data
7780 * @param dimensions how many dimensions the vector has
7781 * @param element_type the vector type
7782 * @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure
7783 */
7784static int
7785vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset,
7786 const void *bVector, size_t dimensions,
7787 enum VectorElementType element_type) {
7788 int n;
7789 int offset;
7790
7791 switch (element_type) {
7792 case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
7793 n = dimensions * sizeof(f32);
7794 offset = chunk_offset * dimensions * sizeof(f32);
7795 break;
7796 case SQLITE_VEC_ELEMENT_TYPE_INT8:
7797 n = dimensions * sizeof(i8);
7798 offset = chunk_offset * dimensions * sizeof(i8);
7799 break;
7800 case SQLITE_VEC_ELEMENT_TYPE_BIT:
7801 n = dimensions / CHAR_BIT8;
7802 offset = chunk_offset * dimensions / CHAR_BIT8;
7803 break;
7804 }
7805
7806 return sqlite3_blob_writesqlite3_api->blob_write(blobVectors, bVector, n, offset);
7807}
7808
7809/**
7810 * @brief
7811 *
7812 * @param p vec0 virtual table
7813 * @param chunk_rowid: which chunk to write to
7814 * @param chunk_offset: the offset inside the chunk to write the vector to.
7815 * @param rowid: the rowid of the inserting row
7816 * @param vectorDatas: array of the vector data to insert
7817 * @param blobValidity: writeable validity blob of the row's assigned chunk.
7818 * @param validity: snapshot buffer of the valdity column from the row's
7819 * assigned chunk.
7820 * @return int SQLITE_OK on success, error code on failure
7821 */
7822int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
7823 i64 chunk_offset, i64 rowid,
7824 void *vectorDatas[],
7825 sqlite3_blob *blobChunksValidity,
7826 const unsigned char *bufferChunksValidity) {
7827 int rc, brc;
7828 sqlite3_blob *blobChunksRowids = NULL((void*)0);
7829
7830 // mark the validity bit for this row in the chunk's validity bitmap
7831 // Get the byte offset of the bitmap
7832 char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT8];
7833 // set the bit at the chunk_offset position inside that byte
7834 bx = bx | (1 << (chunk_offset % CHAR_BIT8));
7835 // write that 1 byte
7836 rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT8);
7837 if (rc != SQLITE_OK0) {
7838 vtab_set_error(&p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not mark validity bit ");
7839 return rc;
7840 }
7841
7842 // Go insert the vector data into the vector chunk shadow tables
7843 for (int i = 0; i < p->numVectorColumns; i++) {
7844 sqlite3_blob *blobVectors;
7845 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
7846 "vectors", chunk_rowid, 1, &blobVectors);
7847 if (rc != SQLITE_OK0) {
7848 vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld",
7849 p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
7850 goto cleanup;
7851 }
7852
7853 i64 expected =
7854 p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
7855 i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobVectors);
7856
7857 if (actual != expected) {
7858 // IMP: V16386_00456
7859 vtab_set_error(
7860 &p->base,
7861 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7862 "vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
7863 p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected,
7864 actual);
7865 rc = SQLITE_ERROR1;
7866 // already error, can ignore result code
7867 sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
7868 goto cleanup;
7869 };
7870
7871 rc = vec0_write_vector_to_vector_blob(
7872 blobVectors, chunk_offset, vectorDatas[i],
7873 p->vector_columns[i].dimensions, p->vector_columns[i].element_type);
7874 if (rc != SQLITE_OK0) {
7875 vtab_set_error(&p->base,
7876 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7877 "could not write vector blob on %s.%s.%lld",
7878 p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
7879 rc = SQLITE_ERROR1;
7880 // already error, can ignore result code
7881 sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
7882 goto cleanup;
7883 }
7884 rc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
7885 if (rc != SQLITE_OK0) {
7886 vtab_set_error(&p->base,
7887 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7888 "could not close vector blob on %s.%s.%lld",
7889 p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
7890 rc = SQLITE_ERROR1;
7891 goto cleanup;
7892 }
7893 }
7894
7895 // write the new rowid to the rowids column of the _chunks table
7896 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
7897 chunk_rowid, 1, &blobChunksRowids);
7898 if (rc != SQLITE_OK0) {
7899 // IMP: V09221_26060
7900 vtab_set_error(&p->base,
7901 VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not open rowids blob on %s.%s.%lld",
7902 p->schemaName, p->shadowChunksName, chunk_rowid);
7903 goto cleanup;
7904 }
7905 i64 expected = p->chunk_size * sizeof(i64);
7906 i64 actual = sqlite3_blob_bytessqlite3_api->blob_bytes(blobChunksRowids);
7907 if (expected != actual) {
7908 // IMP: V12779_29618
7909 vtab_set_error(
7910 &p->base,
7911 VEC_INTERAL_ERROR"Internal sqlite-vec error: "
7912 "rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
7913 p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual);
7914 rc = SQLITE_ERROR1;
7915 goto cleanup;
7916 }
7917 rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksRowids, &rowid, sizeof(i64),
7918 chunk_offset * sizeof(i64));
7919 if (rc != SQLITE_OK0) {
7920 vtab_set_error(
7921 &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not write rowids blob on %s.%s.%lld",
7922 p->schemaName, p->shadowChunksName, chunk_rowid);
7923 rc = SQLITE_ERROR1;
7924 goto cleanup;
7925 }
7926
7927 // Now with all the vectors inserted, go back and update the _rowids table
7928 // with the new chunk_rowid/chunk_offset values
7929 rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset);
7930
7931cleanup:
7932 brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksRowids);
7933 if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) {
7934 vtab_set_error(
7935 &p->base, VEC_INTERAL_ERROR"Internal sqlite-vec error: " "could not close rowids blob on %s.%s.%lld",
7936 p->schemaName, p->shadowChunksName, chunk_rowid);
7937 return brc;
7938 }
7939 return rc;
7940}
7941
7942int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) {
7943 int rc;
7944 struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx];
7945 vec0_metadata_column_kind kind = metadata_column->kind;
7946
7947 // verify input value matches column type
7948 switch(kind) {
7949 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
7950 if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1 || ((sqlite3_value_intsqlite3_api->value_int(v) != 0) && (sqlite3_value_intsqlite3_api->value_int(v) != 1))) {
7951 rc = SQLITE_ERROR1;
7952 vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name);
7953 goto done;
7954 }
7955 break;
7956 }
7957 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
7958 if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_INTEGER1) {
7959 rc = SQLITE_ERROR1;
7960 vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v)));
7961 goto done;
7962 }
7963 break;
7964 }
7965 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
7966 if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_FLOAT2) {
7967 rc = SQLITE_ERROR1;
7968 vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v)));
7969 goto done;
7970 }
7971 break;
7972 }
7973 case VEC0_METADATA_COLUMN_KIND_TEXT: {
7974 if(sqlite3_value_typesqlite3_api->value_type(v) != SQLITE_TEXT3) {
7975 rc = SQLITE_ERROR1;
7976 vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_typesqlite3_api->value_type(v)));
7977 goto done;
7978 }
7979 break;
7980 }
7981 }
7982
7983 sqlite3_blob * blobValue = NULL((void*)0);
7984 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue);
7985 if(rc != SQLITE_OK0) {
7986 goto done;
7987 }
7988
7989 switch(kind) {
7990 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
7991 u8 block;
7992 int value = sqlite3_value_intsqlite3_api->value_int(v);
7993 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8));
7994 if(rc != SQLITE_OK0) {
7995 goto done;
7996 }
7997
7998 if (value) {
7999 block |= 1 << (chunk_offset % CHAR_BIT8);
8000 } else {
8001 block &= ~(1 << (chunk_offset % CHAR_BIT8));
8002 }
8003
8004 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8);
8005 break;
8006 }
8007 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
8008 i64 value = sqlite3_value_int64sqlite3_api->value_int64(v);
8009 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
8010 break;
8011 }
8012 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
8013 double value = sqlite3_value_doublesqlite3_api->value_double(v);
8014 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
8015 break;
8016 }
8017 case VEC0_METADATA_COLUMN_KIND_TEXT: {
8018 int prev_n;
8019 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8020 if(rc != SQLITE_OK0) {
8021 goto done;
8022 }
8023
8024 const char * s = (const char *) sqlite3_value_textsqlite3_api->value_text(v);
8025 int n = sqlite3_value_bytessqlite3_api->value_bytes(v);
8026 u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
8027 memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8028 memcpy(view, &n, sizeof(int));
8029 memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4)(((n) <= (16 -4)) ? (n) : (16 -4)));
8030
8031 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8032 if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
8033 const char * zSql;
8034
8035 if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12)) {
8036 zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx);
8037 }else {
8038 zSql = sqlite3_mprintfsqlite3_api->mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx);
8039 }
8040 if(!zSql) {
8041 rc = SQLITE_NOMEM7;
8042 goto done;
8043 }
8044 sqlite3_stmt * stmt;
8045 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8046 if(rc != SQLITE_OK0) {
8047 goto done;
8048 }
8049 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8050 sqlite3_bind_textsqlite3_api->bind_text(stmt, 2, s, n, SQLITE_STATIC((sqlite3_destructor_type)0));
8051 rc = sqlite3_stepsqlite3_api->step(stmt);
8052 sqlite3_finalizesqlite3_api->finalize(stmt);
8053
8054 if(rc != SQLITE_DONE101) {
8055 rc = SQLITE_ERROR1;
8056 goto done;
8057 }
8058 }
8059 else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
8060 const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx);
8061 if(!zSql) {
8062 rc = SQLITE_NOMEM7;
8063 goto done;
8064 }
8065 sqlite3_stmt * stmt;
8066 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8067 if(rc != SQLITE_OK0) {
8068 goto done;
8069 }
8070 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8071 rc = sqlite3_stepsqlite3_api->step(stmt);
8072 sqlite3_finalizesqlite3_api->finalize(stmt);
8073
8074 if(rc != SQLITE_DONE101) {
8075 rc = SQLITE_ERROR1;
8076 goto done;
8077 }
8078 }
8079 break;
8080 }
8081 }
8082
8083 if(rc != SQLITE_OK0) {
8084
8085 }
8086 rc = sqlite3_blob_closesqlite3_api->blob_close(blobValue);
8087 if(rc != SQLITE_OK0) {
8088 goto done;
8089 }
8090
8091 done:
8092 return rc;
8093}
8094
8095
8096/**
8097 * @brief Handles INSERT INTO operations on a vec0 table.
8098 *
8099 * @return int SQLITE_OK on success, otherwise error code on failure
8100 */
8101int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
8102 sqlite_int64 *pRowid) {
8103 UNUSED_PARAMETER(argc)(void)(argc);
8104 vec0_vtab *p = (vec0_vtab *)pVTab;
8105 int rc;
8106 // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
8107 // table
8108 i64 rowid;
8109
8110 // Array to hold the vector data of the inserted row. Individual elements will
8111 // have a lifetime bound to the argv[..] values.
8112 void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS16];
8113 // Array to hold cleanup functions for vectorDatas[]
8114 vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS16];
8115
8116 sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS4];
8117
8118 // Rowid of the chunk in the _chunks shadow table that the row will be a part
8119 // of.
8120 i64 chunk_rowid;
8121 // offset within the chunk where the rowid belongs
8122 i64 chunk_offset;
8123
8124 // a write-able blob of the validity column for the given chunk. Used to mark
8125 // validity bit
8126 sqlite3_blob *blobChunksValidity = NULL((void*)0);
8127 // buffer for the valididty column for the given chunk. Maybe not needed here?
8128 const unsigned char *bufferChunksValidity = NULL((void*)0);
8129 int numReadVectors = 0;
8130
8131 // Read all provided partition key values into partitionKeyValues
8132 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8133 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
8134 continue;
8135 }
8136 int partition_key_idx = p->user_column_idxs[i];
8137 partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START1 + i];
8138
8139 int new_value_type = sqlite3_value_typesqlite3_api->value_type(partitionKeyValues[partition_key_idx]);
8140 if((new_value_type != SQLITE_NULL5) && (new_value_type != p->paritition_columns[partition_key_idx].type)) {
8141 // IMP: V11454_28292
8142 vtab_set_error(
8143 pVTab,
8144 "Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.",
8145 p->paritition_columns[partition_key_idx].name_length,
8146 p->paritition_columns[partition_key_idx].name,
8147 type_name(p->paritition_columns[partition_key_idx].type),
8148 type_name(new_value_type)
8149 );
8150 rc = SQLITE_ERROR1;
8151 goto cleanup;
8152 }
8153 }
8154
8155 // read all the inserted vectors into vectorDatas, validate their lengths.
8156 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8157 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
8158 continue;
8159 }
8160 int vector_column_idx = p->user_column_idxs[i];
8161 sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i];
8162 size_t dimensions;
8163
8164 char *pzError;
8165 enum VectorElementType elementType;
8166 rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions,
8167 &elementType, &cleanups[vector_column_idx], &pzError);
8168 if (rc != SQLITE_OK0) {
8169 // IMP: V06519_23358
8170 vtab_set_error(
8171 pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z",
8172 p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError);
8173 rc = SQLITE_ERROR1;
8174 goto cleanup;
8175 }
8176
8177 numReadVectors++;
8178 if (elementType != p->vector_columns[vector_column_idx].element_type) {
8179 // IMP: V08221_25059
8180 vtab_set_error(
8181 pVTab,
8182 "Inserted vector for the \"%.*s\" column is expected to be of type "
8183 "%s, but a %s vector was provided.",
8184 p->vector_columns[i].name_length, p->vector_columns[i].name,
8185 vector_subtype_name(p->vector_columns[i].element_type),
8186 vector_subtype_name(elementType));
8187 rc = SQLITE_ERROR1;
8188 goto cleanup;
8189 }
8190
8191 if (dimensions != p->vector_columns[vector_column_idx].dimensions) {
8192 // IMP: V01145_17984
8193 vtab_set_error(
8194 pVTab,
8195 "Dimension mismatch for inserted vector for the \"%.*s\" column. "
8196 "Expected %d dimensions but received %d.",
8197 p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name,
8198 p->vector_columns[vector_column_idx].dimensions, dimensions);
8199 rc = SQLITE_ERROR1;
8200 goto cleanup;
8201 }
8202 }
8203
8204 // Cannot insert a value in the hidden "distance" column
8205 if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_distance_idx(p)]) !=
8206 SQLITE_NULL5) {
8207 // IMP: V24228_08298
8208 vtab_set_error(pVTab,
8209 "A value was provided for the hidden \"distance\" column.");
8210 rc = SQLITE_ERROR1;
8211 goto cleanup;
8212 }
8213 // Cannot insert a value in the hidden "k" column
8214 if (sqlite3_value_typesqlite3_api->value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL5) {
8215 // IMP: V11875_28713
8216 vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column.");
8217 rc = SQLITE_ERROR1;
8218 goto cleanup;
8219 }
8220
8221 // Step #1: Insert/get a rowid for this row, from the _rowids table.
8222 rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID0], &rowid);
8223 if (rc != SQLITE_OK0) {
8224 goto cleanup;
8225 }
8226
8227 // Step #2: Find the next "available" position in the _chunks table for this
8228 // row.
8229 rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues,
8230 &chunk_rowid, &chunk_offset,
8231 &blobChunksValidity,
8232 &bufferChunksValidity);
8233 if (rc != SQLITE_OK0) {
8234 goto cleanup;
8235 }
8236
8237 // Step #3: With the next available chunk position, write out all the vectors
8238 // to their specified location.
8239 rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid,
8240 vectorDatas, blobChunksValidity,
8241 bufferChunksValidity);
8242 if (rc != SQLITE_OK0) {
8243 goto cleanup;
8244 }
8245
8246 if(p->numAuxiliaryColumns > 0) {
8247 sqlite3_stmt *stmt;
8248 sqlite3_str * s = sqlite3_str_newsqlite3_api->str_new(NULL((void*)0));
8249 sqlite3_str_appendfsqlite3_api->str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" "(rowid ", p->schemaName, p->tableName);
8250 for(int i = 0; i < p->numAuxiliaryColumns; i++) {
8251 sqlite3_str_appendfsqlite3_api->str_appendf(s, ", value%02d", i);
8252 }
8253 sqlite3_str_appendallsqlite3_api->str_appendall(s, ") VALUES (? ");
8254 for(int i = 0; i < p->numAuxiliaryColumns; i++) {
8255 sqlite3_str_appendallsqlite3_api->str_appendall(s, ", ?");
8256 }
8257 sqlite3_str_appendallsqlite3_api->str_appendall(s, ")");
8258 char * zSql = sqlite3_str_finishsqlite3_api->str_finish(s);
8259 // TODO double check error handling ehre
8260 if(!zSql) {
8261 rc = SQLITE_NOMEM7;
8262 goto cleanup;
8263 }
8264 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8265 if(rc != SQLITE_OK0) {
8266 goto cleanup;
8267 }
8268 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8269
8270 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8271 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
8272 continue;
8273 }
8274 int auxiliary_key_idx = p->user_column_idxs[i];
8275 sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START1 + i];
8276 int v_type = sqlite3_value_typesqlite3_api->value_type(v);
8277 if(v_type != SQLITE_NULL5 && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) {
8278 sqlite3_finalizesqlite3_api->finalize(stmt);
8279 rc = SQLITE_CONSTRAINT19;
8280 vtab_set_error(
8281 pVTab,
8282 "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.",
8283 p->auxiliary_columns[auxiliary_key_idx].name_length,
8284 p->auxiliary_columns[auxiliary_key_idx].name,
8285 type_name(p->auxiliary_columns[auxiliary_key_idx].type),
8286 type_name(v_type)
8287 );
8288 goto cleanup;
8289 }
8290 // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter
8291 sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1 + 1 + auxiliary_key_idx, v);
8292 }
8293
8294 rc = sqlite3_stepsqlite3_api->step(stmt);
8295 if(rc != SQLITE_DONE101) {
8296 sqlite3_finalizesqlite3_api->finalize(stmt);
8297 rc = SQLITE_ERROR1;
8298 goto cleanup;
8299 }
8300 sqlite3_finalizesqlite3_api->finalize(stmt);
8301 }
8302
8303
8304 for(int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8305 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
8306 continue;
8307 }
8308 int metadata_idx = p->user_column_idxs[i];
8309 sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START1 + i];
8310 rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0);
8311 if(rc != SQLITE_OK0) {
8312 goto cleanup;
8313 }
8314 }
8315
8316 *pRowid = rowid;
8317 rc = SQLITE_OK0;
8318
8319cleanup:
8320 for (int i = 0; i < numReadVectors; i++) {
8321 cleanups[i](vectorDatas[i]);
8322 }
8323 sqlite3_freesqlite3_api->free((void *)bufferChunksValidity);
8324 int brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity);
8325 if ((rc == SQLITE_OK0) && (brc != SQLITE_OK0)) {
8326 vtab_set_error(&p->base,
8327 VEC_INTERAL_ERROR"Internal sqlite-vec error: " "unknown error, blobChunksValidity could "
8328 "not be closed, please file an issue");
8329 return brc;
8330 }
8331 return rc;
8332}
8333
8334int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id,
8335 u64 chunk_offset) {
8336 int rc, brc;
8337 sqlite3_blob *blobChunksValidity = NULL((void*)0);
8338 char unsigned bx;
8339 int validityOffset = chunk_offset / CHAR_BIT8;
8340
8341 // 2. ensure chunks.validity bit is 1, then set to 0
8342 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
8343 chunk_id, 1, &blobChunksValidity);
8344 if (rc != SQLITE_OK0) {
8345 // IMP: V26002_10073
8346 vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld",
8347 p->schemaName, p->shadowChunksName, chunk_id);
8348 return SQLITE_ERROR1;
8349 }
8350 // will skip the sqlite3_blob_bytes(blobChunksValidity) check for now,
8351 // the read below would catch it
8352
8353 rc = sqlite3_blob_readsqlite3_api->blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset);
8354 if (rc != SQLITE_OK0) {
8355 // IMP: V21193_05263
8356 vtab_set_error(
8357 &p->base, "could not read validity blob for %s.%s.%lld at %d",
8358 p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
8359 goto cleanup;
8360 }
8361 if (!(bx >> (chunk_offset % CHAR_BIT8))) {
8362 // IMP: V21193_05263
8363 rc = SQLITE_ERROR1;
8364 vtab_set_error(
8365 &p->base,
8366 "vec0 deletion error: validity bit is not set for %s.%s.%lld at %d",
8367 p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
8368 goto cleanup;
8369 }
8370 char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT8));
8371 char result = bx & mask;
8372 rc = sqlite3_blob_writesqlite3_api->blob_write(blobChunksValidity, &result, sizeof(bx),
8373 validityOffset);
8374 if (rc != SQLITE_OK0) {
8375 vtab_set_error(
8376 &p->base, "could not write to validity blob for %s.%s.%lld at %d",
8377 p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
8378 goto cleanup;
8379 }
8380
8381cleanup:
8382
8383 brc = sqlite3_blob_closesqlite3_api->blob_close(blobChunksValidity);
8384 if (rc != SQLITE_OK0)
8385 return rc;
8386 if (brc != SQLITE_OK0) {
8387 vtab_set_error(&p->base,
8388 "vec0 deletion error: Error commiting validity blob "
8389 "transaction on %s.%s.%lld at %d",
8390 p->schemaName, p->shadowChunksName, chunk_id,
8391 validityOffset);
8392 return brc;
8393 }
8394 return SQLITE_OK0;
8395}
8396
8397int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
8398 int rc;
8399 sqlite3_stmt *stmt = NULL((void*)0);
8400
8401 char *zSql =
8402 sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME"\"%w\".\"%w_rowids\"" " WHERE rowid = ?",
8403 p->schemaName, p->tableName);
8404 if (!zSql) {
8405 return SQLITE_NOMEM7;
8406 }
8407
8408 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8409 sqlite3_freesqlite3_api->free(zSql);
8410 if (rc != SQLITE_OK0) {
8411 goto cleanup;
8412 }
8413 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8414 rc = sqlite3_stepsqlite3_api->step(stmt);
8415 if (rc != SQLITE_DONE101) {
8416 goto cleanup;
8417 }
8418 rc = SQLITE_OK0;
8419
8420cleanup:
8421 sqlite3_finalizesqlite3_api->finalize(stmt);
8422 return rc;
8423}
8424
8425int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
8426 int rc;
8427 sqlite3_stmt *stmt = NULL((void*)0);
8428
8429 char *zSql =
8430 sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " WHERE rowid = ?",
8431 p->schemaName, p->tableName);
8432 if (!zSql) {
8433 return SQLITE_NOMEM7;
8434 }
8435
8436 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8437 sqlite3_freesqlite3_api->free(zSql);
8438 if (rc != SQLITE_OK0) {
8439 goto cleanup;
8440 }
8441 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8442 rc = sqlite3_stepsqlite3_api->step(stmt);
8443 if (rc != SQLITE_DONE101) {
8444 goto cleanup;
8445 }
8446 rc = SQLITE_OK0;
8447
8448cleanup:
8449 sqlite3_finalizesqlite3_api->finalize(stmt);
8450 return rc;
8451}
8452
8453int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id,
8454 u64 chunk_offset) {
8455 int rc;
8456 sqlite3_blob * blobValue;
8457 vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
8458 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue);
8459 if(rc != SQLITE_OK0) {
8460 return rc;
8461 }
8462
8463 switch(kind) {
8464 case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
8465 u8 block;
8466 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT8));
8467 if(rc != SQLITE_OK0) {
8468 goto done;
8469 }
8470
8471 block &= ~(1 << (chunk_offset % CHAR_BIT8));
8472 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT8);
8473 break;
8474 }
8475 case VEC0_METADATA_COLUMN_KIND_INTEGER: {
8476 i64 v = 0;
8477 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64));
8478 break;
8479 }
8480 case VEC0_METADATA_COLUMN_KIND_FLOAT: {
8481 double v = 0;
8482 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double));
8483 break;
8484 }
8485 case VEC0_METADATA_COLUMN_KIND_TEXT: {
8486 int n;
8487 rc = sqlite3_blob_readsqlite3_api->blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8488 if(rc != SQLITE_OK0) {
8489 goto done;
8490 }
8491
8492 u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16];
8493 memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8494 rc = sqlite3_blob_writesqlite3_api->blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH16);
8495 if(rc != SQLITE_OK0) {
8496 goto done;
8497 }
8498
8499 if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH12) {
8500 const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME"\"%w\".\"%w_metadatatext%02d\"" " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
8501 if(!zSql) {
8502 rc = SQLITE_NOMEM7;
8503 goto done;
8504 }
8505 sqlite3_stmt * stmt;
8506 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8507 if(rc != SQLITE_OK0) {
8508 goto done;
8509 }
8510 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 1, rowid);
8511 rc = sqlite3_stepsqlite3_api->step(stmt);
8512 if(rc != SQLITE_DONE101) {
8513 rc = SQLITE_ERROR1;
8514 goto done;
8515 }
8516 sqlite3_finalizesqlite3_api->finalize(stmt);
8517 }
8518 break;
8519 }
8520 }
8521 int rc2;
8522 done:
8523 rc2 = sqlite3_blob_closesqlite3_api->blob_close(blobValue);
8524 if(rc == SQLITE_OK0) {
8525 return rc2;
8526 }
8527 return rc;
8528}
8529
8530int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
8531 vec0_vtab *p = (vec0_vtab *)pVTab;
8532 int rc;
8533 i64 rowid;
8534 i64 chunk_id;
8535 i64 chunk_offset;
8536
8537 if (p->pkIsText) {
8538 rc = vec0_rowid_from_id(p, idValue, &rowid);
8539 if (rc != SQLITE_OK0) {
8540 return rc;
8541 }
8542 } else {
8543 rowid = sqlite3_value_int64sqlite3_api->value_int64(idValue);
8544 }
8545
8546 // 1. Find chunk position for given rowid
8547 // 2. Ensure that validity bit for position is 1, then set to 0
8548 // 3. Zero out rowid in chunks.rowid
8549 // 4. Zero out vector data in all vector column chunks
8550 // 5. Delete value in _rowids table
8551
8552 // 1. get chunk_id and chunk_offset from _rowids
8553 rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset);
8554 if (rc != SQLITE_OK0) {
8555 return rc;
8556 }
8557
8558 rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset);
8559 if (rc != SQLITE_OK0) {
8560 return rc;
8561 }
8562
8563 // 3. zero out rowid in chunks.rowids
8564 // https://github.com/asg017/sqlite-vec/issues/54
8565
8566 // 4. zero out any data in vector chunks tables
8567 // https://github.com/asg017/sqlite-vec/issues/54
8568
8569 // 5. delete from _rowids table
8570 rc = vec0Update_Delete_DeleteRowids(p, rowid);
8571 if (rc != SQLITE_OK0) {
8572 return rc;
8573 }
8574
8575 // 6. delete any auxiliary rows
8576 if(p->numAuxiliaryColumns > 0) {
8577 rc = vec0Update_Delete_DeleteAux(p, rowid);
8578 if (rc != SQLITE_OK0) {
8579 return rc;
8580 }
8581 }
8582
8583 // 6. delete metadata
8584 for(int i = 0; i < p->numMetadataColumns; i++) {
8585 rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset);
8586 }
8587
8588 return SQLITE_OK0;
8589}
8590
8591int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
8592 int rc;
8593 sqlite3_stmt *stmt;
8594 const char * zSql = sqlite3_mprintfsqlite3_api->mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME"\"%w\".\"%w_auxiliary\"" " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx);
8595 if(!zSql) {
8596 return SQLITE_NOMEM7;
8597 }
8598 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(p->db, zSql, -1, &stmt, NULL((void*)0));
8599 if(rc != SQLITE_OK0) {
8600 return rc;
8601 }
8602 sqlite3_bind_valuesqlite3_api->bind_value(stmt, 1, value);
8603 sqlite3_bind_int64sqlite3_api->bind_int64(stmt, 2, rowid);
8604 rc = sqlite3_stepsqlite3_api->step(stmt);
8605 if(rc != SQLITE_DONE101) {
8606 sqlite3_finalizesqlite3_api->finalize(stmt);
8607 return SQLITE_ERROR1;
8608 }
8609 sqlite3_finalizesqlite3_api->finalize(stmt);
8610 return SQLITE_OK0;
8611}
8612
8613int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset,
8614 int i, sqlite3_value *valueVector) {
8615 int rc;
8616
8617 sqlite3_blob *blobVectors = NULL((void*)0);
8618
8619 char *pzError;
8620 size_t dimensions;
8621 enum VectorElementType elementType;
8622 void *vector;
8623 vector_cleanup cleanup = vector_cleanup_noop;
8624 // https://github.com/asg017/sqlite-vec/issues/53
8625 rc = vector_from_value(valueVector, &vector, &dimensions, &elementType,
8626 &cleanup, &pzError);
8627 if (rc != SQLITE_OK0) {
8628 // IMP: V15203_32042
8629 vtab_set_error(
8630 &p->base, "Updated vector for the \"%.*s\" column is invalid: %z",
8631 p->vector_columns[i].name_length, p->vector_columns[i].name, pzError);
8632 rc = SQLITE_ERROR1;
8633 goto cleanup;
8634 }
8635 if (elementType != p->vector_columns[i].element_type) {
8636 // IMP: V03643_20481
8637 vtab_set_error(
8638 &p->base,
8639 "Updated vector for the \"%.*s\" column is expected to be of type "
8640 "%s, but a %s vector was provided.",
8641 p->vector_columns[i].name_length, p->vector_columns[i].name,
8642 vector_subtype_name(p->vector_columns[i].element_type),
8643 vector_subtype_name(elementType));
8644 rc = SQLITE_ERROR1;
8645 goto cleanup;
8646 }
8647 if (dimensions != p->vector_columns[i].dimensions) {
8648 // IMP: V25739_09810
8649 vtab_set_error(
8650 &p->base,
8651 "Dimension mismatch for new updated vector for the \"%.*s\" column. "
8652 "Expected %d dimensions but received %d.",
8653 p->vector_columns[i].name_length, p->vector_columns[i].name,
8654 p->vector_columns[i].dimensions, dimensions);
8655 rc = SQLITE_ERROR1;
8656 goto cleanup;
8657 }
8658
8659 rc = sqlite3_blob_opensqlite3_api->blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
8660 "vectors", chunk_id, 1, &blobVectors);
8661 if (rc != SQLITE_OK0) {
8662 vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
8663 p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8664 goto cleanup;
8665 }
8666 rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector,
8667 p->vector_columns[i].dimensions,
8668 p->vector_columns[i].element_type);
8669 if (rc != SQLITE_OK0) {
8670 vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
8671 p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8672 goto cleanup;
8673 }
8674
8675cleanup:
8676 cleanup(vector);
8677 int brc = sqlite3_blob_closesqlite3_api->blob_close(blobVectors);
8678 if (rc != SQLITE_OK0) {
8679 return rc;
8680 }
8681 if (brc != SQLITE_OK0) {
8682 vtab_set_error(
8683 &p->base,
8684 "Could not commit blob transaction for vectors blob for %s.%s.%lld",
8685 p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8686 return brc;
8687 }
8688 return SQLITE_OK0;
8689}
8690
8691int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
8692 UNUSED_PARAMETER(argc)(void)(argc);
8693 vec0_vtab *p = (vec0_vtab *)pVTab;
8694 int rc;
8695 i64 chunk_id;
8696 i64 chunk_offset;
8697
8698 i64 rowid;
8699 if (p->pkIsText) {
8700 const char *a = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[0]);
8701 const char *b = (const char *)sqlite3_value_textsqlite3_api->value_text(argv[1]);
8702 // IMP: V08886_25725
8703 if ((sqlite3_value_bytessqlite3_api->value_bytes(argv[0]) != sqlite3_value_bytessqlite3_api->value_bytes(argv[1])) ||
8704 strncmp(a, b, sqlite3_value_bytessqlite3_api->value_bytes(argv[0])) != 0) {
8705 vtab_set_error(pVTab,
8706 "UPDATEs on vec0 primary key values are not allowed.");
8707 return SQLITE_ERROR1;
8708 }
8709 rc = vec0_rowid_from_id(p, argv[0], &rowid);
8710 if (rc != SQLITE_OK0) {
8711 return rc;
8712 }
8713 } else {
8714 rowid = sqlite3_value_int64sqlite3_api->value_int64(argv[0]);
8715 }
8716
8717 // 1) get chunk_id and chunk_offset from _rowids
8718 rc = vec0_get_chunk_position(p, rowid, NULL((void*)0), &chunk_id, &chunk_offset);
8719 if (rc != SQLITE_OK0) {
8720 return rc;
8721 }
8722
8723 // 2) update any partition key values
8724 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8725 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
8726 continue;
8727 }
8728 sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i];
8729 if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) {
8730 continue;
8731 }
8732 vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. ");
8733 return SQLITE_ERROR1;
8734 }
8735
8736 // 3) handle auxiliary column updates
8737 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8738 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
8739 continue;
8740 }
8741 int auxiliary_column_idx = p->user_column_idxs[i];
8742 sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i];
8743 if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) {
8744 continue;
8745 }
8746 rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid);
8747 if(rc != SQLITE_OK0) {
8748 return SQLITE_ERROR1;
8749 }
8750 }
8751
8752 // 4) handle metadata column updates
8753 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8754 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
8755 continue;
8756 }
8757 int metadata_column_idx = p->user_column_idxs[i];
8758 sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START1 + i];
8759 if(sqlite3_value_nochangesqlite3_api->value_nochange(value)) {
8760 continue;
8761 }
8762 rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1);
8763 if(rc != SQLITE_OK0) {
8764 return rc;
8765 }
8766 }
8767
8768 // 5) iterate over all new vectors, update the vectors
8769 for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
8770 if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
8771 continue;
8772 }
8773 int vector_idx = p->user_column_idxs[i];
8774 sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START1 + i];
8775 // in vec0Column, we check sqlite3_vtab_nochange() on vector columns.
8776 // If the vector column isn't being changed, we return NULL;
8777 // That's not great, that means vector columns can never be NULLABLE
8778 // (bc we cant distinguish if an updated vector is truly NULL or nochange).
8779 // Also it means that if someone tries to run `UPDATE v SET X = NULL`,
8780 // we can't effectively detect and raise an error.
8781 // A better solution would be to use a custom result_type for "empty",
8782 // but subtypes don't appear to survive xColumn -> xUpdate, it's always 0.
8783 // So for now, we'll just use NULL and warn people to not SET X = NULL
8784 // in the docs.
8785 if (sqlite3_value_typesqlite3_api->value_type(valueVector) == SQLITE_NULL5) {
8786 continue;
8787 }
8788
8789 rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx,
8790 valueVector);
8791 if (rc != SQLITE_OK0) {
8792 return SQLITE_ERROR1;
8793 }
8794 }
8795
8796 return SQLITE_OK0;
8797}
8798
8799static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
8800 sqlite_int64 *pRowid) {
8801 // DELETE operation
8802 if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) {
8803 return vec0Update_Delete(pVTab, argv[0]);
8804 }
8805 // INSERT operation
8806 else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) {
8807 return vec0Update_Insert(pVTab, argc, argv, pRowid);
8808 }
8809 // UPDATE operation
8810 else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) {
8811 return vec0Update_Update(pVTab, argc, argv);
8812 } else {
8813 vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0.");
8814 return SQLITE_ERROR1;
8815 }
8816}
8817
8818static int vec0ShadowName(const char *zName) {
8819 static const char *azName[] = {
8820 "rowids", "chunks", "auxiliary", "info",
8821
8822 // Up to VEC0_MAX_METADATA_COLUMNS
8823 // TODO be smarter about this man
8824 "metadatachunks00",
8825 "metadatachunks01",
8826 "metadatachunks02",
8827 "metadatachunks03",
8828 "metadatachunks04",
8829 "metadatachunks05",
8830 "metadatachunks06",
8831 "metadatachunks07",
8832 "metadatachunks08",
8833 "metadatachunks09",
8834 "metadatachunks10",
8835 "metadatachunks11",
8836 "metadatachunks12",
8837 "metadatachunks13",
8838 "metadatachunks14",
8839 "metadatachunks15",
8840
8841 // Up to
8842 "metadatatext00",
8843 "metadatatext01",
8844 "metadatatext02",
8845 "metadatatext03",
8846 "metadatatext04",
8847 "metadatatext05",
8848 "metadatatext06",
8849 "metadatatext07",
8850 "metadatatext08",
8851 "metadatatext09",
8852 "metadatatext10",
8853 "metadatatext11",
8854 "metadatatext12",
8855 "metadatatext13",
8856 "metadatatext14",
8857 "metadatatext15",
8858 };
8859
8860 for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) {
8861 if (sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i]) == 0)
8862 return 1;
8863 }
8864 //for(size_t i = 0; i < )"vector_chunks", "metadatachunks"
8865 return 0;
8866}
8867
8868static int vec0Begin(sqlite3_vtab *pVTab) {
8869 UNUSED_PARAMETER(pVTab)(void)(pVTab);
8870 return SQLITE_OK0;
8871}
8872static int vec0Sync(sqlite3_vtab *pVTab) {
8873 UNUSED_PARAMETER(pVTab)(void)(pVTab);
8874 vec0_vtab *p = (vec0_vtab *)pVTab;
8875 if (p->stmtLatestChunk) {
8876 sqlite3_finalizesqlite3_api->finalize(p->stmtLatestChunk);
8877 p->stmtLatestChunk = NULL((void*)0);
8878 }
8879 if (p->stmtRowidsInsertRowid) {
8880 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertRowid);
8881 p->stmtRowidsInsertRowid = NULL((void*)0);
8882 }
8883 if (p->stmtRowidsInsertId) {
8884 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsInsertId);
8885 p->stmtRowidsInsertId = NULL((void*)0);
8886 }
8887 if (p->stmtRowidsUpdatePosition) {
8888 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsUpdatePosition);
8889 p->stmtRowidsUpdatePosition = NULL((void*)0);
8890 }
8891 if (p->stmtRowidsGetChunkPosition) {
8892 sqlite3_finalizesqlite3_api->finalize(p->stmtRowidsGetChunkPosition);
8893 p->stmtRowidsGetChunkPosition = NULL((void*)0);
8894 }
8895 return SQLITE_OK0;
8896}
8897static int vec0Commit(sqlite3_vtab *pVTab) {
8898 UNUSED_PARAMETER(pVTab)(void)(pVTab);
8899 return SQLITE_OK0;
8900}
8901static int vec0Rollback(sqlite3_vtab *pVTab) {
8902 UNUSED_PARAMETER(pVTab)(void)(pVTab);
8903 return SQLITE_OK0;
8904}
8905
8906static sqlite3_module vec0Module = {
8907 /* iVersion */ 3,
8908 /* xCreate */ vec0Create,
8909 /* xConnect */ vec0Connect,
8910 /* xBestIndex */ vec0BestIndex,
8911 /* xDisconnect */ vec0Disconnect,
8912 /* xDestroy */ vec0Destroy,
8913 /* xOpen */ vec0Open,
8914 /* xClose */ vec0Close,
8915 /* xFilter */ vec0Filter,
8916 /* xNext */ vec0Next,
8917 /* xEof */ vec0Eof,
8918 /* xColumn */ vec0Column,
8919 /* xRowid */ vec0Rowid,
8920 /* xUpdate */ vec0Update,
8921 /* xBegin */ vec0Begin,
8922 /* xSync */ vec0Sync,
8923 /* xCommit */ vec0Commit,
8924 /* xRollback */ vec0Rollback,
8925 /* xFindFunction */ 0,
8926 /* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43
8927 /* xSavepoint */ 0,
8928 /* xRelease */ 0,
8929 /* xRollbackTo */ 0,
8930 /* xShadowName */ vec0ShadowName,
8931#if SQLITE_VERSION_NUMBER3050001 >= 3044000
8932 /* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44
8933#endif
8934};
8935#pragma endregion
8936
8937static char *POINTER_NAME_STATIC_BLOB_DEF = "vec0-static_blob_def";
8938struct static_blob_definition {
8939 void *p;
8940 size_t dimensions;
8941 size_t nvectors;
8942 enum VectorElementType element_type;
8943};
8944static void vec_static_blob_from_raw(sqlite3_context *context, int argc,
8945 sqlite3_value **argv) {
8946
8947 assert(argc == 4)((void) sizeof ((argc == 4) ? 1 : 0), __extension__ ({ if (argc
== 4) ; else __assert_fail ("argc == 4", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 8947, __extension__ __PRETTY_FUNCTION__); }))
;
8948 struct static_blob_definition *p;
8949 p = sqlite3_mallocsqlite3_api->malloc(sizeof(*p));
8950 if (!p) {
8951 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(context);
8952 return;
8953 }
8954 memset(p, 0, sizeof(*p));
8955 p->p = (void *)sqlite3_value_int64sqlite3_api->value_int64(argv[0]);
8956 p->element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
8957 p->dimensions = sqlite3_value_int64sqlite3_api->value_int64(argv[2]);
8958 p->nvectors = sqlite3_value_int64sqlite3_api->value_int64(argv[3]);
8959 sqlite3_result_pointersqlite3_api->result_pointer(context, p, POINTER_NAME_STATIC_BLOB_DEF,
8960 sqlite3_freesqlite3_api->free);
8961}
8962#pragma region vec_static_blobs() table function
8963
8964#define MAX_STATIC_BLOBS16 16
8965
8966typedef struct static_blob static_blob;
8967struct static_blob {
8968 char *name;
8969 void *p;
8970 size_t dimensions;
8971 size_t nvectors;
8972 enum VectorElementType element_type;
8973};
8974
8975typedef struct vec_static_blob_data vec_static_blob_data;
8976struct vec_static_blob_data {
8977 static_blob static_blobs[MAX_STATIC_BLOBS16];
8978};
8979
8980typedef struct vec_static_blobs_vtab vec_static_blobs_vtab;
8981struct vec_static_blobs_vtab {
8982 sqlite3_vtab base;
8983 vec_static_blob_data *data;
8984};
8985
8986typedef struct vec_static_blobs_cursor vec_static_blobs_cursor;
8987struct vec_static_blobs_cursor {
8988 sqlite3_vtab_cursor base;
8989 sqlite3_int64 iRowid;
8990};
8991
8992static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc,
8993 const char *const *argv,
8994 sqlite3_vtab **ppVtab, char **pzErr) {
8995 UNUSED_PARAMETER(argc)(void)(argc);
8996 UNUSED_PARAMETER(argv)(void)(argv);
8997 UNUSED_PARAMETER(pzErr)(void)(pzErr);
8998
8999 vec_static_blobs_vtab *pNew;
9000#define VEC_STATIC_BLOBS_NAME0 0
9001#define VEC_STATIC_BLOBS_DATA1 1
9002#define VEC_STATIC_BLOBS_DIMENSIONS2 2
9003#define VEC_STATIC_BLOBS_COUNT3 3
9004 int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(
9005 db, "CREATE TABLE x(name, data, dimensions hidden, count hidden)");
9006 if (rc == SQLITE_OK0) {
9007 pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
9008 *ppVtab = (sqlite3_vtab *)pNew;
9009 if (pNew == 0)
9010 return SQLITE_NOMEM7;
9011 memset(pNew, 0, sizeof(*pNew));
9012 pNew->data = pAux;
9013 }
9014 return rc;
9015}
9016
9017static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) {
9018 vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVtab;
9019 sqlite3_freesqlite3_api->free(p);
9020 return SQLITE_OK0;
9021}
9022
9023static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc,
9024 sqlite3_value **argv, sqlite_int64 *pRowid) {
9025 UNUSED_PARAMETER(pRowid)(void)(pRowid);
9026 vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVTab;
9027 // DELETE operation
9028 if (argc == 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) {
9029 return SQLITE_ERROR1;
9030 }
9031 // INSERT operation
9032 else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) == SQLITE_NULL5) {
9033 const char *key =
9034 (const char *)sqlite3_value_textsqlite3_api->value_text(argv[2 + VEC_STATIC_BLOBS_NAME0]);
9035 int idx = -1;
9036 for (int i = 0; i < MAX_STATIC_BLOBS16; i++) {
9037 if (!p->data->static_blobs[i].name) {
9038 p->data->static_blobs[i].name = sqlite3_mprintfsqlite3_api->mprintf("%s", key);
9039 idx = i;
9040 break;
9041 }
9042 }
9043 if (idx < 0)
9044 abort();
9045 struct static_blob_definition *def = sqlite3_value_pointersqlite3_api->value_pointer(
9046 argv[2 + VEC_STATIC_BLOBS_DATA1], POINTER_NAME_STATIC_BLOB_DEF);
9047 p->data->static_blobs[idx].p = def->p;
9048 p->data->static_blobs[idx].dimensions = def->dimensions;
9049 p->data->static_blobs[idx].nvectors = def->nvectors;
9050 p->data->static_blobs[idx].element_type = def->element_type;
9051
9052 return SQLITE_OK0;
9053 }
9054 // UPDATE operation
9055 else if (argc > 1 && sqlite3_value_typesqlite3_api->value_type(argv[0]) != SQLITE_NULL5) {
9056 return SQLITE_ERROR1;
9057 }
9058 return SQLITE_ERROR1;
9059}
9060
9061static int vec_static_blobsOpen(sqlite3_vtab *p,
9062 sqlite3_vtab_cursor **ppCursor) {
9063 UNUSED_PARAMETER(p)(void)(p);
9064 vec_static_blobs_cursor *pCur;
9065 pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur));
9066 if (pCur == 0)
9067 return SQLITE_NOMEM7;
9068 memset(pCur, 0, sizeof(*pCur));
9069 *ppCursor = &pCur->base;
9070 return SQLITE_OK0;
9071}
9072
9073static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) {
9074 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
9075 sqlite3_freesqlite3_api->free(pCur);
9076 return SQLITE_OK0;
9077}
9078
9079static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab,
9080 sqlite3_index_info *pIdxInfo) {
9081 UNUSED_PARAMETER(pVTab)(void)(pVTab);
9082 pIdxInfo->idxNum = 1;
9083 pIdxInfo->estimatedCost = (double)10;
9084 pIdxInfo->estimatedRows = 10;
9085 return SQLITE_OK0;
9086}
9087
9088static int vec_static_blobsNext(sqlite3_vtab_cursor *cur);
9089static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
9090 const char *idxStr, int argc,
9091 sqlite3_value **argv) {
9092 UNUSED_PARAMETER(idxNum)(void)(idxNum);
9093 UNUSED_PARAMETER(idxStr)(void)(idxStr);
9094 UNUSED_PARAMETER(argc)(void)(argc);
9095 UNUSED_PARAMETER(argv)(void)(argv);
9096 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)pVtabCursor;
9097 pCur->iRowid = -1;
9098 vec_static_blobsNext(pVtabCursor);
9099 return SQLITE_OK0;
9100}
9101
9102static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur,
9103 sqlite_int64 *pRowid) {
9104 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
9105 *pRowid = pCur->iRowid;
9106 return SQLITE_OK0;
9107}
9108
9109static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) {
9110 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
9111 vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pCur->base.pVtab;
9112 pCur->iRowid++;
9113 while (pCur->iRowid < MAX_STATIC_BLOBS16) {
9114 if (p->data->static_blobs[pCur->iRowid].name) {
9115 return SQLITE_OK0;
9116 }
9117 pCur->iRowid++;
9118 }
9119 return SQLITE_OK0;
9120}
9121
9122static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) {
9123 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
9124 return pCur->iRowid >= MAX_STATIC_BLOBS16;
9125}
9126
9127static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur,
9128 sqlite3_context *context, int i) {
9129 vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
9130 vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)cur->pVtab;
9131 switch (i) {
9132 case VEC_STATIC_BLOBS_NAME0:
9133 sqlite3_result_textsqlite3_api->result_text(context, p->data->static_blobs[pCur->iRowid].name, -1,
9134 SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
9135 break;
9136 case VEC_STATIC_BLOBS_DATA1:
9137 sqlite3_result_nullsqlite3_api->result_null(context);
9138 break;
9139 case VEC_STATIC_BLOBS_DIMENSIONS2:
9140 sqlite3_result_int64sqlite3_api->result_int64(context,
9141 p->data->static_blobs[pCur->iRowid].dimensions);
9142 break;
9143 case VEC_STATIC_BLOBS_COUNT3:
9144 sqlite3_result_int64sqlite3_api->result_int64(context, p->data->static_blobs[pCur->iRowid].nvectors);
9145 break;
9146 }
9147 return SQLITE_OK0;
9148}
9149
9150static sqlite3_module vec_static_blobsModule = {
9151 /* iVersion */ 3,
9152 /* xCreate */ 0,
9153 /* xConnect */ vec_static_blobsConnect,
9154 /* xBestIndex */ vec_static_blobsBestIndex,
9155 /* xDisconnect */ vec_static_blobsDisconnect,
9156 /* xDestroy */ 0,
9157 /* xOpen */ vec_static_blobsOpen,
9158 /* xClose */ vec_static_blobsClose,
9159 /* xFilter */ vec_static_blobsFilter,
9160 /* xNext */ vec_static_blobsNext,
9161 /* xEof */ vec_static_blobsEof,
9162 /* xColumn */ vec_static_blobsColumn,
9163 /* xRowid */ vec_static_blobsRowid,
9164 /* xUpdate */ vec_static_blobsUpdate,
9165 /* xBegin */ 0,
9166 /* xSync */ 0,
9167 /* xCommit */ 0,
9168 /* xRollback */ 0,
9169 /* xFindMethod */ 0,
9170 /* xRename */ 0,
9171 /* xSavepoint */ 0,
9172 /* xRelease */ 0,
9173 /* xRollbackTo */ 0,
9174 /* xShadowName */ 0,
9175#if SQLITE_VERSION_NUMBER3050001 >= 3044000
9176 /* xIntegrity */ 0
9177#endif
9178};
9179#pragma endregion
9180
9181#pragma region vec_static_blob_entries() table function
9182
9183typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab;
9184struct vec_static_blob_entries_vtab {
9185 sqlite3_vtab base;
9186 static_blob *blob;
9187};
9188typedef enum {
9189 VEC_SBE__QUERYPLAN_FULLSCAN = 1,
9190 VEC_SBE__QUERYPLAN_KNN = 2
9191} vec_sbe_query_plan;
9192
9193struct sbe_query_knn_data {
9194 i64 k;
9195 i64 k_used;
9196 // Array of rowids of size k. Must be freed with sqlite3_free().
9197 i32 *rowids;
9198 // Array of distances of size k. Must be freed with sqlite3_free().
9199 f32 *distances;
9200 i64 current_idx;
9201};
9202void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) {
9203 if (!knn_data)
9204 return;
9205
9206 if (knn_data->rowids) {
9207 sqlite3_freesqlite3_api->free(knn_data->rowids);
9208 knn_data->rowids = NULL((void*)0);
9209 }
9210 if (knn_data->distances) {
9211 sqlite3_freesqlite3_api->free(knn_data->distances);
9212 knn_data->distances = NULL((void*)0);
9213 }
9214}
9215
9216typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor;
9217struct vec_static_blob_entries_cursor {
9218 sqlite3_vtab_cursor base;
9219 sqlite3_int64 iRowid;
9220 vec_sbe_query_plan query_plan;
9221 struct sbe_query_knn_data *knn_data;
9222};
9223
9224static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int argc,
9225 const char *const *argv,
9226 sqlite3_vtab **ppVtab, char **pzErr) {
9227 UNUSED_PARAMETER(argc)(void)(argc);
9228 UNUSED_PARAMETER(argv)(void)(argv);
9229 UNUSED_PARAMETER(pzErr)(void)(pzErr);
9230 vec_static_blob_data *blob_data = pAux;
9231 int idx = -1;
9232 for (int i = 0; i < MAX_STATIC_BLOBS16; i++) {
9233 if (!blob_data->static_blobs[i].name)
9234 continue;
9235 if (strncmp(blob_data->static_blobs[i].name, argv[3],
9236 strlen(blob_data->static_blobs[i].name)) == 0) {
9237 idx = i;
9238 break;
9239 }
9240 }
9241 if (idx < 0)
9242 abort();
9243 vec_static_blob_entries_vtab *pNew;
9244#define VEC_STATIC_BLOB_ENTRIES_VECTOR0 0
9245#define VEC_STATIC_BLOB_ENTRIES_DISTANCE1 1
9246#define VEC_STATIC_BLOB_ENTRIES_K2 2
9247 int rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(
9248 db, "CREATE TABLE x(vector, distance hidden, k hidden)");
9249 if (rc == SQLITE_OK0) {
9250 pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
9251 *ppVtab = (sqlite3_vtab *)pNew;
9252 if (pNew == 0)
9253 return SQLITE_NOMEM7;
9254 memset(pNew, 0, sizeof(*pNew));
9255 pNew->blob = &blob_data->static_blobs[idx];
9256 }
9257 return rc;
9258}
9259
9260static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc,
9261 const char *const *argv,
9262 sqlite3_vtab **ppVtab, char **pzErr) {
9263 return vec_static_blob_entriesConnect(db, pAux, argc, argv, ppVtab, pzErr);
9264}
9265
9266static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) {
9267 vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVtab;
9268 sqlite3_freesqlite3_api->free(p);
9269 return SQLITE_OK0;
9270}
9271
9272static int vec_static_blob_entriesOpen(sqlite3_vtab *p,
9273 sqlite3_vtab_cursor **ppCursor) {
9274 UNUSED_PARAMETER(p)(void)(p);
9275 vec_static_blob_entries_cursor *pCur;
9276 pCur = sqlite3_mallocsqlite3_api->malloc(sizeof(*pCur));
9277 if (pCur == 0)
9278 return SQLITE_NOMEM7;
9279 memset(pCur, 0, sizeof(*pCur));
9280 *ppCursor = &pCur->base;
9281 return SQLITE_OK0;
9282}
9283
9284static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) {
9285 vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
9286 sqlite3_freesqlite3_api->free(pCur->knn_data);
9287 sqlite3_freesqlite3_api->free(pCur);
9288 return SQLITE_OK0;
9289}
9290
9291static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab,
9292 sqlite3_index_info *pIdxInfo) {
9293 vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVTab;
9294 int iMatchTerm = -1;
9295 int iLimitTerm = -1;
9296 // int iRowidTerm = -1; // https://github.com/asg017/sqlite-vec/issues/47
9297 int iKTerm = -1;
9298
9299 for (int i = 0; i < pIdxInfo->nConstraint; i++) {
9300 if (!pIdxInfo->aConstraint[i].usable)
9301 continue;
9302
9303 int iColumn = pIdxInfo->aConstraint[i].iColumn;
9304 int op = pIdxInfo->aConstraint[i].op;
9305 if (op == SQLITE_INDEX_CONSTRAINT_MATCH64 &&
9306 iColumn == VEC_STATIC_BLOB_ENTRIES_VECTOR0) {
9307 if (iMatchTerm > -1) {
9308 // https://github.com/asg017/sqlite-vec/issues/51
9309 return SQLITE_ERROR1;
9310 }
9311 iMatchTerm = i;
9312 }
9313 if (op == SQLITE_INDEX_CONSTRAINT_LIMIT73) {
9314 iLimitTerm = i;
9315 }
9316 if (op == SQLITE_INDEX_CONSTRAINT_EQ2 &&
9317 iColumn == VEC_STATIC_BLOB_ENTRIES_K2) {
9318 iKTerm = i;
9319 }
9320 }
9321 if (iMatchTerm >= 0) {
9322 if (iLimitTerm < 0 && iKTerm < 0) {
9323 // https://github.com/asg017/sqlite-vec/issues/51
9324 return SQLITE_ERROR1;
9325 }
9326 if (iLimitTerm >= 0 && iKTerm >= 0) {
9327 return SQLITE_ERROR1; // limit or k, not both
9328 }
9329 if (pIdxInfo->nOrderBy < 1) {
9330 vtab_set_error(pVTab, "ORDER BY distance required");
9331 return SQLITE_CONSTRAINT19;
9332 }
9333 if (pIdxInfo->nOrderBy > 1) {
9334 // https://github.com/asg017/sqlite-vec/issues/51
9335 vtab_set_error(pVTab, "more than 1 ORDER BY clause provided");
9336 return SQLITE_CONSTRAINT19;
9337 }
9338 if (pIdxInfo->aOrderBy[0].iColumn != VEC_STATIC_BLOB_ENTRIES_DISTANCE1) {
9339 vtab_set_error(pVTab, "ORDER BY must be on the distance column");
9340 return SQLITE_CONSTRAINT19;
9341 }
9342 if (pIdxInfo->aOrderBy[0].desc) {
9343 vtab_set_error(pVTab,
9344 "Only ascending in ORDER BY distance clause is supported, "
9345 "DESC is not supported yet.");
9346 return SQLITE_CONSTRAINT19;
9347 }
9348
9349 pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_KNN;
9350 pIdxInfo->estimatedCost = (double)10;
9351 pIdxInfo->estimatedRows = 10;
9352
9353 pIdxInfo->orderByConsumed = 1;
9354 pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1;
9355 pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
9356 if (iLimitTerm >= 0) {
9357 pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = 2;
9358 pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
9359 } else {
9360 pIdxInfo->aConstraintUsage[iKTerm].argvIndex = 2;
9361 pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
9362 }
9363
9364 } else {
9365 pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_FULLSCAN;
9366 pIdxInfo->estimatedCost = (double)p->blob->nvectors;
9367 pIdxInfo->estimatedRows = p->blob->nvectors;
9368 }
9369 return SQLITE_OK0;
9370}
9371
9372static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
9373 int idxNum, const char *idxStr,
9374 int argc, sqlite3_value **argv) {
9375 UNUSED_PARAMETER(idxStr)(void)(idxStr);
9376 assert(argc >= 0 && argc <= 3)((void) sizeof ((argc >= 0 && argc <= 3) ? 1 : 0
), __extension__ ({ if (argc >= 0 && argc <= 3)
; else __assert_fail ("argc >= 0 && argc <= 3"
, "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 9376, __extension__ __PRETTY_FUNCTION__); }))
;
9377 vec_static_blob_entries_cursor *pCur =
9378 (vec_static_blob_entries_cursor *)pVtabCursor;
9379 vec_static_blob_entries_vtab *p =
9380 (vec_static_blob_entries_vtab *)pCur->base.pVtab;
9381
9382 if (idxNum == VEC_SBE__QUERYPLAN_KNN) {
9383 assert(argc == 2)((void) sizeof ((argc == 2) ? 1 : 0), __extension__ ({ if (argc
== 2) ; else __assert_fail ("argc == 2", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 9383, __extension__ __PRETTY_FUNCTION__); }))
;
9384 pCur->query_plan = VEC_SBE__QUERYPLAN_KNN;
9385 struct sbe_query_knn_data *knn_data;
9386 knn_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*knn_data));
9387 if (!knn_data) {
9388 return SQLITE_NOMEM7;
9389 }
9390 memset(knn_data, 0, sizeof(*knn_data));
9391
9392 void *queryVector;
9393 size_t dimensions;
9394 enum VectorElementType elementType;
9395 vector_cleanup cleanup;
9396 char *err;
9397 int rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType,
9398 &cleanup, &err);
9399 if (rc != SQLITE_OK0) {
9400 return SQLITE_ERROR1;
9401 }
9402 if (elementType != p->blob->element_type) {
9403 return SQLITE_ERROR1;
9404 }
9405 if (dimensions != p->blob->dimensions) {
9406 return SQLITE_ERROR1;
9407 }
9408
9409 i64 k = min(sqlite3_value_int64(argv[1]), (i64)p->blob->nvectors)(((sqlite3_api->value_int64(argv[1])) <= ((i64)p->blob
->nvectors)) ? (sqlite3_api->value_int64(argv[1])) : ((
i64)p->blob->nvectors))
;
9410 if (k < 0) {
9411 // HANDLE https://github.com/asg017/sqlite-vec/issues/55
9412 return SQLITE_ERROR1;
9413 }
9414 if (k == 0) {
9415 knn_data->k = 0;
9416 pCur->knn_data = knn_data;
9417 return SQLITE_OK0;
9418 }
9419
9420 size_t bsize = (p->blob->nvectors + 7) & ~7;
9421
9422 i32 *topk_rowids = sqlite3_mallocsqlite3_api->malloc(k * sizeof(i32));
9423 if (!topk_rowids) {
9424 // HANDLE https://github.com/asg017/sqlite-vec/issues/55
9425 return SQLITE_ERROR1;
9426 }
9427 f32 *distances = sqlite3_mallocsqlite3_api->malloc(bsize * sizeof(f32));
9428 if (!distances) {
9429 // HANDLE https://github.com/asg017/sqlite-vec/issues/55
9430 return SQLITE_ERROR1;
9431 }
9432
9433 for (size_t i = 0; i < p->blob->nvectors; i++) {
9434 // https://github.com/asg017/sqlite-vec/issues/52
9435 float *v = ((float *)p->blob->p) + (i * p->blob->dimensions);
9436 distances[i] =
9437 distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions);
9438 }
9439 u8 *candidates = bitmap_new(bsize);
9440 assert(candidates)((void) sizeof ((candidates) ? 1 : 0), __extension__ ({ if (candidates
) ; else __assert_fail ("candidates", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 9440, __extension__ __PRETTY_FUNCTION__); }))
;
9441
9442 u8 *taken = bitmap_new(bsize);
9443 assert(taken)((void) sizeof ((taken) ? 1 : 0), __extension__ ({ if (taken)
; else __assert_fail ("taken", "/root/firefox-clang/third_party/sqlite3/ext/sqlite-vec/sqlite-vec.c"
, 9443, __extension__ __PRETTY_FUNCTION__); }))
;
9444
9445 bitmap_fill(candidates, bsize);
9446 for (size_t i = bsize; i >= p->blob->nvectors; i--) {
9447 bitmap_set(candidates, i, 0);
9448 }
9449 i32 k_used = 0;
9450 min_idx(distances, bsize, candidates, topk_rowids, k, taken, &k_used);
9451 knn_data->current_idx = 0;
9452 knn_data->distances = distances;
9453 knn_data->k = k;
9454 knn_data->rowids = topk_rowids;
9455
9456 pCur->knn_data = knn_data;
9457 } else {
9458 pCur->query_plan = VEC_SBE__QUERYPLAN_FULLSCAN;
9459 pCur->iRowid = 0;
9460 }
9461
9462 return SQLITE_OK0;
9463}
9464
9465static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur,
9466 sqlite_int64 *pRowid) {
9467 vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
9468 switch (pCur->query_plan) {
9469 case VEC_SBE__QUERYPLAN_FULLSCAN: {
9470 *pRowid = pCur->iRowid;
9471 return SQLITE_OK0;
9472 }
9473 case VEC_SBE__QUERYPLAN_KNN: {
9474 i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
9475 *pRowid = (sqlite3_int64)rowid;
9476 return SQLITE_OK0;
9477 }
9478 }
9479 return SQLITE_ERROR1;
9480}
9481
9482static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
9483 vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
9484 switch (pCur->query_plan) {
9485 case VEC_SBE__QUERYPLAN_FULLSCAN: {
9486 pCur->iRowid++;
9487 return SQLITE_OK0;
9488 }
9489 case VEC_SBE__QUERYPLAN_KNN: {
9490 pCur->knn_data->current_idx++;
9491 return SQLITE_OK0;
9492 }
9493 }
9494 return SQLITE_ERROR1;
9495}
9496
9497static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) {
9498 vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
9499 vec_static_blob_entries_vtab *p =
9500 (vec_static_blob_entries_vtab *)pCur->base.pVtab;
9501 switch (pCur->query_plan) {
9502 case VEC_SBE__QUERYPLAN_FULLSCAN: {
9503 return (size_t)pCur->iRowid >= p->blob->nvectors;
9504 }
9505 case VEC_SBE__QUERYPLAN_KNN: {
9506 return pCur->knn_data->current_idx >= pCur->knn_data->k;
9507 }
9508 }
9509 return SQLITE_ERROR1;
9510}
9511
9512static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
9513 sqlite3_context *context, int i) {
9514 vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
9515 vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)cur->pVtab;
9516
9517 switch (pCur->query_plan) {
9518 case VEC_SBE__QUERYPLAN_FULLSCAN: {
9519 switch (i) {
9520 case VEC_STATIC_BLOB_ENTRIES_VECTOR0:
9521
9522 sqlite3_result_blobsqlite3_api->result_blob(
9523 context,
9524 ((unsigned char *)p->blob->p) +
9525 (pCur->iRowid * p->blob->dimensions * sizeof(float)),
9526 p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
9527 sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type);
9528 break;
9529 }
9530 return SQLITE_OK0;
9531 }
9532 case VEC_SBE__QUERYPLAN_KNN: {
9533 switch (i) {
9534 case VEC_STATIC_BLOB_ENTRIES_VECTOR0: {
9535 i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
9536 sqlite3_result_blobsqlite3_api->result_blob(context,
9537 ((unsigned char *)p->blob->p) +
9538 (rowid * p->blob->dimensions * sizeof(float)),
9539 p->blob->dimensions * sizeof(float),
9540 SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
9541 sqlite3_result_subtypesqlite3_api->result_subtype(context, p->blob->element_type);
9542 break;
9543 }
9544 }
9545 return SQLITE_OK0;
9546 }
9547 }
9548 return SQLITE_ERROR1;
9549}
9550
9551static sqlite3_module vec_static_blob_entriesModule = {
9552 /* iVersion */ 3,
9553 /* xCreate */
9554 vec_static_blob_entriesCreate, // handle rm?
9555 // https://github.com/asg017/sqlite-vec/issues/55
9556 /* xConnect */ vec_static_blob_entriesConnect,
9557 /* xBestIndex */ vec_static_blob_entriesBestIndex,
9558 /* xDisconnect */ vec_static_blob_entriesDisconnect,
9559 /* xDestroy */ vec_static_blob_entriesDisconnect,
9560 /* xOpen */ vec_static_blob_entriesOpen,
9561 /* xClose */ vec_static_blob_entriesClose,
9562 /* xFilter */ vec_static_blob_entriesFilter,
9563 /* xNext */ vec_static_blob_entriesNext,
9564 /* xEof */ vec_static_blob_entriesEof,
9565 /* xColumn */ vec_static_blob_entriesColumn,
9566 /* xRowid */ vec_static_blob_entriesRowid,
9567 /* xUpdate */ 0,
9568 /* xBegin */ 0,
9569 /* xSync */ 0,
9570 /* xCommit */ 0,
9571 /* xRollback */ 0,
9572 /* xFindMethod */ 0,
9573 /* xRename */ 0,
9574 /* xSavepoint */ 0,
9575 /* xRelease */ 0,
9576 /* xRollbackTo */ 0,
9577 /* xShadowName */ 0,
9578#if SQLITE_VERSION_NUMBER3050001 >= 3044000
9579 /* xIntegrity */ 0
9580#endif
9581};
9582#pragma endregion
9583
9584#ifdef SQLITE_VEC_ENABLE_AVX
9585#define SQLITE_VEC_DEBUG_BUILD_AVX"" "avx"
9586#else
9587#define SQLITE_VEC_DEBUG_BUILD_AVX"" ""
9588#endif
9589#ifdef SQLITE_VEC_ENABLE_NEON
9590#define SQLITE_VEC_DEBUG_BUILD_NEON"" "neon"
9591#else
9592#define SQLITE_VEC_DEBUG_BUILD_NEON"" ""
9593#endif
9594
9595#define SQLITE_VEC_DEBUG_BUILD"" " " "" \
9596 SQLITE_VEC_DEBUG_BUILD_AVX"" " " SQLITE_VEC_DEBUG_BUILD_NEON""
9597
9598#define SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000"
"\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n"
"Build flags: " "" " " ""
\
9599 "Version: " SQLITE_VEC_VERSION"v0.1.7-alpha.2" "\n" \
9600 "Date: " SQLITE_VEC_DATE"2025-01-10T23:18:50Z+0000" "\n" \
9601 "Commit: " SQLITE_VEC_SOURCE"bdc336d1cf2a2222b6227784bd30c6631603279b" "\n" \
9602 "Build flags: " SQLITE_VEC_DEBUG_BUILD"" " " ""
9603
9604SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
9605 const sqlite3_api_routines *pApi) {
9606#ifndef SQLITE_CORE
9607 SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;;
9608#endif
9609 int rc = SQLITE_OK0;
9610
9611#define DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) (SQLITE_UTF81 | SQLITE_INNOCUOUS0x000200000 | SQLITE_DETERMINISTIC0x000000800)
9612
9613 rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800),
9614 SQLITE_VEC_VERSION"v0.1.7-alpha.2", _static_text_func, NULL((void*)0),
9615 NULL((void*)0), NULL((void*)0));
9616 if (rc != SQLITE_OK0) {
9617 return rc;
9618 }
9619 rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800),
9620 SQLITE_VEC_DEBUG_STRING"Version: " "v0.1.7-alpha.2" "\n" "Date: " "2025-01-10T23:18:50Z+0000"
"\n" "Commit: " "bdc336d1cf2a2222b6227784bd30c6631603279b" "\n"
"Build flags: " "" " " ""
, _static_text_func,
9621 NULL((void*)0), NULL((void*)0), NULL((void*)0));
9622 if (rc != SQLITE_OK0) {
9623 return rc;
9624 }
9625 static struct {
9626 const char *zFName;
9627 void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
9628 int nArg;
9629 int flags;
9630 } aFunc[] = {
9631 // clang-format off
9632 //{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION },
9633 //{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING },
9634 {"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, },
9635 {"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, },
9636 {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, },
9637 {"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, },
9638 {"vec_length", vec_length, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000, },
9639 {"vec_type", vec_type, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800), },
9640 {"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9641 {"vec_add", vec_add, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9642 {"vec_sub", vec_sub, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9643 {"vec_slice", vec_slice, 3, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9644 {"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9645 {"vec_f32", vec_f32, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9646 {"vec_bit", vec_bit, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9647 {"vec_int8", vec_int8, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9648 {"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9649 {"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, },
9650 // clang-format on
9651 };
9652
9653 static struct {
9654 char *name;
9655 const sqlite3_module *module;
9656 void *p;
9657 void (*xDestroy)(void *);
9658 } aMod[] = {
9659 // clang-format off
9660 {"vec0", &vec0Module, NULL((void*)0), NULL((void*)0)},
9661 {"vec_each", &vec_eachModule, NULL((void*)0), NULL((void*)0)},
9662 // clang-format on
9663 };
9664
9665 for (unsigned long i = 0; i < countof(aFunc)(sizeof(aFunc) / sizeof((aFunc)[0])) && rc == SQLITE_OK0; i++) {
9666 rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg,
9667 aFunc[i].flags, NULL((void*)0), aFunc[i].xFunc, NULL((void*)0),
9668 NULL((void*)0), NULL((void*)0));
9669 if (rc != SQLITE_OK0) {
9670 *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating function %s: %s",
9671 aFunc[i].zFName, sqlite3_errmsgsqlite3_api->errmsg(db));
9672 return rc;
9673 }
9674 }
9675
9676 for (unsigned long i = 0; i < countof(aMod)(sizeof(aMod) / sizeof((aMod)[0])) && rc == SQLITE_OK0; i++) {
9677 rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, aMod[i].name, aMod[i].module, NULL((void*)0), NULL((void*)0));
9678 if (rc != SQLITE_OK0) {
9679 *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("Error creating module %s: %s", aMod[i].name,
9680 sqlite3_errmsgsqlite3_api->errmsg(db));
9681 return rc;
9682 }
9683 }
9684
9685 return SQLITE_OK0;
9686}
9687
9688#ifndef SQLITE_VEC_OMIT_FS
9689SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
9690 const sqlite3_api_routines *pApi) {
9691 UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg);
9692#ifndef SQLITE_CORE
9693 SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;;
9694#endif
9695 int rc = SQLITE_OK0;
9696 rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE0x001000000,
9697 NULL((void*)0), vec_npy_file, NULL((void*)0), NULL((void*)0), NULL((void*)0));
9698 if(rc != SQLITE_OK0) {
9699 return rc;
9700 }
9701 rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL((void*)0), NULL((void*)0));
9702 return rc;
9703}
9704#endif
9705
9706SQLITE_VEC_API int
9707sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
9708 const sqlite3_api_routines *pApi) {
9709 UNUSED_PARAMETER(pzErrMsg)(void)(pzErrMsg);
9710#ifndef SQLITE_CORE
9711 SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;;
9712#endif
9713
9714 int rc = SQLITE_OK0;
9715 vec_static_blob_data *static_blob_data;
9716 static_blob_data = sqlite3_mallocsqlite3_api->malloc(sizeof(*static_blob_data));
9717 if (!static_blob_data) {
9718 return SQLITE_NOMEM7;
9719 }
9720 memset(static_blob_data, 0, sizeof(*static_blob_data));
9721
9722 rc = sqlite3_create_function_v2sqlite3_api->create_function_v2(
9723 db, "vec_static_blob_from_raw", 4,
9724 DEFAULT_FLAGS(1 | 0x000200000 | 0x000000800) | SQLITE_SUBTYPE0x000100000 | SQLITE_RESULT_SUBTYPE0x001000000, NULL((void*)0),
9725 vec_static_blob_from_raw, NULL((void*)0), NULL((void*)0), NULL((void*)0));
9726 if (rc != SQLITE_OK0)
9727 return rc;
9728
9729 rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule,
9730 static_blob_data, sqlite3_freesqlite3_api->free);
9731 if (rc != SQLITE_OK0)
9732 return rc;
9733 rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "vec_static_blob_entries",
9734 &vec_static_blob_entriesModule,
9735 static_blob_data, NULL((void*)0));
9736 if (rc != SQLITE_OK0)
9737 return rc;
9738 return rc;
9739}