File: | root/firefox-clang/third_party/sqlite3/ext/fts5.c |
Warning: | line 27552, column 9 Access to field 'bEof' results in a dereference of a null pointer (loaded from field 'pIter') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | |||||
2 | /* | ||||
3 | ** This, the "fts5.c" source file, is a composite file that is itself | ||||
4 | ** assembled from the following files: | ||||
5 | ** | ||||
6 | ** fts5.h | ||||
7 | ** fts5Int.h | ||||
8 | ** fts5parse.h <--- Generated from fts5parse.y by Lemon | ||||
9 | ** fts5parse.c <--- Generated from fts5parse.y by Lemon | ||||
10 | ** fts5_aux.c | ||||
11 | ** fts5_buffer.c | ||||
12 | ** fts5_config.c | ||||
13 | ** fts5_expr.c | ||||
14 | ** fts5_hash.c | ||||
15 | ** fts5_index.c | ||||
16 | ** fts5_main.c | ||||
17 | ** fts5_storage.c | ||||
18 | ** fts5_tokenize.c | ||||
19 | ** fts5_unicode2.c | ||||
20 | ** fts5_varint.c | ||||
21 | ** fts5_vocab.c | ||||
22 | */ | ||||
23 | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) | ||||
24 | |||||
25 | #if !defined(NDEBUG1) && !defined(SQLITE_DEBUG) | ||||
26 | # define NDEBUG1 1 | ||||
27 | #endif | ||||
28 | #if defined(NDEBUG1) && defined(SQLITE_DEBUG) | ||||
29 | # undef NDEBUG1 | ||||
30 | #endif | ||||
31 | |||||
32 | #ifdef HAVE_STDINT_H1 | ||||
33 | #include <stdint.h> | ||||
34 | #endif | ||||
35 | #ifdef HAVE_INTTYPES_H1 | ||||
36 | #include <inttypes.h> | ||||
37 | #endif | ||||
38 | #line 1 "fts5.h" | ||||
39 | /* | ||||
40 | ** 2014 May 31 | ||||
41 | ** | ||||
42 | ** The author disclaims copyright to this source code. In place of | ||||
43 | ** a legal notice, here is a blessing: | ||||
44 | ** | ||||
45 | ** May you do good and not evil. | ||||
46 | ** May you find forgiveness for yourself and forgive others. | ||||
47 | ** May you share freely, never taking more than you give. | ||||
48 | ** | ||||
49 | ****************************************************************************** | ||||
50 | ** | ||||
51 | ** Interfaces to extend FTS5. Using the interfaces defined in this file, | ||||
52 | ** FTS5 may be extended with: | ||||
53 | ** | ||||
54 | ** * custom tokenizers, and | ||||
55 | ** * custom auxiliary functions. | ||||
56 | */ | ||||
57 | |||||
58 | |||||
59 | #ifndef _FTS5_H | ||||
60 | #define _FTS5_H | ||||
61 | |||||
62 | #include "sqlite3.h" | ||||
63 | |||||
64 | #ifdef __cplusplus | ||||
65 | extern "C" { | ||||
66 | #endif | ||||
67 | |||||
68 | /************************************************************************* | ||||
69 | ** CUSTOM AUXILIARY FUNCTIONS | ||||
70 | ** | ||||
71 | ** Virtual table implementations may overload SQL functions by implementing | ||||
72 | ** the sqlite3_module.xFindFunction() method. | ||||
73 | */ | ||||
74 | |||||
75 | typedef struct Fts5ExtensionApi Fts5ExtensionApi; | ||||
76 | typedef struct Fts5Context Fts5Context; | ||||
77 | typedef struct Fts5PhraseIter Fts5PhraseIter; | ||||
78 | |||||
79 | typedef void (*fts5_extension_function)( | ||||
80 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
81 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
82 | sqlite3_context *pCtx, /* Context for returning result/error */ | ||||
83 | int nVal, /* Number of values in apVal[] array */ | ||||
84 | sqlite3_value **apVal /* Array of trailing arguments */ | ||||
85 | ); | ||||
86 | |||||
87 | struct Fts5PhraseIter { | ||||
88 | const unsigned char *a; | ||||
89 | const unsigned char *b; | ||||
90 | }; | ||||
91 | |||||
92 | /* | ||||
93 | ** EXTENSION API FUNCTIONS | ||||
94 | ** | ||||
95 | ** xUserData(pFts): | ||||
96 | ** Return a copy of the pUserData pointer passed to the xCreateFunction() | ||||
97 | ** API when the extension function was registered. | ||||
98 | ** | ||||
99 | ** xColumnTotalSize(pFts, iCol, pnToken): | ||||
100 | ** If parameter iCol is less than zero, set output variable *pnToken | ||||
101 | ** to the total number of tokens in the FTS5 table. Or, if iCol is | ||||
102 | ** non-negative but less than the number of columns in the table, return | ||||
103 | ** the total number of tokens in column iCol, considering all rows in | ||||
104 | ** the FTS5 table. | ||||
105 | ** | ||||
106 | ** If parameter iCol is greater than or equal to the number of columns | ||||
107 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | ||||
108 | ** an OOM condition or IO error), an appropriate SQLite error code is | ||||
109 | ** returned. | ||||
110 | ** | ||||
111 | ** xColumnCount(pFts): | ||||
112 | ** Return the number of columns in the table. | ||||
113 | ** | ||||
114 | ** xColumnSize(pFts, iCol, pnToken): | ||||
115 | ** If parameter iCol is less than zero, set output variable *pnToken | ||||
116 | ** to the total number of tokens in the current row. Or, if iCol is | ||||
117 | ** non-negative but less than the number of columns in the table, set | ||||
118 | ** *pnToken to the number of tokens in column iCol of the current row. | ||||
119 | ** | ||||
120 | ** If parameter iCol is greater than or equal to the number of columns | ||||
121 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | ||||
122 | ** an OOM condition or IO error), an appropriate SQLite error code is | ||||
123 | ** returned. | ||||
124 | ** | ||||
125 | ** This function may be quite inefficient if used with an FTS5 table | ||||
126 | ** created with the "columnsize=0" option. | ||||
127 | ** | ||||
128 | ** xColumnText: | ||||
129 | ** If parameter iCol is less than zero, or greater than or equal to the | ||||
130 | ** number of columns in the table, SQLITE_RANGE is returned. | ||||
131 | ** | ||||
132 | ** Otherwise, this function attempts to retrieve the text of column iCol of | ||||
133 | ** the current document. If successful, (*pz) is set to point to a buffer | ||||
134 | ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes | ||||
135 | ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, | ||||
136 | ** if an error occurs, an SQLite error code is returned and the final values | ||||
137 | ** of (*pz) and (*pn) are undefined. | ||||
138 | ** | ||||
139 | ** xPhraseCount: | ||||
140 | ** Returns the number of phrases in the current query expression. | ||||
141 | ** | ||||
142 | ** xPhraseSize: | ||||
143 | ** If parameter iCol is less than zero, or greater than or equal to the | ||||
144 | ** number of phrases in the current query, as returned by xPhraseCount, | ||||
145 | ** 0 is returned. Otherwise, this function returns the number of tokens in | ||||
146 | ** phrase iPhrase of the query. Phrases are numbered starting from zero. | ||||
147 | ** | ||||
148 | ** xInstCount: | ||||
149 | ** Set *pnInst to the total number of occurrences of all phrases within | ||||
150 | ** the query within the current row. Return SQLITE_OK if successful, or | ||||
151 | ** an error code (i.e. SQLITE_NOMEM) if an error occurs. | ||||
152 | ** | ||||
153 | ** This API can be quite slow if used with an FTS5 table created with the | ||||
154 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | ||||
155 | ** with either "detail=none" or "detail=column" and "content=" option | ||||
156 | ** (i.e. if it is a contentless table), then this API always returns 0. | ||||
157 | ** | ||||
158 | ** xInst: | ||||
159 | ** Query for the details of phrase match iIdx within the current row. | ||||
160 | ** Phrase matches are numbered starting from zero, so the iIdx argument | ||||
161 | ** should be greater than or equal to zero and smaller than the value | ||||
162 | ** output by xInstCount(). If iIdx is less than zero or greater than | ||||
163 | ** or equal to the value returned by xInstCount(), SQLITE_RANGE is returned. | ||||
164 | ** | ||||
165 | ** Otherwise, output parameter *piPhrase is set to the phrase number, *piCol | ||||
166 | ** to the column in which it occurs and *piOff the token offset of the | ||||
167 | ** first token of the phrase. SQLITE_OK is returned if successful, or an | ||||
168 | ** error code (i.e. SQLITE_NOMEM) if an error occurs. | ||||
169 | ** | ||||
170 | ** This API can be quite slow if used with an FTS5 table created with the | ||||
171 | ** "detail=none" or "detail=column" option. | ||||
172 | ** | ||||
173 | ** xRowid: | ||||
174 | ** Returns the rowid of the current row. | ||||
175 | ** | ||||
176 | ** xTokenize: | ||||
177 | ** Tokenize text using the tokenizer belonging to the FTS5 table. | ||||
178 | ** | ||||
179 | ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): | ||||
180 | ** This API function is used to query the FTS table for phrase iPhrase | ||||
181 | ** of the current query. Specifically, a query equivalent to: | ||||
182 | ** | ||||
183 | ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid | ||||
184 | ** | ||||
185 | ** with $p set to a phrase equivalent to the phrase iPhrase of the | ||||
186 | ** current query is executed. Any column filter that applies to | ||||
187 | ** phrase iPhrase of the current query is included in $p. For each | ||||
188 | ** row visited, the callback function passed as the fourth argument | ||||
189 | ** is invoked. The context and API objects passed to the callback | ||||
190 | ** function may be used to access the properties of each matched row. | ||||
191 | ** Invoking Api.xUserData() returns a copy of the pointer passed as | ||||
192 | ** the third argument to pUserData. | ||||
193 | ** | ||||
194 | ** If parameter iPhrase is less than zero, or greater than or equal to | ||||
195 | ** the number of phrases in the query, as returned by xPhraseCount(), | ||||
196 | ** this function returns SQLITE_RANGE. | ||||
197 | ** | ||||
198 | ** If the callback function returns any value other than SQLITE_OK, the | ||||
199 | ** query is abandoned and the xQueryPhrase function returns immediately. | ||||
200 | ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. | ||||
201 | ** Otherwise, the error code is propagated upwards. | ||||
202 | ** | ||||
203 | ** If the query runs to completion without incident, SQLITE_OK is returned. | ||||
204 | ** Or, if some error occurs before the query completes or is aborted by | ||||
205 | ** the callback, an SQLite error code is returned. | ||||
206 | ** | ||||
207 | ** | ||||
208 | ** xSetAuxdata(pFts5, pAux, xDelete) | ||||
209 | ** | ||||
210 | ** Save the pointer passed as the second argument as the extension function's | ||||
211 | ** "auxiliary data". The pointer may then be retrieved by the current or any | ||||
212 | ** future invocation of the same fts5 extension function made as part of | ||||
213 | ** the same MATCH query using the xGetAuxdata() API. | ||||
214 | ** | ||||
215 | ** Each extension function is allocated a single auxiliary data slot for | ||||
216 | ** each FTS query (MATCH expression). If the extension function is invoked | ||||
217 | ** more than once for a single FTS query, then all invocations share a | ||||
218 | ** single auxiliary data context. | ||||
219 | ** | ||||
220 | ** If there is already an auxiliary data pointer when this function is | ||||
221 | ** invoked, then it is replaced by the new pointer. If an xDelete callback | ||||
222 | ** was specified along with the original pointer, it is invoked at this | ||||
223 | ** point. | ||||
224 | ** | ||||
225 | ** The xDelete callback, if one is specified, is also invoked on the | ||||
226 | ** auxiliary data pointer after the FTS5 query has finished. | ||||
227 | ** | ||||
228 | ** If an error (e.g. an OOM condition) occurs within this function, | ||||
229 | ** the auxiliary data is set to NULL and an error code returned. If the | ||||
230 | ** xDelete parameter was not NULL, it is invoked on the auxiliary data | ||||
231 | ** pointer before returning. | ||||
232 | ** | ||||
233 | ** | ||||
234 | ** xGetAuxdata(pFts5, bClear) | ||||
235 | ** | ||||
236 | ** Returns the current auxiliary data pointer for the fts5 extension | ||||
237 | ** function. See the xSetAuxdata() method for details. | ||||
238 | ** | ||||
239 | ** If the bClear argument is non-zero, then the auxiliary data is cleared | ||||
240 | ** (set to NULL) before this function returns. In this case the xDelete, | ||||
241 | ** if any, is not invoked. | ||||
242 | ** | ||||
243 | ** | ||||
244 | ** xRowCount(pFts5, pnRow) | ||||
245 | ** | ||||
246 | ** This function is used to retrieve the total number of rows in the table. | ||||
247 | ** In other words, the same value that would be returned by: | ||||
248 | ** | ||||
249 | ** SELECT count(*) FROM ftstable; | ||||
250 | ** | ||||
251 | ** xPhraseFirst() | ||||
252 | ** This function is used, along with type Fts5PhraseIter and the xPhraseNext | ||||
253 | ** method, to iterate through all instances of a single query phrase within | ||||
254 | ** the current row. This is the same information as is accessible via the | ||||
255 | ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient | ||||
256 | ** to use, this API may be faster under some circumstances. To iterate | ||||
257 | ** through instances of phrase iPhrase, use the following code: | ||||
258 | ** | ||||
259 | ** Fts5PhraseIter iter; | ||||
260 | ** int iCol, iOff; | ||||
261 | ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); | ||||
262 | ** iCol>=0; | ||||
263 | ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) | ||||
264 | ** ){ | ||||
265 | ** // An instance of phrase iPhrase at offset iOff of column iCol | ||||
266 | ** } | ||||
267 | ** | ||||
268 | ** The Fts5PhraseIter structure is defined above. Applications should not | ||||
269 | ** modify this structure directly - it should only be used as shown above | ||||
270 | ** with the xPhraseFirst() and xPhraseNext() API methods (and by | ||||
271 | ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). | ||||
272 | ** | ||||
273 | ** This API can be quite slow if used with an FTS5 table created with the | ||||
274 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | ||||
275 | ** with either "detail=none" or "detail=column" and "content=" option | ||||
276 | ** (i.e. if it is a contentless table), then this API always iterates | ||||
277 | ** through an empty set (all calls to xPhraseFirst() set iCol to -1). | ||||
278 | ** | ||||
279 | ** In all cases, matches are visited in (column ASC, offset ASC) order. | ||||
280 | ** i.e. all those in column 0, sorted by offset, followed by those in | ||||
281 | ** column 1, etc. | ||||
282 | ** | ||||
283 | ** xPhraseNext() | ||||
284 | ** See xPhraseFirst above. | ||||
285 | ** | ||||
286 | ** xPhraseFirstColumn() | ||||
287 | ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() | ||||
288 | ** and xPhraseNext() APIs described above. The difference is that instead | ||||
289 | ** of iterating through all instances of a phrase in the current row, these | ||||
290 | ** APIs are used to iterate through the set of columns in the current row | ||||
291 | ** that contain one or more instances of a specified phrase. For example: | ||||
292 | ** | ||||
293 | ** Fts5PhraseIter iter; | ||||
294 | ** int iCol; | ||||
295 | ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); | ||||
296 | ** iCol>=0; | ||||
297 | ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) | ||||
298 | ** ){ | ||||
299 | ** // Column iCol contains at least one instance of phrase iPhrase | ||||
300 | ** } | ||||
301 | ** | ||||
302 | ** This API can be quite slow if used with an FTS5 table created with the | ||||
303 | ** "detail=none" option. If the FTS5 table is created with either | ||||
304 | ** "detail=none" "content=" option (i.e. if it is a contentless table), | ||||
305 | ** then this API always iterates through an empty set (all calls to | ||||
306 | ** xPhraseFirstColumn() set iCol to -1). | ||||
307 | ** | ||||
308 | ** The information accessed using this API and its companion | ||||
309 | ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext | ||||
310 | ** (or xInst/xInstCount). The chief advantage of this API is that it is | ||||
311 | ** significantly more efficient than those alternatives when used with | ||||
312 | ** "detail=column" tables. | ||||
313 | ** | ||||
314 | ** xPhraseNextColumn() | ||||
315 | ** See xPhraseFirstColumn above. | ||||
316 | ** | ||||
317 | ** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) | ||||
318 | ** This is used to access token iToken of phrase iPhrase of the current | ||||
319 | ** query. Before returning, output parameter *ppToken is set to point | ||||
320 | ** to a buffer containing the requested token, and *pnToken to the | ||||
321 | ** size of this buffer in bytes. | ||||
322 | ** | ||||
323 | ** If iPhrase or iToken are less than zero, or if iPhrase is greater than | ||||
324 | ** or equal to the number of phrases in the query as reported by | ||||
325 | ** xPhraseCount(), or if iToken is equal to or greater than the number of | ||||
326 | ** tokens in the phrase, SQLITE_RANGE is returned and *ppToken and *pnToken | ||||
327 | are both zeroed. | ||||
328 | ** | ||||
329 | ** The output text is not a copy of the query text that specified the | ||||
330 | ** token. It is the output of the tokenizer module. For tokendata=1 | ||||
331 | ** tables, this includes any embedded 0x00 and trailing data. | ||||
332 | ** | ||||
333 | ** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) | ||||
334 | ** This is used to access token iToken of phrase hit iIdx within the | ||||
335 | ** current row. If iIdx is less than zero or greater than or equal to the | ||||
336 | ** value returned by xInstCount(), SQLITE_RANGE is returned. Otherwise, | ||||
337 | ** output variable (*ppToken) is set to point to a buffer containing the | ||||
338 | ** matching document token, and (*pnToken) to the size of that buffer in | ||||
339 | ** bytes. | ||||
340 | ** | ||||
341 | ** The output text is not a copy of the document text that was tokenized. | ||||
342 | ** It is the output of the tokenizer module. For tokendata=1 tables, this | ||||
343 | ** includes any embedded 0x00 and trailing data. | ||||
344 | ** | ||||
345 | ** This API may be slow in some cases if the token identified by parameters | ||||
346 | ** iIdx and iToken matched a prefix token in the query. In most cases, the | ||||
347 | ** first call to this API for each prefix token in the query is forced | ||||
348 | ** to scan the portion of the full-text index that matches the prefix | ||||
349 | ** token to collect the extra data required by this API. If the prefix | ||||
350 | ** token matches a large number of token instances in the document set, | ||||
351 | ** this may be a performance problem. | ||||
352 | ** | ||||
353 | ** If the user knows in advance that a query may use this API for a | ||||
354 | ** prefix token, FTS5 may be configured to collect all required data as part | ||||
355 | ** of the initial querying of the full-text index, avoiding the second scan | ||||
356 | ** entirely. This also causes prefix queries that do not use this API to | ||||
357 | ** run more slowly and use more memory. FTS5 may be configured in this way | ||||
358 | ** either on a per-table basis using the [FTS5 insttoken | 'insttoken'] | ||||
359 | ** option, or on a per-query basis using the | ||||
360 | ** [fts5_insttoken | fts5_insttoken()] user function. | ||||
361 | ** | ||||
362 | ** This API can be quite slow if used with an FTS5 table created with the | ||||
363 | ** "detail=none" or "detail=column" option. | ||||
364 | ** | ||||
365 | ** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale) | ||||
366 | ** If parameter iCol is less than zero, or greater than or equal to the | ||||
367 | ** number of columns in the table, SQLITE_RANGE is returned. | ||||
368 | ** | ||||
369 | ** Otherwise, this function attempts to retrieve the locale associated | ||||
370 | ** with column iCol of the current row. Usually, there is no associated | ||||
371 | ** locale, and output parameters (*pzLocale) and (*pnLocale) are set | ||||
372 | ** to NULL and 0, respectively. However, if the fts5_locale() function | ||||
373 | ** was used to associate a locale with the value when it was inserted | ||||
374 | ** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated | ||||
375 | ** buffer containing the name of the locale in utf-8 encoding. (*pnLocale) | ||||
376 | ** is set to the size in bytes of the buffer, not including the | ||||
377 | ** nul-terminator. | ||||
378 | ** | ||||
379 | ** If successful, SQLITE_OK is returned. Or, if an error occurs, an | ||||
380 | ** SQLite error code is returned. The final value of the output parameters | ||||
381 | ** is undefined in this case. | ||||
382 | ** | ||||
383 | ** xTokenize_v2: | ||||
384 | ** Tokenize text using the tokenizer belonging to the FTS5 table. This | ||||
385 | ** API is the same as the xTokenize() API, except that it allows a tokenizer | ||||
386 | ** locale to be specified. | ||||
387 | */ | ||||
388 | struct Fts5ExtensionApi { | ||||
389 | int iVersion; /* Currently always set to 4 */ | ||||
390 | |||||
391 | void *(*xUserData)(Fts5Context*); | ||||
392 | |||||
393 | int (*xColumnCount)(Fts5Context*); | ||||
394 | int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); | ||||
395 | int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); | ||||
396 | |||||
397 | int (*xTokenize)(Fts5Context*, | ||||
398 | const char *pText, int nText, /* Text to tokenize */ | ||||
399 | void *pCtx, /* Context passed to xToken() */ | ||||
400 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | ||||
401 | ); | ||||
402 | |||||
403 | int (*xPhraseCount)(Fts5Context*); | ||||
404 | int (*xPhraseSize)(Fts5Context*, int iPhrase); | ||||
405 | |||||
406 | int (*xInstCount)(Fts5Context*, int *pnInst); | ||||
407 | int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); | ||||
408 | |||||
409 | sqlite3_int64 (*xRowid)(Fts5Context*); | ||||
410 | int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); | ||||
411 | int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); | ||||
412 | |||||
413 | int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, | ||||
414 | int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) | ||||
415 | ); | ||||
416 | int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); | ||||
417 | void *(*xGetAuxdata)(Fts5Context*, int bClear); | ||||
418 | |||||
419 | int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); | ||||
420 | void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); | ||||
421 | |||||
422 | int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); | ||||
423 | void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); | ||||
424 | |||||
425 | /* Below this point are iVersion>=3 only */ | ||||
426 | int (*xQueryToken)(Fts5Context*, | ||||
427 | int iPhrase, int iToken, | ||||
428 | const char **ppToken, int *pnToken | ||||
429 | ); | ||||
430 | int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); | ||||
431 | |||||
432 | /* Below this point are iVersion>=4 only */ | ||||
433 | int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn); | ||||
434 | int (*xTokenize_v2)(Fts5Context*, | ||||
435 | const char *pText, int nText, /* Text to tokenize */ | ||||
436 | const char *pLocale, int nLocale, /* Locale to pass to tokenizer */ | ||||
437 | void *pCtx, /* Context passed to xToken() */ | ||||
438 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | ||||
439 | ); | ||||
440 | }; | ||||
441 | |||||
442 | /* | ||||
443 | ** CUSTOM AUXILIARY FUNCTIONS | ||||
444 | *************************************************************************/ | ||||
445 | |||||
446 | /************************************************************************* | ||||
447 | ** CUSTOM TOKENIZERS | ||||
448 | ** | ||||
449 | ** Applications may also register custom tokenizer types. A tokenizer | ||||
450 | ** is registered by providing fts5 with a populated instance of the | ||||
451 | ** following structure. All structure methods must be defined, setting | ||||
452 | ** any member of the fts5_tokenizer struct to NULL leads to undefined | ||||
453 | ** behaviour. The structure methods are expected to function as follows: | ||||
454 | ** | ||||
455 | ** xCreate: | ||||
456 | ** This function is used to allocate and initialize a tokenizer instance. | ||||
457 | ** A tokenizer instance is required to actually tokenize text. | ||||
458 | ** | ||||
459 | ** The first argument passed to this function is a copy of the (void*) | ||||
460 | ** pointer provided by the application when the fts5_tokenizer_v2 object | ||||
461 | ** was registered with FTS5 (the third argument to xCreateTokenizer()). | ||||
462 | ** The second and third arguments are an array of nul-terminated strings | ||||
463 | ** containing the tokenizer arguments, if any, specified following the | ||||
464 | ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used | ||||
465 | ** to create the FTS5 table. | ||||
466 | ** | ||||
467 | ** The final argument is an output variable. If successful, (*ppOut) | ||||
468 | ** should be set to point to the new tokenizer handle and SQLITE_OK | ||||
469 | ** returned. If an error occurs, some value other than SQLITE_OK should | ||||
470 | ** be returned. In this case, fts5 assumes that the final value of *ppOut | ||||
471 | ** is undefined. | ||||
472 | ** | ||||
473 | ** xDelete: | ||||
474 | ** This function is invoked to delete a tokenizer handle previously | ||||
475 | ** allocated using xCreate(). Fts5 guarantees that this function will | ||||
476 | ** be invoked exactly once for each successful call to xCreate(). | ||||
477 | ** | ||||
478 | ** xTokenize: | ||||
479 | ** This function is expected to tokenize the nText byte string indicated | ||||
480 | ** by argument pText. pText may or may not be nul-terminated. The first | ||||
481 | ** argument passed to this function is a pointer to an Fts5Tokenizer object | ||||
482 | ** returned by an earlier call to xCreate(). | ||||
483 | ** | ||||
484 | ** The third argument indicates the reason that FTS5 is requesting | ||||
485 | ** tokenization of the supplied text. This is always one of the following | ||||
486 | ** four values: | ||||
487 | ** | ||||
488 | ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into | ||||
489 | ** or removed from the FTS table. The tokenizer is being invoked to | ||||
490 | ** determine the set of tokens to add to (or delete from) the | ||||
491 | ** FTS index. | ||||
492 | ** | ||||
493 | ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed | ||||
494 | ** against the FTS index. The tokenizer is being called to tokenize | ||||
495 | ** a bareword or quoted string specified as part of the query. | ||||
496 | ** | ||||
497 | ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as | ||||
498 | ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is | ||||
499 | ** followed by a "*" character, indicating that the last token | ||||
500 | ** returned by the tokenizer will be treated as a token prefix. | ||||
501 | ** | ||||
502 | ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to | ||||
503 | ** satisfy an fts5_api.xTokenize() request made by an auxiliary | ||||
504 | ** function. Or an fts5_api.xColumnSize() request made by the same | ||||
505 | ** on a columnsize=0 database. | ||||
506 | ** </ul> | ||||
507 | ** | ||||
508 | ** The sixth and seventh arguments passed to xTokenize() - pLocale and | ||||
509 | ** nLocale - are a pointer to a buffer containing the locale to use for | ||||
510 | ** tokenization (e.g. "en_US") and its size in bytes, respectively. The | ||||
511 | ** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in | ||||
512 | ** which case nLocale is always 0) to indicate that the tokenizer should | ||||
513 | ** use its default locale. | ||||
514 | ** | ||||
515 | ** For each token in the input string, the supplied callback xToken() must | ||||
516 | ** be invoked. The first argument to it should be a copy of the pointer | ||||
517 | ** passed as the second argument to xTokenize(). The third and fourth | ||||
518 | ** arguments are a pointer to a buffer containing the token text, and the | ||||
519 | ** size of the token in bytes. The 4th and 5th arguments are the byte offsets | ||||
520 | ** of the first byte of and first byte immediately following the text from | ||||
521 | ** which the token is derived within the input. | ||||
522 | ** | ||||
523 | ** The second argument passed to the xToken() callback ("tflags") should | ||||
524 | ** normally be set to 0. The exception is if the tokenizer supports | ||||
525 | ** synonyms. In this case see the discussion below for details. | ||||
526 | ** | ||||
527 | ** FTS5 assumes the xToken() callback is invoked for each token in the | ||||
528 | ** order that they occur within the input text. | ||||
529 | ** | ||||
530 | ** If an xToken() callback returns any value other than SQLITE_OK, then | ||||
531 | ** the tokenization should be abandoned and the xTokenize() method should | ||||
532 | ** immediately return a copy of the xToken() return value. Or, if the | ||||
533 | ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, | ||||
534 | ** if an error occurs with the xTokenize() implementation itself, it | ||||
535 | ** may abandon the tokenization and return any error code other than | ||||
536 | ** SQLITE_OK or SQLITE_DONE. | ||||
537 | ** | ||||
538 | ** If the tokenizer is registered using an fts5_tokenizer_v2 object, | ||||
539 | ** then the xTokenize() method has two additional arguments - pLocale | ||||
540 | ** and nLocale. These specify the locale that the tokenizer should use | ||||
541 | ** for the current request. If pLocale and nLocale are both 0, then the | ||||
542 | ** tokenizer should use its default locale. Otherwise, pLocale points to | ||||
543 | ** an nLocale byte buffer containing the name of the locale to use as utf-8 | ||||
544 | ** text. pLocale is not nul-terminated. | ||||
545 | ** | ||||
546 | ** FTS5_TOKENIZER | ||||
547 | ** | ||||
548 | ** There is also an fts5_tokenizer object. This is an older, deprecated, | ||||
549 | ** version of fts5_tokenizer_v2. It is similar except that: | ||||
550 | ** | ||||
551 | ** <ul> | ||||
552 | ** <li> There is no "iVersion" field, and | ||||
553 | ** <li> The xTokenize() method does not take a locale argument. | ||||
554 | ** </ul> | ||||
555 | ** | ||||
556 | ** Legacy fts5_tokenizer tokenizers must be registered using the | ||||
557 | ** legacy xCreateTokenizer() function, instead of xCreateTokenizer_v2(). | ||||
558 | ** | ||||
559 | ** Tokenizer implementations registered using either API may be retrieved | ||||
560 | ** using both xFindTokenizer() and xFindTokenizer_v2(). | ||||
561 | ** | ||||
562 | ** SYNONYM SUPPORT | ||||
563 | ** | ||||
564 | ** Custom tokenizers may also support synonyms. Consider a case in which a | ||||
565 | ** user wishes to query for a phrase such as "first place". Using the | ||||
566 | ** built-in tokenizers, the FTS5 query 'first + place' will match instances | ||||
567 | ** of "first place" within the document set, but not alternative forms | ||||
568 | ** such as "1st place". In some applications, it would be better to match | ||||
569 | ** all instances of "first place" or "1st place" regardless of which form | ||||
570 | ** the user specified in the MATCH query text. | ||||
571 | ** | ||||
572 | ** There are several ways to approach this in FTS5: | ||||
573 | ** | ||||
574 | ** <ol><li> By mapping all synonyms to a single token. In this case, using | ||||
575 | ** the above example, this means that the tokenizer returns the | ||||
576 | ** same token for inputs "first" and "1st". Say that token is in | ||||
577 | ** fact "first", so that when the user inserts the document "I won | ||||
578 | ** 1st place" entries are added to the index for tokens "i", "won", | ||||
579 | ** "first" and "place". If the user then queries for '1st + place', | ||||
580 | ** the tokenizer substitutes "first" for "1st" and the query works | ||||
581 | ** as expected. | ||||
582 | ** | ||||
583 | ** <li> By querying the index for all synonyms of each query term | ||||
584 | ** separately. In this case, when tokenizing query text, the | ||||
585 | ** tokenizer may provide multiple synonyms for a single term | ||||
586 | ** within the document. FTS5 then queries the index for each | ||||
587 | ** synonym individually. For example, faced with the query: | ||||
588 | ** | ||||
589 | ** <codeblock> | ||||
590 | ** ... MATCH 'first place'</codeblock> | ||||
591 | ** | ||||
592 | ** the tokenizer offers both "1st" and "first" as synonyms for the | ||||
593 | ** first token in the MATCH query and FTS5 effectively runs a query | ||||
594 | ** similar to: | ||||
595 | ** | ||||
596 | ** <codeblock> | ||||
597 | ** ... MATCH '(first OR 1st) place'</codeblock> | ||||
598 | ** | ||||
599 | ** except that, for the purposes of auxiliary functions, the query | ||||
600 | ** still appears to contain just two phrases - "(first OR 1st)" | ||||
601 | ** being treated as a single phrase. | ||||
602 | ** | ||||
603 | ** <li> By adding multiple synonyms for a single term to the FTS index. | ||||
604 | ** Using this method, when tokenizing document text, the tokenizer | ||||
605 | ** provides multiple synonyms for each token. So that when a | ||||
606 | ** document such as "I won first place" is tokenized, entries are | ||||
607 | ** added to the FTS index for "i", "won", "first", "1st" and | ||||
608 | ** "place". | ||||
609 | ** | ||||
610 | ** This way, even if the tokenizer does not provide synonyms | ||||
611 | ** when tokenizing query text (it should not - to do so would be | ||||
612 | ** inefficient), it doesn't matter if the user queries for | ||||
613 | ** 'first + place' or '1st + place', as there are entries in the | ||||
614 | ** FTS index corresponding to both forms of the first token. | ||||
615 | ** </ol> | ||||
616 | ** | ||||
617 | ** Whether it is parsing document or query text, any call to xToken that | ||||
618 | ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit | ||||
619 | ** is considered to supply a synonym for the previous token. For example, | ||||
620 | ** when parsing the document "I won first place", a tokenizer that supports | ||||
621 | ** synonyms would call xToken() 5 times, as follows: | ||||
622 | ** | ||||
623 | ** <codeblock> | ||||
624 | ** xToken(pCtx, 0, "i", 1, 0, 1); | ||||
625 | ** xToken(pCtx, 0, "won", 3, 2, 5); | ||||
626 | ** xToken(pCtx, 0, "first", 5, 6, 11); | ||||
627 | ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); | ||||
628 | ** xToken(pCtx, 0, "place", 5, 12, 17); | ||||
629 | **</codeblock> | ||||
630 | ** | ||||
631 | ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time | ||||
632 | ** xToken() is called. Multiple synonyms may be specified for a single token | ||||
633 | ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. | ||||
634 | ** There is no limit to the number of synonyms that may be provided for a | ||||
635 | ** single token. | ||||
636 | ** | ||||
637 | ** In many cases, method (1) above is the best approach. It does not add | ||||
638 | ** extra data to the FTS index or require FTS5 to query for multiple terms, | ||||
639 | ** so it is efficient in terms of disk space and query speed. However, it | ||||
640 | ** does not support prefix queries very well. If, as suggested above, the | ||||
641 | ** token "first" is substituted for "1st" by the tokenizer, then the query: | ||||
642 | ** | ||||
643 | ** <codeblock> | ||||
644 | ** ... MATCH '1s*'</codeblock> | ||||
645 | ** | ||||
646 | ** will not match documents that contain the token "1st" (as the tokenizer | ||||
647 | ** will probably not map "1s" to any prefix of "first"). | ||||
648 | ** | ||||
649 | ** For full prefix support, method (3) may be preferred. In this case, | ||||
650 | ** because the index contains entries for both "first" and "1st", prefix | ||||
651 | ** queries such as 'fi*' or '1s*' will match correctly. However, because | ||||
652 | ** extra entries are added to the FTS index, this method uses more space | ||||
653 | ** within the database. | ||||
654 | ** | ||||
655 | ** Method (2) offers a midpoint between (1) and (3). Using this method, | ||||
656 | ** a query such as '1s*' will match documents that contain the literal | ||||
657 | ** token "1st", but not "first" (assuming the tokenizer is not able to | ||||
658 | ** provide synonyms for prefixes). However, a non-prefix query like '1st' | ||||
659 | ** will match against "1st" and "first". This method does not require | ||||
660 | ** extra disk space, as no extra entries are added to the FTS index. | ||||
661 | ** On the other hand, it may require more CPU cycles to run MATCH queries, | ||||
662 | ** as separate queries of the FTS index are required for each synonym. | ||||
663 | ** | ||||
664 | ** When using methods (2) or (3), it is important that the tokenizer only | ||||
665 | ** provide synonyms when tokenizing document text (method (3)) or query | ||||
666 | ** text (method (2)), not both. Doing so will not cause any errors, but is | ||||
667 | ** inefficient. | ||||
668 | */ | ||||
669 | typedef struct Fts5Tokenizer Fts5Tokenizer; | ||||
670 | typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2; | ||||
671 | struct fts5_tokenizer_v2 { | ||||
672 | int iVersion; /* Currently always 2 */ | ||||
673 | |||||
674 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | ||||
675 | void (*xDelete)(Fts5Tokenizer*); | ||||
676 | int (*xTokenize)(Fts5Tokenizer*, | ||||
677 | void *pCtx, | ||||
678 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | ||||
679 | const char *pText, int nText, | ||||
680 | const char *pLocale, int nLocale, | ||||
681 | int (*xToken)( | ||||
682 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | ||||
683 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
684 | const char *pToken, /* Pointer to buffer containing token */ | ||||
685 | int nToken, /* Size of token in bytes */ | ||||
686 | int iStart, /* Byte offset of token within input text */ | ||||
687 | int iEnd /* Byte offset of end of token within input text */ | ||||
688 | ) | ||||
689 | ); | ||||
690 | }; | ||||
691 | |||||
692 | /* | ||||
693 | ** New code should use the fts5_tokenizer_v2 type to define tokenizer | ||||
694 | ** implementations. The following type is included for legacy applications | ||||
695 | ** that still use it. | ||||
696 | */ | ||||
697 | typedef struct fts5_tokenizer fts5_tokenizer; | ||||
698 | struct fts5_tokenizer { | ||||
699 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | ||||
700 | void (*xDelete)(Fts5Tokenizer*); | ||||
701 | int (*xTokenize)(Fts5Tokenizer*, | ||||
702 | void *pCtx, | ||||
703 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | ||||
704 | const char *pText, int nText, | ||||
705 | int (*xToken)( | ||||
706 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | ||||
707 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
708 | const char *pToken, /* Pointer to buffer containing token */ | ||||
709 | int nToken, /* Size of token in bytes */ | ||||
710 | int iStart, /* Byte offset of token within input text */ | ||||
711 | int iEnd /* Byte offset of end of token within input text */ | ||||
712 | ) | ||||
713 | ); | ||||
714 | }; | ||||
715 | |||||
716 | |||||
717 | /* Flags that may be passed as the third argument to xTokenize() */ | ||||
718 | #define FTS5_TOKENIZE_QUERY0x0001 0x0001 | ||||
719 | #define FTS5_TOKENIZE_PREFIX0x0002 0x0002 | ||||
720 | #define FTS5_TOKENIZE_DOCUMENT0x0004 0x0004 | ||||
721 | #define FTS5_TOKENIZE_AUX0x0008 0x0008 | ||||
722 | |||||
723 | /* Flags that may be passed by the tokenizer implementation back to FTS5 | ||||
724 | ** as the third argument to the supplied xToken callback. */ | ||||
725 | #define FTS5_TOKEN_COLOCATED0x0001 0x0001 /* Same position as prev. token */ | ||||
726 | |||||
727 | /* | ||||
728 | ** END OF CUSTOM TOKENIZERS | ||||
729 | *************************************************************************/ | ||||
730 | |||||
731 | /************************************************************************* | ||||
732 | ** FTS5 EXTENSION REGISTRATION API | ||||
733 | */ | ||||
734 | typedef struct fts5_api fts5_api; | ||||
735 | struct fts5_api { | ||||
736 | int iVersion; /* Currently always set to 3 */ | ||||
737 | |||||
738 | /* Create a new tokenizer */ | ||||
739 | int (*xCreateTokenizer)( | ||||
740 | fts5_api *pApi, | ||||
741 | const char *zName, | ||||
742 | void *pUserData, | ||||
743 | fts5_tokenizer *pTokenizer, | ||||
744 | void (*xDestroy)(void*) | ||||
745 | ); | ||||
746 | |||||
747 | /* Find an existing tokenizer */ | ||||
748 | int (*xFindTokenizer)( | ||||
749 | fts5_api *pApi, | ||||
750 | const char *zName, | ||||
751 | void **ppUserData, | ||||
752 | fts5_tokenizer *pTokenizer | ||||
753 | ); | ||||
754 | |||||
755 | /* Create a new auxiliary function */ | ||||
756 | int (*xCreateFunction)( | ||||
757 | fts5_api *pApi, | ||||
758 | const char *zName, | ||||
759 | void *pUserData, | ||||
760 | fts5_extension_function xFunction, | ||||
761 | void (*xDestroy)(void*) | ||||
762 | ); | ||||
763 | |||||
764 | /* APIs below this point are only available if iVersion>=3 */ | ||||
765 | |||||
766 | /* Create a new tokenizer */ | ||||
767 | int (*xCreateTokenizer_v2)( | ||||
768 | fts5_api *pApi, | ||||
769 | const char *zName, | ||||
770 | void *pUserData, | ||||
771 | fts5_tokenizer_v2 *pTokenizer, | ||||
772 | void (*xDestroy)(void*) | ||||
773 | ); | ||||
774 | |||||
775 | /* Find an existing tokenizer */ | ||||
776 | int (*xFindTokenizer_v2)( | ||||
777 | fts5_api *pApi, | ||||
778 | const char *zName, | ||||
779 | void **ppUserData, | ||||
780 | fts5_tokenizer_v2 **ppTokenizer | ||||
781 | ); | ||||
782 | }; | ||||
783 | |||||
784 | /* | ||||
785 | ** END OF REGISTRATION API | ||||
786 | *************************************************************************/ | ||||
787 | |||||
788 | #ifdef __cplusplus | ||||
789 | } /* end of the 'extern "C"' block */ | ||||
790 | #endif | ||||
791 | |||||
792 | #endif /* _FTS5_H */ | ||||
793 | |||||
794 | #line 1 "fts5Int.h" | ||||
795 | /* | ||||
796 | ** 2014 May 31 | ||||
797 | ** | ||||
798 | ** The author disclaims copyright to this source code. In place of | ||||
799 | ** a legal notice, here is a blessing: | ||||
800 | ** | ||||
801 | ** May you do good and not evil. | ||||
802 | ** May you find forgiveness for yourself and forgive others. | ||||
803 | ** May you share freely, never taking more than you give. | ||||
804 | ** | ||||
805 | ****************************************************************************** | ||||
806 | ** | ||||
807 | */ | ||||
808 | #ifndef _FTS5INT_H | ||||
809 | #define _FTS5INT_H | ||||
810 | |||||
811 | /* #include "fts5.h" */ | ||||
812 | #include "sqlite3ext.h" | ||||
813 | SQLITE_EXTENSION_INIT1const sqlite3_api_routines *sqlite3_api=0; | ||||
814 | |||||
815 | #include <string.h> | ||||
816 | #include <assert.h> | ||||
817 | #include <stddef.h> | ||||
818 | |||||
819 | #ifndef SQLITE_AMALGAMATION | ||||
820 | |||||
821 | typedef unsigned char u8; | ||||
822 | typedef unsigned int u32; | ||||
823 | typedef unsigned short u16; | ||||
824 | typedef short i16; | ||||
825 | typedef sqlite3_int64 i64; | ||||
826 | typedef sqlite3_uint64 u64; | ||||
827 | |||||
828 | #ifndef ArraySize | ||||
829 | # define ArraySize(x)((int)(sizeof(x) / sizeof(x[0]))) ((int)(sizeof(x) / sizeof(x[0]))) | ||||
830 | #endif | ||||
831 | |||||
832 | #define testcase(x) | ||||
833 | |||||
834 | #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) | ||||
835 | # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 | ||||
836 | #endif | ||||
837 | #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) | ||||
838 | # define ALWAYS(X)(X) (1) | ||||
839 | # define NEVER(X)(X) (0) | ||||
840 | #elif !defined(NDEBUG1) | ||||
841 | # define ALWAYS(X)(X) ((X)?1:(assert(0)((void) (0)),0)) | ||||
842 | # define NEVER(X)(X) ((X)?(assert(0)((void) (0)),1):0) | ||||
843 | #else | ||||
844 | # define ALWAYS(X)(X) (X) | ||||
845 | # define NEVER(X)(X) (X) | ||||
846 | #endif | ||||
847 | |||||
848 | #define MIN(x,y)(((x) < (y)) ? (x) : (y)) (((x) < (y)) ? (x) : (y)) | ||||
849 | #define MAX(x,y)(((x) > (y)) ? (x) : (y)) (((x) > (y)) ? (x) : (y)) | ||||
850 | |||||
851 | /* | ||||
852 | ** Constants for the largest and smallest possible 64-bit signed integers. | ||||
853 | */ | ||||
854 | # define LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | ||||
855 | # define SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) (((i64)-1) - LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))) | ||||
856 | |||||
857 | /* The uptr type is an unsigned integer large enough to hold a pointer | ||||
858 | */ | ||||
859 | #if defined(HAVE_STDINT_H1) | ||||
860 | typedef uintptr_t uptr; | ||||
861 | #elif SQLITE_PTRSIZE==4 | ||||
862 | typedef u32 uptr; | ||||
863 | #else | ||||
864 | typedef u64 uptr; | ||||
865 | #endif | ||||
866 | |||||
867 | #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC | ||||
868 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&3)==0) | ||||
869 | #else | ||||
870 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&7)==0) | ||||
871 | #endif | ||||
872 | |||||
873 | /* | ||||
874 | ** Macros needed to provide flexible arrays in a portable way | ||||
875 | */ | ||||
876 | #ifndef offsetof | ||||
877 | # define offsetof(STRUCTURE,FIELD)__builtin_offsetof(STRUCTURE, FIELD) ((size_t)((char*)&((STRUCTURE*)0)->FIELD)) | ||||
878 | #endif | ||||
879 | #if defined(__STDC_VERSION__201710L) && (__STDC_VERSION__201710L >= 199901L) | ||||
880 | # define FLEXARRAY | ||||
881 | #else | ||||
882 | # define FLEXARRAY 1 | ||||
883 | #endif | ||||
884 | |||||
885 | #endif | ||||
886 | |||||
887 | /* Truncate very long tokens to this many bytes. Hard limit is | ||||
888 | ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset | ||||
889 | ** field that occurs at the start of each leaf page (see fts5_index.c). */ | ||||
890 | #define FTS5_MAX_TOKEN_SIZE32768 32768 | ||||
891 | |||||
892 | /* | ||||
893 | ** Maximum number of prefix indexes on single FTS5 table. This must be | ||||
894 | ** less than 32. If it is set to anything large than that, an #error | ||||
895 | ** directive in fts5_index.c will cause the build to fail. | ||||
896 | */ | ||||
897 | #define FTS5_MAX_PREFIX_INDEXES31 31 | ||||
898 | |||||
899 | /* | ||||
900 | ** Maximum segments permitted in a single index | ||||
901 | */ | ||||
902 | #define FTS5_MAX_SEGMENT2000 2000 | ||||
903 | |||||
904 | #define FTS5_DEFAULT_NEARDIST10 10 | ||||
905 | #define FTS5_DEFAULT_RANK"bm25" "bm25" | ||||
906 | |||||
907 | /* Name of rank and rowid columns */ | ||||
908 | #define FTS5_RANK_NAME"rank" "rank" | ||||
909 | #define FTS5_ROWID_NAME"rowid" "rowid" | ||||
910 | |||||
911 | #ifdef SQLITE_DEBUG | ||||
912 | # define FTS5_CORRUPT(11 | (1<<8)) sqlite3Fts5Corrupt() | ||||
913 | static int sqlite3Fts5Corrupt(void); | ||||
914 | #else | ||||
915 | # define FTS5_CORRUPT(11 | (1<<8)) SQLITE_CORRUPT_VTAB(11 | (1<<8)) | ||||
916 | #endif | ||||
917 | |||||
918 | /* | ||||
919 | ** The assert_nc() macro is similar to the assert() macro, except that it | ||||
920 | ** is used for assert() conditions that are true only if it can be | ||||
921 | ** guranteed that the database is not corrupt. | ||||
922 | */ | ||||
923 | #ifdef SQLITE_DEBUG | ||||
924 | extern int sqlite3_fts5_may_be_corrupt; | ||||
925 | # define assert_nc(x)((void) (0)) assert(sqlite3_fts5_may_be_corrupt || (x))((void) (0)) | ||||
926 | #else | ||||
927 | # define assert_nc(x)((void) (0)) assert(x)((void) (0)) | ||||
928 | #endif | ||||
929 | |||||
930 | /* | ||||
931 | ** A version of memcmp() that does not cause asan errors if one of the pointer | ||||
932 | ** parameters is NULL and the number of bytes to compare is zero. | ||||
933 | */ | ||||
934 | #define fts5Memcmp(s1, s2, n)((n)<=0 ? 0 : memcmp((s1), (s2), (n))) ((n)<=0 ? 0 : memcmp((s1), (s2), (n))) | ||||
935 | |||||
936 | /* Mark a function parameter as unused, to suppress nuisance compiler | ||||
937 | ** warnings. */ | ||||
938 | #ifndef UNUSED_PARAM | ||||
939 | # define UNUSED_PARAM(X)(void)(X) (void)(X) | ||||
940 | #endif | ||||
941 | |||||
942 | #ifndef UNUSED_PARAM2 | ||||
943 | # define UNUSED_PARAM2(X, Y)(void)(X), (void)(Y) (void)(X), (void)(Y) | ||||
944 | #endif | ||||
945 | |||||
946 | typedef struct Fts5Global Fts5Global; | ||||
947 | typedef struct Fts5Colset Fts5Colset; | ||||
948 | |||||
949 | /* If a NEAR() clump or phrase may only match a specific set of columns, | ||||
950 | ** then an object of the following type is used to record the set of columns. | ||||
951 | ** Each entry in the aiCol[] array is a column that may be matched. | ||||
952 | ** | ||||
953 | ** This object is used by fts5_expr.c and fts5_index.c. | ||||
954 | */ | ||||
955 | struct Fts5Colset { | ||||
956 | int nCol; | ||||
957 | int aiCol[FLEXARRAY]; | ||||
958 | }; | ||||
959 | |||||
960 | /* Size (int bytes) of a complete Fts5Colset object with N columns. */ | ||||
961 | #define SZ_FTS5COLSET(N)(sizeof(i64)*((N+2)/2)) (sizeof(i64)*((N+2)/2)) | ||||
962 | |||||
963 | /************************************************************************** | ||||
964 | ** Interface to code in fts5_config.c. fts5_config.c contains contains code | ||||
965 | ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. | ||||
966 | */ | ||||
967 | |||||
968 | typedef struct Fts5Config Fts5Config; | ||||
969 | typedef struct Fts5TokenizerConfig Fts5TokenizerConfig; | ||||
970 | |||||
971 | struct Fts5TokenizerConfig { | ||||
972 | Fts5Tokenizer *pTok; | ||||
973 | fts5_tokenizer_v2 *pApi2; | ||||
974 | fts5_tokenizer *pApi1; | ||||
975 | const char **azArg; | ||||
976 | int nArg; | ||||
977 | int ePattern; /* FTS_PATTERN_XXX constant */ | ||||
978 | const char *pLocale; /* Current locale to use */ | ||||
979 | int nLocale; /* Size of pLocale in bytes */ | ||||
980 | }; | ||||
981 | |||||
982 | /* | ||||
983 | ** An instance of the following structure encodes all information that can | ||||
984 | ** be gleaned from the CREATE VIRTUAL TABLE statement. | ||||
985 | ** | ||||
986 | ** And all information loaded from the %_config table. | ||||
987 | ** | ||||
988 | ** nAutomerge: | ||||
989 | ** The minimum number of segments that an auto-merge operation should | ||||
990 | ** attempt to merge together. A value of 1 sets the object to use the | ||||
991 | ** compile time default. Zero disables auto-merge altogether. | ||||
992 | ** | ||||
993 | ** bContentlessDelete: | ||||
994 | ** True if the contentless_delete option was present in the CREATE | ||||
995 | ** VIRTUAL TABLE statement. | ||||
996 | ** | ||||
997 | ** zContent: | ||||
998 | ** | ||||
999 | ** zContentRowid: | ||||
1000 | ** The value of the content_rowid= option, if one was specified. Or | ||||
1001 | ** the string "rowid" otherwise. This text is not quoted - if it is | ||||
1002 | ** used as part of an SQL statement it needs to be quoted appropriately. | ||||
1003 | ** | ||||
1004 | ** zContentExprlist: | ||||
1005 | ** | ||||
1006 | ** pzErrmsg: | ||||
1007 | ** This exists in order to allow the fts5_index.c module to return a | ||||
1008 | ** decent error message if it encounters a file-format version it does | ||||
1009 | ** not understand. | ||||
1010 | ** | ||||
1011 | ** bColumnsize: | ||||
1012 | ** True if the %_docsize table is created. | ||||
1013 | ** | ||||
1014 | ** bPrefixIndex: | ||||
1015 | ** This is only used for debugging. If set to false, any prefix indexes | ||||
1016 | ** are ignored. This value is configured using: | ||||
1017 | ** | ||||
1018 | ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); | ||||
1019 | ** | ||||
1020 | ** bLocale: | ||||
1021 | ** Set to true if locale=1 was specified when the table was created. | ||||
1022 | */ | ||||
1023 | struct Fts5Config { | ||||
1024 | sqlite3 *db; /* Database handle */ | ||||
1025 | Fts5Global *pGlobal; /* Global fts5 object for handle db */ | ||||
1026 | char *zDb; /* Database holding FTS index (e.g. "main") */ | ||||
1027 | char *zName; /* Name of FTS index */ | ||||
1028 | int nCol; /* Number of columns */ | ||||
1029 | char **azCol; /* Column names */ | ||||
1030 | u8 *abUnindexed; /* True for unindexed columns */ | ||||
1031 | int nPrefix; /* Number of prefix indexes */ | ||||
1032 | int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ | ||||
1033 | int eContent; /* An FTS5_CONTENT value */ | ||||
1034 | int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ | ||||
1035 | int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ | ||||
1036 | char *zContent; /* content table */ | ||||
1037 | char *zContentRowid; /* "content_rowid=" option value */ | ||||
1038 | int bColumnsize; /* "columnsize=" option value (dflt==1) */ | ||||
1039 | int bTokendata; /* "tokendata=" option value (dflt==0) */ | ||||
1040 | int bLocale; /* "locale=" option value (dflt==0) */ | ||||
1041 | int eDetail; /* FTS5_DETAIL_XXX value */ | ||||
1042 | char *zContentExprlist; | ||||
1043 | Fts5TokenizerConfig t; | ||||
1044 | int bLock; /* True when table is preparing statement */ | ||||
1045 | |||||
1046 | |||||
1047 | /* Values loaded from the %_config table */ | ||||
1048 | int iVersion; /* fts5 file format 'version' */ | ||||
1049 | int iCookie; /* Incremented when %_config is modified */ | ||||
1050 | int pgsz; /* Approximate page size used in %_data */ | ||||
1051 | int nAutomerge; /* 'automerge' setting */ | ||||
1052 | int nCrisisMerge; /* Maximum allowed segments per level */ | ||||
1053 | int nUsermerge; /* 'usermerge' setting */ | ||||
1054 | int nHashSize; /* Bytes of memory for in-memory hash */ | ||||
1055 | char *zRank; /* Name of rank function */ | ||||
1056 | char *zRankArgs; /* Arguments to rank function */ | ||||
1057 | int bSecureDelete; /* 'secure-delete' */ | ||||
1058 | int nDeleteMerge; /* 'deletemerge' */ | ||||
1059 | int bPrefixInsttoken; /* 'prefix-insttoken' */ | ||||
1060 | |||||
1061 | /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ | ||||
1062 | char **pzErrmsg; | ||||
1063 | |||||
1064 | #ifdef SQLITE_DEBUG | ||||
1065 | int bPrefixIndex; /* True to use prefix-indexes */ | ||||
1066 | #endif | ||||
1067 | }; | ||||
1068 | |||||
1069 | /* Current expected value of %_config table 'version' field. And | ||||
1070 | ** the expected version if the 'secure-delete' option has ever been | ||||
1071 | ** set on the table. */ | ||||
1072 | #define FTS5_CURRENT_VERSION4 4 | ||||
1073 | #define FTS5_CURRENT_VERSION_SECUREDELETE5 5 | ||||
1074 | |||||
1075 | #define FTS5_CONTENT_NORMAL0 0 | ||||
1076 | #define FTS5_CONTENT_NONE1 1 | ||||
1077 | #define FTS5_CONTENT_EXTERNAL2 2 | ||||
1078 | #define FTS5_CONTENT_UNINDEXED3 3 | ||||
1079 | |||||
1080 | #define FTS5_DETAIL_FULL0 0 | ||||
1081 | #define FTS5_DETAIL_NONE1 1 | ||||
1082 | #define FTS5_DETAIL_COLUMNS2 2 | ||||
1083 | |||||
1084 | #define FTS5_PATTERN_NONE0 0 | ||||
1085 | #define FTS5_PATTERN_LIKE65 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */ | ||||
1086 | #define FTS5_PATTERN_GLOB66 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */ | ||||
1087 | |||||
1088 | static int sqlite3Fts5ConfigParse( | ||||
1089 | Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** | ||||
1090 | ); | ||||
1091 | static void sqlite3Fts5ConfigFree(Fts5Config*); | ||||
1092 | |||||
1093 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); | ||||
1094 | |||||
1095 | static int sqlite3Fts5Tokenize( | ||||
1096 | Fts5Config *pConfig, /* FTS5 Configuration object */ | ||||
1097 | int flags, /* FTS5_TOKENIZE_* flags */ | ||||
1098 | const char *pText, int nText, /* Text to tokenize */ | ||||
1099 | void *pCtx, /* Context passed to xToken() */ | ||||
1100 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | ||||
1101 | ); | ||||
1102 | |||||
1103 | static void sqlite3Fts5Dequote(char *z); | ||||
1104 | |||||
1105 | /* Load the contents of the %_config table */ | ||||
1106 | static int sqlite3Fts5ConfigLoad(Fts5Config*, int); | ||||
1107 | |||||
1108 | /* Set the value of a single config attribute */ | ||||
1109 | static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); | ||||
1110 | |||||
1111 | static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); | ||||
1112 | |||||
1113 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...); | ||||
1114 | |||||
1115 | /* | ||||
1116 | ** End of interface to code in fts5_config.c. | ||||
1117 | **************************************************************************/ | ||||
1118 | |||||
1119 | /************************************************************************** | ||||
1120 | ** Interface to code in fts5_buffer.c. | ||||
1121 | */ | ||||
1122 | |||||
1123 | /* | ||||
1124 | ** Buffer object for the incremental building of string data. | ||||
1125 | */ | ||||
1126 | typedef struct Fts5Buffer Fts5Buffer; | ||||
1127 | struct Fts5Buffer { | ||||
1128 | u8 *p; | ||||
1129 | int n; | ||||
1130 | int nSpace; | ||||
1131 | }; | ||||
1132 | |||||
1133 | static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); | ||||
1134 | static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); | ||||
1135 | static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); | ||||
1136 | static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); | ||||
1137 | static void sqlite3Fts5BufferFree(Fts5Buffer*); | ||||
1138 | static void sqlite3Fts5BufferZero(Fts5Buffer*); | ||||
1139 | static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); | ||||
1140 | static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); | ||||
1141 | |||||
1142 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); | ||||
1143 | |||||
1144 | #define fts5BufferZero(x)sqlite3Fts5BufferZero(x) sqlite3Fts5BufferZero(x) | ||||
1145 | #define fts5BufferAppendVarint(a,b,c)sqlite3Fts5BufferAppendVarint(a,b,(i64)c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c) | ||||
1146 | #define fts5BufferFree(a)sqlite3Fts5BufferFree(a) sqlite3Fts5BufferFree(a) | ||||
1147 | #define fts5BufferAppendBlob(a,b,c,d)sqlite3Fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) | ||||
1148 | #define fts5BufferSet(a,b,c,d)sqlite3Fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) | ||||
1149 | |||||
1150 | #define fts5BufferGrow(pRc,pBuf,nn)( (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) ) ( \ | ||||
1151 | (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ | ||||
1152 | sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ | ||||
1153 | ) | ||||
1154 | |||||
1155 | /* Write and decode big-endian 32-bit integer values */ | ||||
1156 | static void sqlite3Fts5Put32(u8*, int); | ||||
1157 | static int sqlite3Fts5Get32(const u8*); | ||||
1158 | |||||
1159 | #define FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF) (int)((iPos >> 32) & 0x7FFFFFFF) | ||||
1160 | #define FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF) (int)(iPos & 0x7FFFFFFF) | ||||
1161 | |||||
1162 | typedef struct Fts5PoslistReader Fts5PoslistReader; | ||||
1163 | struct Fts5PoslistReader { | ||||
1164 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ | ||||
1165 | const u8 *a; /* Position list to iterate through */ | ||||
1166 | int n; /* Size of buffer at a[] in bytes */ | ||||
1167 | int i; /* Current offset in a[] */ | ||||
1168 | |||||
1169 | u8 bFlag; /* For client use (any custom purpose) */ | ||||
1170 | |||||
1171 | /* Output variables */ | ||||
1172 | u8 bEof; /* Set to true at EOF */ | ||||
1173 | i64 iPos; /* (iCol<<32) + iPos */ | ||||
1174 | }; | ||||
1175 | static int sqlite3Fts5PoslistReaderInit( | ||||
1176 | const u8 *a, int n, /* Poslist buffer to iterate through */ | ||||
1177 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | ||||
1178 | ); | ||||
1179 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); | ||||
1180 | |||||
1181 | typedef struct Fts5PoslistWriter Fts5PoslistWriter; | ||||
1182 | struct Fts5PoslistWriter { | ||||
1183 | i64 iPrev; | ||||
1184 | }; | ||||
1185 | static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); | ||||
1186 | static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); | ||||
1187 | |||||
1188 | static int sqlite3Fts5PoslistNext64( | ||||
1189 | const u8 *a, int n, /* Buffer containing poslist */ | ||||
1190 | int *pi, /* IN/OUT: Offset within a[] */ | ||||
1191 | i64 *piOff /* IN/OUT: Current offset */ | ||||
1192 | ); | ||||
1193 | |||||
1194 | /* Malloc utility */ | ||||
1195 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte); | ||||
1196 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); | ||||
1197 | |||||
1198 | /* Character set tests (like isspace(), isalpha() etc.) */ | ||||
1199 | static int sqlite3Fts5IsBareword(char t); | ||||
1200 | |||||
1201 | |||||
1202 | /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ | ||||
1203 | typedef struct Fts5Termset Fts5Termset; | ||||
1204 | static int sqlite3Fts5TermsetNew(Fts5Termset**); | ||||
1205 | static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); | ||||
1206 | static void sqlite3Fts5TermsetFree(Fts5Termset*); | ||||
1207 | |||||
1208 | /* | ||||
1209 | ** End of interface to code in fts5_buffer.c. | ||||
1210 | **************************************************************************/ | ||||
1211 | |||||
1212 | /************************************************************************** | ||||
1213 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code | ||||
1214 | ** to access the data stored in the %_data table. | ||||
1215 | */ | ||||
1216 | |||||
1217 | typedef struct Fts5Index Fts5Index; | ||||
1218 | typedef struct Fts5IndexIter Fts5IndexIter; | ||||
1219 | |||||
1220 | struct Fts5IndexIter { | ||||
1221 | i64 iRowid; | ||||
1222 | const u8 *pData; | ||||
1223 | int nData; | ||||
1224 | u8 bEof; | ||||
1225 | }; | ||||
1226 | |||||
1227 | #define sqlite3Fts5IterEof(x)((x)->bEof) ((x)->bEof) | ||||
1228 | |||||
1229 | /* | ||||
1230 | ** Values used as part of the flags argument passed to IndexQuery(). | ||||
1231 | */ | ||||
1232 | #define FTS5INDEX_QUERY_PREFIX0x0001 0x0001 /* Prefix query */ | ||||
1233 | #define FTS5INDEX_QUERY_DESC0x0002 0x0002 /* Docs in descending rowid order */ | ||||
1234 | #define FTS5INDEX_QUERY_TEST_NOIDX0x0004 0x0004 /* Do not use prefix index */ | ||||
1235 | #define FTS5INDEX_QUERY_SCAN0x0008 0x0008 /* Scan query (fts5vocab) */ | ||||
1236 | |||||
1237 | /* The following are used internally by the fts5_index.c module. They are | ||||
1238 | ** defined here only to make it easier to avoid clashes with the flags | ||||
1239 | ** above. */ | ||||
1240 | #define FTS5INDEX_QUERY_SKIPEMPTY0x0010 0x0010 | ||||
1241 | #define FTS5INDEX_QUERY_NOOUTPUT0x0020 0x0020 | ||||
1242 | #define FTS5INDEX_QUERY_SKIPHASH0x0040 0x0040 | ||||
1243 | #define FTS5INDEX_QUERY_NOTOKENDATA0x0080 0x0080 | ||||
1244 | #define FTS5INDEX_QUERY_SCANONETERM0x0100 0x0100 | ||||
1245 | |||||
1246 | /* | ||||
1247 | ** Create/destroy an Fts5Index object. | ||||
1248 | */ | ||||
1249 | static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); | ||||
1250 | static int sqlite3Fts5IndexClose(Fts5Index *p); | ||||
1251 | |||||
1252 | /* | ||||
1253 | ** Return a simple checksum value based on the arguments. | ||||
1254 | */ | ||||
1255 | static u64 sqlite3Fts5IndexEntryCksum( | ||||
1256 | i64 iRowid, | ||||
1257 | int iCol, | ||||
1258 | int iPos, | ||||
1259 | int iIdx, | ||||
1260 | const char *pTerm, | ||||
1261 | int nTerm | ||||
1262 | ); | ||||
1263 | |||||
1264 | /* | ||||
1265 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | ||||
1266 | ** size. Return the number of bytes in the nChar character prefix of the | ||||
1267 | ** buffer, or 0 if there are less than nChar characters in total. | ||||
1268 | */ | ||||
1269 | static int sqlite3Fts5IndexCharlenToBytelen( | ||||
1270 | const char *p, | ||||
1271 | int nByte, | ||||
1272 | int nChar | ||||
1273 | ); | ||||
1274 | |||||
1275 | /* | ||||
1276 | ** Open a new iterator to iterate though all rowids that match the | ||||
1277 | ** specified token or token prefix. | ||||
1278 | */ | ||||
1279 | static int sqlite3Fts5IndexQuery( | ||||
1280 | Fts5Index *p, /* FTS index to query */ | ||||
1281 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | ||||
1282 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | ||||
1283 | Fts5Colset *pColset, /* Match these columns only */ | ||||
1284 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | ||||
1285 | ); | ||||
1286 | |||||
1287 | /* | ||||
1288 | ** The various operations on open token or token prefix iterators opened | ||||
1289 | ** using sqlite3Fts5IndexQuery(). | ||||
1290 | */ | ||||
1291 | static int sqlite3Fts5IterNext(Fts5IndexIter*); | ||||
1292 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); | ||||
1293 | |||||
1294 | /* | ||||
1295 | ** Close an iterator opened by sqlite3Fts5IndexQuery(). | ||||
1296 | */ | ||||
1297 | static void sqlite3Fts5IterClose(Fts5IndexIter*); | ||||
1298 | |||||
1299 | /* | ||||
1300 | ** Close the reader blob handle, if it is open. | ||||
1301 | */ | ||||
1302 | static void sqlite3Fts5IndexCloseReader(Fts5Index*); | ||||
1303 | |||||
1304 | /* | ||||
1305 | ** This interface is used by the fts5vocab module. | ||||
1306 | */ | ||||
1307 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); | ||||
1308 | static int sqlite3Fts5IterNextScan(Fts5IndexIter*); | ||||
1309 | static void *sqlite3Fts5StructureRef(Fts5Index*); | ||||
1310 | static void sqlite3Fts5StructureRelease(void*); | ||||
1311 | static int sqlite3Fts5StructureTest(Fts5Index*, void*); | ||||
1312 | |||||
1313 | /* | ||||
1314 | ** Used by xInstToken(): | ||||
1315 | */ | ||||
1316 | static int sqlite3Fts5IterToken( | ||||
1317 | Fts5IndexIter *pIndexIter, | ||||
1318 | const char *pToken, int nToken, | ||||
1319 | i64 iRowid, | ||||
1320 | int iCol, | ||||
1321 | int iOff, | ||||
1322 | const char **ppOut, int *pnOut | ||||
1323 | ); | ||||
1324 | |||||
1325 | /* | ||||
1326 | ** Insert or remove data to or from the index. Each time a document is | ||||
1327 | ** added to or removed from the index, this function is called one or more | ||||
1328 | ** times. | ||||
1329 | ** | ||||
1330 | ** For an insert, it must be called once for each token in the new document. | ||||
1331 | ** If the operation is a delete, it must be called (at least) once for each | ||||
1332 | ** unique token in the document with an iCol value less than zero. The iPos | ||||
1333 | ** argument is ignored for a delete. | ||||
1334 | */ | ||||
1335 | static int sqlite3Fts5IndexWrite( | ||||
1336 | Fts5Index *p, /* Index to write to */ | ||||
1337 | int iCol, /* Column token appears in (-ve -> delete) */ | ||||
1338 | int iPos, /* Position of token within column */ | ||||
1339 | const char *pToken, int nToken /* Token to add or remove to or from index */ | ||||
1340 | ); | ||||
1341 | |||||
1342 | /* | ||||
1343 | ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to | ||||
1344 | ** document iDocid. | ||||
1345 | */ | ||||
1346 | static int sqlite3Fts5IndexBeginWrite( | ||||
1347 | Fts5Index *p, /* Index to write to */ | ||||
1348 | int bDelete, /* True if current operation is a delete */ | ||||
1349 | i64 iDocid /* Docid to add or remove data from */ | ||||
1350 | ); | ||||
1351 | |||||
1352 | /* | ||||
1353 | ** Flush any data stored in the in-memory hash tables to the database. | ||||
1354 | ** Also close any open blob handles. | ||||
1355 | */ | ||||
1356 | static int sqlite3Fts5IndexSync(Fts5Index *p); | ||||
1357 | |||||
1358 | /* | ||||
1359 | ** Discard any data stored in the in-memory hash tables. Do not write it | ||||
1360 | ** to the database. Additionally, assume that the contents of the %_data | ||||
1361 | ** table may have changed on disk. So any in-memory caches of %_data | ||||
1362 | ** records must be invalidated. | ||||
1363 | */ | ||||
1364 | static int sqlite3Fts5IndexRollback(Fts5Index *p); | ||||
1365 | |||||
1366 | /* | ||||
1367 | ** Get or set the "averages" values. | ||||
1368 | */ | ||||
1369 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); | ||||
1370 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); | ||||
1371 | |||||
1372 | /* | ||||
1373 | ** Functions called by the storage module as part of integrity-check. | ||||
1374 | */ | ||||
1375 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum); | ||||
1376 | |||||
1377 | /* | ||||
1378 | ** Called during virtual module initialization to register UDF | ||||
1379 | ** fts5_decode() with SQLite | ||||
1380 | */ | ||||
1381 | static int sqlite3Fts5IndexInit(sqlite3*); | ||||
1382 | |||||
1383 | static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); | ||||
1384 | |||||
1385 | /* | ||||
1386 | ** Return the total number of entries read from the %_data table by | ||||
1387 | ** this connection since it was created. | ||||
1388 | */ | ||||
1389 | static int sqlite3Fts5IndexReads(Fts5Index *p); | ||||
1390 | |||||
1391 | static int sqlite3Fts5IndexReinit(Fts5Index *p); | ||||
1392 | static int sqlite3Fts5IndexOptimize(Fts5Index *p); | ||||
1393 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); | ||||
1394 | static int sqlite3Fts5IndexReset(Fts5Index *p); | ||||
1395 | |||||
1396 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); | ||||
1397 | |||||
1398 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); | ||||
1399 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); | ||||
1400 | |||||
1401 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); | ||||
1402 | |||||
1403 | /* Used to populate hash tables for xInstToken in detail=none/column mode. */ | ||||
1404 | static int sqlite3Fts5IndexIterWriteTokendata( | ||||
1405 | Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff | ||||
1406 | ); | ||||
1407 | |||||
1408 | /* | ||||
1409 | ** End of interface to code in fts5_index.c. | ||||
1410 | **************************************************************************/ | ||||
1411 | |||||
1412 | /************************************************************************** | ||||
1413 | ** Interface to code in fts5_varint.c. | ||||
1414 | */ | ||||
1415 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); | ||||
1416 | static int sqlite3Fts5GetVarintLen(u32 iVal); | ||||
1417 | static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); | ||||
1418 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); | ||||
1419 | |||||
1420 | #define fts5GetVarint32(a,b)sqlite3Fts5GetVarint32(a,(u32*)&(b)) sqlite3Fts5GetVarint32(a,(u32*)&(b)) | ||||
1421 | #define fts5GetVarintsqlite3Fts5GetVarint sqlite3Fts5GetVarint | ||||
1422 | |||||
1423 | #define fts5FastGetVarint32(a, iOff, nVal){ nVal = (a)[iOff++]; if( nVal & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nVal)); } } { \ | ||||
1424 | nVal = (a)[iOff++]; \ | ||||
1425 | if( nVal & 0x80 ){ \ | ||||
1426 | iOff--; \ | ||||
1427 | iOff += fts5GetVarint32(&(a)[iOff], nVal)sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nVal)); \ | ||||
1428 | } \ | ||||
1429 | } | ||||
1430 | |||||
1431 | |||||
1432 | /* | ||||
1433 | ** End of interface to code in fts5_varint.c. | ||||
1434 | **************************************************************************/ | ||||
1435 | |||||
1436 | |||||
1437 | /************************************************************************** | ||||
1438 | ** Interface to code in fts5_main.c. | ||||
1439 | */ | ||||
1440 | |||||
1441 | /* | ||||
1442 | ** Virtual-table object. | ||||
1443 | */ | ||||
1444 | typedef struct Fts5Table Fts5Table; | ||||
1445 | struct Fts5Table { | ||||
1446 | sqlite3_vtab base; /* Base class used by SQLite core */ | ||||
1447 | Fts5Config *pConfig; /* Virtual table configuration */ | ||||
1448 | Fts5Index *pIndex; /* Full-text index */ | ||||
1449 | }; | ||||
1450 | |||||
1451 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig); | ||||
1452 | |||||
1453 | static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); | ||||
1454 | |||||
1455 | static int sqlite3Fts5FlushToDisk(Fts5Table*); | ||||
1456 | |||||
1457 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); | ||||
1458 | static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc); | ||||
1459 | |||||
1460 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); | ||||
1461 | static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, | ||||
1462 | const char **ppText, int *pnText, const char **ppLoc, int *pnLoc | ||||
1463 | ); | ||||
1464 | |||||
1465 | /* | ||||
1466 | ** End of interface to code in fts5.c. | ||||
1467 | **************************************************************************/ | ||||
1468 | |||||
1469 | /************************************************************************** | ||||
1470 | ** Interface to code in fts5_hash.c. | ||||
1471 | */ | ||||
1472 | typedef struct Fts5Hash Fts5Hash; | ||||
1473 | |||||
1474 | /* | ||||
1475 | ** Create a hash table, free a hash table. | ||||
1476 | */ | ||||
1477 | static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); | ||||
1478 | static void sqlite3Fts5HashFree(Fts5Hash*); | ||||
1479 | |||||
1480 | static int sqlite3Fts5HashWrite( | ||||
1481 | Fts5Hash*, | ||||
1482 | i64 iRowid, /* Rowid for this entry */ | ||||
1483 | int iCol, /* Column token appears in (-ve -> delete) */ | ||||
1484 | int iPos, /* Position of token within column */ | ||||
1485 | char bByte, | ||||
1486 | const char *pToken, int nToken /* Token to add or remove to or from index */ | ||||
1487 | ); | ||||
1488 | |||||
1489 | /* | ||||
1490 | ** Empty (but do not delete) a hash table. | ||||
1491 | */ | ||||
1492 | static void sqlite3Fts5HashClear(Fts5Hash*); | ||||
1493 | |||||
1494 | /* | ||||
1495 | ** Return true if the hash is empty, false otherwise. | ||||
1496 | */ | ||||
1497 | static int sqlite3Fts5HashIsEmpty(Fts5Hash*); | ||||
1498 | |||||
1499 | static int sqlite3Fts5HashQuery( | ||||
1500 | Fts5Hash*, /* Hash table to query */ | ||||
1501 | int nPre, | ||||
1502 | const char *pTerm, int nTerm, /* Query term */ | ||||
1503 | void **ppObj, /* OUT: Pointer to doclist for pTerm */ | ||||
1504 | int *pnDoclist /* OUT: Size of doclist in bytes */ | ||||
1505 | ); | ||||
1506 | |||||
1507 | static int sqlite3Fts5HashScanInit( | ||||
1508 | Fts5Hash*, /* Hash table to query */ | ||||
1509 | const char *pTerm, int nTerm /* Query prefix */ | ||||
1510 | ); | ||||
1511 | static void sqlite3Fts5HashScanNext(Fts5Hash*); | ||||
1512 | static int sqlite3Fts5HashScanEof(Fts5Hash*); | ||||
1513 | static void sqlite3Fts5HashScanEntry(Fts5Hash *, | ||||
1514 | const char **pzTerm, /* OUT: term (nul-terminated) */ | ||||
1515 | int *pnTerm, /* OUT: Size of term in bytes */ | ||||
1516 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | ||||
1517 | int *pnDoclist /* OUT: size of doclist in bytes */ | ||||
1518 | ); | ||||
1519 | |||||
1520 | |||||
1521 | |||||
1522 | /* | ||||
1523 | ** End of interface to code in fts5_hash.c. | ||||
1524 | **************************************************************************/ | ||||
1525 | |||||
1526 | /************************************************************************** | ||||
1527 | ** Interface to code in fts5_storage.c. fts5_storage.c contains contains | ||||
1528 | ** code to access the data stored in the %_content and %_docsize tables. | ||||
1529 | */ | ||||
1530 | |||||
1531 | #define FTS5_STMT_SCAN_ASC0 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ | ||||
1532 | #define FTS5_STMT_SCAN_DESC1 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ | ||||
1533 | #define FTS5_STMT_LOOKUP2 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ | ||||
1534 | |||||
1535 | typedef struct Fts5Storage Fts5Storage; | ||||
1536 | |||||
1537 | static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); | ||||
1538 | static int sqlite3Fts5StorageClose(Fts5Storage *p); | ||||
1539 | static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); | ||||
1540 | |||||
1541 | static int sqlite3Fts5DropAll(Fts5Config*); | ||||
1542 | static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); | ||||
1543 | |||||
1544 | static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); | ||||
1545 | static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*); | ||||
1546 | static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); | ||||
1547 | |||||
1548 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); | ||||
1549 | |||||
1550 | static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); | ||||
1551 | static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); | ||||
1552 | |||||
1553 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); | ||||
1554 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); | ||||
1555 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); | ||||
1556 | |||||
1557 | static int sqlite3Fts5StorageSync(Fts5Storage *p); | ||||
1558 | static int sqlite3Fts5StorageRollback(Fts5Storage *p); | ||||
1559 | |||||
1560 | static int sqlite3Fts5StorageConfigValue( | ||||
1561 | Fts5Storage *p, const char*, sqlite3_value*, int | ||||
1562 | ); | ||||
1563 | |||||
1564 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); | ||||
1565 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p); | ||||
1566 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p); | ||||
1567 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); | ||||
1568 | static int sqlite3Fts5StorageReset(Fts5Storage *p); | ||||
1569 | |||||
1570 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*); | ||||
1571 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel); | ||||
1572 | |||||
1573 | /* | ||||
1574 | ** End of interface to code in fts5_storage.c. | ||||
1575 | **************************************************************************/ | ||||
1576 | |||||
1577 | |||||
1578 | /************************************************************************** | ||||
1579 | ** Interface to code in fts5_expr.c. | ||||
1580 | */ | ||||
1581 | typedef struct Fts5Expr Fts5Expr; | ||||
1582 | typedef struct Fts5ExprNode Fts5ExprNode; | ||||
1583 | typedef struct Fts5Parse Fts5Parse; | ||||
1584 | typedef struct Fts5Token Fts5Token; | ||||
1585 | typedef struct Fts5ExprPhrase Fts5ExprPhrase; | ||||
1586 | typedef struct Fts5ExprNearset Fts5ExprNearset; | ||||
1587 | |||||
1588 | struct Fts5Token { | ||||
1589 | const char *p; /* Token text (not NULL terminated) */ | ||||
1590 | int n; /* Size of buffer p in bytes */ | ||||
1591 | }; | ||||
1592 | |||||
1593 | /* Parse a MATCH expression. */ | ||||
1594 | static int sqlite3Fts5ExprNew( | ||||
1595 | Fts5Config *pConfig, | ||||
1596 | int bPhraseToAnd, | ||||
1597 | int iCol, /* Column on LHS of MATCH operator */ | ||||
1598 | const char *zExpr, | ||||
1599 | Fts5Expr **ppNew, | ||||
1600 | char **pzErr | ||||
1601 | ); | ||||
1602 | static int sqlite3Fts5ExprPattern( | ||||
1603 | Fts5Config *pConfig, | ||||
1604 | int bGlob, | ||||
1605 | int iCol, | ||||
1606 | const char *zText, | ||||
1607 | Fts5Expr **pp | ||||
1608 | ); | ||||
1609 | |||||
1610 | /* | ||||
1611 | ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); | ||||
1612 | ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); | ||||
1613 | ** rc = sqlite3Fts5ExprNext(pExpr) | ||||
1614 | ** ){ | ||||
1615 | ** // The document with rowid iRowid matches the expression! | ||||
1616 | ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); | ||||
1617 | ** } | ||||
1618 | */ | ||||
1619 | static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); | ||||
1620 | static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); | ||||
1621 | static int sqlite3Fts5ExprEof(Fts5Expr*); | ||||
1622 | static i64 sqlite3Fts5ExprRowid(Fts5Expr*); | ||||
1623 | |||||
1624 | static void sqlite3Fts5ExprFree(Fts5Expr*); | ||||
1625 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2); | ||||
1626 | |||||
1627 | /* Called during startup to register a UDF with SQLite */ | ||||
1628 | static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); | ||||
1629 | |||||
1630 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); | ||||
1631 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); | ||||
1632 | static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); | ||||
1633 | |||||
1634 | typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; | ||||
1635 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); | ||||
1636 | static int sqlite3Fts5ExprPopulatePoslists( | ||||
1637 | Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int | ||||
1638 | ); | ||||
1639 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); | ||||
1640 | |||||
1641 | static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); | ||||
1642 | |||||
1643 | static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); | ||||
1644 | |||||
1645 | static int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); | ||||
1646 | static int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*); | ||||
1647 | static void sqlite3Fts5ExprClearTokens(Fts5Expr*); | ||||
1648 | |||||
1649 | /******************************************* | ||||
1650 | ** The fts5_expr.c API above this point is used by the other hand-written | ||||
1651 | ** C code in this module. The interfaces below this point are called by | ||||
1652 | ** the parser code in fts5parse.y. */ | ||||
1653 | |||||
1654 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); | ||||
1655 | |||||
1656 | static Fts5ExprNode *sqlite3Fts5ParseNode( | ||||
1657 | Fts5Parse *pParse, | ||||
1658 | int eType, | ||||
1659 | Fts5ExprNode *pLeft, | ||||
1660 | Fts5ExprNode *pRight, | ||||
1661 | Fts5ExprNearset *pNear | ||||
1662 | ); | ||||
1663 | |||||
1664 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | ||||
1665 | Fts5Parse *pParse, | ||||
1666 | Fts5ExprNode *pLeft, | ||||
1667 | Fts5ExprNode *pRight | ||||
1668 | ); | ||||
1669 | |||||
1670 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | ||||
1671 | Fts5Parse *pParse, | ||||
1672 | Fts5ExprPhrase *pPhrase, | ||||
1673 | Fts5Token *pToken, | ||||
1674 | int bPrefix | ||||
1675 | ); | ||||
1676 | |||||
1677 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); | ||||
1678 | |||||
1679 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | ||||
1680 | Fts5Parse*, | ||||
1681 | Fts5ExprNearset*, | ||||
1682 | Fts5ExprPhrase* | ||||
1683 | ); | ||||
1684 | |||||
1685 | static Fts5Colset *sqlite3Fts5ParseColset( | ||||
1686 | Fts5Parse*, | ||||
1687 | Fts5Colset*, | ||||
1688 | Fts5Token * | ||||
1689 | ); | ||||
1690 | |||||
1691 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); | ||||
1692 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); | ||||
1693 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); | ||||
1694 | |||||
1695 | static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); | ||||
1696 | static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*); | ||||
1697 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); | ||||
1698 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); | ||||
1699 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); | ||||
1700 | |||||
1701 | /* | ||||
1702 | ** End of interface to code in fts5_expr.c. | ||||
1703 | **************************************************************************/ | ||||
1704 | |||||
1705 | |||||
1706 | |||||
1707 | /************************************************************************** | ||||
1708 | ** Interface to code in fts5_aux.c. | ||||
1709 | */ | ||||
1710 | |||||
1711 | static int sqlite3Fts5AuxInit(fts5_api*); | ||||
1712 | /* | ||||
1713 | ** End of interface to code in fts5_aux.c. | ||||
1714 | **************************************************************************/ | ||||
1715 | |||||
1716 | /************************************************************************** | ||||
1717 | ** Interface to code in fts5_tokenizer.c. | ||||
1718 | */ | ||||
1719 | |||||
1720 | static int sqlite3Fts5TokenizerInit(fts5_api*); | ||||
1721 | static int sqlite3Fts5TokenizerPattern( | ||||
1722 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | ||||
1723 | Fts5Tokenizer *pTok | ||||
1724 | ); | ||||
1725 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*); | ||||
1726 | /* | ||||
1727 | ** End of interface to code in fts5_tokenizer.c. | ||||
1728 | **************************************************************************/ | ||||
1729 | |||||
1730 | /************************************************************************** | ||||
1731 | ** Interface to code in fts5_vocab.c. | ||||
1732 | */ | ||||
1733 | |||||
1734 | static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); | ||||
1735 | |||||
1736 | /* | ||||
1737 | ** End of interface to code in fts5_vocab.c. | ||||
1738 | **************************************************************************/ | ||||
1739 | |||||
1740 | |||||
1741 | /************************************************************************** | ||||
1742 | ** Interface to automatically generated code in fts5_unicode2.c. | ||||
1743 | */ | ||||
1744 | static int sqlite3Fts5UnicodeIsdiacritic(int c); | ||||
1745 | static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); | ||||
1746 | |||||
1747 | static int sqlite3Fts5UnicodeCatParse(const char*, u8*); | ||||
1748 | static int sqlite3Fts5UnicodeCategory(u32 iCode); | ||||
1749 | static void sqlite3Fts5UnicodeAscii(u8*, u8*); | ||||
1750 | /* | ||||
1751 | ** End of interface to code in fts5_unicode2.c. | ||||
1752 | **************************************************************************/ | ||||
1753 | |||||
1754 | #endif | ||||
1755 | |||||
1756 | #line 1 "fts5parse.h" | ||||
1757 | #define FTS5_OR1 1 | ||||
1758 | #define FTS5_AND2 2 | ||||
1759 | #define FTS5_NOT3 3 | ||||
1760 | #define FTS5_TERM4 4 | ||||
1761 | #define FTS5_COLON5 5 | ||||
1762 | #define FTS5_MINUS6 6 | ||||
1763 | #define FTS5_LCP7 7 | ||||
1764 | #define FTS5_RCP8 8 | ||||
1765 | #define FTS5_STRING9 9 | ||||
1766 | #define FTS5_LP10 10 | ||||
1767 | #define FTS5_RP11 11 | ||||
1768 | #define FTS5_CARET12 12 | ||||
1769 | #define FTS5_COMMA13 13 | ||||
1770 | #define FTS5_PLUS14 14 | ||||
1771 | #define FTS5_STAR15 15 | ||||
1772 | |||||
1773 | #line 1 "fts5parse.c" | ||||
1774 | /* This file is automatically generated by Lemon from input grammar | ||||
1775 | ** source file "fts5parse.y". | ||||
1776 | */ | ||||
1777 | /* | ||||
1778 | ** 2000-05-29 | ||||
1779 | ** | ||||
1780 | ** The author disclaims copyright to this source code. In place of | ||||
1781 | ** a legal notice, here is a blessing: | ||||
1782 | ** | ||||
1783 | ** May you do good and not evil. | ||||
1784 | ** May you find forgiveness for yourself and forgive others. | ||||
1785 | ** May you share freely, never taking more than you give. | ||||
1786 | ** | ||||
1787 | ************************************************************************* | ||||
1788 | ** Driver template for the LEMON parser generator. | ||||
1789 | ** | ||||
1790 | ** The "lemon" program processes an LALR(1) input grammar file, then uses | ||||
1791 | ** this template to construct a parser. The "lemon" program inserts text | ||||
1792 | ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the | ||||
1793 | ** interstitial "-" characters) contained in this template is changed into | ||||
1794 | ** the value of the %name directive from the grammar. Otherwise, the content | ||||
1795 | ** of this template is copied straight through into the generate parser | ||||
1796 | ** source file. | ||||
1797 | ** | ||||
1798 | ** The following is the concatenation of all %include directives from the | ||||
1799 | ** input grammar file: | ||||
1800 | */ | ||||
1801 | /************ Begin %include sections from the grammar ************************/ | ||||
1802 | #line 47 "fts5parse.y" | ||||
1803 | |||||
1804 | /* #include "fts5Int.h" */ | ||||
1805 | /* #include "fts5parse.h" */ | ||||
1806 | |||||
1807 | /* | ||||
1808 | ** Disable all error recovery processing in the parser push-down | ||||
1809 | ** automaton. | ||||
1810 | */ | ||||
1811 | #define fts5YYNOERRORRECOVERY1 1 | ||||
1812 | |||||
1813 | /* | ||||
1814 | ** Make fts5yytestcase() the same as testcase() | ||||
1815 | */ | ||||
1816 | #define fts5yytestcase(X) testcase(X) | ||||
1817 | |||||
1818 | /* | ||||
1819 | ** Indicate that sqlite3ParserFree() will never be called with a null | ||||
1820 | ** pointer. | ||||
1821 | */ | ||||
1822 | #define fts5YYPARSEFREENOTNULL1 1 | ||||
1823 | |||||
1824 | /* | ||||
1825 | ** Alternative datatype for the argument to the malloc() routine passed | ||||
1826 | ** into sqlite3ParserAlloc(). The default is size_t. | ||||
1827 | */ | ||||
1828 | #define fts5YYMALLOCARGTYPEu64 u64 | ||||
1829 | |||||
1830 | #line 58 "fts5parse.sql" | ||||
1831 | /**************** End of %include directives **********************************/ | ||||
1832 | /* These constants specify the various numeric values for terminal symbols. | ||||
1833 | ***************** Begin token definitions *************************************/ | ||||
1834 | #ifndef FTS5_OR1 | ||||
1835 | #define FTS5_OR1 1 | ||||
1836 | #define FTS5_AND2 2 | ||||
1837 | #define FTS5_NOT3 3 | ||||
1838 | #define FTS5_TERM4 4 | ||||
1839 | #define FTS5_COLON5 5 | ||||
1840 | #define FTS5_MINUS6 6 | ||||
1841 | #define FTS5_LCP7 7 | ||||
1842 | #define FTS5_RCP8 8 | ||||
1843 | #define FTS5_STRING9 9 | ||||
1844 | #define FTS5_LP10 10 | ||||
1845 | #define FTS5_RP11 11 | ||||
1846 | #define FTS5_CARET12 12 | ||||
1847 | #define FTS5_COMMA13 13 | ||||
1848 | #define FTS5_PLUS14 14 | ||||
1849 | #define FTS5_STAR15 15 | ||||
1850 | #endif | ||||
1851 | /**************** End token definitions ***************************************/ | ||||
1852 | |||||
1853 | /* The next sections is a series of control #defines. | ||||
1854 | ** various aspects of the generated parser. | ||||
1855 | ** fts5YYCODETYPE is the data type used to store the integer codes | ||||
1856 | ** that represent terminal and non-terminal symbols. | ||||
1857 | ** "unsigned char" is used if there are fewer than | ||||
1858 | ** 256 symbols. Larger types otherwise. | ||||
1859 | ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for | ||||
1860 | ** any terminal or nonterminal symbol. | ||||
1861 | ** fts5YYFALLBACK If defined, this indicates that one or more tokens | ||||
1862 | ** (also known as: "terminal symbols") have fall-back | ||||
1863 | ** values which should be used if the original symbol | ||||
1864 | ** would not parse. This permits keywords to sometimes | ||||
1865 | ** be used as identifiers, for example. | ||||
1866 | ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers | ||||
1867 | ** that indicate what to do in response to the next | ||||
1868 | ** token. | ||||
1869 | ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal | ||||
1870 | ** symbols. Background: A "minor type" is a semantic | ||||
1871 | ** value associated with a terminal or non-terminal | ||||
1872 | ** symbols. For example, for an "ID" terminal symbol, | ||||
1873 | ** the minor type might be the name of the identifier. | ||||
1874 | ** Each non-terminal can have a different minor type. | ||||
1875 | ** Terminal symbols all have the same minor type, though. | ||||
1876 | ** This macros defines the minor type for terminal | ||||
1877 | ** symbols. | ||||
1878 | ** fts5YYMINORTYPE is the data type used for all minor types. | ||||
1879 | ** This is typically a union of many types, one of | ||||
1880 | ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union | ||||
1881 | ** for terminal symbols is called "fts5yy0". | ||||
1882 | ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If | ||||
1883 | ** zero the stack is dynamically sized using realloc() | ||||
1884 | ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument | ||||
1885 | ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument | ||||
1886 | ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter | ||||
1887 | ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser | ||||
1888 | ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser | ||||
1889 | ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context | ||||
1890 | ** fts5YYREALLOC Name of the realloc() function to use | ||||
1891 | ** fts5YYFREE Name of the free() function to use | ||||
1892 | ** fts5YYDYNSTACK True if stack space should be extended on heap | ||||
1893 | ** fts5YYERRORSYMBOL is the code number of the error symbol. If not | ||||
1894 | ** defined, then do no error processing. | ||||
1895 | ** fts5YYNSTATE the combined number of states. | ||||
1896 | ** fts5YYNRULE the number of rules in the grammar | ||||
1897 | ** fts5YYNFTS5TOKEN Number of terminal symbols | ||||
1898 | ** fts5YY_MAX_SHIFT Maximum value for shift actions | ||||
1899 | ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions | ||||
1900 | ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions | ||||
1901 | ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error | ||||
1902 | ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept | ||||
1903 | ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op | ||||
1904 | ** fts5YY_MIN_REDUCE Minimum value for reduce actions | ||||
1905 | ** fts5YY_MAX_REDUCE Maximum value for reduce actions | ||||
1906 | ** fts5YY_MIN_DSTRCTR Minimum symbol value that has a destructor | ||||
1907 | ** fts5YY_MAX_DSTRCTR Maximum symbol value that has a destructor | ||||
1908 | */ | ||||
1909 | #ifndef INTERFACE1 | ||||
1910 | # define INTERFACE1 1 | ||||
1911 | #endif | ||||
1912 | /************* Begin control #defines *****************************************/ | ||||
1913 | #define fts5YYCODETYPEunsigned char unsigned char | ||||
1914 | #define fts5YYNOCODE27 27 | ||||
1915 | #define fts5YYACTIONTYPEunsigned char unsigned char | ||||
1916 | #define sqlite3Fts5ParserFTS5TOKENTYPEFts5Token Fts5Token | ||||
1917 | typedef union { | ||||
1918 | int fts5yyinit; | ||||
1919 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yy0; | ||||
1920 | int fts5yy4; | ||||
1921 | Fts5Colset* fts5yy11; | ||||
1922 | Fts5ExprNode* fts5yy24; | ||||
1923 | Fts5ExprNearset* fts5yy46; | ||||
1924 | Fts5ExprPhrase* fts5yy53; | ||||
1925 | } fts5YYMINORTYPE; | ||||
1926 | #ifndef fts5YYSTACKDEPTH100 | ||||
1927 | #define fts5YYSTACKDEPTH100 100 | ||||
1928 | #endif | ||||
1929 | #define sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; Fts5Parse *pParse; | ||||
1930 | #define sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse ,Fts5Parse *pParse | ||||
1931 | #define sqlite3Fts5ParserARG_PARAM,pParse ,pParse | ||||
1932 | #define sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; Fts5Parse *pParse=fts5yypParser->pParse; | ||||
1933 | #define sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; fts5yypParser->pParse=pParse; | ||||
1934 | #define fts5YYREALLOCrealloc realloc | ||||
1935 | #define fts5YYFREEfree free | ||||
1936 | #define fts5YYDYNSTACK0 0 | ||||
1937 | #define sqlite3Fts5ParserCTX_SDECL | ||||
1938 | #define sqlite3Fts5ParserCTX_PDECL | ||||
1939 | #define sqlite3Fts5ParserCTX_PARAM | ||||
1940 | #define sqlite3Fts5ParserCTX_FETCH | ||||
1941 | #define sqlite3Fts5ParserCTX_STORE | ||||
1942 | #define fts5YYNSTATE35 35 | ||||
1943 | #define fts5YYNRULE28 28 | ||||
1944 | #define fts5YYNRULE_WITH_ACTION28 28 | ||||
1945 | #define fts5YYNFTS5TOKEN16 16 | ||||
1946 | #define fts5YY_MAX_SHIFT34 34 | ||||
1947 | #define fts5YY_MIN_SHIFTREDUCE52 52 | ||||
1948 | #define fts5YY_MAX_SHIFTREDUCE79 79 | ||||
1949 | #define fts5YY_ERROR_ACTION80 80 | ||||
1950 | #define fts5YY_ACCEPT_ACTION81 81 | ||||
1951 | #define fts5YY_NO_ACTION82 82 | ||||
1952 | #define fts5YY_MIN_REDUCE83 83 | ||||
1953 | #define fts5YY_MAX_REDUCE110 110 | ||||
1954 | #define fts5YY_MIN_DSTRCTR16 16 | ||||
1955 | #define fts5YY_MAX_DSTRCTR24 24 | ||||
1956 | /************* End control #defines *******************************************/ | ||||
1957 | #define fts5YY_NLOOKAHEAD((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) | ||||
1958 | |||||
1959 | /* Define the fts5yytestcase() macro to be a no-op if is not already defined | ||||
1960 | ** otherwise. | ||||
1961 | ** | ||||
1962 | ** Applications can choose to define fts5yytestcase() in the %include section | ||||
1963 | ** to a macro that can assist in verifying code coverage. For production | ||||
1964 | ** code the fts5yytestcase() macro should be turned off. But it is useful | ||||
1965 | ** for testing. | ||||
1966 | */ | ||||
1967 | #ifndef fts5yytestcase | ||||
1968 | # define fts5yytestcase(X) | ||||
1969 | #endif | ||||
1970 | |||||
1971 | /* Macro to determine if stack space has the ability to grow using | ||||
1972 | ** heap memory. | ||||
1973 | */ | ||||
1974 | #if fts5YYSTACKDEPTH100<=0 || fts5YYDYNSTACK0 | ||||
1975 | # define fts5YYGROWABLESTACK0 1 | ||||
1976 | #else | ||||
1977 | # define fts5YYGROWABLESTACK0 0 | ||||
1978 | #endif | ||||
1979 | |||||
1980 | /* Guarantee a minimum number of initial stack slots. | ||||
1981 | */ | ||||
1982 | #if fts5YYSTACKDEPTH100<=0 | ||||
1983 | # undef fts5YYSTACKDEPTH100 | ||||
1984 | # define fts5YYSTACKDEPTH100 2 /* Need a minimum stack size */ | ||||
1985 | #endif | ||||
1986 | |||||
1987 | |||||
1988 | /* Next are the tables used to determine what action to take based on the | ||||
1989 | ** current state and lookahead token. These tables are used to implement | ||||
1990 | ** functions that take a state number and lookahead value and return an | ||||
1991 | ** action integer. | ||||
1992 | ** | ||||
1993 | ** Suppose the action integer is N. Then the action is determined as | ||||
1994 | ** follows | ||||
1995 | ** | ||||
1996 | ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead | ||||
1997 | ** token onto the stack and goto state N. | ||||
1998 | ** | ||||
1999 | ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then | ||||
2000 | ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. | ||||
2001 | ** | ||||
2002 | ** N == fts5YY_ERROR_ACTION A syntax error has occurred. | ||||
2003 | ** | ||||
2004 | ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. | ||||
2005 | ** | ||||
2006 | ** N == fts5YY_NO_ACTION No such action. Denotes unused | ||||
2007 | ** slots in the fts5yy_action[] table. | ||||
2008 | ** | ||||
2009 | ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE | ||||
2010 | ** and fts5YY_MAX_REDUCE | ||||
2011 | ** | ||||
2012 | ** The action table is constructed as a single large table named fts5yy_action[]. | ||||
2013 | ** Given state S and lookahead X, the action is computed as either: | ||||
2014 | ** | ||||
2015 | ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] | ||||
2016 | ** (B) N = fts5yy_default[S] | ||||
2017 | ** | ||||
2018 | ** The (A) formula is preferred. The B formula is used instead if | ||||
2019 | ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. | ||||
2020 | ** | ||||
2021 | ** The formulas above are for computing the action when the lookahead is | ||||
2022 | ** a terminal symbol. If the lookahead is a non-terminal (as occurs after | ||||
2023 | ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of | ||||
2024 | ** the fts5yy_shift_ofst[] array. | ||||
2025 | ** | ||||
2026 | ** The following are the tables generated in this section: | ||||
2027 | ** | ||||
2028 | ** fts5yy_action[] A single table containing all actions. | ||||
2029 | ** fts5yy_lookahead[] A table containing the lookahead for each entry in | ||||
2030 | ** fts5yy_action. Used to detect hash collisions. | ||||
2031 | ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for | ||||
2032 | ** shifting terminals. | ||||
2033 | ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for | ||||
2034 | ** shifting non-terminals after a reduce. | ||||
2035 | ** fts5yy_default[] Default action for each state. | ||||
2036 | ** | ||||
2037 | *********** Begin parsing tables **********************************************/ | ||||
2038 | #define fts5YY_ACTTAB_COUNT(105) (105) | ||||
2039 | static const fts5YYACTIONTYPEunsigned char fts5yy_action[] = { | ||||
2040 | /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, | ||||
2041 | /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, | ||||
2042 | /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, | ||||
2043 | /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, | ||||
2044 | /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, | ||||
2045 | /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, | ||||
2046 | /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, | ||||
2047 | /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, | ||||
2048 | /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, | ||||
2049 | /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, | ||||
2050 | /* 100 */ 57, 15, 82, 82, 71, | ||||
2051 | }; | ||||
2052 | static const fts5YYCODETYPEunsigned char fts5yy_lookahead[] = { | ||||
2053 | /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, | ||||
2054 | /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, | ||||
2055 | /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, | ||||
2056 | /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, | ||||
2057 | /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, | ||||
2058 | /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, | ||||
2059 | /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, | ||||
2060 | /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, | ||||
2061 | /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, | ||||
2062 | /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, | ||||
2063 | /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, | ||||
2064 | /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, | ||||
2065 | /* 120 */ 27, | ||||
2066 | }; | ||||
2067 | #define fts5YY_SHIFT_COUNT(34) (34) | ||||
2068 | #define fts5YY_SHIFT_MIN(0) (0) | ||||
2069 | #define fts5YY_SHIFT_MAX(93) (93) | ||||
2070 | static const unsigned char fts5yy_shift_ofst[] = { | ||||
2071 | /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, | ||||
2072 | /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, | ||||
2073 | /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, | ||||
2074 | /* 30 */ 53, 87, 92, 53, 93, | ||||
2075 | }; | ||||
2076 | #define fts5YY_REDUCE_COUNT(17) (17) | ||||
2077 | #define fts5YY_REDUCE_MIN(-17) (-17) | ||||
2078 | #define fts5YY_REDUCE_MAX(67) (67) | ||||
2079 | static const signed char fts5yy_reduce_ofst[] = { | ||||
2080 | /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, | ||||
2081 | /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38, | ||||
2082 | }; | ||||
2083 | static const fts5YYACTIONTYPEunsigned char fts5yy_default[] = { | ||||
2084 | /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, | ||||
2085 | /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, | ||||
2086 | /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, | ||||
2087 | /* 30 */ 103, 80, 80, 104, 80, | ||||
2088 | }; | ||||
2089 | /********** End of lemon-generated parsing tables *****************************/ | ||||
2090 | |||||
2091 | /* The next table maps tokens (terminal symbols) into fallback tokens. | ||||
2092 | ** If a construct like the following: | ||||
2093 | ** | ||||
2094 | ** %fallback ID X Y Z. | ||||
2095 | ** | ||||
2096 | ** appears in the grammar, then ID becomes a fallback token for X, Y, | ||||
2097 | ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser | ||||
2098 | ** but it does not parse, the type of the token is changed to ID and | ||||
2099 | ** the parse is retried before an error is thrown. | ||||
2100 | ** | ||||
2101 | ** This feature can be used, for example, to cause some keywords in a language | ||||
2102 | ** to revert to identifiers if they keyword does not apply in the context where | ||||
2103 | ** it appears. | ||||
2104 | */ | ||||
2105 | #ifdef fts5YYFALLBACK | ||||
2106 | static const fts5YYCODETYPEunsigned char fts5yyFallback[] = { | ||||
2107 | }; | ||||
2108 | #endif /* fts5YYFALLBACK */ | ||||
2109 | |||||
2110 | /* The following structure represents a single element of the | ||||
2111 | ** parser's stack. Information stored includes: | ||||
2112 | ** | ||||
2113 | ** + The state number for the parser at this level of the stack. | ||||
2114 | ** | ||||
2115 | ** + The value of the token stored at this level of the stack. | ||||
2116 | ** (In other words, the "major" token.) | ||||
2117 | ** | ||||
2118 | ** + The semantic value stored at this level of the stack. This is | ||||
2119 | ** the information used by the action routines in the grammar. | ||||
2120 | ** It is sometimes called the "minor" token. | ||||
2121 | ** | ||||
2122 | ** After the "shift" half of a SHIFTREDUCE action, the stateno field | ||||
2123 | ** actually contains the reduce action for the second half of the | ||||
2124 | ** SHIFTREDUCE. | ||||
2125 | */ | ||||
2126 | struct fts5yyStackEntry { | ||||
2127 | fts5YYACTIONTYPEunsigned char stateno; /* The state-number, or reduce action in SHIFTREDUCE */ | ||||
2128 | fts5YYCODETYPEunsigned char major; /* The major token value. This is the code | ||||
2129 | ** number for the token at this stack level */ | ||||
2130 | fts5YYMINORTYPE minor; /* The user-supplied minor token value. This | ||||
2131 | ** is the value of the token */ | ||||
2132 | }; | ||||
2133 | typedef struct fts5yyStackEntry fts5yyStackEntry; | ||||
2134 | |||||
2135 | /* The state of the parser is completely contained in an instance of | ||||
2136 | ** the following structure */ | ||||
2137 | struct fts5yyParser { | ||||
2138 | fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ | ||||
2139 | #ifdef fts5YYTRACKMAXSTACKDEPTH | ||||
2140 | int fts5yyhwm; /* High-water mark of the stack */ | ||||
2141 | #endif | ||||
2142 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
2143 | int fts5yyerrcnt; /* Shifts left before out of the error */ | ||||
2144 | #endif | ||||
2145 | sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; /* A place to hold %extra_argument */ | ||||
2146 | sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */ | ||||
2147 | fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */ | ||||
2148 | fts5yyStackEntry *fts5yystack; /* The parser stack */ | ||||
2149 | fts5yyStackEntry fts5yystk0[fts5YYSTACKDEPTH100]; /* Initial stack space */ | ||||
2150 | }; | ||||
2151 | typedef struct fts5yyParser fts5yyParser; | ||||
2152 | |||||
2153 | #include <assert.h> | ||||
2154 | #ifndef NDEBUG1 | ||||
2155 | #include <stdio.h> | ||||
2156 | static FILE *fts5yyTraceFILE = 0; | ||||
2157 | static char *fts5yyTracePrompt = 0; | ||||
2158 | #endif /* NDEBUG */ | ||||
2159 | |||||
2160 | #ifndef NDEBUG1 | ||||
2161 | /* | ||||
2162 | ** Turn parser tracing on by giving a stream to which to write the trace | ||||
2163 | ** and a prompt to preface each trace message. Tracing is turned off | ||||
2164 | ** by making either argument NULL | ||||
2165 | ** | ||||
2166 | ** Inputs: | ||||
2167 | ** <ul> | ||||
2168 | ** <li> A FILE* to which trace output should be written. | ||||
2169 | ** If NULL, then tracing is turned off. | ||||
2170 | ** <li> A prefix string written at the beginning of every | ||||
2171 | ** line of trace output. If NULL, then tracing is | ||||
2172 | ** turned off. | ||||
2173 | ** </ul> | ||||
2174 | ** | ||||
2175 | ** Outputs: | ||||
2176 | ** None. | ||||
2177 | */ | ||||
2178 | static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ | ||||
2179 | fts5yyTraceFILE = TraceFILE; | ||||
2180 | fts5yyTracePrompt = zTracePrompt; | ||||
2181 | if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; | ||||
2182 | else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; | ||||
2183 | } | ||||
2184 | #endif /* NDEBUG */ | ||||
2185 | |||||
2186 | #if defined(fts5YYCOVERAGE) || !defined(NDEBUG1) | ||||
2187 | /* For tracing shifts, the names of all terminals and nonterminals | ||||
2188 | ** are required. The following table supplies these names */ | ||||
2189 | static const char *const fts5yyTokenName[] = { | ||||
2190 | /* 0 */ "$", | ||||
2191 | /* 1 */ "OR", | ||||
2192 | /* 2 */ "AND", | ||||
2193 | /* 3 */ "NOT", | ||||
2194 | /* 4 */ "TERM", | ||||
2195 | /* 5 */ "COLON", | ||||
2196 | /* 6 */ "MINUS", | ||||
2197 | /* 7 */ "LCP", | ||||
2198 | /* 8 */ "RCP", | ||||
2199 | /* 9 */ "STRING", | ||||
2200 | /* 10 */ "LP", | ||||
2201 | /* 11 */ "RP", | ||||
2202 | /* 12 */ "CARET", | ||||
2203 | /* 13 */ "COMMA", | ||||
2204 | /* 14 */ "PLUS", | ||||
2205 | /* 15 */ "STAR", | ||||
2206 | /* 16 */ "input", | ||||
2207 | /* 17 */ "expr", | ||||
2208 | /* 18 */ "cnearset", | ||||
2209 | /* 19 */ "exprlist", | ||||
2210 | /* 20 */ "colset", | ||||
2211 | /* 21 */ "colsetlist", | ||||
2212 | /* 22 */ "nearset", | ||||
2213 | /* 23 */ "nearphrases", | ||||
2214 | /* 24 */ "phrase", | ||||
2215 | /* 25 */ "neardist_opt", | ||||
2216 | /* 26 */ "star_opt", | ||||
2217 | }; | ||||
2218 | #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */ | ||||
2219 | |||||
2220 | #ifndef NDEBUG1 | ||||
2221 | /* For tracing reduce actions, the names of all rules are required. | ||||
2222 | */ | ||||
2223 | static const char *const fts5yyRuleName[] = { | ||||
2224 | /* 0 */ "input ::= expr", | ||||
2225 | /* 1 */ "colset ::= MINUS LCP colsetlist RCP", | ||||
2226 | /* 2 */ "colset ::= LCP colsetlist RCP", | ||||
2227 | /* 3 */ "colset ::= STRING", | ||||
2228 | /* 4 */ "colset ::= MINUS STRING", | ||||
2229 | /* 5 */ "colsetlist ::= colsetlist STRING", | ||||
2230 | /* 6 */ "colsetlist ::= STRING", | ||||
2231 | /* 7 */ "expr ::= expr AND expr", | ||||
2232 | /* 8 */ "expr ::= expr OR expr", | ||||
2233 | /* 9 */ "expr ::= expr NOT expr", | ||||
2234 | /* 10 */ "expr ::= colset COLON LP expr RP", | ||||
2235 | /* 11 */ "expr ::= LP expr RP", | ||||
2236 | /* 12 */ "expr ::= exprlist", | ||||
2237 | /* 13 */ "exprlist ::= cnearset", | ||||
2238 | /* 14 */ "exprlist ::= exprlist cnearset", | ||||
2239 | /* 15 */ "cnearset ::= nearset", | ||||
2240 | /* 16 */ "cnearset ::= colset COLON nearset", | ||||
2241 | /* 17 */ "nearset ::= phrase", | ||||
2242 | /* 18 */ "nearset ::= CARET phrase", | ||||
2243 | /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP", | ||||
2244 | /* 20 */ "nearphrases ::= phrase", | ||||
2245 | /* 21 */ "nearphrases ::= nearphrases phrase", | ||||
2246 | /* 22 */ "neardist_opt ::=", | ||||
2247 | /* 23 */ "neardist_opt ::= COMMA STRING", | ||||
2248 | /* 24 */ "phrase ::= phrase PLUS STRING star_opt", | ||||
2249 | /* 25 */ "phrase ::= STRING star_opt", | ||||
2250 | /* 26 */ "star_opt ::= STAR", | ||||
2251 | /* 27 */ "star_opt ::=", | ||||
2252 | }; | ||||
2253 | #endif /* NDEBUG */ | ||||
2254 | |||||
2255 | |||||
2256 | #if fts5YYGROWABLESTACK0 | ||||
2257 | /* | ||||
2258 | ** Try to increase the size of the parser stack. Return the number | ||||
2259 | ** of errors. Return 0 on success. | ||||
2260 | */ | ||||
2261 | static int fts5yyGrowStack(fts5yyParser *p)1{ | ||||
2262 | int oldSize = 1 + (int)(p->fts5yystackEnd - p->fts5yystack); | ||||
2263 | int newSize; | ||||
2264 | int idx; | ||||
2265 | fts5yyStackEntry *pNew; | ||||
2266 | |||||
2267 | newSize = oldSize*2 + 100; | ||||
2268 | idx = (int)(p->fts5yytos - p->fts5yystack); | ||||
2269 | if( p->fts5yystack==p->fts5yystk0 ){ | ||||
2270 | pNew = fts5YYREALLOCrealloc(0, newSize*sizeof(pNew[0])); | ||||
2271 | if( pNew==0 ) return 1; | ||||
2272 | memcpy(pNew, p->fts5yystack, oldSize*sizeof(pNew[0])); | ||||
2273 | }else{ | ||||
2274 | pNew = fts5YYREALLOCrealloc(p->fts5yystack, newSize*sizeof(pNew[0])); | ||||
2275 | if( pNew==0 ) return 1; | ||||
2276 | } | ||||
2277 | p->fts5yystack = pNew; | ||||
2278 | p->fts5yytos = &p->fts5yystack[idx]; | ||||
2279 | #ifndef NDEBUG1 | ||||
2280 | if( fts5yyTraceFILE ){ | ||||
2281 | fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n", | ||||
2282 | fts5yyTracePrompt, oldSize, newSize); | ||||
2283 | } | ||||
2284 | #endif | ||||
2285 | p->fts5yystackEnd = &p->fts5yystack[newSize-1]; | ||||
2286 | return 0; | ||||
2287 | } | ||||
2288 | #endif /* fts5YYGROWABLESTACK */ | ||||
2289 | |||||
2290 | #if !fts5YYGROWABLESTACK0 | ||||
2291 | /* For builds that do no have a growable stack, fts5yyGrowStack always | ||||
2292 | ** returns an error. | ||||
2293 | */ | ||||
2294 | # define fts5yyGrowStack(X)1 1 | ||||
2295 | #endif | ||||
2296 | |||||
2297 | /* Datatype of the argument to the memory allocated passed as the | ||||
2298 | ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by | ||||
2299 | ** putting an appropriate #define in the %include section of the input | ||||
2300 | ** grammar. | ||||
2301 | */ | ||||
2302 | #ifndef fts5YYMALLOCARGTYPEu64 | ||||
2303 | # define fts5YYMALLOCARGTYPEu64 size_t | ||||
2304 | #endif | ||||
2305 | |||||
2306 | /* Initialize a new parser that has already been allocated. | ||||
2307 | */ | ||||
2308 | static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){ | ||||
2309 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser; | ||||
2310 | sqlite3Fts5ParserCTX_STORE | ||||
2311 | #ifdef fts5YYTRACKMAXSTACKDEPTH | ||||
2312 | fts5yypParser->fts5yyhwm = 0; | ||||
2313 | #endif | ||||
2314 | fts5yypParser->fts5yystack = fts5yypParser->fts5yystk0; | ||||
2315 | fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH100-1]; | ||||
2316 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
2317 | fts5yypParser->fts5yyerrcnt = -1; | ||||
2318 | #endif | ||||
2319 | fts5yypParser->fts5yytos = fts5yypParser->fts5yystack; | ||||
2320 | fts5yypParser->fts5yystack[0].stateno = 0; | ||||
2321 | fts5yypParser->fts5yystack[0].major = 0; | ||||
2322 | } | ||||
2323 | |||||
2324 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | ||||
2325 | /* | ||||
2326 | ** This function allocates a new parser. | ||||
2327 | ** The only argument is a pointer to a function which works like | ||||
2328 | ** malloc. | ||||
2329 | ** | ||||
2330 | ** Inputs: | ||||
2331 | ** A pointer to the function used to allocate memory. | ||||
2332 | ** | ||||
2333 | ** Outputs: | ||||
2334 | ** A pointer to a parser. This pointer is used in subsequent calls | ||||
2335 | ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. | ||||
2336 | */ | ||||
2337 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPEu64) sqlite3Fts5ParserCTX_PDECL){ | ||||
2338 | fts5yyParser *fts5yypParser; | ||||
2339 | fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPEu64)sizeof(fts5yyParser) ); | ||||
2340 | if( fts5yypParser ){ | ||||
2341 | sqlite3Fts5ParserCTX_STORE | ||||
2342 | sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM); | ||||
2343 | } | ||||
2344 | return (void*)fts5yypParser; | ||||
2345 | } | ||||
2346 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | ||||
2347 | |||||
2348 | |||||
2349 | /* The following function deletes the "minor type" or semantic value | ||||
2350 | ** associated with a symbol. The symbol can be either a terminal | ||||
2351 | ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is | ||||
2352 | ** a pointer to the value to be deleted. The code used to do the | ||||
2353 | ** deletions is derived from the %destructor and/or %token_destructor | ||||
2354 | ** directives of the input grammar. | ||||
2355 | */ | ||||
2356 | static void fts5yy_destructor( | ||||
2357 | fts5yyParser *fts5yypParser, /* The parser */ | ||||
2358 | fts5YYCODETYPEunsigned char fts5yymajor, /* Type code for object to destroy */ | ||||
2359 | fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ | ||||
2360 | ){ | ||||
2361 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
2362 | sqlite3Fts5ParserCTX_FETCH | ||||
2363 | switch( fts5yymajor ){ | ||||
2364 | /* Here is inserted the actions which take place when a | ||||
2365 | ** terminal or non-terminal is destroyed. This can happen | ||||
2366 | ** when the symbol is popped from the stack during a | ||||
2367 | ** reduce or during error processing or when a parser is | ||||
2368 | ** being destroyed before it is finished parsing. | ||||
2369 | ** | ||||
2370 | ** Note: during a reduce, the only symbols destroyed are those | ||||
2371 | ** which appear on the RHS of the rule, but which are *not* used | ||||
2372 | ** inside the C code. | ||||
2373 | */ | ||||
2374 | /********* Begin destructor definitions ***************************************/ | ||||
2375 | case 16: /* input */ | ||||
2376 | { | ||||
2377 | #line 83 "fts5parse.y" | ||||
2378 | (void)pParse; | ||||
2379 | #line 606 "fts5parse.sql" | ||||
2380 | } | ||||
2381 | break; | ||||
2382 | case 17: /* expr */ | ||||
2383 | case 18: /* cnearset */ | ||||
2384 | case 19: /* exprlist */ | ||||
2385 | { | ||||
2386 | #line 89 "fts5parse.y" | ||||
2387 | sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); | ||||
2388 | #line 615 "fts5parse.sql" | ||||
2389 | } | ||||
2390 | break; | ||||
2391 | case 20: /* colset */ | ||||
2392 | case 21: /* colsetlist */ | ||||
2393 | { | ||||
2394 | #line 93 "fts5parse.y" | ||||
2395 | sqlite3_freesqlite3_api->free((fts5yypminor->fts5yy11)); | ||||
2396 | #line 623 "fts5parse.sql" | ||||
2397 | } | ||||
2398 | break; | ||||
2399 | case 22: /* nearset */ | ||||
2400 | case 23: /* nearphrases */ | ||||
2401 | { | ||||
2402 | #line 148 "fts5parse.y" | ||||
2403 | sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); | ||||
2404 | #line 631 "fts5parse.sql" | ||||
2405 | } | ||||
2406 | break; | ||||
2407 | case 24: /* phrase */ | ||||
2408 | { | ||||
2409 | #line 183 "fts5parse.y" | ||||
2410 | sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); | ||||
2411 | #line 638 "fts5parse.sql" | ||||
2412 | } | ||||
2413 | break; | ||||
2414 | /********* End destructor definitions *****************************************/ | ||||
2415 | default: break; /* If no destructor action specified: do nothing */ | ||||
2416 | } | ||||
2417 | } | ||||
2418 | |||||
2419 | /* | ||||
2420 | ** Pop the parser's stack once. | ||||
2421 | ** | ||||
2422 | ** If there is a destructor routine associated with the token which | ||||
2423 | ** is popped from the stack, then call it. | ||||
2424 | */ | ||||
2425 | static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ | ||||
2426 | fts5yyStackEntry *fts5yytos; | ||||
2427 | assert( pParser->fts5yytos!=0 )((void) (0)); | ||||
2428 | assert( pParser->fts5yytos > pParser->fts5yystack )((void) (0)); | ||||
2429 | fts5yytos = pParser->fts5yytos--; | ||||
2430 | #ifndef NDEBUG1 | ||||
2431 | if( fts5yyTraceFILE ){ | ||||
2432 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | ||||
2433 | fts5yyTracePrompt, | ||||
2434 | fts5yyTokenName[fts5yytos->major]); | ||||
2435 | } | ||||
2436 | #endif | ||||
2437 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | ||||
2438 | } | ||||
2439 | |||||
2440 | /* | ||||
2441 | ** Clear all secondary memory allocations from the parser | ||||
2442 | */ | ||||
2443 | static void sqlite3Fts5ParserFinalize(void *p){ | ||||
2444 | fts5yyParser *pParser = (fts5yyParser*)p; | ||||
2445 | |||||
2446 | /* In-lined version of calling fts5yy_pop_parser_stack() for each | ||||
2447 | ** element left in the stack */ | ||||
2448 | fts5yyStackEntry *fts5yytos = pParser->fts5yytos; | ||||
2449 | while( fts5yytos>pParser->fts5yystack ){ | ||||
2450 | #ifndef NDEBUG1 | ||||
2451 | if( fts5yyTraceFILE ){ | ||||
2452 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | ||||
2453 | fts5yyTracePrompt, | ||||
2454 | fts5yyTokenName[fts5yytos->major]); | ||||
2455 | } | ||||
2456 | #endif | ||||
2457 | if( fts5yytos->major>=fts5YY_MIN_DSTRCTR16 ){ | ||||
2458 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | ||||
2459 | } | ||||
2460 | fts5yytos--; | ||||
2461 | } | ||||
2462 | |||||
2463 | #if fts5YYGROWABLESTACK0 | ||||
2464 | if( pParser->fts5yystack!=pParser->fts5yystk0 ) fts5YYFREEfree(pParser->fts5yystack); | ||||
2465 | #endif | ||||
2466 | } | ||||
2467 | |||||
2468 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | ||||
2469 | /* | ||||
2470 | ** Deallocate and destroy a parser. Destructors are called for | ||||
2471 | ** all stack elements before shutting the parser down. | ||||
2472 | ** | ||||
2473 | ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it | ||||
2474 | ** is defined in a %include section of the input grammar) then it is | ||||
2475 | ** assumed that the input pointer is never NULL. | ||||
2476 | */ | ||||
2477 | static void sqlite3Fts5ParserFree( | ||||
2478 | void *p, /* The parser to be deleted */ | ||||
2479 | void (*freeProc)(void*) /* Function used to reclaim memory */ | ||||
2480 | ){ | ||||
2481 | #ifndef fts5YYPARSEFREENEVERNULL | ||||
2482 | if( p==0 ) return; | ||||
2483 | #endif | ||||
2484 | sqlite3Fts5ParserFinalize(p); | ||||
2485 | (*freeProc)(p); | ||||
2486 | } | ||||
2487 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | ||||
2488 | |||||
2489 | /* | ||||
2490 | ** Return the peak depth of the stack for a parser. | ||||
2491 | */ | ||||
2492 | #ifdef fts5YYTRACKMAXSTACKDEPTH | ||||
2493 | static int sqlite3Fts5ParserStackPeak(void *p){ | ||||
2494 | fts5yyParser *pParser = (fts5yyParser*)p; | ||||
2495 | return pParser->fts5yyhwm; | ||||
2496 | } | ||||
2497 | #endif | ||||
2498 | |||||
2499 | /* This array of booleans keeps track of the parser statement | ||||
2500 | ** coverage. The element fts5yycoverage[X][Y] is set when the parser | ||||
2501 | ** is in state X and has a lookahead token Y. In a well-tested | ||||
2502 | ** systems, every element of this matrix should end up being set. | ||||
2503 | */ | ||||
2504 | #if defined(fts5YYCOVERAGE) | ||||
2505 | static unsigned char fts5yycoverage[fts5YYNSTATE35][fts5YYNFTS5TOKEN16]; | ||||
2506 | #endif | ||||
2507 | |||||
2508 | /* | ||||
2509 | ** Write into out a description of every state/lookahead combination that | ||||
2510 | ** | ||||
2511 | ** (1) has not been used by the parser, and | ||||
2512 | ** (2) is not a syntax error. | ||||
2513 | ** | ||||
2514 | ** Return the number of missed state/lookahead combinations. | ||||
2515 | */ | ||||
2516 | #if defined(fts5YYCOVERAGE) | ||||
2517 | static int sqlite3Fts5ParserCoverage(FILE *out){ | ||||
2518 | int stateno, iLookAhead, i; | ||||
2519 | int nMissed = 0; | ||||
2520 | for(stateno=0; stateno<fts5YYNSTATE35; stateno++){ | ||||
2521 | i = fts5yy_shift_ofst[stateno]; | ||||
2522 | for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN16; iLookAhead++){ | ||||
2523 | if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; | ||||
2524 | if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; | ||||
2525 | if( out ){ | ||||
2526 | fprintf(out,"State %d lookahead %s %s\n", stateno, | ||||
2527 | fts5yyTokenName[iLookAhead], | ||||
2528 | fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed"); | ||||
2529 | } | ||||
2530 | } | ||||
2531 | } | ||||
2532 | return nMissed; | ||||
2533 | } | ||||
2534 | #endif | ||||
2535 | |||||
2536 | /* | ||||
2537 | ** Find the appropriate action for a parser given the terminal | ||||
2538 | ** look-ahead token iLookAhead. | ||||
2539 | */ | ||||
2540 | static fts5YYACTIONTYPEunsigned char fts5yy_find_shift_action( | ||||
2541 | fts5YYCODETYPEunsigned char iLookAhead, /* The look-ahead token */ | ||||
2542 | fts5YYACTIONTYPEunsigned char stateno /* Current state number */ | ||||
2543 | ){ | ||||
2544 | int i; | ||||
2545 | |||||
2546 | if( stateno>fts5YY_MAX_SHIFT34 ) return stateno; | ||||
2547 | assert( stateno <= fts5YY_SHIFT_COUNT )((void) (0)); | ||||
2548 | #if defined(fts5YYCOVERAGE) | ||||
2549 | fts5yycoverage[stateno][iLookAhead] = 1; | ||||
2550 | #endif | ||||
2551 | do{ | ||||
2552 | i = fts5yy_shift_ofst[stateno]; | ||||
2553 | assert( i>=0 )((void) (0)); | ||||
2554 | assert( i<=fts5YY_ACTTAB_COUNT )((void) (0)); | ||||
2555 | assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD )((void) (0)); | ||||
2556 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | ||||
2557 | assert( iLookAhead < fts5YYNFTS5TOKEN )((void) (0)); | ||||
2558 | i += iLookAhead; | ||||
2559 | assert( i<(int)fts5YY_NLOOKAHEAD )((void) (0)); | ||||
2560 | if( fts5yy_lookahead[i]!=iLookAhead ){ | ||||
2561 | #ifdef fts5YYFALLBACK | ||||
2562 | fts5YYCODETYPEunsigned char iFallback; /* Fallback token */ | ||||
2563 | assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) )((void) (0)); | ||||
2564 | iFallback = fts5yyFallback[iLookAhead]; | ||||
2565 | if( iFallback!=0 ){ | ||||
2566 | #ifndef NDEBUG1 | ||||
2567 | if( fts5yyTraceFILE ){ | ||||
2568 | fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n", | ||||
2569 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]); | ||||
2570 | } | ||||
2571 | #endif | ||||
2572 | assert( fts5yyFallback[iFallback]==0 )((void) (0)); /* Fallback loop must terminate */ | ||||
2573 | iLookAhead = iFallback; | ||||
2574 | continue; | ||||
2575 | } | ||||
2576 | #endif | ||||
2577 | #ifdef fts5YYWILDCARD | ||||
2578 | { | ||||
2579 | int j = i - iLookAhead + fts5YYWILDCARD; | ||||
2580 | assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) )((void) (0)); | ||||
2581 | if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){ | ||||
2582 | #ifndef NDEBUG1 | ||||
2583 | if( fts5yyTraceFILE ){ | ||||
2584 | fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n", | ||||
2585 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], | ||||
2586 | fts5yyTokenName[fts5YYWILDCARD]); | ||||
2587 | } | ||||
2588 | #endif /* NDEBUG */ | ||||
2589 | return fts5yy_action[j]; | ||||
2590 | } | ||||
2591 | } | ||||
2592 | #endif /* fts5YYWILDCARD */ | ||||
2593 | return fts5yy_default[stateno]; | ||||
2594 | }else{ | ||||
2595 | assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) )((void) (0)); | ||||
2596 | return fts5yy_action[i]; | ||||
2597 | } | ||||
2598 | }while(1); | ||||
2599 | } | ||||
2600 | |||||
2601 | /* | ||||
2602 | ** Find the appropriate action for a parser given the non-terminal | ||||
2603 | ** look-ahead token iLookAhead. | ||||
2604 | */ | ||||
2605 | static fts5YYACTIONTYPEunsigned char fts5yy_find_reduce_action( | ||||
2606 | fts5YYACTIONTYPEunsigned char stateno, /* Current state number */ | ||||
2607 | fts5YYCODETYPEunsigned char iLookAhead /* The look-ahead token */ | ||||
2608 | ){ | ||||
2609 | int i; | ||||
2610 | #ifdef fts5YYERRORSYMBOL | ||||
2611 | if( stateno>fts5YY_REDUCE_COUNT(17) ){ | ||||
2612 | return fts5yy_default[stateno]; | ||||
2613 | } | ||||
2614 | #else | ||||
2615 | assert( stateno<=fts5YY_REDUCE_COUNT )((void) (0)); | ||||
2616 | #endif | ||||
2617 | i = fts5yy_reduce_ofst[stateno]; | ||||
2618 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | ||||
2619 | i += iLookAhead; | ||||
2620 | #ifdef fts5YYERRORSYMBOL | ||||
2621 | if( i<0 || i>=fts5YY_ACTTAB_COUNT(105) || fts5yy_lookahead[i]!=iLookAhead ){ | ||||
2622 | return fts5yy_default[stateno]; | ||||
2623 | } | ||||
2624 | #else | ||||
2625 | assert( i>=0 && i<fts5YY_ACTTAB_COUNT )((void) (0)); | ||||
2626 | assert( fts5yy_lookahead[i]==iLookAhead )((void) (0)); | ||||
2627 | #endif | ||||
2628 | return fts5yy_action[i]; | ||||
2629 | } | ||||
2630 | |||||
2631 | /* | ||||
2632 | ** The following routine is called if the stack overflows. | ||||
2633 | */ | ||||
2634 | static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ | ||||
2635 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
2636 | sqlite3Fts5ParserCTX_FETCH | ||||
2637 | #ifndef NDEBUG1 | ||||
2638 | if( fts5yyTraceFILE ){ | ||||
2639 | fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt); | ||||
2640 | } | ||||
2641 | #endif | ||||
2642 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | ||||
2643 | /* Here code is inserted which will execute if the parser | ||||
2644 | ** stack every overflows */ | ||||
2645 | /******** Begin %stack_overflow code ******************************************/ | ||||
2646 | #line 36 "fts5parse.y" | ||||
2647 | |||||
2648 | sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); | ||||
2649 | #line 876 "fts5parse.sql" | ||||
2650 | /******** End %stack_overflow code ********************************************/ | ||||
2651 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument var */ | ||||
2652 | sqlite3Fts5ParserCTX_STORE | ||||
2653 | } | ||||
2654 | |||||
2655 | /* | ||||
2656 | ** Print tracing information for a SHIFT action | ||||
2657 | */ | ||||
2658 | #ifndef NDEBUG1 | ||||
2659 | static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){ | ||||
2660 | if( fts5yyTraceFILE ){ | ||||
2661 | if( fts5yyNewState<fts5YYNSTATE35 ){ | ||||
2662 | fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n", | ||||
2663 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | ||||
2664 | fts5yyNewState); | ||||
2665 | }else{ | ||||
2666 | fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n", | ||||
2667 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | ||||
2668 | fts5yyNewState - fts5YY_MIN_REDUCE83); | ||||
2669 | } | ||||
2670 | } | ||||
2671 | } | ||||
2672 | #else | ||||
2673 | # define fts5yyTraceShift(X,Y,Z) | ||||
2674 | #endif | ||||
2675 | |||||
2676 | /* | ||||
2677 | ** Perform a shift action. | ||||
2678 | */ | ||||
2679 | static void fts5yy_shift( | ||||
2680 | fts5yyParser *fts5yypParser, /* The parser to be shifted */ | ||||
2681 | fts5YYACTIONTYPEunsigned char fts5yyNewState, /* The new state to shift in */ | ||||
2682 | fts5YYCODETYPEunsigned char fts5yyMajor, /* The major token to shift in */ | ||||
2683 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyMinor /* The minor token to shift in */ | ||||
2684 | ){ | ||||
2685 | fts5yyStackEntry *fts5yytos; | ||||
2686 | fts5yypParser->fts5yytos++; | ||||
2687 | #ifdef fts5YYTRACKMAXSTACKDEPTH | ||||
2688 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | ||||
2689 | fts5yypParser->fts5yyhwm++; | ||||
2690 | assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) )((void) (0)); | ||||
2691 | } | ||||
2692 | #endif | ||||
2693 | fts5yytos = fts5yypParser->fts5yytos; | ||||
2694 | if( fts5yytos>fts5yypParser->fts5yystackEnd ){ | ||||
2695 | if( fts5yyGrowStack(fts5yypParser)1 ){ | ||||
2696 | fts5yypParser->fts5yytos--; | ||||
2697 | fts5yyStackOverflow(fts5yypParser); | ||||
2698 | return; | ||||
2699 | } | ||||
2700 | fts5yytos = fts5yypParser->fts5yytos; | ||||
2701 | assert( fts5yytos <= fts5yypParser->fts5yystackEnd )((void) (0)); | ||||
2702 | } | ||||
2703 | if( fts5yyNewState > fts5YY_MAX_SHIFT34 ){ | ||||
2704 | fts5yyNewState += fts5YY_MIN_REDUCE83 - fts5YY_MIN_SHIFTREDUCE52; | ||||
2705 | } | ||||
2706 | fts5yytos->stateno = fts5yyNewState; | ||||
2707 | fts5yytos->major = fts5yyMajor; | ||||
2708 | fts5yytos->minor.fts5yy0 = fts5yyMinor; | ||||
2709 | fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift"); | ||||
2710 | } | ||||
2711 | |||||
2712 | /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side | ||||
2713 | ** of that rule */ | ||||
2714 | static const fts5YYCODETYPEunsigned char fts5yyRuleInfoLhs[] = { | ||||
2715 | 16, /* (0) input ::= expr */ | ||||
2716 | 20, /* (1) colset ::= MINUS LCP colsetlist RCP */ | ||||
2717 | 20, /* (2) colset ::= LCP colsetlist RCP */ | ||||
2718 | 20, /* (3) colset ::= STRING */ | ||||
2719 | 20, /* (4) colset ::= MINUS STRING */ | ||||
2720 | 21, /* (5) colsetlist ::= colsetlist STRING */ | ||||
2721 | 21, /* (6) colsetlist ::= STRING */ | ||||
2722 | 17, /* (7) expr ::= expr AND expr */ | ||||
2723 | 17, /* (8) expr ::= expr OR expr */ | ||||
2724 | 17, /* (9) expr ::= expr NOT expr */ | ||||
2725 | 17, /* (10) expr ::= colset COLON LP expr RP */ | ||||
2726 | 17, /* (11) expr ::= LP expr RP */ | ||||
2727 | 17, /* (12) expr ::= exprlist */ | ||||
2728 | 19, /* (13) exprlist ::= cnearset */ | ||||
2729 | 19, /* (14) exprlist ::= exprlist cnearset */ | ||||
2730 | 18, /* (15) cnearset ::= nearset */ | ||||
2731 | 18, /* (16) cnearset ::= colset COLON nearset */ | ||||
2732 | 22, /* (17) nearset ::= phrase */ | ||||
2733 | 22, /* (18) nearset ::= CARET phrase */ | ||||
2734 | 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | ||||
2735 | 23, /* (20) nearphrases ::= phrase */ | ||||
2736 | 23, /* (21) nearphrases ::= nearphrases phrase */ | ||||
2737 | 25, /* (22) neardist_opt ::= */ | ||||
2738 | 25, /* (23) neardist_opt ::= COMMA STRING */ | ||||
2739 | 24, /* (24) phrase ::= phrase PLUS STRING star_opt */ | ||||
2740 | 24, /* (25) phrase ::= STRING star_opt */ | ||||
2741 | 26, /* (26) star_opt ::= STAR */ | ||||
2742 | 26, /* (27) star_opt ::= */ | ||||
2743 | }; | ||||
2744 | |||||
2745 | /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number | ||||
2746 | ** of symbols on the right-hand side of that rule. */ | ||||
2747 | static const signed char fts5yyRuleInfoNRhs[] = { | ||||
2748 | -1, /* (0) input ::= expr */ | ||||
2749 | -4, /* (1) colset ::= MINUS LCP colsetlist RCP */ | ||||
2750 | -3, /* (2) colset ::= LCP colsetlist RCP */ | ||||
2751 | -1, /* (3) colset ::= STRING */ | ||||
2752 | -2, /* (4) colset ::= MINUS STRING */ | ||||
2753 | -2, /* (5) colsetlist ::= colsetlist STRING */ | ||||
2754 | -1, /* (6) colsetlist ::= STRING */ | ||||
2755 | -3, /* (7) expr ::= expr AND expr */ | ||||
2756 | -3, /* (8) expr ::= expr OR expr */ | ||||
2757 | -3, /* (9) expr ::= expr NOT expr */ | ||||
2758 | -5, /* (10) expr ::= colset COLON LP expr RP */ | ||||
2759 | -3, /* (11) expr ::= LP expr RP */ | ||||
2760 | -1, /* (12) expr ::= exprlist */ | ||||
2761 | -1, /* (13) exprlist ::= cnearset */ | ||||
2762 | -2, /* (14) exprlist ::= exprlist cnearset */ | ||||
2763 | -1, /* (15) cnearset ::= nearset */ | ||||
2764 | -3, /* (16) cnearset ::= colset COLON nearset */ | ||||
2765 | -1, /* (17) nearset ::= phrase */ | ||||
2766 | -2, /* (18) nearset ::= CARET phrase */ | ||||
2767 | -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | ||||
2768 | -1, /* (20) nearphrases ::= phrase */ | ||||
2769 | -2, /* (21) nearphrases ::= nearphrases phrase */ | ||||
2770 | 0, /* (22) neardist_opt ::= */ | ||||
2771 | -2, /* (23) neardist_opt ::= COMMA STRING */ | ||||
2772 | -4, /* (24) phrase ::= phrase PLUS STRING star_opt */ | ||||
2773 | -2, /* (25) phrase ::= STRING star_opt */ | ||||
2774 | -1, /* (26) star_opt ::= STAR */ | ||||
2775 | 0, /* (27) star_opt ::= */ | ||||
2776 | }; | ||||
2777 | |||||
2778 | static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ | ||||
2779 | |||||
2780 | /* | ||||
2781 | ** Perform a reduce action and the shift that must immediately | ||||
2782 | ** follow the reduce. | ||||
2783 | ** | ||||
2784 | ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions | ||||
2785 | ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE | ||||
2786 | ** if the lookahead token has already been consumed. As this procedure is | ||||
2787 | ** only called from one place, optimizing compilers will in-line it, which | ||||
2788 | ** means that the extra parameters have no performance impact. | ||||
2789 | */ | ||||
2790 | static fts5YYACTIONTYPEunsigned char fts5yy_reduce( | ||||
2791 | fts5yyParser *fts5yypParser, /* The parser */ | ||||
2792 | unsigned int fts5yyruleno, /* Number of the rule by which to reduce */ | ||||
2793 | int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */ | ||||
2794 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyLookaheadToken /* Value of the lookahead token */ | ||||
2795 | sqlite3Fts5ParserCTX_PDECL /* %extra_context */ | ||||
2796 | ){ | ||||
2797 | int fts5yygoto; /* The next state */ | ||||
2798 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The next action */ | ||||
2799 | fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ | ||||
2800 | int fts5yysize; /* Amount to pop the stack */ | ||||
2801 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
2802 | (void)fts5yyLookahead; | ||||
2803 | (void)fts5yyLookaheadToken; | ||||
2804 | fts5yymsp = fts5yypParser->fts5yytos; | ||||
2805 | |||||
2806 | switch( fts5yyruleno ){ | ||||
2807 | /* Beginning here are the reduction cases. A typical example | ||||
2808 | ** follows: | ||||
2809 | ** case 0: | ||||
2810 | ** #line <lineno> <grammarfile> | ||||
2811 | ** { ... } // User supplied code | ||||
2812 | ** #line <lineno> <thisfile> | ||||
2813 | ** break; | ||||
2814 | */ | ||||
2815 | /********** Begin reduce actions **********************************************/ | ||||
2816 | fts5YYMINORTYPE fts5yylhsminor; | ||||
2817 | case 0: /* input ::= expr */ | ||||
2818 | #line 82 "fts5parse.y" | ||||
2819 | { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } | ||||
2820 | #line 1047 "fts5parse.sql" | ||||
2821 | break; | ||||
2822 | case 1: /* colset ::= MINUS LCP colsetlist RCP */ | ||||
2823 | #line 97 "fts5parse.y" | ||||
2824 | { | ||||
2825 | fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | ||||
2826 | } | ||||
2827 | #line 1054 "fts5parse.sql" | ||||
2828 | break; | ||||
2829 | case 2: /* colset ::= LCP colsetlist RCP */ | ||||
2830 | #line 100 "fts5parse.y" | ||||
2831 | { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } | ||||
2832 | #line 1059 "fts5parse.sql" | ||||
2833 | break; | ||||
2834 | case 3: /* colset ::= STRING */ | ||||
2835 | #line 101 "fts5parse.y" | ||||
2836 | { | ||||
2837 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | ||||
2838 | } | ||||
2839 | #line 1066 "fts5parse.sql" | ||||
2840 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | ||||
2841 | break; | ||||
2842 | case 4: /* colset ::= MINUS STRING */ | ||||
2843 | #line 104 "fts5parse.y" | ||||
2844 | { | ||||
2845 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | ||||
2846 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | ||||
2847 | } | ||||
2848 | #line 1075 "fts5parse.sql" | ||||
2849 | break; | ||||
2850 | case 5: /* colsetlist ::= colsetlist STRING */ | ||||
2851 | #line 109 "fts5parse.y" | ||||
2852 | { | ||||
2853 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); } | ||||
2854 | #line 1081 "fts5parse.sql" | ||||
2855 | fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | ||||
2856 | break; | ||||
2857 | case 6: /* colsetlist ::= STRING */ | ||||
2858 | #line 111 "fts5parse.y" | ||||
2859 | { | ||||
2860 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | ||||
2861 | } | ||||
2862 | #line 1089 "fts5parse.sql" | ||||
2863 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | ||||
2864 | break; | ||||
2865 | case 7: /* expr ::= expr AND expr */ | ||||
2866 | #line 115 "fts5parse.y" | ||||
2867 | { | ||||
2868 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND2, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | ||||
2869 | } | ||||
2870 | #line 1097 "fts5parse.sql" | ||||
2871 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2872 | break; | ||||
2873 | case 8: /* expr ::= expr OR expr */ | ||||
2874 | #line 118 "fts5parse.y" | ||||
2875 | { | ||||
2876 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR1, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | ||||
2877 | } | ||||
2878 | #line 1105 "fts5parse.sql" | ||||
2879 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2880 | break; | ||||
2881 | case 9: /* expr ::= expr NOT expr */ | ||||
2882 | #line 121 "fts5parse.y" | ||||
2883 | { | ||||
2884 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT3, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | ||||
2885 | } | ||||
2886 | #line 1113 "fts5parse.sql" | ||||
2887 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2888 | break; | ||||
2889 | case 10: /* expr ::= colset COLON LP expr RP */ | ||||
2890 | #line 125 "fts5parse.y" | ||||
2891 | { | ||||
2892 | sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11); | ||||
2893 | fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24; | ||||
2894 | } | ||||
2895 | #line 1122 "fts5parse.sql" | ||||
2896 | fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2897 | break; | ||||
2898 | case 11: /* expr ::= LP expr RP */ | ||||
2899 | #line 129 "fts5parse.y" | ||||
2900 | {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} | ||||
2901 | #line 1128 "fts5parse.sql" | ||||
2902 | break; | ||||
2903 | case 12: /* expr ::= exprlist */ | ||||
2904 | case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13); | ||||
2905 | #line 130 "fts5parse.y" | ||||
2906 | {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} | ||||
2907 | #line 1134 "fts5parse.sql" | ||||
2908 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2909 | break; | ||||
2910 | case 14: /* exprlist ::= exprlist cnearset */ | ||||
2911 | #line 133 "fts5parse.y" | ||||
2912 | { | ||||
2913 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24); | ||||
2914 | } | ||||
2915 | #line 1142 "fts5parse.sql" | ||||
2916 | fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2917 | break; | ||||
2918 | case 15: /* cnearset ::= nearset */ | ||||
2919 | #line 137 "fts5parse.y" | ||||
2920 | { | ||||
2921 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | ||||
2922 | } | ||||
2923 | #line 1150 "fts5parse.sql" | ||||
2924 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2925 | break; | ||||
2926 | case 16: /* cnearset ::= colset COLON nearset */ | ||||
2927 | #line 140 "fts5parse.y" | ||||
2928 | { | ||||
2929 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | ||||
2930 | sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11); | ||||
2931 | } | ||||
2932 | #line 1159 "fts5parse.sql" | ||||
2933 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | ||||
2934 | break; | ||||
2935 | case 17: /* nearset ::= phrase */ | ||||
2936 | #line 151 "fts5parse.y" | ||||
2937 | { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); } | ||||
2938 | #line 1165 "fts5parse.sql" | ||||
2939 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | ||||
2940 | break; | ||||
2941 | case 18: /* nearset ::= CARET phrase */ | ||||
2942 | #line 152 "fts5parse.y" | ||||
2943 | { | ||||
2944 | sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53); | ||||
2945 | fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | ||||
2946 | } | ||||
2947 | #line 1174 "fts5parse.sql" | ||||
2948 | break; | ||||
2949 | case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */ | ||||
2950 | #line 156 "fts5parse.y" | ||||
2951 | { | ||||
2952 | sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); | ||||
2953 | sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0); | ||||
2954 | fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; | ||||
2955 | } | ||||
2956 | #line 1183 "fts5parse.sql" | ||||
2957 | fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | ||||
2958 | break; | ||||
2959 | case 20: /* nearphrases ::= phrase */ | ||||
2960 | #line 162 "fts5parse.y" | ||||
2961 | { | ||||
2962 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | ||||
2963 | } | ||||
2964 | #line 1191 "fts5parse.sql" | ||||
2965 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | ||||
2966 | break; | ||||
2967 | case 21: /* nearphrases ::= nearphrases phrase */ | ||||
2968 | #line 165 "fts5parse.y" | ||||
2969 | { | ||||
2970 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53); | ||||
2971 | } | ||||
2972 | #line 1199 "fts5parse.sql" | ||||
2973 | fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | ||||
2974 | break; | ||||
2975 | case 22: /* neardist_opt ::= */ | ||||
2976 | #line 172 "fts5parse.y" | ||||
2977 | { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } | ||||
2978 | #line 1205 "fts5parse.sql" | ||||
2979 | break; | ||||
2980 | case 23: /* neardist_opt ::= COMMA STRING */ | ||||
2981 | #line 173 "fts5parse.y" | ||||
2982 | { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } | ||||
2983 | #line 1210 "fts5parse.sql" | ||||
2984 | break; | ||||
2985 | case 24: /* phrase ::= phrase PLUS STRING star_opt */ | ||||
2986 | #line 185 "fts5parse.y" | ||||
2987 | { | ||||
2988 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | ||||
2989 | } | ||||
2990 | #line 1217 "fts5parse.sql" | ||||
2991 | fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | ||||
2992 | break; | ||||
2993 | case 25: /* phrase ::= STRING star_opt */ | ||||
2994 | #line 188 "fts5parse.y" | ||||
2995 | { | ||||
2996 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | ||||
2997 | } | ||||
2998 | #line 1225 "fts5parse.sql" | ||||
2999 | fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | ||||
3000 | break; | ||||
3001 | case 26: /* star_opt ::= STAR */ | ||||
3002 | #line 196 "fts5parse.y" | ||||
3003 | { fts5yymsp[0].minor.fts5yy4 = 1; } | ||||
3004 | #line 1231 "fts5parse.sql" | ||||
3005 | break; | ||||
3006 | case 27: /* star_opt ::= */ | ||||
3007 | #line 197 "fts5parse.y" | ||||
3008 | { fts5yymsp[1].minor.fts5yy4 = 0; } | ||||
3009 | #line 1236 "fts5parse.sql" | ||||
3010 | break; | ||||
3011 | default: | ||||
3012 | break; | ||||
3013 | /********** End reduce actions ************************************************/ | ||||
3014 | }; | ||||
3015 | assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) )((void) (0)); | ||||
3016 | fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno]; | ||||
3017 | fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | ||||
3018 | fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPEunsigned char)fts5yygoto); | ||||
3019 | |||||
3020 | /* There are no SHIFTREDUCE actions on nonterminals because the table | ||||
3021 | ** generator has simplified them to pure REDUCE actions. */ | ||||
3022 | assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) )((void) (0)); | ||||
3023 | |||||
3024 | /* It is not possible for a REDUCE to be followed by an error */ | ||||
3025 | assert( fts5yyact!=fts5YY_ERROR_ACTION )((void) (0)); | ||||
3026 | |||||
3027 | fts5yymsp += fts5yysize+1; | ||||
3028 | fts5yypParser->fts5yytos = fts5yymsp; | ||||
3029 | fts5yymsp->stateno = (fts5YYACTIONTYPEunsigned char)fts5yyact; | ||||
3030 | fts5yymsp->major = (fts5YYCODETYPEunsigned char)fts5yygoto; | ||||
3031 | fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift"); | ||||
3032 | return fts5yyact; | ||||
3033 | } | ||||
3034 | |||||
3035 | /* | ||||
3036 | ** The following code executes when the parse fails | ||||
3037 | */ | ||||
3038 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
3039 | static void fts5yy_parse_failed( | ||||
3040 | fts5yyParser *fts5yypParser /* The parser */ | ||||
3041 | ){ | ||||
3042 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
3043 | sqlite3Fts5ParserCTX_FETCH | ||||
3044 | #ifndef NDEBUG1 | ||||
3045 | if( fts5yyTraceFILE ){ | ||||
3046 | fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt); | ||||
3047 | } | ||||
3048 | #endif | ||||
3049 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | ||||
3050 | /* Here code is inserted which will be executed whenever the | ||||
3051 | ** parser fails */ | ||||
3052 | /************ Begin %parse_failure code ***************************************/ | ||||
3053 | /************ End %parse_failure code *****************************************/ | ||||
3054 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | ||||
3055 | sqlite3Fts5ParserCTX_STORE | ||||
3056 | } | ||||
3057 | #endif /* fts5YYNOERRORRECOVERY */ | ||||
3058 | |||||
3059 | /* | ||||
3060 | ** The following code executes when a syntax error first occurs. | ||||
3061 | */ | ||||
3062 | static void fts5yy_syntax_error( | ||||
3063 | fts5yyParser *fts5yypParser, /* The parser */ | ||||
3064 | int fts5yymajor, /* The major type of the error token */ | ||||
3065 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The minor type of the error token */ | ||||
3066 | ){ | ||||
3067 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
3068 | sqlite3Fts5ParserCTX_FETCH | ||||
3069 | #define FTS5TOKENfts5yyminor fts5yyminor | ||||
3070 | /************ Begin %syntax_error code ****************************************/ | ||||
3071 | #line 30 "fts5parse.y" | ||||
3072 | |||||
3073 | UNUSED_PARAM(fts5yymajor)(void)(fts5yymajor); /* Silence a compiler warning */ | ||||
3074 | sqlite3Fts5ParseError( | ||||
3075 | pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKENfts5yyminor.n,FTS5TOKENfts5yyminor.p | ||||
3076 | ); | ||||
3077 | #line 1304 "fts5parse.sql" | ||||
3078 | /************ End %syntax_error code ******************************************/ | ||||
3079 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | ||||
3080 | sqlite3Fts5ParserCTX_STORE | ||||
3081 | } | ||||
3082 | |||||
3083 | /* | ||||
3084 | ** The following is executed when the parser accepts | ||||
3085 | */ | ||||
3086 | static void fts5yy_accept( | ||||
3087 | fts5yyParser *fts5yypParser /* The parser */ | ||||
3088 | ){ | ||||
3089 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | ||||
3090 | sqlite3Fts5ParserCTX_FETCH | ||||
3091 | #ifndef NDEBUG1 | ||||
3092 | if( fts5yyTraceFILE ){ | ||||
3093 | fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt); | ||||
3094 | } | ||||
3095 | #endif | ||||
3096 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
3097 | fts5yypParser->fts5yyerrcnt = -1; | ||||
3098 | #endif | ||||
3099 | assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack )((void) (0)); | ||||
3100 | /* Here code is inserted which will be executed whenever the | ||||
3101 | ** parser accepts */ | ||||
3102 | /*********** Begin %parse_accept code *****************************************/ | ||||
3103 | /*********** End %parse_accept code *******************************************/ | ||||
3104 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | ||||
3105 | sqlite3Fts5ParserCTX_STORE | ||||
3106 | } | ||||
3107 | |||||
3108 | /* The main parser program. | ||||
3109 | ** The first argument is a pointer to a structure obtained from | ||||
3110 | ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. | ||||
3111 | ** The second argument is the major token number. The third is | ||||
3112 | ** the minor token. The fourth optional argument is whatever the | ||||
3113 | ** user wants (and specified in the grammar) and is available for | ||||
3114 | ** use by the action routines. | ||||
3115 | ** | ||||
3116 | ** Inputs: | ||||
3117 | ** <ul> | ||||
3118 | ** <li> A pointer to the parser (an opaque structure.) | ||||
3119 | ** <li> The major token number. | ||||
3120 | ** <li> The minor token number. | ||||
3121 | ** <li> An option argument of a grammar-specified type. | ||||
3122 | ** </ul> | ||||
3123 | ** | ||||
3124 | ** Outputs: | ||||
3125 | ** None. | ||||
3126 | */ | ||||
3127 | static void sqlite3Fts5Parser( | ||||
3128 | void *fts5yyp, /* The parser */ | ||||
3129 | int fts5yymajor, /* The major token code number */ | ||||
3130 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The value for the token */ | ||||
3131 | sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse /* Optional %extra_argument parameter */ | ||||
3132 | ){ | ||||
3133 | fts5YYMINORTYPE fts5yyminorunion; | ||||
3134 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The parser action. */ | ||||
3135 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | ||||
3136 | int fts5yyendofinput; /* True if we are at the end of input */ | ||||
3137 | #endif | ||||
3138 | #ifdef fts5YYERRORSYMBOL | ||||
3139 | int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ | ||||
3140 | #endif | ||||
3141 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */ | ||||
3142 | sqlite3Fts5ParserCTX_FETCH | ||||
3143 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; | ||||
3144 | |||||
3145 | assert( fts5yypParser->fts5yytos!=0 )((void) (0)); | ||||
3146 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | ||||
3147 | fts5yyendofinput = (fts5yymajor==0); | ||||
3148 | #endif | ||||
3149 | |||||
3150 | fts5yyact = fts5yypParser->fts5yytos->stateno; | ||||
3151 | #ifndef NDEBUG1 | ||||
3152 | if( fts5yyTraceFILE ){ | ||||
3153 | if( fts5yyact < fts5YY_MIN_REDUCE83 ){ | ||||
3154 | fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n", | ||||
3155 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact); | ||||
3156 | }else{ | ||||
3157 | fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n", | ||||
3158 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE83); | ||||
3159 | } | ||||
3160 | } | ||||
3161 | #endif | ||||
3162 | |||||
3163 | while(1){ /* Exit by "break" */ | ||||
3164 | assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack )((void) (0)); | ||||
3165 | assert( fts5yyact==fts5yypParser->fts5yytos->stateno )((void) (0)); | ||||
3166 | fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyact); | ||||
3167 | if( fts5yyact >= fts5YY_MIN_REDUCE83 ){ | ||||
3168 | unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE83; /* Reduce by this rule */ | ||||
3169 | #ifndef NDEBUG1 | ||||
3170 | assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) )((void) (0)); | ||||
3171 | if( fts5yyTraceFILE ){ | ||||
3172 | int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | ||||
3173 | if( fts5yysize ){ | ||||
3174 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", | ||||
3175 | fts5yyTracePrompt, | ||||
3176 | fts5yyruleno, fts5yyRuleName[fts5yyruleno], | ||||
3177 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action", | ||||
3178 | fts5yypParser->fts5yytos[fts5yysize].stateno); | ||||
3179 | }else{ | ||||
3180 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n", | ||||
3181 | fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno], | ||||
3182 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action"); | ||||
3183 | } | ||||
3184 | } | ||||
3185 | #endif /* NDEBUG */ | ||||
3186 | |||||
3187 | /* Check that the stack is large enough to grow by a single entry | ||||
3188 | ** if the RHS of the rule is empty. This ensures that there is room | ||||
3189 | ** enough on the stack to push the LHS value */ | ||||
3190 | if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ | ||||
3191 | #ifdef fts5YYTRACKMAXSTACKDEPTH | ||||
3192 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | ||||
3193 | fts5yypParser->fts5yyhwm++; | ||||
3194 | assert( fts5yypParser->fts5yyhwm ==((void) (0)) | ||||
3195 | (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack))((void) (0)); | ||||
3196 | } | ||||
3197 | #endif | ||||
3198 | if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ | ||||
3199 | if( fts5yyGrowStack(fts5yypParser)1 ){ | ||||
3200 | fts5yyStackOverflow(fts5yypParser); | ||||
3201 | break; | ||||
3202 | } | ||||
3203 | } | ||||
3204 | } | ||||
3205 | fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM); | ||||
3206 | }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE79 ){ | ||||
3207 | fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyminor); | ||||
3208 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
3209 | fts5yypParser->fts5yyerrcnt--; | ||||
3210 | #endif | ||||
3211 | break; | ||||
3212 | }else if( fts5yyact==fts5YY_ACCEPT_ACTION81 ){ | ||||
3213 | fts5yypParser->fts5yytos--; | ||||
3214 | fts5yy_accept(fts5yypParser); | ||||
3215 | return; | ||||
3216 | }else{ | ||||
3217 | assert( fts5yyact == fts5YY_ERROR_ACTION )((void) (0)); | ||||
3218 | fts5yyminorunion.fts5yy0 = fts5yyminor; | ||||
3219 | #ifdef fts5YYERRORSYMBOL | ||||
3220 | int fts5yymx; | ||||
3221 | #endif | ||||
3222 | #ifndef NDEBUG1 | ||||
3223 | if( fts5yyTraceFILE ){ | ||||
3224 | fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt); | ||||
3225 | } | ||||
3226 | #endif | ||||
3227 | #ifdef fts5YYERRORSYMBOL | ||||
3228 | /* A syntax error has occurred. | ||||
3229 | ** The response to an error depends upon whether or not the | ||||
3230 | ** grammar defines an error token "ERROR". | ||||
3231 | ** | ||||
3232 | ** This is what we do if the grammar does define ERROR: | ||||
3233 | ** | ||||
3234 | ** * Call the %syntax_error function. | ||||
3235 | ** | ||||
3236 | ** * Begin popping the stack until we enter a state where | ||||
3237 | ** it is legal to shift the error symbol, then shift | ||||
3238 | ** the error symbol. | ||||
3239 | ** | ||||
3240 | ** * Set the error count to three. | ||||
3241 | ** | ||||
3242 | ** * Begin accepting and shifting new tokens. No new error | ||||
3243 | ** processing will occur until three tokens have been | ||||
3244 | ** shifted successfully. | ||||
3245 | ** | ||||
3246 | */ | ||||
3247 | if( fts5yypParser->fts5yyerrcnt<0 ){ | ||||
3248 | fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); | ||||
3249 | } | ||||
3250 | fts5yymx = fts5yypParser->fts5yytos->major; | ||||
3251 | if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ | ||||
3252 | #ifndef NDEBUG1 | ||||
3253 | if( fts5yyTraceFILE ){ | ||||
3254 | fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n", | ||||
3255 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); | ||||
3256 | } | ||||
3257 | #endif | ||||
3258 | fts5yy_destructor(fts5yypParser, (fts5YYCODETYPEunsigned char)fts5yymajor, &fts5yyminorunion); | ||||
3259 | fts5yymajor = fts5YYNOCODE27; | ||||
3260 | }else{ | ||||
3261 | while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){ | ||||
3262 | fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno, | ||||
3263 | fts5YYERRORSYMBOL); | ||||
3264 | if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE79 ) break; | ||||
3265 | fts5yy_pop_parser_stack(fts5yypParser); | ||||
3266 | } | ||||
3267 | if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){ | ||||
3268 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | ||||
3269 | fts5yy_parse_failed(fts5yypParser); | ||||
3270 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
3271 | fts5yypParser->fts5yyerrcnt = -1; | ||||
3272 | #endif | ||||
3273 | fts5yymajor = fts5YYNOCODE27; | ||||
3274 | }else if( fts5yymx!=fts5YYERRORSYMBOL ){ | ||||
3275 | fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); | ||||
3276 | } | ||||
3277 | } | ||||
3278 | fts5yypParser->fts5yyerrcnt = 3; | ||||
3279 | fts5yyerrorhit = 1; | ||||
3280 | if( fts5yymajor==fts5YYNOCODE27 ) break; | ||||
3281 | fts5yyact = fts5yypParser->fts5yytos->stateno; | ||||
3282 | #elif defined(fts5YYNOERRORRECOVERY1) | ||||
3283 | /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to | ||||
3284 | ** do any kind of error recovery. Instead, simply invoke the syntax | ||||
3285 | ** error routine and continue going as if nothing had happened. | ||||
3286 | ** | ||||
3287 | ** Applications can set this macro (for example inside %include) if | ||||
3288 | ** they intend to abandon the parse upon the first syntax error seen. | ||||
3289 | */ | ||||
3290 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | ||||
3291 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | ||||
3292 | break; | ||||
3293 | #else /* fts5YYERRORSYMBOL is not defined */ | ||||
3294 | /* This is what we do if the grammar does not define ERROR: | ||||
3295 | ** | ||||
3296 | ** * Report an error message, and throw away the input token. | ||||
3297 | ** | ||||
3298 | ** * If the input token is $, then fail the parse. | ||||
3299 | ** | ||||
3300 | ** As before, subsequent error messages are suppressed until | ||||
3301 | ** three input tokens have been successfully shifted. | ||||
3302 | */ | ||||
3303 | if( fts5yypParser->fts5yyerrcnt<=0 ){ | ||||
3304 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | ||||
3305 | } | ||||
3306 | fts5yypParser->fts5yyerrcnt = 3; | ||||
3307 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | ||||
3308 | if( fts5yyendofinput ){ | ||||
3309 | fts5yy_parse_failed(fts5yypParser); | ||||
3310 | #ifndef fts5YYNOERRORRECOVERY1 | ||||
3311 | fts5yypParser->fts5yyerrcnt = -1; | ||||
3312 | #endif | ||||
3313 | } | ||||
3314 | break; | ||||
3315 | #endif | ||||
3316 | } | ||||
3317 | } | ||||
3318 | #ifndef NDEBUG1 | ||||
3319 | if( fts5yyTraceFILE ){ | ||||
3320 | fts5yyStackEntry *i; | ||||
3321 | char cDiv = '['; | ||||
3322 | fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); | ||||
3323 | for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ | ||||
3324 | fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]); | ||||
3325 | cDiv = ' '; | ||||
3326 | } | ||||
3327 | fprintf(fts5yyTraceFILE,"]\n"); | ||||
3328 | } | ||||
3329 | #endif | ||||
3330 | return; | ||||
3331 | } | ||||
3332 | |||||
3333 | /* | ||||
3334 | ** Return the fallback token corresponding to canonical token iToken, or | ||||
3335 | ** 0 if iToken has no fallback. | ||||
3336 | */ | ||||
3337 | static int sqlite3Fts5ParserFallback(int iToken){ | ||||
3338 | #ifdef fts5YYFALLBACK | ||||
3339 | assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) )((void) (0)); | ||||
3340 | return fts5yyFallback[iToken]; | ||||
3341 | #else | ||||
3342 | (void)iToken; | ||||
3343 | return 0; | ||||
3344 | #endif | ||||
3345 | } | ||||
3346 | |||||
3347 | #line 1 "fts5_aux.c" | ||||
3348 | /* | ||||
3349 | ** 2014 May 31 | ||||
3350 | ** | ||||
3351 | ** The author disclaims copyright to this source code. In place of | ||||
3352 | ** a legal notice, here is a blessing: | ||||
3353 | ** | ||||
3354 | ** May you do good and not evil. | ||||
3355 | ** May you find forgiveness for yourself and forgive others. | ||||
3356 | ** May you share freely, never taking more than you give. | ||||
3357 | ** | ||||
3358 | ****************************************************************************** | ||||
3359 | */ | ||||
3360 | |||||
3361 | |||||
3362 | /* #include "fts5Int.h" */ | ||||
3363 | #include <math.h> /* amalgamator: keep */ | ||||
3364 | |||||
3365 | /* | ||||
3366 | ** Object used to iterate through all "coalesced phrase instances" in | ||||
3367 | ** a single column of the current row. If the phrase instances in the | ||||
3368 | ** column being considered do not overlap, this object simply iterates | ||||
3369 | ** through them. Or, if they do overlap (share one or more tokens in | ||||
3370 | ** common), each set of overlapping instances is treated as a single | ||||
3371 | ** match. See documentation for the highlight() auxiliary function for | ||||
3372 | ** details. | ||||
3373 | ** | ||||
3374 | ** Usage is: | ||||
3375 | ** | ||||
3376 | ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); | ||||
3377 | ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); | ||||
3378 | ** rc = fts5CInstIterNext(&iter) | ||||
3379 | ** ){ | ||||
3380 | ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); | ||||
3381 | ** } | ||||
3382 | ** | ||||
3383 | */ | ||||
3384 | typedef struct CInstIter CInstIter; | ||||
3385 | struct CInstIter { | ||||
3386 | const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ | ||||
3387 | Fts5Context *pFts; /* First arg to pass to pApi functions */ | ||||
3388 | int iCol; /* Column to search */ | ||||
3389 | int iInst; /* Next phrase instance index */ | ||||
3390 | int nInst; /* Total number of phrase instances */ | ||||
3391 | |||||
3392 | /* Output variables */ | ||||
3393 | int iStart; /* First token in coalesced phrase instance */ | ||||
3394 | int iEnd; /* Last token in coalesced phrase instance */ | ||||
3395 | }; | ||||
3396 | |||||
3397 | /* | ||||
3398 | ** Advance the iterator to the next coalesced phrase instance. Return | ||||
3399 | ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. | ||||
3400 | */ | ||||
3401 | static int fts5CInstIterNext(CInstIter *pIter){ | ||||
3402 | int rc = SQLITE_OK0; | ||||
3403 | pIter->iStart = -1; | ||||
3404 | pIter->iEnd = -1; | ||||
3405 | |||||
3406 | while( rc==SQLITE_OK0 && pIter->iInst<pIter->nInst ){ | ||||
3407 | int ip; int ic; int io; | ||||
3408 | rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); | ||||
3409 | if( rc==SQLITE_OK0 ){ | ||||
3410 | if( ic==pIter->iCol ){ | ||||
3411 | int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); | ||||
3412 | if( pIter->iStart<0 ){ | ||||
3413 | pIter->iStart = io; | ||||
3414 | pIter->iEnd = iEnd; | ||||
3415 | }else if( io<=pIter->iEnd ){ | ||||
3416 | if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; | ||||
3417 | }else{ | ||||
3418 | break; | ||||
3419 | } | ||||
3420 | } | ||||
3421 | pIter->iInst++; | ||||
3422 | } | ||||
3423 | } | ||||
3424 | |||||
3425 | return rc; | ||||
3426 | } | ||||
3427 | |||||
3428 | /* | ||||
3429 | ** Initialize the iterator object indicated by the final parameter to | ||||
3430 | ** iterate through coalesced phrase instances in column iCol. | ||||
3431 | */ | ||||
3432 | static int fts5CInstIterInit( | ||||
3433 | const Fts5ExtensionApi *pApi, | ||||
3434 | Fts5Context *pFts, | ||||
3435 | int iCol, | ||||
3436 | CInstIter *pIter | ||||
3437 | ){ | ||||
3438 | int rc; | ||||
3439 | |||||
3440 | memset(pIter, 0, sizeof(CInstIter)); | ||||
3441 | pIter->pApi = pApi; | ||||
3442 | pIter->pFts = pFts; | ||||
3443 | pIter->iCol = iCol; | ||||
3444 | rc = pApi->xInstCount(pFts, &pIter->nInst); | ||||
3445 | |||||
3446 | if( rc==SQLITE_OK0 ){ | ||||
3447 | rc = fts5CInstIterNext(pIter); | ||||
3448 | } | ||||
3449 | |||||
3450 | return rc; | ||||
3451 | } | ||||
3452 | |||||
3453 | |||||
3454 | |||||
3455 | /************************************************************************* | ||||
3456 | ** Start of highlight() implementation. | ||||
3457 | */ | ||||
3458 | typedef struct HighlightContext HighlightContext; | ||||
3459 | struct HighlightContext { | ||||
3460 | /* Constant parameters to fts5HighlightCb() */ | ||||
3461 | int iRangeStart; /* First token to include */ | ||||
3462 | int iRangeEnd; /* If non-zero, last token to include */ | ||||
3463 | const char *zOpen; /* Opening highlight */ | ||||
3464 | const char *zClose; /* Closing highlight */ | ||||
3465 | const char *zIn; /* Input text */ | ||||
3466 | int nIn; /* Size of input text in bytes */ | ||||
3467 | |||||
3468 | /* Variables modified by fts5HighlightCb() */ | ||||
3469 | CInstIter iter; /* Coalesced Instance Iterator */ | ||||
3470 | int iPos; /* Current token offset in zIn[] */ | ||||
3471 | int iOff; /* Have copied up to this offset in zIn[] */ | ||||
3472 | int bOpen; /* True if highlight is open */ | ||||
3473 | char *zOut; /* Output value */ | ||||
3474 | }; | ||||
3475 | |||||
3476 | /* | ||||
3477 | ** Append text to the HighlightContext output string - p->zOut. Argument | ||||
3478 | ** z points to a buffer containing n bytes of text to append. If n is | ||||
3479 | ** negative, everything up until the first '\0' is appended to the output. | ||||
3480 | ** | ||||
3481 | ** If *pRc is set to any value other than SQLITE_OK when this function is | ||||
3482 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, | ||||
3483 | ** *pRc is set to an error code before returning. | ||||
3484 | */ | ||||
3485 | static void fts5HighlightAppend( | ||||
3486 | int *pRc, | ||||
3487 | HighlightContext *p, | ||||
3488 | const char *z, int n | ||||
3489 | ){ | ||||
3490 | if( *pRc==SQLITE_OK0 && z ){ | ||||
3491 | if( n<0 ) n = (int)strlen(z); | ||||
3492 | p->zOut = sqlite3_mprintfsqlite3_api->mprintf("%z%.*s", p->zOut, n, z); | ||||
3493 | if( p->zOut==0 ) *pRc = SQLITE_NOMEM7; | ||||
3494 | } | ||||
3495 | } | ||||
3496 | |||||
3497 | /* | ||||
3498 | ** Tokenizer callback used by implementation of highlight() function. | ||||
3499 | */ | ||||
3500 | static int fts5HighlightCb( | ||||
3501 | void *pContext, /* Pointer to HighlightContext object */ | ||||
3502 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
3503 | const char *pToken, /* Buffer containing token */ | ||||
3504 | int nToken, /* Size of token in bytes */ | ||||
3505 | int iStartOff, /* Start byte offset of token */ | ||||
3506 | int iEndOff /* End byte offset of token */ | ||||
3507 | ){ | ||||
3508 | HighlightContext *p = (HighlightContext*)pContext; | ||||
3509 | int rc = SQLITE_OK0; | ||||
3510 | int iPos; | ||||
3511 | |||||
3512 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | ||||
3513 | |||||
3514 | if( tflags & FTS5_TOKEN_COLOCATED0x0001 ) return SQLITE_OK0; | ||||
3515 | iPos = p->iPos++; | ||||
3516 | |||||
3517 | if( p->iRangeEnd>=0 ){ | ||||
3518 | if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK0; | ||||
3519 | if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; | ||||
3520 | } | ||||
3521 | |||||
3522 | /* If the parenthesis is open, and this token is not part of the current | ||||
3523 | ** phrase, and the starting byte offset of this token is past the point | ||||
3524 | ** that has currently been copied into the output buffer, close the | ||||
3525 | ** parenthesis. */ | ||||
3526 | if( p->bOpen | ||||
3527 | && (iPos<=p->iter.iStart || p->iter.iStart<0) | ||||
3528 | && iStartOff>p->iOff | ||||
3529 | ){ | ||||
3530 | fts5HighlightAppend(&rc, p, p->zClose, -1); | ||||
3531 | p->bOpen = 0; | ||||
3532 | } | ||||
3533 | |||||
3534 | /* If this is the start of a new phrase, and the highlight is not open: | ||||
3535 | ** | ||||
3536 | ** * copy text from the input up to the start of the phrase, and | ||||
3537 | ** * open the highlight. | ||||
3538 | */ | ||||
3539 | if( iPos==p->iter.iStart && p->bOpen==0 ){ | ||||
3540 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); | ||||
3541 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | ||||
3542 | p->iOff = iStartOff; | ||||
3543 | p->bOpen = 1; | ||||
3544 | } | ||||
3545 | |||||
3546 | if( iPos==p->iter.iEnd ){ | ||||
3547 | if( p->bOpen==0 ){ | ||||
3548 | assert( p->iRangeEnd>=0 )((void) (0)); | ||||
3549 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | ||||
3550 | p->bOpen = 1; | ||||
3551 | } | ||||
3552 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | ||||
3553 | p->iOff = iEndOff; | ||||
3554 | |||||
3555 | if( rc==SQLITE_OK0 ){ | ||||
3556 | rc = fts5CInstIterNext(&p->iter); | ||||
3557 | } | ||||
3558 | } | ||||
3559 | |||||
3560 | if( iPos==p->iRangeEnd ){ | ||||
3561 | if( p->bOpen ){ | ||||
3562 | if( p->iter.iStart>=0 && iPos>=p->iter.iStart ){ | ||||
3563 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | ||||
3564 | p->iOff = iEndOff; | ||||
3565 | } | ||||
3566 | fts5HighlightAppend(&rc, p, p->zClose, -1); | ||||
3567 | p->bOpen = 0; | ||||
3568 | } | ||||
3569 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | ||||
3570 | p->iOff = iEndOff; | ||||
3571 | } | ||||
3572 | |||||
3573 | return rc; | ||||
3574 | } | ||||
3575 | |||||
3576 | |||||
3577 | /* | ||||
3578 | ** Implementation of highlight() function. | ||||
3579 | */ | ||||
3580 | static void fts5HighlightFunction( | ||||
3581 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
3582 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
3583 | sqlite3_context *pCtx, /* Context for returning result/error */ | ||||
3584 | int nVal, /* Number of values in apVal[] array */ | ||||
3585 | sqlite3_value **apVal /* Array of trailing arguments */ | ||||
3586 | ){ | ||||
3587 | HighlightContext ctx; | ||||
3588 | int rc; | ||||
3589 | int iCol; | ||||
3590 | |||||
3591 | if( nVal!=3 ){ | ||||
3592 | const char *zErr = "wrong number of arguments to function highlight()"; | ||||
3593 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | ||||
3594 | return; | ||||
3595 | } | ||||
3596 | |||||
3597 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | ||||
3598 | memset(&ctx, 0, sizeof(HighlightContext)); | ||||
3599 | ctx.zOpen = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | ||||
3600 | ctx.zClose = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2]); | ||||
3601 | ctx.iRangeEnd = -1; | ||||
3602 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); | ||||
3603 | if( rc==SQLITE_RANGE25 ){ | ||||
3604 | sqlite3_result_textsqlite3_api->result_text(pCtx, "", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
3605 | rc = SQLITE_OK0; | ||||
3606 | }else if( ctx.zIn ){ | ||||
3607 | const char *pLoc = 0; /* Locale of column iCol */ | ||||
3608 | int nLoc = 0; /* Size of pLoc in bytes */ | ||||
3609 | if( rc==SQLITE_OK0 ){ | ||||
3610 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); | ||||
3611 | } | ||||
3612 | |||||
3613 | if( rc==SQLITE_OK0 ){ | ||||
3614 | rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc); | ||||
3615 | } | ||||
3616 | if( rc==SQLITE_OK0 ){ | ||||
3617 | rc = pApi->xTokenize_v2( | ||||
3618 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb | ||||
3619 | ); | ||||
3620 | } | ||||
3621 | if( ctx.bOpen ){ | ||||
3622 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | ||||
3623 | } | ||||
3624 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | ||||
3625 | |||||
3626 | if( rc==SQLITE_OK0 ){ | ||||
3627 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
3628 | } | ||||
3629 | sqlite3_freesqlite3_api->free(ctx.zOut); | ||||
3630 | } | ||||
3631 | if( rc!=SQLITE_OK0 ){ | ||||
3632 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
3633 | } | ||||
3634 | } | ||||
3635 | /* | ||||
3636 | ** End of highlight() implementation. | ||||
3637 | **************************************************************************/ | ||||
3638 | |||||
3639 | /* | ||||
3640 | ** Context object passed to the fts5SentenceFinderCb() function. | ||||
3641 | */ | ||||
3642 | typedef struct Fts5SFinder Fts5SFinder; | ||||
3643 | struct Fts5SFinder { | ||||
3644 | int iPos; /* Current token position */ | ||||
3645 | int nFirstAlloc; /* Allocated size of aFirst[] */ | ||||
3646 | int nFirst; /* Number of entries in aFirst[] */ | ||||
3647 | int *aFirst; /* Array of first token in each sentence */ | ||||
3648 | const char *zDoc; /* Document being tokenized */ | ||||
3649 | }; | ||||
3650 | |||||
3651 | /* | ||||
3652 | ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if | ||||
3653 | ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an | ||||
3654 | ** error occurs. | ||||
3655 | */ | ||||
3656 | static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ | ||||
3657 | if( p->nFirstAlloc==p->nFirst ){ | ||||
3658 | int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; | ||||
3659 | int *aNew; | ||||
3660 | |||||
3661 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aFirst, nNew*sizeof(int)); | ||||
3662 | if( aNew==0 ) return SQLITE_NOMEM7; | ||||
3663 | p->aFirst = aNew; | ||||
3664 | p->nFirstAlloc = nNew; | ||||
3665 | } | ||||
3666 | p->aFirst[p->nFirst++] = iAdd; | ||||
3667 | return SQLITE_OK0; | ||||
3668 | } | ||||
3669 | |||||
3670 | /* | ||||
3671 | ** This function is an xTokenize() callback used by the auxiliary snippet() | ||||
3672 | ** function. Its job is to identify tokens that are the first in a sentence. | ||||
3673 | ** For each such token, an entry is added to the SFinder.aFirst[] array. | ||||
3674 | */ | ||||
3675 | static int fts5SentenceFinderCb( | ||||
3676 | void *pContext, /* Pointer to HighlightContext object */ | ||||
3677 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
3678 | const char *pToken, /* Buffer containing token */ | ||||
3679 | int nToken, /* Size of token in bytes */ | ||||
3680 | int iStartOff, /* Start offset of token */ | ||||
3681 | int iEndOff /* End offset of token */ | ||||
3682 | ){ | ||||
3683 | int rc = SQLITE_OK0; | ||||
3684 | |||||
3685 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | ||||
3686 | UNUSED_PARAM(iEndOff)(void)(iEndOff); | ||||
3687 | |||||
3688 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | ||||
3689 | Fts5SFinder *p = (Fts5SFinder*)pContext; | ||||
3690 | if( p->iPos>0 ){ | ||||
3691 | int i; | ||||
3692 | char c = 0; | ||||
3693 | for(i=iStartOff-1; i>=0; i--){ | ||||
3694 | c = p->zDoc[i]; | ||||
3695 | if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; | ||||
3696 | } | ||||
3697 | if( i!=iStartOff-1 && (c=='.' || c==':') ){ | ||||
3698 | rc = fts5SentenceFinderAdd(p, p->iPos); | ||||
3699 | } | ||||
3700 | }else{ | ||||
3701 | rc = fts5SentenceFinderAdd(p, 0); | ||||
3702 | } | ||||
3703 | p->iPos++; | ||||
3704 | } | ||||
3705 | return rc; | ||||
3706 | } | ||||
3707 | |||||
3708 | static int fts5SnippetScore( | ||||
3709 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
3710 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
3711 | int nDocsize, /* Size of column in tokens */ | ||||
3712 | unsigned char *aSeen, /* Array with one element per query phrase */ | ||||
3713 | int iCol, /* Column to score */ | ||||
3714 | int iPos, /* Starting offset to score */ | ||||
3715 | int nToken, /* Max tokens per snippet */ | ||||
3716 | int *pnScore, /* OUT: Score */ | ||||
3717 | int *piPos /* OUT: Adjusted offset */ | ||||
3718 | ){ | ||||
3719 | int rc; | ||||
3720 | int i; | ||||
3721 | int ip = 0; | ||||
3722 | int ic = 0; | ||||
3723 | int iOff = 0; | ||||
3724 | int iFirst = -1; | ||||
3725 | int nInst; | ||||
3726 | int nScore = 0; | ||||
3727 | int iLast = 0; | ||||
3728 | sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; | ||||
3729 | |||||
3730 | rc = pApi->xInstCount(pFts, &nInst); | ||||
3731 | for(i=0; i<nInst && rc==SQLITE_OK0; i++){ | ||||
3732 | rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); | ||||
3733 | if( rc==SQLITE_OK0 && ic==iCol && iOff>=iPos && iOff<iEnd ){ | ||||
3734 | nScore += (aSeen[ip] ? 1 : 1000); | ||||
3735 | aSeen[ip] = 1; | ||||
3736 | if( iFirst<0 ) iFirst = iOff; | ||||
3737 | iLast = iOff + pApi->xPhraseSize(pFts, ip); | ||||
3738 | } | ||||
3739 | } | ||||
3740 | |||||
3741 | *pnScore = nScore; | ||||
3742 | if( piPos ){ | ||||
3743 | sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; | ||||
3744 | if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; | ||||
3745 | if( iAdj<0 ) iAdj = 0; | ||||
3746 | *piPos = (int)iAdj; | ||||
3747 | } | ||||
3748 | |||||
3749 | return rc; | ||||
3750 | } | ||||
3751 | |||||
3752 | /* | ||||
3753 | ** Return the value in pVal interpreted as utf-8 text. Except, if pVal | ||||
3754 | ** contains a NULL value, return a pointer to a static string zero | ||||
3755 | ** bytes in length instead of a NULL pointer. | ||||
3756 | */ | ||||
3757 | static const char *fts5ValueToText(sqlite3_value *pVal){ | ||||
3758 | const char *zRet = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
3759 | return zRet ? zRet : ""; | ||||
3760 | } | ||||
3761 | |||||
3762 | /* | ||||
3763 | ** Implementation of snippet() function. | ||||
3764 | */ | ||||
3765 | static void fts5SnippetFunction( | ||||
3766 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
3767 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
3768 | sqlite3_context *pCtx, /* Context for returning result/error */ | ||||
3769 | int nVal, /* Number of values in apVal[] array */ | ||||
3770 | sqlite3_value **apVal /* Array of trailing arguments */ | ||||
3771 | ){ | ||||
3772 | HighlightContext ctx; | ||||
3773 | int rc = SQLITE_OK0; /* Return code */ | ||||
3774 | int iCol; /* 1st argument to snippet() */ | ||||
3775 | const char *zEllips; /* 4th argument to snippet() */ | ||||
3776 | int nToken; /* 5th argument to snippet() */ | ||||
3777 | int nInst = 0; /* Number of instance matches this row */ | ||||
3778 | int i; /* Used to iterate through instances */ | ||||
3779 | int nPhrase; /* Number of phrases in query */ | ||||
3780 | unsigned char *aSeen; /* Array of "seen instance" flags */ | ||||
3781 | int iBestCol; /* Column containing best snippet */ | ||||
3782 | int iBestStart = 0; /* First token of best snippet */ | ||||
3783 | int nBestScore = 0; /* Score of best snippet */ | ||||
3784 | int nColSize = 0; /* Total size of iBestCol in tokens */ | ||||
3785 | Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ | ||||
3786 | int nCol; | ||||
3787 | |||||
3788 | if( nVal!=5 ){ | ||||
3789 | const char *zErr = "wrong number of arguments to function snippet()"; | ||||
3790 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | ||||
3791 | return; | ||||
3792 | } | ||||
3793 | |||||
3794 | nCol = pApi->xColumnCount(pFts); | ||||
3795 | memset(&ctx, 0, sizeof(HighlightContext)); | ||||
3796 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | ||||
3797 | ctx.zOpen = fts5ValueToText(apVal[1]); | ||||
3798 | ctx.zClose = fts5ValueToText(apVal[2]); | ||||
3799 | ctx.iRangeEnd = -1; | ||||
3800 | zEllips = fts5ValueToText(apVal[3]); | ||||
3801 | nToken = sqlite3_value_intsqlite3_api->value_int(apVal[4]); | ||||
3802 | |||||
3803 | iBestCol = (iCol>=0 ? iCol : 0); | ||||
3804 | nPhrase = pApi->xPhraseCount(pFts); | ||||
3805 | aSeen = sqlite3_mallocsqlite3_api->malloc(nPhrase); | ||||
3806 | if( aSeen==0 ){ | ||||
3807 | rc = SQLITE_NOMEM7; | ||||
3808 | } | ||||
3809 | if( rc==SQLITE_OK0 ){ | ||||
3810 | rc = pApi->xInstCount(pFts, &nInst); | ||||
3811 | } | ||||
3812 | |||||
3813 | memset(&sFinder, 0, sizeof(Fts5SFinder)); | ||||
3814 | for(i=0; i<nCol; i++){ | ||||
3815 | if( iCol<0 || iCol==i ){ | ||||
3816 | const char *pLoc = 0; /* Locale of column iCol */ | ||||
3817 | int nLoc = 0; /* Size of pLoc in bytes */ | ||||
3818 | int nDoc; | ||||
3819 | int nDocsize; | ||||
3820 | int ii; | ||||
3821 | sFinder.iPos = 0; | ||||
3822 | sFinder.nFirst = 0; | ||||
3823 | rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); | ||||
3824 | if( rc!=SQLITE_OK0 ) break; | ||||
3825 | rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc); | ||||
3826 | if( rc!=SQLITE_OK0 ) break; | ||||
3827 | rc = pApi->xTokenize_v2(pFts, | ||||
3828 | sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb | ||||
3829 | ); | ||||
3830 | if( rc!=SQLITE_OK0 ) break; | ||||
3831 | rc = pApi->xColumnSize(pFts, i, &nDocsize); | ||||
3832 | if( rc!=SQLITE_OK0 ) break; | ||||
3833 | |||||
3834 | for(ii=0; rc==SQLITE_OK0 && ii<nInst; ii++){ | ||||
3835 | int ip, ic, io; | ||||
3836 | int iAdj; | ||||
3837 | int nScore; | ||||
3838 | int jj; | ||||
3839 | |||||
3840 | rc = pApi->xInst(pFts, ii, &ip, &ic, &io); | ||||
3841 | if( ic!=i ) continue; | ||||
3842 | if( io>nDocsize ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
3843 | if( rc!=SQLITE_OK0 ) continue; | ||||
3844 | memset(aSeen, 0, nPhrase); | ||||
3845 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | ||||
3846 | io, nToken, &nScore, &iAdj | ||||
3847 | ); | ||||
3848 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | ||||
3849 | nBestScore = nScore; | ||||
3850 | iBestCol = i; | ||||
3851 | iBestStart = iAdj; | ||||
3852 | nColSize = nDocsize; | ||||
3853 | } | ||||
3854 | |||||
3855 | if( rc==SQLITE_OK0 && sFinder.nFirst && nDocsize>nToken ){ | ||||
3856 | for(jj=0; jj<(sFinder.nFirst-1); jj++){ | ||||
3857 | if( sFinder.aFirst[jj+1]>io ) break; | ||||
3858 | } | ||||
3859 | |||||
3860 | if( sFinder.aFirst[jj]<io ){ | ||||
3861 | memset(aSeen, 0, nPhrase); | ||||
3862 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | ||||
3863 | sFinder.aFirst[jj], nToken, &nScore, 0 | ||||
3864 | ); | ||||
3865 | |||||
3866 | nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); | ||||
3867 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | ||||
3868 | nBestScore = nScore; | ||||
3869 | iBestCol = i; | ||||
3870 | iBestStart = sFinder.aFirst[jj]; | ||||
3871 | nColSize = nDocsize; | ||||
3872 | } | ||||
3873 | } | ||||
3874 | } | ||||
3875 | } | ||||
3876 | } | ||||
3877 | } | ||||
3878 | |||||
3879 | if( rc==SQLITE_OK0 ){ | ||||
3880 | rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); | ||||
3881 | } | ||||
3882 | if( rc==SQLITE_OK0 && nColSize==0 ){ | ||||
3883 | rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); | ||||
3884 | } | ||||
3885 | if( ctx.zIn ){ | ||||
3886 | const char *pLoc = 0; /* Locale of column iBestCol */ | ||||
3887 | int nLoc = 0; /* Bytes in pLoc */ | ||||
3888 | |||||
3889 | if( rc==SQLITE_OK0 ){ | ||||
3890 | rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); | ||||
3891 | } | ||||
3892 | |||||
3893 | ctx.iRangeStart = iBestStart; | ||||
3894 | ctx.iRangeEnd = iBestStart + nToken - 1; | ||||
3895 | |||||
3896 | if( iBestStart>0 ){ | ||||
3897 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | ||||
3898 | } | ||||
3899 | |||||
3900 | /* Advance iterator ctx.iter so that it points to the first coalesced | ||||
3901 | ** phrase instance at or following position iBestStart. */ | ||||
3902 | while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK0 ){ | ||||
3903 | rc = fts5CInstIterNext(&ctx.iter); | ||||
3904 | } | ||||
3905 | |||||
3906 | if( rc==SQLITE_OK0 ){ | ||||
3907 | rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc); | ||||
3908 | } | ||||
3909 | if( rc==SQLITE_OK0 ){ | ||||
3910 | rc = pApi->xTokenize_v2( | ||||
3911 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb | ||||
3912 | ); | ||||
3913 | } | ||||
3914 | if( ctx.bOpen ){ | ||||
3915 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | ||||
3916 | } | ||||
3917 | if( ctx.iRangeEnd>=(nColSize-1) ){ | ||||
3918 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | ||||
3919 | }else{ | ||||
3920 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | ||||
3921 | } | ||||
3922 | } | ||||
3923 | if( rc==SQLITE_OK0 ){ | ||||
3924 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
3925 | }else{ | ||||
3926 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
3927 | } | ||||
3928 | sqlite3_freesqlite3_api->free(ctx.zOut); | ||||
3929 | sqlite3_freesqlite3_api->free(aSeen); | ||||
3930 | sqlite3_freesqlite3_api->free(sFinder.aFirst); | ||||
3931 | } | ||||
3932 | |||||
3933 | /************************************************************************/ | ||||
3934 | |||||
3935 | /* | ||||
3936 | ** The first time the bm25() function is called for a query, an instance | ||||
3937 | ** of the following structure is allocated and populated. | ||||
3938 | */ | ||||
3939 | typedef struct Fts5Bm25Data Fts5Bm25Data; | ||||
3940 | struct Fts5Bm25Data { | ||||
3941 | int nPhrase; /* Number of phrases in query */ | ||||
3942 | double avgdl; /* Average number of tokens in each row */ | ||||
3943 | double *aIDF; /* IDF for each phrase */ | ||||
3944 | double *aFreq; /* Array used to calculate phrase freq. */ | ||||
3945 | }; | ||||
3946 | |||||
3947 | /* | ||||
3948 | ** Callback used by fts5Bm25GetData() to count the number of rows in the | ||||
3949 | ** table matched by each individual phrase within the query. | ||||
3950 | */ | ||||
3951 | static int fts5CountCb( | ||||
3952 | const Fts5ExtensionApi *pApi, | ||||
3953 | Fts5Context *pFts, | ||||
3954 | void *pUserData /* Pointer to sqlite3_int64 variable */ | ||||
3955 | ){ | ||||
3956 | sqlite3_int64 *pn = (sqlite3_int64*)pUserData; | ||||
3957 | UNUSED_PARAM2(pApi, pFts)(void)(pApi), (void)(pFts); | ||||
3958 | (*pn)++; | ||||
3959 | return SQLITE_OK0; | ||||
3960 | } | ||||
3961 | |||||
3962 | /* | ||||
3963 | ** Set *ppData to point to the Fts5Bm25Data object for the current query. | ||||
3964 | ** If the object has not already been allocated, allocate and populate it | ||||
3965 | ** now. | ||||
3966 | */ | ||||
3967 | static int fts5Bm25GetData( | ||||
3968 | const Fts5ExtensionApi *pApi, | ||||
3969 | Fts5Context *pFts, | ||||
3970 | Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ | ||||
3971 | ){ | ||||
3972 | int rc = SQLITE_OK0; /* Return code */ | ||||
3973 | Fts5Bm25Data *p; /* Object to return */ | ||||
3974 | |||||
3975 | p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); | ||||
3976 | if( p==0 ){ | ||||
3977 | int nPhrase; /* Number of phrases in query */ | ||||
3978 | sqlite3_int64 nRow = 0; /* Number of rows in table */ | ||||
3979 | sqlite3_int64 nToken = 0; /* Number of tokens in table */ | ||||
3980 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | ||||
3981 | int i; | ||||
3982 | |||||
3983 | /* Allocate the Fts5Bm25Data object */ | ||||
3984 | nPhrase = pApi->xPhraseCount(pFts); | ||||
3985 | nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); | ||||
3986 | p = (Fts5Bm25Data*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
3987 | if( p==0 ){ | ||||
3988 | rc = SQLITE_NOMEM7; | ||||
3989 | }else{ | ||||
3990 | memset(p, 0, (size_t)nByte); | ||||
3991 | p->nPhrase = nPhrase; | ||||
3992 | p->aIDF = (double*)&p[1]; | ||||
3993 | p->aFreq = &p->aIDF[nPhrase]; | ||||
3994 | } | ||||
3995 | |||||
3996 | /* Calculate the average document length for this FTS5 table */ | ||||
3997 | if( rc==SQLITE_OK0 ) rc = pApi->xRowCount(pFts, &nRow); | ||||
3998 | assert( rc!=SQLITE_OK || nRow>0 )((void) (0)); | ||||
3999 | if( rc==SQLITE_OK0 ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); | ||||
4000 | if( rc==SQLITE_OK0 ) p->avgdl = (double)nToken / (double)nRow; | ||||
4001 | |||||
4002 | /* Calculate an IDF for each phrase in the query */ | ||||
4003 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | ||||
4004 | sqlite3_int64 nHit = 0; | ||||
4005 | rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); | ||||
4006 | if( rc==SQLITE_OK0 ){ | ||||
4007 | /* Calculate the IDF (Inverse Document Frequency) for phrase i. | ||||
4008 | ** This is done using the standard BM25 formula as found on wikipedia: | ||||
4009 | ** | ||||
4010 | ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) | ||||
4011 | ** | ||||
4012 | ** where "N" is the total number of documents in the set and nHit | ||||
4013 | ** is the number that contain at least one instance of the phrase | ||||
4014 | ** under consideration. | ||||
4015 | ** | ||||
4016 | ** The problem with this is that if (N < 2*nHit), the IDF is | ||||
4017 | ** negative. Which is undesirable. So the minimum allowable IDF is | ||||
4018 | ** (1e-6) - roughly the same as a term that appears in just over | ||||
4019 | ** half of set of 5,000,000 documents. */ | ||||
4020 | double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); | ||||
4021 | if( idf<=0.0 ) idf = 1e-6; | ||||
4022 | p->aIDF[i] = idf; | ||||
4023 | } | ||||
4024 | } | ||||
4025 | |||||
4026 | if( rc!=SQLITE_OK0 ){ | ||||
4027 | sqlite3_freesqlite3_api->free(p); | ||||
4028 | }else{ | ||||
4029 | rc = pApi->xSetAuxdata(pFts, p, sqlite3_freesqlite3_api->free); | ||||
4030 | } | ||||
4031 | if( rc!=SQLITE_OK0 ) p = 0; | ||||
4032 | } | ||||
4033 | *ppData = p; | ||||
4034 | return rc; | ||||
4035 | } | ||||
4036 | |||||
4037 | /* | ||||
4038 | ** Implementation of bm25() function. | ||||
4039 | */ | ||||
4040 | static void fts5Bm25Function( | ||||
4041 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
4042 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
4043 | sqlite3_context *pCtx, /* Context for returning result/error */ | ||||
4044 | int nVal, /* Number of values in apVal[] array */ | ||||
4045 | sqlite3_value **apVal /* Array of trailing arguments */ | ||||
4046 | ){ | ||||
4047 | const double k1 = 1.2; /* Constant "k1" from BM25 formula */ | ||||
4048 | const double b = 0.75; /* Constant "b" from BM25 formula */ | ||||
4049 | int rc; /* Error code */ | ||||
4050 | double score = 0.0; /* SQL function return value */ | ||||
4051 | Fts5Bm25Data *pData; /* Values allocated/calculated once only */ | ||||
4052 | int i; /* Iterator variable */ | ||||
4053 | int nInst = 0; /* Value returned by xInstCount() */ | ||||
4054 | double D = 0.0; /* Total number of tokens in row */ | ||||
4055 | double *aFreq = 0; /* Array of phrase freq. for current row */ | ||||
4056 | |||||
4057 | /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) | ||||
4058 | ** for each phrase in the query for the current row. */ | ||||
4059 | rc = fts5Bm25GetData(pApi, pFts, &pData); | ||||
4060 | if( rc==SQLITE_OK0 ){ | ||||
4061 | aFreq = pData->aFreq; | ||||
4062 | memset(aFreq, 0, sizeof(double) * pData->nPhrase); | ||||
4063 | rc = pApi->xInstCount(pFts, &nInst); | ||||
4064 | } | ||||
4065 | for(i=0; rc==SQLITE_OK0 && i<nInst; i++){ | ||||
4066 | int ip; int ic; int io; | ||||
4067 | rc = pApi->xInst(pFts, i, &ip, &ic, &io); | ||||
4068 | if( rc==SQLITE_OK0 ){ | ||||
4069 | double w = (nVal > ic) ? sqlite3_value_doublesqlite3_api->value_double(apVal[ic]) : 1.0; | ||||
4070 | aFreq[ip] += w; | ||||
4071 | } | ||||
4072 | } | ||||
4073 | |||||
4074 | /* Figure out the total size of the current row in tokens. */ | ||||
4075 | if( rc==SQLITE_OK0 ){ | ||||
4076 | int nTok; | ||||
4077 | rc = pApi->xColumnSize(pFts, -1, &nTok); | ||||
4078 | D = (double)nTok; | ||||
4079 | } | ||||
4080 | |||||
4081 | /* Determine and return the BM25 score for the current row. Or, if an | ||||
4082 | ** error has occurred, throw an exception. */ | ||||
4083 | if( rc==SQLITE_OK0 ){ | ||||
4084 | for(i=0; i<pData->nPhrase; i++){ | ||||
4085 | score += pData->aIDF[i] * ( | ||||
4086 | ( aFreq[i] * (k1 + 1.0) ) / | ||||
4087 | ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) | ||||
4088 | ); | ||||
4089 | } | ||||
4090 | sqlite3_result_doublesqlite3_api->result_double(pCtx, -1.0 * score); | ||||
4091 | }else{ | ||||
4092 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
4093 | } | ||||
4094 | } | ||||
4095 | |||||
4096 | /* | ||||
4097 | ** Implementation of fts5_get_locale() function. | ||||
4098 | */ | ||||
4099 | static void fts5GetLocaleFunction( | ||||
4100 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | ||||
4101 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | ||||
4102 | sqlite3_context *pCtx, /* Context for returning result/error */ | ||||
4103 | int nVal, /* Number of values in apVal[] array */ | ||||
4104 | sqlite3_value **apVal /* Array of trailing arguments */ | ||||
4105 | ){ | ||||
4106 | int iCol = 0; | ||||
4107 | int eType = 0; | ||||
4108 | int rc = SQLITE_OK0; | ||||
4109 | const char *zLocale = 0; | ||||
4110 | int nLocale = 0; | ||||
4111 | |||||
4112 | /* xColumnLocale() must be available */ | ||||
4113 | assert( pApi->iVersion>=4 )((void) (0)); | ||||
4114 | |||||
4115 | if( nVal!=1 ){ | ||||
4116 | const char *z = "wrong number of arguments to function fts5_get_locale()"; | ||||
4117 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | ||||
4118 | return; | ||||
4119 | } | ||||
4120 | |||||
4121 | eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[0]); | ||||
4122 | if( eType!=SQLITE_INTEGER1 ){ | ||||
4123 | const char *z = "non-integer argument passed to function fts5_get_locale()"; | ||||
4124 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | ||||
4125 | return; | ||||
4126 | } | ||||
4127 | |||||
4128 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | ||||
4129 | if( iCol<0 || iCol>=pApi->xColumnCount(pFts) ){ | ||||
4130 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, SQLITE_RANGE25); | ||||
4131 | return; | ||||
4132 | } | ||||
4133 | |||||
4134 | rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale); | ||||
4135 | if( rc!=SQLITE_OK0 ){ | ||||
4136 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
4137 | return; | ||||
4138 | } | ||||
4139 | |||||
4140 | sqlite3_result_textsqlite3_api->result_text(pCtx, zLocale, nLocale, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
4141 | } | ||||
4142 | |||||
4143 | static int sqlite3Fts5AuxInit(fts5_api *pApi){ | ||||
4144 | struct Builtin { | ||||
4145 | const char *zFunc; /* Function name (nul-terminated) */ | ||||
4146 | void *pUserData; /* User-data pointer */ | ||||
4147 | fts5_extension_function xFunc;/* Callback function */ | ||||
4148 | void (*xDestroy)(void*); /* Destructor function */ | ||||
4149 | } aBuiltin [] = { | ||||
4150 | { "snippet", 0, fts5SnippetFunction, 0 }, | ||||
4151 | { "highlight", 0, fts5HighlightFunction, 0 }, | ||||
4152 | { "bm25", 0, fts5Bm25Function, 0 }, | ||||
4153 | { "fts5_get_locale", 0, fts5GetLocaleFunction, 0 }, | ||||
4154 | }; | ||||
4155 | int rc = SQLITE_OK0; /* Return code */ | ||||
4156 | int i; /* To iterate through builtin functions */ | ||||
4157 | |||||
4158 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | ||||
4159 | rc = pApi->xCreateFunction(pApi, | ||||
4160 | aBuiltin[i].zFunc, | ||||
4161 | aBuiltin[i].pUserData, | ||||
4162 | aBuiltin[i].xFunc, | ||||
4163 | aBuiltin[i].xDestroy | ||||
4164 | ); | ||||
4165 | } | ||||
4166 | |||||
4167 | return rc; | ||||
4168 | } | ||||
4169 | |||||
4170 | #line 1 "fts5_buffer.c" | ||||
4171 | /* | ||||
4172 | ** 2014 May 31 | ||||
4173 | ** | ||||
4174 | ** The author disclaims copyright to this source code. In place of | ||||
4175 | ** a legal notice, here is a blessing: | ||||
4176 | ** | ||||
4177 | ** May you do good and not evil. | ||||
4178 | ** May you find forgiveness for yourself and forgive others. | ||||
4179 | ** May you share freely, never taking more than you give. | ||||
4180 | ** | ||||
4181 | ****************************************************************************** | ||||
4182 | */ | ||||
4183 | |||||
4184 | |||||
4185 | |||||
4186 | /* #include "fts5Int.h" */ | ||||
4187 | |||||
4188 | static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ | ||||
4189 | if( (u32)pBuf->nSpace<nByte ){ | ||||
4190 | u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64; | ||||
4191 | u8 *pNew; | ||||
4192 | while( nNew<nByte ){ | ||||
4193 | nNew = nNew * 2; | ||||
4194 | } | ||||
4195 | pNew = sqlite3_realloc64sqlite3_api->realloc64(pBuf->p, nNew); | ||||
4196 | if( pNew==0 ){ | ||||
4197 | *pRc = SQLITE_NOMEM7; | ||||
4198 | return 1; | ||||
4199 | }else{ | ||||
4200 | pBuf->nSpace = (int)nNew; | ||||
4201 | pBuf->p = pNew; | ||||
4202 | } | ||||
4203 | } | ||||
4204 | return 0; | ||||
4205 | } | ||||
4206 | |||||
4207 | |||||
4208 | /* | ||||
4209 | ** Encode value iVal as an SQLite varint and append it to the buffer object | ||||
4210 | ** pBuf. If an OOM error occurs, set the error code in p. | ||||
4211 | */ | ||||
4212 | static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ | ||||
4213 | if( fts5BufferGrow(pRc, pBuf, 9)( (u32)((pBuf)->n) + (u32)(9) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(9)+(pBuf)->n) ) ) return; | ||||
4214 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); | ||||
4215 | } | ||||
4216 | |||||
4217 | static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ | ||||
4218 | aBuf[0] = (iVal>>24) & 0x00FF; | ||||
4219 | aBuf[1] = (iVal>>16) & 0x00FF; | ||||
4220 | aBuf[2] = (iVal>> 8) & 0x00FF; | ||||
4221 | aBuf[3] = (iVal>> 0) & 0x00FF; | ||||
4222 | } | ||||
4223 | |||||
4224 | static int sqlite3Fts5Get32(const u8 *aBuf){ | ||||
4225 | return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]); | ||||
4226 | } | ||||
4227 | |||||
4228 | /* | ||||
4229 | ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set | ||||
4230 | ** the error code in p. If an error has already occurred when this function | ||||
4231 | ** is called, it is a no-op. | ||||
4232 | */ | ||||
4233 | static void sqlite3Fts5BufferAppendBlob( | ||||
4234 | int *pRc, | ||||
4235 | Fts5Buffer *pBuf, | ||||
4236 | u32 nData, | ||||
4237 | const u8 *pData | ||||
4238 | ){ | ||||
4239 | if( nData ){ | ||||
4240 | if( fts5BufferGrow(pRc, pBuf, nData)( (u32)((pBuf)->n) + (u32)(nData) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nData)+(pBuf)-> n) ) ) return; | ||||
4241 | assert( pBuf->p!=0 )((void) (0)); | ||||
4242 | memcpy(&pBuf->p[pBuf->n], pData, nData); | ||||
4243 | pBuf->n += nData; | ||||
4244 | } | ||||
4245 | } | ||||
4246 | |||||
4247 | /* | ||||
4248 | ** Append the nul-terminated string zStr to the buffer pBuf. This function | ||||
4249 | ** ensures that the byte following the buffer data is set to 0x00, even | ||||
4250 | ** though this byte is not included in the pBuf->n count. | ||||
4251 | */ | ||||
4252 | static void sqlite3Fts5BufferAppendString( | ||||
4253 | int *pRc, | ||||
4254 | Fts5Buffer *pBuf, | ||||
4255 | const char *zStr | ||||
4256 | ){ | ||||
4257 | int nStr = (int)strlen(zStr); | ||||
4258 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); | ||||
4259 | pBuf->n--; | ||||
4260 | } | ||||
4261 | |||||
4262 | /* | ||||
4263 | ** Argument zFmt is a printf() style format string. This function performs | ||||
4264 | ** the printf() style processing, then appends the results to buffer pBuf. | ||||
4265 | ** | ||||
4266 | ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte | ||||
4267 | ** following the buffer data is set to 0x00, even though this byte is not | ||||
4268 | ** included in the pBuf->n count. | ||||
4269 | */ | ||||
4270 | static void sqlite3Fts5BufferAppendPrintf( | ||||
4271 | int *pRc, | ||||
4272 | Fts5Buffer *pBuf, | ||||
4273 | char *zFmt, ... | ||||
4274 | ){ | ||||
4275 | if( *pRc==SQLITE_OK0 ){ | ||||
4276 | char *zTmp; | ||||
4277 | va_list ap; | ||||
4278 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
4279 | zTmp = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
4280 | va_end(ap)__builtin_va_end(ap); | ||||
4281 | |||||
4282 | if( zTmp==0 ){ | ||||
4283 | *pRc = SQLITE_NOMEM7; | ||||
4284 | }else{ | ||||
4285 | sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); | ||||
4286 | sqlite3_freesqlite3_api->free(zTmp); | ||||
4287 | } | ||||
4288 | } | ||||
4289 | } | ||||
4290 | |||||
4291 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ | ||||
4292 | char *zRet = 0; | ||||
4293 | if( *pRc==SQLITE_OK0 ){ | ||||
4294 | va_list ap; | ||||
4295 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
4296 | zRet = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
4297 | va_end(ap)__builtin_va_end(ap); | ||||
4298 | if( zRet==0 ){ | ||||
4299 | *pRc = SQLITE_NOMEM7; | ||||
4300 | } | ||||
4301 | } | ||||
4302 | return zRet; | ||||
4303 | } | ||||
4304 | |||||
4305 | |||||
4306 | /* | ||||
4307 | ** Free any buffer allocated by pBuf. Zero the structure before returning. | ||||
4308 | */ | ||||
4309 | static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ | ||||
4310 | sqlite3_freesqlite3_api->free(pBuf->p); | ||||
4311 | memset(pBuf, 0, sizeof(Fts5Buffer)); | ||||
4312 | } | ||||
4313 | |||||
4314 | /* | ||||
4315 | ** Zero the contents of the buffer object. But do not free the associated | ||||
4316 | ** memory allocation. | ||||
4317 | */ | ||||
4318 | static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ | ||||
4319 | pBuf->n = 0; | ||||
4320 | } | ||||
4321 | |||||
4322 | /* | ||||
4323 | ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an | ||||
4324 | ** the error code in p. If an error has already occurred when this function | ||||
4325 | ** is called, it is a no-op. | ||||
4326 | */ | ||||
4327 | static void sqlite3Fts5BufferSet( | ||||
4328 | int *pRc, | ||||
4329 | Fts5Buffer *pBuf, | ||||
4330 | int nData, | ||||
4331 | const u8 *pData | ||||
4332 | ){ | ||||
4333 | pBuf->n = 0; | ||||
4334 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); | ||||
4335 | } | ||||
4336 | |||||
4337 | static int sqlite3Fts5PoslistNext64( | ||||
4338 | const u8 *a, int n, /* Buffer containing poslist */ | ||||
4339 | int *pi, /* IN/OUT: Offset within a[] */ | ||||
4340 | i64 *piOff /* IN/OUT: Current offset */ | ||||
4341 | ){ | ||||
4342 | int i = *pi; | ||||
4343 | assert( a!=0 || i==0 )((void) (0)); | ||||
4344 | if( i>=n ){ | ||||
4345 | /* EOF */ | ||||
4346 | *piOff = -1; | ||||
4347 | return 1; | ||||
4348 | }else{ | ||||
4349 | i64 iOff = *piOff; | ||||
4350 | u32 iVal; | ||||
4351 | assert( a!=0 )((void) (0)); | ||||
4352 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | ||||
4353 | if( iVal<=1 ){ | ||||
4354 | if( iVal==0 ){ | ||||
4355 | *pi = i; | ||||
4356 | return 0; | ||||
4357 | } | ||||
4358 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | ||||
4359 | iOff = ((i64)iVal) << 32; | ||||
4360 | assert( iOff>=0 )((void) (0)); | ||||
4361 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | ||||
4362 | if( iVal<2 ){ | ||||
4363 | /* This is a corrupt record. So stop parsing it here. */ | ||||
4364 | *piOff = -1; | ||||
4365 | return 1; | ||||
4366 | } | ||||
4367 | *piOff = iOff + ((iVal-2) & 0x7FFFFFFF); | ||||
4368 | }else{ | ||||
4369 | *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF); | ||||
4370 | } | ||||
4371 | *pi = i; | ||||
4372 | assert_nc( *piOff>=iOff )((void) (0)); | ||||
4373 | return 0; | ||||
4374 | } | ||||
4375 | } | ||||
4376 | |||||
4377 | |||||
4378 | /* | ||||
4379 | ** Advance the iterator object passed as the only argument. Return true | ||||
4380 | ** if the iterator reaches EOF, or false otherwise. | ||||
4381 | */ | ||||
4382 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ | ||||
4383 | if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ | ||||
4384 | pIter->bEof = 1; | ||||
4385 | } | ||||
4386 | return pIter->bEof; | ||||
4387 | } | ||||
4388 | |||||
4389 | static int sqlite3Fts5PoslistReaderInit( | ||||
4390 | const u8 *a, int n, /* Poslist buffer to iterate through */ | ||||
4391 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | ||||
4392 | ){ | ||||
4393 | memset(pIter, 0, sizeof(*pIter)); | ||||
4394 | pIter->a = a; | ||||
4395 | pIter->n = n; | ||||
4396 | sqlite3Fts5PoslistReaderNext(pIter); | ||||
4397 | return pIter->bEof; | ||||
4398 | } | ||||
4399 | |||||
4400 | /* | ||||
4401 | ** Append position iPos to the position list being accumulated in buffer | ||||
4402 | ** pBuf, which must be already be large enough to hold the new data. | ||||
4403 | ** The previous position written to this list is *piPrev. *piPrev is set | ||||
4404 | ** to iPos before returning. | ||||
4405 | */ | ||||
4406 | static void sqlite3Fts5PoslistSafeAppend( | ||||
4407 | Fts5Buffer *pBuf, | ||||
4408 | i64 *piPrev, | ||||
4409 | i64 iPos | ||||
4410 | ){ | ||||
4411 | if( iPos>=*piPrev ){ | ||||
4412 | static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; | ||||
4413 | if( (iPos & colmask) != (*piPrev & colmask) ){ | ||||
4414 | pBuf->p[pBuf->n++] = 1; | ||||
4415 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); | ||||
4416 | *piPrev = (iPos & colmask); | ||||
4417 | } | ||||
4418 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); | ||||
4419 | *piPrev = iPos; | ||||
4420 | } | ||||
4421 | } | ||||
4422 | |||||
4423 | static int sqlite3Fts5PoslistWriterAppend( | ||||
4424 | Fts5Buffer *pBuf, | ||||
4425 | Fts5PoslistWriter *pWriter, | ||||
4426 | i64 iPos | ||||
4427 | ){ | ||||
4428 | int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ | ||||
4429 | if( fts5BufferGrow(&rc, pBuf, 5+5+5)( (u32)((pBuf)->n) + (u32)(5+5+5) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&rc),(pBuf),(5+5+5)+(pBuf) ->n) ) ) return rc; | ||||
4430 | sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); | ||||
4431 | return SQLITE_OK0; | ||||
4432 | } | ||||
4433 | |||||
4434 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){ | ||||
4435 | void *pRet = 0; | ||||
4436 | if( *pRc==SQLITE_OK0 ){ | ||||
4437 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
4438 | if( pRet==0 ){ | ||||
4439 | if( nByte>0 ) *pRc = SQLITE_NOMEM7; | ||||
4440 | }else{ | ||||
4441 | memset(pRet, 0, (size_t)nByte); | ||||
4442 | } | ||||
4443 | } | ||||
4444 | return pRet; | ||||
4445 | } | ||||
4446 | |||||
4447 | /* | ||||
4448 | ** Return a nul-terminated copy of the string indicated by pIn. If nIn | ||||
4449 | ** is non-negative, then it is the length of the string in bytes. Otherwise, | ||||
4450 | ** the length of the string is determined using strlen(). | ||||
4451 | ** | ||||
4452 | ** It is the responsibility of the caller to eventually free the returned | ||||
4453 | ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. | ||||
4454 | */ | ||||
4455 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ | ||||
4456 | char *zRet = 0; | ||||
4457 | if( *pRc==SQLITE_OK0 ){ | ||||
4458 | if( nIn<0 ){ | ||||
4459 | nIn = (int)strlen(pIn); | ||||
4460 | } | ||||
4461 | zRet = (char*)sqlite3_mallocsqlite3_api->malloc(nIn+1); | ||||
4462 | if( zRet ){ | ||||
4463 | memcpy(zRet, pIn, nIn); | ||||
4464 | zRet[nIn] = '\0'; | ||||
4465 | }else{ | ||||
4466 | *pRc = SQLITE_NOMEM7; | ||||
4467 | } | ||||
4468 | } | ||||
4469 | return zRet; | ||||
4470 | } | ||||
4471 | |||||
4472 | |||||
4473 | /* | ||||
4474 | ** Return true if character 't' may be part of an FTS5 bareword, or false | ||||
4475 | ** otherwise. Characters that may be part of barewords: | ||||
4476 | ** | ||||
4477 | ** * All non-ASCII characters, | ||||
4478 | ** * The 52 upper and lower case ASCII characters, and | ||||
4479 | ** * The 10 integer ASCII characters. | ||||
4480 | ** * The underscore character "_" (0x5F). | ||||
4481 | ** * The unicode "substitute" character (0x1A). | ||||
4482 | */ | ||||
4483 | static int sqlite3Fts5IsBareword(char t){ | ||||
4484 | u8 aBareword[128] = { | ||||
4485 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ | ||||
4486 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ | ||||
4487 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ | ||||
4488 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ | ||||
4489 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ | ||||
4490 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ | ||||
4491 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ | ||||
4492 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ | ||||
4493 | }; | ||||
4494 | |||||
4495 | return (t & 0x80) || aBareword[(int)t]; | ||||
4496 | } | ||||
4497 | |||||
4498 | |||||
4499 | /************************************************************************* | ||||
4500 | */ | ||||
4501 | typedef struct Fts5TermsetEntry Fts5TermsetEntry; | ||||
4502 | struct Fts5TermsetEntry { | ||||
4503 | char *pTerm; | ||||
4504 | int nTerm; | ||||
4505 | int iIdx; /* Index (main or aPrefix[] entry) */ | ||||
4506 | Fts5TermsetEntry *pNext; | ||||
4507 | }; | ||||
4508 | |||||
4509 | struct Fts5Termset { | ||||
4510 | Fts5TermsetEntry *apHash[512]; | ||||
4511 | }; | ||||
4512 | |||||
4513 | static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ | ||||
4514 | int rc = SQLITE_OK0; | ||||
4515 | *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); | ||||
4516 | return rc; | ||||
4517 | } | ||||
4518 | |||||
4519 | static int sqlite3Fts5TermsetAdd( | ||||
4520 | Fts5Termset *p, | ||||
4521 | int iIdx, | ||||
4522 | const char *pTerm, int nTerm, | ||||
4523 | int *pbPresent | ||||
4524 | ){ | ||||
4525 | int rc = SQLITE_OK0; | ||||
4526 | *pbPresent = 0; | ||||
4527 | if( p ){ | ||||
4528 | int i; | ||||
4529 | u32 hash = 13; | ||||
4530 | Fts5TermsetEntry *pEntry; | ||||
4531 | |||||
4532 | /* Calculate a hash value for this term. This is the same hash checksum | ||||
4533 | ** used by the fts5_hash.c module. This is not important for correct | ||||
4534 | ** operation of the module, but is necessary to ensure that some tests | ||||
4535 | ** designed to produce hash table collisions really do work. */ | ||||
4536 | for(i=nTerm-1; i>=0; i--){ | ||||
4537 | hash = (hash << 3) ^ hash ^ pTerm[i]; | ||||
4538 | } | ||||
4539 | hash = (hash << 3) ^ hash ^ iIdx; | ||||
4540 | hash = hash % ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); | ||||
4541 | |||||
4542 | for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ | ||||
4543 | if( pEntry->iIdx==iIdx | ||||
4544 | && pEntry->nTerm==nTerm | ||||
4545 | && memcmp(pEntry->pTerm, pTerm, nTerm)==0 | ||||
4546 | ){ | ||||
4547 | *pbPresent = 1; | ||||
4548 | break; | ||||
4549 | } | ||||
4550 | } | ||||
4551 | |||||
4552 | if( pEntry==0 ){ | ||||
4553 | pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); | ||||
4554 | if( pEntry ){ | ||||
4555 | pEntry->pTerm = (char*)&pEntry[1]; | ||||
4556 | pEntry->nTerm = nTerm; | ||||
4557 | pEntry->iIdx = iIdx; | ||||
4558 | memcpy(pEntry->pTerm, pTerm, nTerm); | ||||
4559 | pEntry->pNext = p->apHash[hash]; | ||||
4560 | p->apHash[hash] = pEntry; | ||||
4561 | } | ||||
4562 | } | ||||
4563 | } | ||||
4564 | |||||
4565 | return rc; | ||||
4566 | } | ||||
4567 | |||||
4568 | static void sqlite3Fts5TermsetFree(Fts5Termset *p){ | ||||
4569 | if( p ){ | ||||
4570 | u32 i; | ||||
4571 | for(i=0; i<ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); i++){ | ||||
4572 | Fts5TermsetEntry *pEntry = p->apHash[i]; | ||||
4573 | while( pEntry ){ | ||||
4574 | Fts5TermsetEntry *pDel = pEntry; | ||||
4575 | pEntry = pEntry->pNext; | ||||
4576 | sqlite3_freesqlite3_api->free(pDel); | ||||
4577 | } | ||||
4578 | } | ||||
4579 | sqlite3_freesqlite3_api->free(p); | ||||
4580 | } | ||||
4581 | } | ||||
4582 | |||||
4583 | #line 1 "fts5_config.c" | ||||
4584 | /* | ||||
4585 | ** 2014 Jun 09 | ||||
4586 | ** | ||||
4587 | ** The author disclaims copyright to this source code. In place of | ||||
4588 | ** a legal notice, here is a blessing: | ||||
4589 | ** | ||||
4590 | ** May you do good and not evil. | ||||
4591 | ** May you find forgiveness for yourself and forgive others. | ||||
4592 | ** May you share freely, never taking more than you give. | ||||
4593 | ** | ||||
4594 | ****************************************************************************** | ||||
4595 | ** | ||||
4596 | ** This is an SQLite module implementing full-text search. | ||||
4597 | */ | ||||
4598 | |||||
4599 | |||||
4600 | /* #include "fts5Int.h" */ | ||||
4601 | |||||
4602 | #define FTS5_DEFAULT_PAGE_SIZE4050 4050 | ||||
4603 | #define FTS5_DEFAULT_AUTOMERGE4 4 | ||||
4604 | #define FTS5_DEFAULT_USERMERGE4 4 | ||||
4605 | #define FTS5_DEFAULT_CRISISMERGE16 16 | ||||
4606 | #define FTS5_DEFAULT_HASHSIZE(1024*1024) (1024*1024) | ||||
4607 | |||||
4608 | #define FTS5_DEFAULT_DELETE_AUTOMERGE10 10 /* default 10% */ | ||||
4609 | |||||
4610 | /* Maximum allowed page size */ | ||||
4611 | #define FTS5_MAX_PAGE_SIZE(64*1024) (64*1024) | ||||
4612 | |||||
4613 | static int fts5_iswhitespace(char x){ | ||||
4614 | return (x==' '); | ||||
4615 | } | ||||
4616 | |||||
4617 | static int fts5_isopenquote(char x){ | ||||
4618 | return (x=='"' || x=='\'' || x=='[' || x=='`'); | ||||
4619 | } | ||||
4620 | |||||
4621 | /* | ||||
4622 | ** Argument pIn points to a character that is part of a nul-terminated | ||||
4623 | ** string. Return a pointer to the first character following *pIn in | ||||
4624 | ** the string that is not a white-space character. | ||||
4625 | */ | ||||
4626 | static const char *fts5ConfigSkipWhitespace(const char *pIn){ | ||||
4627 | const char *p = pIn; | ||||
4628 | if( p ){ | ||||
4629 | while( fts5_iswhitespace(*p) ){ p++; } | ||||
4630 | } | ||||
4631 | return p; | ||||
4632 | } | ||||
4633 | |||||
4634 | /* | ||||
4635 | ** Argument pIn points to a character that is part of a nul-terminated | ||||
4636 | ** string. Return a pointer to the first character following *pIn in | ||||
4637 | ** the string that is not a "bareword" character. | ||||
4638 | */ | ||||
4639 | static const char *fts5ConfigSkipBareword(const char *pIn){ | ||||
4640 | const char *p = pIn; | ||||
4641 | while ( sqlite3Fts5IsBareword(*p) ) p++; | ||||
4642 | if( p==pIn ) p = 0; | ||||
4643 | return p; | ||||
4644 | } | ||||
4645 | |||||
4646 | static int fts5_isdigit(char a){ | ||||
4647 | return (a>='0' && a<='9'); | ||||
4648 | } | ||||
4649 | |||||
4650 | |||||
4651 | |||||
4652 | static const char *fts5ConfigSkipLiteral(const char *pIn){ | ||||
4653 | const char *p = pIn; | ||||
4654 | switch( *p ){ | ||||
4655 | case 'n': case 'N': | ||||
4656 | if( sqlite3_strnicmpsqlite3_api->strnicmp("null", p, 4)==0 ){ | ||||
4657 | p = &p[4]; | ||||
4658 | }else{ | ||||
4659 | p = 0; | ||||
4660 | } | ||||
4661 | break; | ||||
4662 | |||||
4663 | case 'x': case 'X': | ||||
4664 | p++; | ||||
4665 | if( *p=='\'' ){ | ||||
4666 | p++; | ||||
4667 | while( (*p>='a' && *p<='f') | ||||
4668 | || (*p>='A' && *p<='F') | ||||
4669 | || (*p>='0' && *p<='9') | ||||
4670 | ){ | ||||
4671 | p++; | ||||
4672 | } | ||||
4673 | if( *p=='\'' && 0==((p-pIn)%2) ){ | ||||
4674 | p++; | ||||
4675 | }else{ | ||||
4676 | p = 0; | ||||
4677 | } | ||||
4678 | }else{ | ||||
4679 | p = 0; | ||||
4680 | } | ||||
4681 | break; | ||||
4682 | |||||
4683 | case '\'': | ||||
4684 | p++; | ||||
4685 | while( p ){ | ||||
4686 | if( *p=='\'' ){ | ||||
4687 | p++; | ||||
4688 | if( *p!='\'' ) break; | ||||
4689 | } | ||||
4690 | p++; | ||||
4691 | if( *p==0 ) p = 0; | ||||
4692 | } | ||||
4693 | break; | ||||
4694 | |||||
4695 | default: | ||||
4696 | /* maybe a number */ | ||||
4697 | if( *p=='+' || *p=='-' ) p++; | ||||
4698 | while( fts5_isdigit(*p) ) p++; | ||||
4699 | |||||
4700 | /* At this point, if the literal was an integer, the parse is | ||||
4701 | ** finished. Or, if it is a floating point value, it may continue | ||||
4702 | ** with either a decimal point or an 'E' character. */ | ||||
4703 | if( *p=='.' && fts5_isdigit(p[1]) ){ | ||||
4704 | p += 2; | ||||
4705 | while( fts5_isdigit(*p) ) p++; | ||||
4706 | } | ||||
4707 | if( p==pIn ) p = 0; | ||||
4708 | |||||
4709 | break; | ||||
4710 | } | ||||
4711 | |||||
4712 | return p; | ||||
4713 | } | ||||
4714 | |||||
4715 | /* | ||||
4716 | ** The first character of the string pointed to by argument z is guaranteed | ||||
4717 | ** to be an open-quote character (see function fts5_isopenquote()). | ||||
4718 | ** | ||||
4719 | ** This function searches for the corresponding close-quote character within | ||||
4720 | ** the string and, if found, dequotes the string in place and adds a new | ||||
4721 | ** nul-terminator byte. | ||||
4722 | ** | ||||
4723 | ** If the close-quote is found, the value returned is the byte offset of | ||||
4724 | ** the character immediately following it. Or, if the close-quote is not | ||||
4725 | ** found, -1 is returned. If -1 is returned, the buffer is left in an | ||||
4726 | ** undefined state. | ||||
4727 | */ | ||||
4728 | static int fts5Dequote(char *z){ | ||||
4729 | char q; | ||||
4730 | int iIn = 1; | ||||
4731 | int iOut = 0; | ||||
4732 | q = z[0]; | ||||
4733 | |||||
4734 | /* Set stack variable q to the close-quote character */ | ||||
4735 | assert( q=='[' || q=='\'' || q=='"' || q=='`' )((void) (0)); | ||||
4736 | if( q=='[' ) q = ']'; | ||||
4737 | |||||
4738 | while( z[iIn] ){ | ||||
4739 | if( z[iIn]==q ){ | ||||
4740 | if( z[iIn+1]!=q ){ | ||||
4741 | /* Character iIn was the close quote. */ | ||||
4742 | iIn++; | ||||
4743 | break; | ||||
4744 | }else{ | ||||
4745 | /* Character iIn and iIn+1 form an escaped quote character. Skip | ||||
4746 | ** the input cursor past both and copy a single quote character | ||||
4747 | ** to the output buffer. */ | ||||
4748 | iIn += 2; | ||||
4749 | z[iOut++] = q; | ||||
4750 | } | ||||
4751 | }else{ | ||||
4752 | z[iOut++] = z[iIn++]; | ||||
4753 | } | ||||
4754 | } | ||||
4755 | |||||
4756 | z[iOut] = '\0'; | ||||
4757 | return iIn; | ||||
4758 | } | ||||
4759 | |||||
4760 | /* | ||||
4761 | ** Convert an SQL-style quoted string into a normal string by removing | ||||
4762 | ** the quote characters. The conversion is done in-place. If the | ||||
4763 | ** input does not begin with a quote character, then this routine | ||||
4764 | ** is a no-op. | ||||
4765 | ** | ||||
4766 | ** Examples: | ||||
4767 | ** | ||||
4768 | ** "abc" becomes abc | ||||
4769 | ** 'xyz' becomes xyz | ||||
4770 | ** [pqr] becomes pqr | ||||
4771 | ** `mno` becomes mno | ||||
4772 | */ | ||||
4773 | static void sqlite3Fts5Dequote(char *z){ | ||||
4774 | char quote; /* Quote character (if any ) */ | ||||
4775 | |||||
4776 | assert( 0==fts5_iswhitespace(z[0]) )((void) (0)); | ||||
4777 | quote = z[0]; | ||||
4778 | if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ | ||||
4779 | fts5Dequote(z); | ||||
4780 | } | ||||
4781 | } | ||||
4782 | |||||
4783 | |||||
4784 | struct Fts5Enum { | ||||
4785 | const char *zName; | ||||
4786 | int eVal; | ||||
4787 | }; | ||||
4788 | typedef struct Fts5Enum Fts5Enum; | ||||
4789 | |||||
4790 | static int fts5ConfigSetEnum( | ||||
4791 | const Fts5Enum *aEnum, | ||||
4792 | const char *zEnum, | ||||
4793 | int *peVal | ||||
4794 | ){ | ||||
4795 | int nEnum = (int)strlen(zEnum); | ||||
4796 | int i; | ||||
4797 | int iVal = -1; | ||||
4798 | |||||
4799 | for(i=0; aEnum[i].zName; i++){ | ||||
4800 | if( sqlite3_strnicmpsqlite3_api->strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ | ||||
4801 | if( iVal>=0 ) return SQLITE_ERROR1; | ||||
4802 | iVal = aEnum[i].eVal; | ||||
4803 | } | ||||
4804 | } | ||||
4805 | |||||
4806 | *peVal = iVal; | ||||
4807 | return iVal<0 ? SQLITE_ERROR1 : SQLITE_OK0; | ||||
4808 | } | ||||
4809 | |||||
4810 | /* | ||||
4811 | ** Parse a "special" CREATE VIRTUAL TABLE directive and update | ||||
4812 | ** configuration object pConfig as appropriate. | ||||
4813 | ** | ||||
4814 | ** If successful, object pConfig is updated and SQLITE_OK returned. If | ||||
4815 | ** an error occurs, an SQLite error code is returned and an error message | ||||
4816 | ** may be left in *pzErr. It is the responsibility of the caller to | ||||
4817 | ** eventually free any such error message using sqlite3_free(). | ||||
4818 | */ | ||||
4819 | static int fts5ConfigParseSpecial( | ||||
4820 | Fts5Config *pConfig, /* Configuration object to update */ | ||||
4821 | const char *zCmd, /* Special command to parse */ | ||||
4822 | const char *zArg, /* Argument to parse */ | ||||
4823 | char **pzErr /* OUT: Error message */ | ||||
4824 | ){ | ||||
4825 | int rc = SQLITE_OK0; | ||||
4826 | int nCmd = (int)strlen(zCmd); | ||||
4827 | |||||
4828 | if( sqlite3_strnicmpsqlite3_api->strnicmp("prefix", zCmd, nCmd)==0 ){ | ||||
4829 | const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES31; | ||||
4830 | const char *p; | ||||
4831 | int bFirst = 1; | ||||
4832 | if( pConfig->aPrefix==0 ){ | ||||
4833 | pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); | ||||
4834 | if( rc ) return rc; | ||||
4835 | } | ||||
4836 | |||||
4837 | p = zArg; | ||||
4838 | while( 1 ){ | ||||
4839 | int nPre = 0; | ||||
4840 | |||||
4841 | while( p[0]==' ' ) p++; | ||||
4842 | if( bFirst==0 && p[0]==',' ){ | ||||
4843 | p++; | ||||
4844 | while( p[0]==' ' ) p++; | ||||
4845 | }else if( p[0]=='\0' ){ | ||||
4846 | break; | ||||
4847 | } | ||||
4848 | if( p[0]<'0' || p[0]>'9' ){ | ||||
4849 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed prefix=... directive"); | ||||
4850 | rc = SQLITE_ERROR1; | ||||
4851 | break; | ||||
4852 | } | ||||
4853 | |||||
4854 | if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES31 ){ | ||||
4855 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | ||||
4856 | "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES31 | ||||
4857 | ); | ||||
4858 | rc = SQLITE_ERROR1; | ||||
4859 | break; | ||||
4860 | } | ||||
4861 | |||||
4862 | while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ | ||||
4863 | nPre = nPre*10 + (p[0] - '0'); | ||||
4864 | p++; | ||||
4865 | } | ||||
4866 | |||||
4867 | if( nPre<=0 || nPre>=1000 ){ | ||||
4868 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("prefix length out of range (max 999)"); | ||||
4869 | rc = SQLITE_ERROR1; | ||||
4870 | break; | ||||
4871 | } | ||||
4872 | |||||
4873 | pConfig->aPrefix[pConfig->nPrefix] = nPre; | ||||
4874 | pConfig->nPrefix++; | ||||
4875 | bFirst = 0; | ||||
4876 | } | ||||
4877 | assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES )((void) (0)); | ||||
4878 | return rc; | ||||
4879 | } | ||||
4880 | |||||
4881 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokenize", zCmd, nCmd)==0 ){ | ||||
4882 | const char *p = (const char*)zArg; | ||||
4883 | sqlite3_int64 nArg = strlen(zArg) + 1; | ||||
4884 | char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg); | ||||
4885 | |||||
4886 | if( azArg ){ | ||||
4887 | char *pSpace = (char*)&azArg[nArg]; | ||||
4888 | if( pConfig->t.azArg ){ | ||||
4889 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple tokenize=... directives"); | ||||
4890 | rc = SQLITE_ERROR1; | ||||
4891 | }else{ | ||||
4892 | for(nArg=0; p && *p; nArg++){ | ||||
4893 | const char *p2 = fts5ConfigSkipWhitespace(p); | ||||
4894 | if( *p2=='\'' ){ | ||||
4895 | p = fts5ConfigSkipLiteral(p2); | ||||
4896 | }else{ | ||||
4897 | p = fts5ConfigSkipBareword(p2); | ||||
4898 | } | ||||
4899 | if( p ){ | ||||
4900 | memcpy(pSpace, p2, p-p2); | ||||
4901 | azArg[nArg] = pSpace; | ||||
4902 | sqlite3Fts5Dequote(pSpace); | ||||
4903 | pSpace += (p - p2) + 1; | ||||
4904 | p = fts5ConfigSkipWhitespace(p); | ||||
4905 | } | ||||
4906 | } | ||||
4907 | if( p==0 ){ | ||||
4908 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in tokenize directive"); | ||||
4909 | rc = SQLITE_ERROR1; | ||||
4910 | }else{ | ||||
4911 | pConfig->t.azArg = (const char**)azArg; | ||||
4912 | pConfig->t.nArg = nArg; | ||||
4913 | azArg = 0; | ||||
4914 | } | ||||
4915 | } | ||||
4916 | } | ||||
4917 | sqlite3_freesqlite3_api->free(azArg); | ||||
4918 | |||||
4919 | return rc; | ||||
4920 | } | ||||
4921 | |||||
4922 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content", zCmd, nCmd)==0 ){ | ||||
4923 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 ){ | ||||
4924 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content=... directives"); | ||||
4925 | rc = SQLITE_ERROR1; | ||||
4926 | }else{ | ||||
4927 | if( zArg[0] ){ | ||||
4928 | pConfig->eContent = FTS5_CONTENT_EXTERNAL2; | ||||
4929 | pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); | ||||
4930 | }else{ | ||||
4931 | pConfig->eContent = FTS5_CONTENT_NONE1; | ||||
4932 | } | ||||
4933 | } | ||||
4934 | return rc; | ||||
4935 | } | ||||
4936 | |||||
4937 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_delete", zCmd, nCmd)==0 ){ | ||||
4938 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | ||||
4939 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | ||||
4940 | rc = SQLITE_ERROR1; | ||||
4941 | }else{ | ||||
4942 | pConfig->bContentlessDelete = (zArg[0]=='1'); | ||||
4943 | } | ||||
4944 | return rc; | ||||
4945 | } | ||||
4946 | |||||
4947 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){ | ||||
4948 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | ||||
4949 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | ||||
4950 | rc = SQLITE_ERROR1; | ||||
4951 | }else{ | ||||
4952 | pConfig->bContentlessUnindexed = (zArg[0]=='1'); | ||||
4953 | } | ||||
4954 | return rc; | ||||
4955 | } | ||||
4956 | |||||
4957 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content_rowid", zCmd, nCmd)==0 ){ | ||||
4958 | if( pConfig->zContentRowid ){ | ||||
4959 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content_rowid=... directives"); | ||||
4960 | rc = SQLITE_ERROR1; | ||||
4961 | }else{ | ||||
4962 | pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); | ||||
4963 | } | ||||
4964 | return rc; | ||||
4965 | } | ||||
4966 | |||||
4967 | if( sqlite3_strnicmpsqlite3_api->strnicmp("columnsize", zCmd, nCmd)==0 ){ | ||||
4968 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | ||||
4969 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed columnsize=... directive"); | ||||
4970 | rc = SQLITE_ERROR1; | ||||
4971 | }else{ | ||||
4972 | pConfig->bColumnsize = (zArg[0]=='1'); | ||||
4973 | } | ||||
4974 | return rc; | ||||
4975 | } | ||||
4976 | |||||
4977 | if( sqlite3_strnicmpsqlite3_api->strnicmp("locale", zCmd, nCmd)==0 ){ | ||||
4978 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | ||||
4979 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed locale=... directive"); | ||||
4980 | rc = SQLITE_ERROR1; | ||||
4981 | }else{ | ||||
4982 | pConfig->bLocale = (zArg[0]=='1'); | ||||
4983 | } | ||||
4984 | return rc; | ||||
4985 | } | ||||
4986 | |||||
4987 | if( sqlite3_strnicmpsqlite3_api->strnicmp("detail", zCmd, nCmd)==0 ){ | ||||
4988 | const Fts5Enum aDetail[] = { | ||||
4989 | { "none", FTS5_DETAIL_NONE1 }, | ||||
4990 | { "full", FTS5_DETAIL_FULL0 }, | ||||
4991 | { "columns", FTS5_DETAIL_COLUMNS2 }, | ||||
4992 | { 0, 0 } | ||||
4993 | }; | ||||
4994 | |||||
4995 | if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ | ||||
4996 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed detail=... directive"); | ||||
4997 | } | ||||
4998 | return rc; | ||||
4999 | } | ||||
5000 | |||||
5001 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokendata", zCmd, nCmd)==0 ){ | ||||
5002 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | ||||
5003 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed tokendata=... directive"); | ||||
5004 | rc = SQLITE_ERROR1; | ||||
5005 | }else{ | ||||
5006 | pConfig->bTokendata = (zArg[0]=='1'); | ||||
5007 | } | ||||
5008 | return rc; | ||||
5009 | } | ||||
5010 | |||||
5011 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); | ||||
5012 | return SQLITE_ERROR1; | ||||
5013 | } | ||||
5014 | |||||
5015 | /* | ||||
5016 | ** Gobble up the first bareword or quoted word from the input buffer zIn. | ||||
5017 | ** Return a pointer to the character immediately following the last in | ||||
5018 | ** the gobbled word if successful, or a NULL pointer otherwise (failed | ||||
5019 | ** to find close-quote character). | ||||
5020 | ** | ||||
5021 | ** Before returning, set pzOut to point to a new buffer containing a | ||||
5022 | ** nul-terminated, dequoted copy of the gobbled word. If the word was | ||||
5023 | ** quoted, *pbQuoted is also set to 1 before returning. | ||||
5024 | ** | ||||
5025 | ** If *pRc is other than SQLITE_OK when this function is called, it is | ||||
5026 | ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this | ||||
5027 | ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* | ||||
5028 | ** set if a parse error (failed to find close quote) occurs. | ||||
5029 | */ | ||||
5030 | static const char *fts5ConfigGobbleWord( | ||||
5031 | int *pRc, /* IN/OUT: Error code */ | ||||
5032 | const char *zIn, /* Buffer to gobble string/bareword from */ | ||||
5033 | char **pzOut, /* OUT: malloc'd buffer containing str/bw */ | ||||
5034 | int *pbQuoted /* OUT: Set to true if dequoting required */ | ||||
5035 | ){ | ||||
5036 | const char *zRet = 0; | ||||
5037 | |||||
5038 | sqlite3_int64 nIn = strlen(zIn); | ||||
5039 | char *zOut = sqlite3_malloc64sqlite3_api->malloc64(nIn+1); | ||||
5040 | |||||
5041 | assert( *pRc==SQLITE_OK )((void) (0)); | ||||
5042 | *pbQuoted = 0; | ||||
5043 | *pzOut = 0; | ||||
5044 | |||||
5045 | if( zOut==0 ){ | ||||
5046 | *pRc = SQLITE_NOMEM7; | ||||
5047 | }else{ | ||||
5048 | memcpy(zOut, zIn, (size_t)(nIn+1)); | ||||
5049 | if( fts5_isopenquote(zOut[0]) ){ | ||||
5050 | int ii = fts5Dequote(zOut); | ||||
5051 | zRet = &zIn[ii]; | ||||
5052 | *pbQuoted = 1; | ||||
5053 | }else{ | ||||
5054 | zRet = fts5ConfigSkipBareword(zIn); | ||||
5055 | if( zRet ){ | ||||
5056 | zOut[zRet-zIn] = '\0'; | ||||
5057 | } | ||||
5058 | } | ||||
5059 | } | ||||
5060 | |||||
5061 | if( zRet==0 ){ | ||||
5062 | sqlite3_freesqlite3_api->free(zOut); | ||||
5063 | }else{ | ||||
5064 | *pzOut = zOut; | ||||
5065 | } | ||||
5066 | |||||
5067 | return zRet; | ||||
5068 | } | ||||
5069 | |||||
5070 | static int fts5ConfigParseColumn( | ||||
5071 | Fts5Config *p, | ||||
5072 | char *zCol, | ||||
5073 | char *zArg, | ||||
5074 | char **pzErr, | ||||
5075 | int *pbUnindexed | ||||
5076 | ){ | ||||
5077 | int rc = SQLITE_OK0; | ||||
5078 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_RANK_NAME"rank") | ||||
5079 | || 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_ROWID_NAME"rowid") | ||||
5080 | ){ | ||||
5081 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 column name: %s", zCol); | ||||
5082 | rc = SQLITE_ERROR1; | ||||
5083 | }else if( zArg ){ | ||||
5084 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "unindexed") ){ | ||||
5085 | p->abUnindexed[p->nCol] = 1; | ||||
5086 | *pbUnindexed = 1; | ||||
5087 | }else{ | ||||
5088 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized column option: %s", zArg); | ||||
5089 | rc = SQLITE_ERROR1; | ||||
5090 | } | ||||
5091 | } | ||||
5092 | |||||
5093 | p->azCol[p->nCol++] = zCol; | ||||
5094 | return rc; | ||||
5095 | } | ||||
5096 | |||||
5097 | /* | ||||
5098 | ** Populate the Fts5Config.zContentExprlist string. | ||||
5099 | */ | ||||
5100 | static int fts5ConfigMakeExprlist(Fts5Config *p){ | ||||
5101 | int i; | ||||
5102 | int rc = SQLITE_OK0; | ||||
5103 | Fts5Buffer buf = {0, 0, 0}; | ||||
5104 | |||||
5105 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); | ||||
5106 | if( p->eContent!=FTS5_CONTENT_NONE1 ){ | ||||
5107 | assert( p->eContent==FTS5_CONTENT_EXTERNAL((void) (0)) | ||||
5108 | || p->eContent==FTS5_CONTENT_NORMAL((void) (0)) | ||||
5109 | || p->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | ||||
5110 | )((void) (0)); | ||||
5111 | for(i=0; i<p->nCol; i++){ | ||||
5112 | if( p->eContent==FTS5_CONTENT_EXTERNAL2 ){ | ||||
5113 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); | ||||
5114 | }else if( p->eContent==FTS5_CONTENT_NORMAL0 || p->abUnindexed[i] ){ | ||||
5115 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); | ||||
5116 | }else{ | ||||
5117 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | ||||
5118 | } | ||||
5119 | } | ||||
5120 | } | ||||
5121 | if( p->eContent==FTS5_CONTENT_NORMAL0 && p->bLocale ){ | ||||
5122 | for(i=0; i<p->nCol; i++){ | ||||
5123 | if( p->abUnindexed[i]==0 ){ | ||||
5124 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i); | ||||
5125 | }else{ | ||||
5126 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | ||||
5127 | } | ||||
5128 | } | ||||
5129 | } | ||||
5130 | |||||
5131 | assert( p->zContentExprlist==0 )((void) (0)); | ||||
5132 | p->zContentExprlist = (char*)buf.p; | ||||
5133 | return rc; | ||||
5134 | } | ||||
5135 | |||||
5136 | /* | ||||
5137 | ** Arguments nArg/azArg contain the string arguments passed to the xCreate | ||||
5138 | ** or xConnect method of the virtual table. This function attempts to | ||||
5139 | ** allocate an instance of Fts5Config containing the results of parsing | ||||
5140 | ** those arguments. | ||||
5141 | ** | ||||
5142 | ** If successful, SQLITE_OK is returned and *ppOut is set to point to the | ||||
5143 | ** new Fts5Config object. If an error occurs, an SQLite error code is | ||||
5144 | ** returned, *ppOut is set to NULL and an error message may be left in | ||||
5145 | ** *pzErr. It is the responsibility of the caller to eventually free any | ||||
5146 | ** such error message using sqlite3_free(). | ||||
5147 | */ | ||||
5148 | static int sqlite3Fts5ConfigParse( | ||||
5149 | Fts5Global *pGlobal, | ||||
5150 | sqlite3 *db, | ||||
5151 | int nArg, /* Number of arguments */ | ||||
5152 | const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ | ||||
5153 | Fts5Config **ppOut, /* OUT: Results of parse */ | ||||
5154 | char **pzErr /* OUT: Error message */ | ||||
5155 | ){ | ||||
5156 | int rc = SQLITE_OK0; /* Return code */ | ||||
5157 | Fts5Config *pRet; /* New object to return */ | ||||
5158 | int i; | ||||
5159 | sqlite3_int64 nByte; | ||||
5160 | int bUnindexed = 0; /* True if there are one or more UNINDEXED */ | ||||
5161 | |||||
5162 | *ppOut = pRet = (Fts5Config*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Config)); | ||||
5163 | if( pRet==0 ) return SQLITE_NOMEM7; | ||||
5164 | memset(pRet, 0, sizeof(Fts5Config)); | ||||
5165 | pRet->pGlobal = pGlobal; | ||||
5166 | pRet->db = db; | ||||
5167 | pRet->iCookie = -1; | ||||
5168 | |||||
5169 | nByte = nArg * (sizeof(char*) + sizeof(u8)); | ||||
5170 | pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); | ||||
5171 | pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0; | ||||
5172 | pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); | ||||
5173 | pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); | ||||
5174 | pRet->bColumnsize = 1; | ||||
5175 | pRet->eDetail = FTS5_DETAIL_FULL0; | ||||
5176 | #ifdef SQLITE_DEBUG | ||||
5177 | pRet->bPrefixIndex = 1; | ||||
5178 | #endif | ||||
5179 | if( rc==SQLITE_OK0 && sqlite3_stricmpsqlite3_api->stricmp(pRet->zName, FTS5_RANK_NAME"rank")==0 ){ | ||||
5180 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 table name: %s", pRet->zName); | ||||
5181 | rc = SQLITE_ERROR1; | ||||
5182 | } | ||||
5183 | |||||
5184 | assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK )((void) (0)); | ||||
5185 | for(i=3; rc==SQLITE_OK0 && i<nArg; i++){ | ||||
5186 | const char *zOrig = azArg[i]; | ||||
5187 | const char *z; | ||||
5188 | char *zOne = 0; | ||||
5189 | char *zTwo = 0; | ||||
5190 | int bOption = 0; | ||||
5191 | int bMustBeCol = 0; | ||||
5192 | |||||
5193 | z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); | ||||
5194 | z = fts5ConfigSkipWhitespace(z); | ||||
5195 | if( z && *z=='=' ){ | ||||
5196 | bOption = 1; | ||||
5197 | assert( zOne!=0 )((void) (0)); | ||||
5198 | z++; | ||||
5199 | if( bMustBeCol ) z = 0; | ||||
5200 | } | ||||
5201 | z = fts5ConfigSkipWhitespace(z); | ||||
5202 | if( z && z[0] ){ | ||||
5203 | int bDummy; | ||||
5204 | z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); | ||||
5205 | if( z && z[0] ) z = 0; | ||||
5206 | } | ||||
5207 | |||||
5208 | if( rc==SQLITE_OK0 ){ | ||||
5209 | if( z==0 ){ | ||||
5210 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in \"%s\"", zOrig); | ||||
5211 | rc = SQLITE_ERROR1; | ||||
5212 | }else{ | ||||
5213 | if( bOption ){ | ||||
5214 | rc = fts5ConfigParseSpecial(pRet, | ||||
5215 | ALWAYS(zOne)(zOne)?zOne:"", | ||||
5216 | zTwo?zTwo:"", | ||||
5217 | pzErr | ||||
5218 | ); | ||||
5219 | }else{ | ||||
5220 | rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed); | ||||
5221 | zOne = 0; | ||||
5222 | } | ||||
5223 | } | ||||
5224 | } | ||||
5225 | |||||
5226 | sqlite3_freesqlite3_api->free(zOne); | ||||
5227 | sqlite3_freesqlite3_api->free(zTwo); | ||||
5228 | } | ||||
5229 | |||||
5230 | /* We only allow contentless_delete=1 if the table is indeed contentless. */ | ||||
5231 | if( rc==SQLITE_OK0 | ||||
5232 | && pRet->bContentlessDelete | ||||
5233 | && pRet->eContent!=FTS5_CONTENT_NONE1 | ||||
5234 | ){ | ||||
5235 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | ||||
5236 | "contentless_delete=1 requires a contentless table" | ||||
5237 | ); | ||||
5238 | rc = SQLITE_ERROR1; | ||||
5239 | } | ||||
5240 | |||||
5241 | /* We only allow contentless_delete=1 if columnsize=0 is not present. | ||||
5242 | ** | ||||
5243 | ** This restriction may be removed at some point. | ||||
5244 | */ | ||||
5245 | if( rc==SQLITE_OK0 && pRet->bContentlessDelete && pRet->bColumnsize==0 ){ | ||||
5246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | ||||
5247 | "contentless_delete=1 is incompatible with columnsize=0" | ||||
5248 | ); | ||||
5249 | rc = SQLITE_ERROR1; | ||||
5250 | } | ||||
5251 | |||||
5252 | /* We only allow contentless_unindexed=1 if the table is actually a | ||||
5253 | ** contentless one. | ||||
5254 | */ | ||||
5255 | if( rc==SQLITE_OK0 | ||||
5256 | && pRet->bContentlessUnindexed | ||||
5257 | && pRet->eContent!=FTS5_CONTENT_NONE1 | ||||
5258 | ){ | ||||
5259 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | ||||
5260 | "contentless_unindexed=1 requires a contentless table" | ||||
5261 | ); | ||||
5262 | rc = SQLITE_ERROR1; | ||||
5263 | } | ||||
5264 | |||||
5265 | /* If no zContent option was specified, fill in the default values. */ | ||||
5266 | if( rc==SQLITE_OK0 && pRet->zContent==0 ){ | ||||
5267 | const char *zTail = 0; | ||||
5268 | assert( pRet->eContent==FTS5_CONTENT_NORMAL((void) (0)) | ||||
5269 | || pRet->eContent==FTS5_CONTENT_NONE((void) (0)) | ||||
5270 | )((void) (0)); | ||||
5271 | if( pRet->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
5272 | zTail = "content"; | ||||
5273 | }else if( bUnindexed && pRet->bContentlessUnindexed ){ | ||||
5274 | pRet->eContent = FTS5_CONTENT_UNINDEXED3; | ||||
5275 | zTail = "content"; | ||||
5276 | }else if( pRet->bColumnsize ){ | ||||
5277 | zTail = "docsize"; | ||||
5278 | } | ||||
5279 | |||||
5280 | if( zTail ){ | ||||
5281 | pRet->zContent = sqlite3Fts5Mprintf( | ||||
5282 | &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail | ||||
5283 | ); | ||||
5284 | } | ||||
5285 | } | ||||
5286 | |||||
5287 | if( rc==SQLITE_OK0 && pRet->zContentRowid==0 ){ | ||||
5288 | pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); | ||||
5289 | } | ||||
5290 | |||||
5291 | /* Formulate the zContentExprlist text */ | ||||
5292 | if( rc==SQLITE_OK0 ){ | ||||
5293 | rc = fts5ConfigMakeExprlist(pRet); | ||||
5294 | } | ||||
5295 | |||||
5296 | if( rc!=SQLITE_OK0 ){ | ||||
5297 | sqlite3Fts5ConfigFree(pRet); | ||||
5298 | *ppOut = 0; | ||||
5299 | } | ||||
5300 | return rc; | ||||
5301 | } | ||||
5302 | |||||
5303 | /* | ||||
5304 | ** Free the configuration object passed as the only argument. | ||||
5305 | */ | ||||
5306 | static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ | ||||
5307 | if( pConfig ){ | ||||
5308 | int i; | ||||
5309 | if( pConfig->t.pTok ){ | ||||
5310 | if( pConfig->t.pApi1 ){ | ||||
5311 | pConfig->t.pApi1->xDelete(pConfig->t.pTok); | ||||
5312 | }else{ | ||||
5313 | pConfig->t.pApi2->xDelete(pConfig->t.pTok); | ||||
5314 | } | ||||
5315 | } | ||||
5316 | sqlite3_freesqlite3_api->free((char*)pConfig->t.azArg); | ||||
5317 | sqlite3_freesqlite3_api->free(pConfig->zDb); | ||||
5318 | sqlite3_freesqlite3_api->free(pConfig->zName); | ||||
5319 | for(i=0; i<pConfig->nCol; i++){ | ||||
5320 | sqlite3_freesqlite3_api->free(pConfig->azCol[i]); | ||||
5321 | } | ||||
5322 | sqlite3_freesqlite3_api->free(pConfig->azCol); | ||||
5323 | sqlite3_freesqlite3_api->free(pConfig->aPrefix); | ||||
5324 | sqlite3_freesqlite3_api->free(pConfig->zRank); | ||||
5325 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | ||||
5326 | sqlite3_freesqlite3_api->free(pConfig->zContent); | ||||
5327 | sqlite3_freesqlite3_api->free(pConfig->zContentRowid); | ||||
5328 | sqlite3_freesqlite3_api->free(pConfig->zContentExprlist); | ||||
5329 | sqlite3_freesqlite3_api->free(pConfig); | ||||
5330 | } | ||||
5331 | } | ||||
5332 | |||||
5333 | /* | ||||
5334 | ** Call sqlite3_declare_vtab() based on the contents of the configuration | ||||
5335 | ** object passed as the only argument. Return SQLITE_OK if successful, or | ||||
5336 | ** an SQLite error code if an error occurs. | ||||
5337 | */ | ||||
5338 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ | ||||
5339 | int i; | ||||
5340 | int rc = SQLITE_OK0; | ||||
5341 | char *zSql; | ||||
5342 | |||||
5343 | zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); | ||||
5344 | for(i=0; zSql && i<pConfig->nCol; i++){ | ||||
5345 | const char *zSep = (i==0?"":", "); | ||||
5346 | zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); | ||||
5347 | } | ||||
5348 | zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", | ||||
5349 | zSql, pConfig->zName, FTS5_RANK_NAME"rank" | ||||
5350 | ); | ||||
5351 | |||||
5352 | assert( zSql || rc==SQLITE_NOMEM )((void) (0)); | ||||
5353 | if( zSql ){ | ||||
5354 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(pConfig->db, zSql); | ||||
5355 | sqlite3_freesqlite3_api->free(zSql); | ||||
5356 | } | ||||
5357 | |||||
5358 | return rc; | ||||
5359 | } | ||||
5360 | |||||
5361 | /* | ||||
5362 | ** Tokenize the text passed via the second and third arguments. | ||||
5363 | ** | ||||
5364 | ** The callback is invoked once for each token in the input text. The | ||||
5365 | ** arguments passed to it are, in order: | ||||
5366 | ** | ||||
5367 | ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() | ||||
5368 | ** const char *pToken // Pointer to buffer containing token | ||||
5369 | ** int nToken // Size of token in bytes | ||||
5370 | ** int iStart // Byte offset of start of token within input text | ||||
5371 | ** int iEnd // Byte offset of end of token within input text | ||||
5372 | ** int iPos // Position of token in input (first token is 0) | ||||
5373 | ** | ||||
5374 | ** If the callback returns a non-zero value the tokenization is abandoned | ||||
5375 | ** and no further callbacks are issued. | ||||
5376 | ** | ||||
5377 | ** This function returns SQLITE_OK if successful or an SQLite error code | ||||
5378 | ** if an error occurs. If the tokenization was abandoned early because | ||||
5379 | ** the callback returned SQLITE_DONE, this is not an error and this function | ||||
5380 | ** still returns SQLITE_OK. Or, if the tokenization was abandoned early | ||||
5381 | ** because the callback returned another non-zero value, it is assumed | ||||
5382 | ** to be an SQLite error code and returned to the caller. | ||||
5383 | */ | ||||
5384 | static int sqlite3Fts5Tokenize( | ||||
5385 | Fts5Config *pConfig, /* FTS5 Configuration object */ | ||||
5386 | int flags, /* FTS5_TOKENIZE_* flags */ | ||||
5387 | const char *pText, int nText, /* Text to tokenize */ | ||||
5388 | void *pCtx, /* Context passed to xToken() */ | ||||
5389 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | ||||
5390 | ){ | ||||
5391 | int rc = SQLITE_OK0; | ||||
5392 | if( pText ){ | ||||
5393 | if( pConfig->t.pTok==0 ){ | ||||
5394 | rc = sqlite3Fts5LoadTokenizer(pConfig); | ||||
5395 | } | ||||
5396 | if( rc==SQLITE_OK0 ){ | ||||
5397 | if( pConfig->t.pApi1 ){ | ||||
5398 | rc = pConfig->t.pApi1->xTokenize( | ||||
5399 | pConfig->t.pTok, pCtx, flags, pText, nText, xToken | ||||
5400 | ); | ||||
5401 | }else{ | ||||
5402 | rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags, | ||||
5403 | pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken | ||||
5404 | ); | ||||
5405 | } | ||||
5406 | } | ||||
5407 | } | ||||
5408 | return rc; | ||||
5409 | } | ||||
5410 | |||||
5411 | /* | ||||
5412 | ** Argument pIn points to the first character in what is expected to be | ||||
5413 | ** a comma-separated list of SQL literals followed by a ')' character. | ||||
5414 | ** If it actually is this, return a pointer to the ')'. Otherwise, return | ||||
5415 | ** NULL to indicate a parse error. | ||||
5416 | */ | ||||
5417 | static const char *fts5ConfigSkipArgs(const char *pIn){ | ||||
5418 | const char *p = pIn; | ||||
5419 | |||||
5420 | while( 1 ){ | ||||
5421 | p = fts5ConfigSkipWhitespace(p); | ||||
5422 | p = fts5ConfigSkipLiteral(p); | ||||
5423 | p = fts5ConfigSkipWhitespace(p); | ||||
5424 | if( p==0 || *p==')' ) break; | ||||
5425 | if( *p!=',' ){ | ||||
5426 | p = 0; | ||||
5427 | break; | ||||
5428 | } | ||||
5429 | p++; | ||||
5430 | } | ||||
5431 | |||||
5432 | return p; | ||||
5433 | } | ||||
5434 | |||||
5435 | /* | ||||
5436 | ** Parameter zIn contains a rank() function specification. The format of | ||||
5437 | ** this is: | ||||
5438 | ** | ||||
5439 | ** + Bareword (function name) | ||||
5440 | ** + Open parenthesis - "(" | ||||
5441 | ** + Zero or more SQL literals in a comma separated list | ||||
5442 | ** + Close parenthesis - ")" | ||||
5443 | */ | ||||
5444 | static int sqlite3Fts5ConfigParseRank( | ||||
5445 | const char *zIn, /* Input string */ | ||||
5446 | char **pzRank, /* OUT: Rank function name */ | ||||
5447 | char **pzRankArgs /* OUT: Rank function arguments */ | ||||
5448 | ){ | ||||
5449 | const char *p = zIn; | ||||
5450 | const char *pRank; | ||||
5451 | char *zRank = 0; | ||||
5452 | char *zRankArgs = 0; | ||||
5453 | int rc = SQLITE_OK0; | ||||
5454 | |||||
5455 | *pzRank = 0; | ||||
5456 | *pzRankArgs = 0; | ||||
5457 | |||||
5458 | if( p==0 ){ | ||||
5459 | rc = SQLITE_ERROR1; | ||||
5460 | }else{ | ||||
5461 | p = fts5ConfigSkipWhitespace(p); | ||||
5462 | pRank = p; | ||||
5463 | p = fts5ConfigSkipBareword(p); | ||||
5464 | |||||
5465 | if( p ){ | ||||
5466 | zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); | ||||
5467 | if( zRank ) memcpy(zRank, pRank, p-pRank); | ||||
5468 | }else{ | ||||
5469 | rc = SQLITE_ERROR1; | ||||
5470 | } | ||||
5471 | |||||
5472 | if( rc==SQLITE_OK0 ){ | ||||
5473 | p = fts5ConfigSkipWhitespace(p); | ||||
5474 | if( *p!='(' ) rc = SQLITE_ERROR1; | ||||
5475 | p++; | ||||
5476 | } | ||||
5477 | if( rc==SQLITE_OK0 ){ | ||||
5478 | const char *pArgs; | ||||
5479 | p = fts5ConfigSkipWhitespace(p); | ||||
5480 | pArgs = p; | ||||
5481 | if( *p!=')' ){ | ||||
5482 | p = fts5ConfigSkipArgs(p); | ||||
5483 | if( p==0 ){ | ||||
5484 | rc = SQLITE_ERROR1; | ||||
5485 | }else{ | ||||
5486 | zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); | ||||
5487 | if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); | ||||
5488 | } | ||||
5489 | } | ||||
5490 | } | ||||
5491 | } | ||||
5492 | |||||
5493 | if( rc!=SQLITE_OK0 ){ | ||||
5494 | sqlite3_freesqlite3_api->free(zRank); | ||||
5495 | assert( zRankArgs==0 )((void) (0)); | ||||
5496 | }else{ | ||||
5497 | *pzRank = zRank; | ||||
5498 | *pzRankArgs = zRankArgs; | ||||
5499 | } | ||||
5500 | return rc; | ||||
5501 | } | ||||
5502 | |||||
5503 | static int sqlite3Fts5ConfigSetValue( | ||||
5504 | Fts5Config *pConfig, | ||||
5505 | const char *zKey, | ||||
5506 | sqlite3_value *pVal, | ||||
5507 | int *pbBadkey | ||||
5508 | ){ | ||||
5509 | int rc = SQLITE_OK0; | ||||
5510 | |||||
5511 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "pgsz") ){ | ||||
5512 | int pgsz = 0; | ||||
5513 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5514 | pgsz = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5515 | } | ||||
5516 | if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE(64*1024) ){ | ||||
5517 | *pbBadkey = 1; | ||||
5518 | }else{ | ||||
5519 | pConfig->pgsz = pgsz; | ||||
5520 | } | ||||
5521 | } | ||||
5522 | |||||
5523 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "hashsize") ){ | ||||
5524 | int nHashSize = -1; | ||||
5525 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5526 | nHashSize = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5527 | } | ||||
5528 | if( nHashSize<=0 ){ | ||||
5529 | *pbBadkey = 1; | ||||
5530 | }else{ | ||||
5531 | pConfig->nHashSize = nHashSize; | ||||
5532 | } | ||||
5533 | } | ||||
5534 | |||||
5535 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "automerge") ){ | ||||
5536 | int nAutomerge = -1; | ||||
5537 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5538 | nAutomerge = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5539 | } | ||||
5540 | if( nAutomerge<0 || nAutomerge>64 ){ | ||||
5541 | *pbBadkey = 1; | ||||
5542 | }else{ | ||||
5543 | if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | ||||
5544 | pConfig->nAutomerge = nAutomerge; | ||||
5545 | } | ||||
5546 | } | ||||
5547 | |||||
5548 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "usermerge") ){ | ||||
5549 | int nUsermerge = -1; | ||||
5550 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5551 | nUsermerge = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5552 | } | ||||
5553 | if( nUsermerge<2 || nUsermerge>16 ){ | ||||
5554 | *pbBadkey = 1; | ||||
5555 | }else{ | ||||
5556 | pConfig->nUsermerge = nUsermerge; | ||||
5557 | } | ||||
5558 | } | ||||
5559 | |||||
5560 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "crisismerge") ){ | ||||
5561 | int nCrisisMerge = -1; | ||||
5562 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5563 | nCrisisMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5564 | } | ||||
5565 | if( nCrisisMerge<0 ){ | ||||
5566 | *pbBadkey = 1; | ||||
5567 | }else{ | ||||
5568 | if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | ||||
5569 | if( nCrisisMerge>=FTS5_MAX_SEGMENT2000 ) nCrisisMerge = FTS5_MAX_SEGMENT2000-1; | ||||
5570 | pConfig->nCrisisMerge = nCrisisMerge; | ||||
5571 | } | ||||
5572 | } | ||||
5573 | |||||
5574 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "deletemerge") ){ | ||||
5575 | int nVal = -1; | ||||
5576 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5577 | nVal = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5578 | }else{ | ||||
5579 | *pbBadkey = 1; | ||||
5580 | } | ||||
5581 | if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE10; | ||||
5582 | if( nVal>100 ) nVal = 0; | ||||
5583 | pConfig->nDeleteMerge = nVal; | ||||
5584 | } | ||||
5585 | |||||
5586 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "rank") ){ | ||||
5587 | const char *zIn = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
5588 | char *zRank; | ||||
5589 | char *zRankArgs; | ||||
5590 | rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); | ||||
5591 | if( rc==SQLITE_OK0 ){ | ||||
5592 | sqlite3_freesqlite3_api->free(pConfig->zRank); | ||||
5593 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | ||||
5594 | pConfig->zRank = zRank; | ||||
5595 | pConfig->zRankArgs = zRankArgs; | ||||
5596 | }else if( rc==SQLITE_ERROR1 ){ | ||||
5597 | rc = SQLITE_OK0; | ||||
5598 | *pbBadkey = 1; | ||||
5599 | } | ||||
5600 | } | ||||
5601 | |||||
5602 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "secure-delete") ){ | ||||
5603 | int bVal = -1; | ||||
5604 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5605 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5606 | } | ||||
5607 | if( bVal<0 ){ | ||||
5608 | *pbBadkey = 1; | ||||
5609 | }else{ | ||||
5610 | pConfig->bSecureDelete = (bVal ? 1 : 0); | ||||
5611 | } | ||||
5612 | } | ||||
5613 | |||||
5614 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "insttoken") ){ | ||||
5615 | int bVal = -1; | ||||
5616 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | ||||
5617 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5618 | } | ||||
5619 | if( bVal<0 ){ | ||||
5620 | *pbBadkey = 1; | ||||
5621 | }else{ | ||||
5622 | pConfig->bPrefixInsttoken = (bVal ? 1 : 0); | ||||
5623 | } | ||||
5624 | |||||
5625 | }else{ | ||||
5626 | *pbBadkey = 1; | ||||
5627 | } | ||||
5628 | return rc; | ||||
5629 | } | ||||
5630 | |||||
5631 | /* | ||||
5632 | ** Load the contents of the %_config table into memory. | ||||
5633 | */ | ||||
5634 | static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ | ||||
5635 | const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; | ||||
5636 | char *zSql; | ||||
5637 | sqlite3_stmt *p = 0; | ||||
5638 | int rc = SQLITE_OK0; | ||||
5639 | int iVersion = 0; | ||||
5640 | |||||
5641 | /* Set default values */ | ||||
5642 | pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE4050; | ||||
5643 | pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | ||||
5644 | pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE4; | ||||
5645 | pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | ||||
5646 | pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE(1024*1024); | ||||
5647 | pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE10; | ||||
5648 | |||||
5649 | zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); | ||||
5650 | if( zSql ){ | ||||
5651 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &p, 0); | ||||
5652 | sqlite3_freesqlite3_api->free(zSql); | ||||
5653 | } | ||||
5654 | |||||
5655 | assert( rc==SQLITE_OK || p==0 )((void) (0)); | ||||
5656 | if( rc==SQLITE_OK0 ){ | ||||
5657 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p) ){ | ||||
5658 | const char *zK = (const char*)sqlite3_column_textsqlite3_api->column_text(p, 0); | ||||
5659 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(p, 1); | ||||
5660 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zK, "version") ){ | ||||
5661 | iVersion = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
5662 | }else{ | ||||
5663 | int bDummy = 0; | ||||
5664 | sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); | ||||
5665 | } | ||||
5666 | } | ||||
5667 | rc = sqlite3_finalizesqlite3_api->finalize(p); | ||||
5668 | } | ||||
5669 | |||||
5670 | if( rc==SQLITE_OK0 | ||||
5671 | && iVersion!=FTS5_CURRENT_VERSION4 | ||||
5672 | && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 | ||||
5673 | ){ | ||||
5674 | rc = SQLITE_ERROR1; | ||||
5675 | sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format " | ||||
5676 | "(found %d, expected %d or %d) - run 'rebuild'", | ||||
5677 | iVersion, FTS5_CURRENT_VERSION4, FTS5_CURRENT_VERSION_SECUREDELETE5 | ||||
5678 | ); | ||||
5679 | }else{ | ||||
5680 | pConfig->iVersion = iVersion; | ||||
5681 | } | ||||
5682 | |||||
5683 | if( rc==SQLITE_OK0 ){ | ||||
5684 | pConfig->iCookie = iCookie; | ||||
5685 | } | ||||
5686 | return rc; | ||||
5687 | } | ||||
5688 | |||||
5689 | /* | ||||
5690 | ** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer | ||||
5691 | ** containing the error message created using printf() style formatting | ||||
5692 | ** string zFmt and its trailing arguments. | ||||
5693 | */ | ||||
5694 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){ | ||||
5695 | va_list ap; /* ... printf arguments */ | ||||
5696 | char *zMsg = 0; | ||||
5697 | |||||
5698 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
5699 | zMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
5700 | if( pConfig->pzErrmsg ){ | ||||
5701 | assert( *pConfig->pzErrmsg==0 )((void) (0)); | ||||
5702 | *pConfig->pzErrmsg = zMsg; | ||||
5703 | }else{ | ||||
5704 | sqlite3_freesqlite3_api->free(zMsg); | ||||
5705 | } | ||||
5706 | |||||
5707 | va_end(ap)__builtin_va_end(ap); | ||||
5708 | } | ||||
5709 | |||||
5710 | |||||
5711 | |||||
5712 | #line 1 "fts5_expr.c" | ||||
5713 | /* | ||||
5714 | ** 2014 May 31 | ||||
5715 | ** | ||||
5716 | ** The author disclaims copyright to this source code. In place of | ||||
5717 | ** a legal notice, here is a blessing: | ||||
5718 | ** | ||||
5719 | ** May you do good and not evil. | ||||
5720 | ** May you find forgiveness for yourself and forgive others. | ||||
5721 | ** May you share freely, never taking more than you give. | ||||
5722 | ** | ||||
5723 | ****************************************************************************** | ||||
5724 | ** | ||||
5725 | */ | ||||
5726 | |||||
5727 | |||||
5728 | |||||
5729 | /* #include "fts5Int.h" */ | ||||
5730 | /* #include "fts5parse.h" */ | ||||
5731 | |||||
5732 | #ifndef SQLITE_FTS5_MAX_EXPR_DEPTH256 | ||||
5733 | # define SQLITE_FTS5_MAX_EXPR_DEPTH256 256 | ||||
5734 | #endif | ||||
5735 | |||||
5736 | /* | ||||
5737 | ** All token types in the generated fts5parse.h file are greater than 0. | ||||
5738 | */ | ||||
5739 | #define FTS5_EOF0 0 | ||||
5740 | |||||
5741 | #define FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | ||||
5742 | |||||
5743 | typedef struct Fts5ExprTerm Fts5ExprTerm; | ||||
5744 | |||||
5745 | /* | ||||
5746 | ** Functions generated by lemon from fts5parse.y. | ||||
5747 | */ | ||||
5748 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); | ||||
5749 | static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); | ||||
5750 | static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); | ||||
5751 | #ifndef NDEBUG1 | ||||
5752 | #include <stdio.h> | ||||
5753 | static void sqlite3Fts5ParserTrace(FILE*, char*); | ||||
5754 | #endif | ||||
5755 | static int sqlite3Fts5ParserFallback(int); | ||||
5756 | |||||
5757 | |||||
5758 | struct Fts5Expr { | ||||
5759 | Fts5Index *pIndex; | ||||
5760 | Fts5Config *pConfig; | ||||
5761 | Fts5ExprNode *pRoot; | ||||
5762 | int bDesc; /* Iterate in descending rowid order */ | ||||
5763 | int nPhrase; /* Number of phrases in expression */ | ||||
5764 | Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ | ||||
5765 | }; | ||||
5766 | |||||
5767 | /* | ||||
5768 | ** eType: | ||||
5769 | ** Expression node type. Usually one of: | ||||
5770 | ** | ||||
5771 | ** FTS5_AND (nChild, apChild valid) | ||||
5772 | ** FTS5_OR (nChild, apChild valid) | ||||
5773 | ** FTS5_NOT (nChild, apChild valid) | ||||
5774 | ** FTS5_STRING (pNear valid) | ||||
5775 | ** FTS5_TERM (pNear valid) | ||||
5776 | ** | ||||
5777 | ** An expression node with eType==0 may also exist. It always matches zero | ||||
5778 | ** rows. This is created when a phrase containing no tokens is parsed. | ||||
5779 | ** e.g. "". | ||||
5780 | ** | ||||
5781 | ** iHeight: | ||||
5782 | ** Distance from this node to furthest leaf. This is always 0 for nodes | ||||
5783 | ** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one | ||||
5784 | ** greater than the largest child value. | ||||
5785 | */ | ||||
5786 | struct Fts5ExprNode { | ||||
5787 | int eType; /* Node type */ | ||||
5788 | int bEof; /* True at EOF */ | ||||
5789 | int bNomatch; /* True if entry is not a match */ | ||||
5790 | int iHeight; /* Distance to tree leaf nodes */ | ||||
5791 | |||||
5792 | /* Next method for this node. */ | ||||
5793 | int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); | ||||
5794 | |||||
5795 | i64 iRowid; /* Current rowid */ | ||||
5796 | Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ | ||||
5797 | |||||
5798 | /* Child nodes. For a NOT node, this array always contains 2 entries. For | ||||
5799 | ** AND or OR nodes, it contains 2 or more entries. */ | ||||
5800 | int nChild; /* Number of child nodes */ | ||||
5801 | Fts5ExprNode *apChild[FLEXARRAY]; /* Array of child nodes */ | ||||
5802 | }; | ||||
5803 | |||||
5804 | /* Size (in bytes) of an Fts5ExprNode object that holds up to N children */ | ||||
5805 | #define SZ_FTS5EXPRNODE(N)(__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode *)) \ | ||||
5806 | (offsetof(Fts5ExprNode,apChild)__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode*)) | ||||
5807 | |||||
5808 | #define Fts5NodeIsString(p)((p)->eType==4 || (p)->eType==9) ((p)->eType==FTS5_TERM4 || (p)->eType==FTS5_STRING9) | ||||
5809 | |||||
5810 | /* | ||||
5811 | ** Invoke the xNext method of an Fts5ExprNode object. This macro should be | ||||
5812 | ** used as if it has the same signature as the xNext() methods themselves. | ||||
5813 | */ | ||||
5814 | #define fts5ExprNodeNext(a,b,c,d)(b)->xNext((a), (b), (c), (d)) (b)->xNext((a), (b), (c), (d)) | ||||
5815 | |||||
5816 | /* | ||||
5817 | ** An instance of the following structure represents a single search term | ||||
5818 | ** or term prefix. | ||||
5819 | */ | ||||
5820 | struct Fts5ExprTerm { | ||||
5821 | u8 bPrefix; /* True for a prefix term */ | ||||
5822 | u8 bFirst; /* True if token must be first in column */ | ||||
5823 | char *pTerm; /* Term data */ | ||||
5824 | int nQueryTerm; /* Effective size of term in bytes */ | ||||
5825 | int nFullTerm; /* Size of term in bytes incl. tokendata */ | ||||
5826 | Fts5IndexIter *pIter; /* Iterator for this term */ | ||||
5827 | Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ | ||||
5828 | }; | ||||
5829 | |||||
5830 | /* | ||||
5831 | ** A phrase. One or more terms that must appear in a contiguous sequence | ||||
5832 | ** within a document for it to match. | ||||
5833 | */ | ||||
5834 | struct Fts5ExprPhrase { | ||||
5835 | Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ | ||||
5836 | Fts5Buffer poslist; /* Current position list */ | ||||
5837 | int nTerm; /* Number of entries in aTerm[] */ | ||||
5838 | Fts5ExprTerm aTerm[FLEXARRAY]; /* Terms that make up this phrase */ | ||||
5839 | }; | ||||
5840 | |||||
5841 | /* Size (in bytes) of an Fts5ExprPhrase object that holds up to N terms */ | ||||
5842 | #define SZ_FTS5EXPRPHRASE(N)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm )) \ | ||||
5843 | (offsetof(Fts5ExprPhrase,aTerm)__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm)) | ||||
5844 | |||||
5845 | /* | ||||
5846 | ** One or more phrases that must appear within a certain token distance of | ||||
5847 | ** each other within each matching document. | ||||
5848 | */ | ||||
5849 | struct Fts5ExprNearset { | ||||
5850 | int nNear; /* NEAR parameter */ | ||||
5851 | Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ | ||||
5852 | int nPhrase; /* Number of entries in aPhrase[] array */ | ||||
5853 | Fts5ExprPhrase *apPhrase[FLEXARRAY]; /* Array of phrase pointers */ | ||||
5854 | }; | ||||
5855 | |||||
5856 | /* Size (in bytes) of an Fts5ExprNearset object covering up to N phrases */ | ||||
5857 | #define SZ_FTS5EXPRNEARSET(N)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase *)) \ | ||||
5858 | (offsetof(Fts5ExprNearset,apPhrase)__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase*)) | ||||
5859 | |||||
5860 | /* | ||||
5861 | ** Parse context. | ||||
5862 | */ | ||||
5863 | struct Fts5Parse { | ||||
5864 | Fts5Config *pConfig; | ||||
5865 | char *zErr; | ||||
5866 | int rc; | ||||
5867 | int nPhrase; /* Size of apPhrase array */ | ||||
5868 | Fts5ExprPhrase **apPhrase; /* Array of all phrases */ | ||||
5869 | Fts5ExprNode *pExpr; /* Result of a successful parse */ | ||||
5870 | int bPhraseToAnd; /* Convert "a+b" to "a AND b" */ | ||||
5871 | }; | ||||
5872 | |||||
5873 | /* | ||||
5874 | ** Check that the Fts5ExprNode.iHeight variables are set correctly in | ||||
5875 | ** the expression tree passed as the only argument. | ||||
5876 | */ | ||||
5877 | #ifndef NDEBUG1 | ||||
5878 | static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){ | ||||
5879 | if( rc==SQLITE_OK0 ){ | ||||
5880 | if( p->eType==FTS5_TERM4 || p->eType==FTS5_STRING9 || p->eType==0 ){ | ||||
5881 | assert( p->iHeight==0 )((void) (0)); | ||||
5882 | }else{ | ||||
5883 | int ii; | ||||
5884 | int iMaxChild = 0; | ||||
5885 | for(ii=0; ii<p->nChild; ii++){ | ||||
5886 | Fts5ExprNode *pChild = p->apChild[ii]; | ||||
5887 | iMaxChild = MAX(iMaxChild, pChild->iHeight)(((iMaxChild) > (pChild->iHeight)) ? (iMaxChild) : (pChild ->iHeight)); | ||||
5888 | assert_expr_depth_ok(SQLITE_OK, pChild); | ||||
5889 | } | ||||
5890 | assert( p->iHeight==iMaxChild+1 )((void) (0)); | ||||
5891 | } | ||||
5892 | } | ||||
5893 | } | ||||
5894 | #else | ||||
5895 | # define assert_expr_depth_ok(rc, p) | ||||
5896 | #endif | ||||
5897 | |||||
5898 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ | ||||
5899 | va_list ap; | ||||
5900 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
5901 | if( pParse->rc==SQLITE_OK0 ){ | ||||
5902 | assert( pParse->zErr==0 )((void) (0)); | ||||
5903 | pParse->zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
5904 | pParse->rc = SQLITE_ERROR1; | ||||
5905 | } | ||||
5906 | va_end(ap)__builtin_va_end(ap); | ||||
5907 | } | ||||
5908 | |||||
5909 | static int fts5ExprIsspace(char t){ | ||||
5910 | return t==' ' || t=='\t' || t=='\n' || t=='\r'; | ||||
5911 | } | ||||
5912 | |||||
5913 | /* | ||||
5914 | ** Read the first token from the nul-terminated string at *pz. | ||||
5915 | */ | ||||
5916 | static int fts5ExprGetToken( | ||||
5917 | Fts5Parse *pParse, | ||||
5918 | const char **pz, /* IN/OUT: Pointer into buffer */ | ||||
5919 | Fts5Token *pToken | ||||
5920 | ){ | ||||
5921 | const char *z = *pz; | ||||
5922 | int tok; | ||||
5923 | |||||
5924 | /* Skip past any whitespace */ | ||||
5925 | while( fts5ExprIsspace(*z) ) z++; | ||||
5926 | |||||
5927 | pToken->p = z; | ||||
5928 | pToken->n = 1; | ||||
5929 | switch( *z ){ | ||||
5930 | case '(': tok = FTS5_LP10; break; | ||||
5931 | case ')': tok = FTS5_RP11; break; | ||||
5932 | case '{': tok = FTS5_LCP7; break; | ||||
5933 | case '}': tok = FTS5_RCP8; break; | ||||
5934 | case ':': tok = FTS5_COLON5; break; | ||||
5935 | case ',': tok = FTS5_COMMA13; break; | ||||
5936 | case '+': tok = FTS5_PLUS14; break; | ||||
5937 | case '*': tok = FTS5_STAR15; break; | ||||
5938 | case '-': tok = FTS5_MINUS6; break; | ||||
5939 | case '^': tok = FTS5_CARET12; break; | ||||
5940 | case '\0': tok = FTS5_EOF0; break; | ||||
5941 | |||||
5942 | case '"': { | ||||
5943 | const char *z2; | ||||
5944 | tok = FTS5_STRING9; | ||||
5945 | |||||
5946 | for(z2=&z[1]; 1; z2++){ | ||||
5947 | if( z2[0]=='"' ){ | ||||
5948 | z2++; | ||||
5949 | if( z2[0]!='"' ) break; | ||||
5950 | } | ||||
5951 | if( z2[0]=='\0' ){ | ||||
5952 | sqlite3Fts5ParseError(pParse, "unterminated string"); | ||||
5953 | return FTS5_EOF0; | ||||
5954 | } | ||||
5955 | } | ||||
5956 | pToken->n = (z2 - z); | ||||
5957 | break; | ||||
5958 | } | ||||
5959 | |||||
5960 | default: { | ||||
5961 | const char *z2; | ||||
5962 | if( sqlite3Fts5IsBareword(z[0])==0 ){ | ||||
5963 | sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); | ||||
5964 | return FTS5_EOF0; | ||||
5965 | } | ||||
5966 | tok = FTS5_STRING9; | ||||
5967 | for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); | ||||
5968 | pToken->n = (z2 - z); | ||||
5969 | if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR1; | ||||
5970 | if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT3; | ||||
5971 | if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND2; | ||||
5972 | break; | ||||
5973 | } | ||||
5974 | } | ||||
5975 | |||||
5976 | *pz = &pToken->p[pToken->n]; | ||||
5977 | return tok; | ||||
5978 | } | ||||
5979 | |||||
5980 | static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)t);} | ||||
5981 | static void fts5ParseFree(void *p){ sqlite3_freesqlite3_api->free(p); } | ||||
5982 | |||||
5983 | static int sqlite3Fts5ExprNew( | ||||
5984 | Fts5Config *pConfig, /* FTS5 Configuration */ | ||||
5985 | int bPhraseToAnd, | ||||
5986 | int iCol, | ||||
5987 | const char *zExpr, /* Expression text */ | ||||
5988 | Fts5Expr **ppNew, | ||||
5989 | char **pzErr | ||||
5990 | ){ | ||||
5991 | Fts5Parse sParse; | ||||
5992 | Fts5Token token; | ||||
5993 | const char *z = zExpr; | ||||
5994 | int t; /* Next token type */ | ||||
5995 | void *pEngine; | ||||
5996 | Fts5Expr *pNew; | ||||
5997 | |||||
5998 | *ppNew = 0; | ||||
5999 | *pzErr = 0; | ||||
6000 | memset(&sParse, 0, sizeof(sParse)); | ||||
6001 | sParse.bPhraseToAnd = bPhraseToAnd; | ||||
6002 | pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); | ||||
6003 | if( pEngine==0 ){ return SQLITE_NOMEM7; } | ||||
6004 | sParse.pConfig = pConfig; | ||||
6005 | |||||
6006 | do { | ||||
6007 | t = fts5ExprGetToken(&sParse, &z, &token); | ||||
6008 | sqlite3Fts5Parser(pEngine, t, token, &sParse); | ||||
6009 | }while( sParse.rc==SQLITE_OK0 && t!=FTS5_EOF0 ); | ||||
6010 | sqlite3Fts5ParserFree(pEngine, fts5ParseFree); | ||||
6011 | |||||
6012 | assert( sParse.pExpr || sParse.rc!=SQLITE_OK )((void) (0)); | ||||
6013 | assert_expr_depth_ok(sParse.rc, sParse.pExpr); | ||||
6014 | |||||
6015 | /* If the LHS of the MATCH expression was a user column, apply the | ||||
6016 | ** implicit column-filter. */ | ||||
6017 | if( sParse.rc==SQLITE_OK0 && iCol<pConfig->nCol ){ | ||||
6018 | int n = SZ_FTS5COLSET(1)(sizeof(i64)*((1 +2)/2)); | ||||
6019 | Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); | ||||
6020 | if( pColset ){ | ||||
6021 | pColset->nCol = 1; | ||||
6022 | pColset->aiCol[0] = iCol; | ||||
6023 | sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset); | ||||
6024 | } | ||||
6025 | } | ||||
6026 | |||||
6027 | assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 )((void) (0)); | ||||
6028 | if( sParse.rc==SQLITE_OK0 ){ | ||||
6029 | *ppNew = pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Expr)); | ||||
6030 | if( pNew==0 ){ | ||||
6031 | sParse.rc = SQLITE_NOMEM7; | ||||
6032 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | ||||
6033 | }else{ | ||||
6034 | pNew->pRoot = sParse.pExpr; | ||||
6035 | pNew->pIndex = 0; | ||||
6036 | pNew->pConfig = pConfig; | ||||
6037 | pNew->apExprPhrase = sParse.apPhrase; | ||||
6038 | pNew->nPhrase = sParse.nPhrase; | ||||
6039 | pNew->bDesc = 0; | ||||
6040 | sParse.apPhrase = 0; | ||||
6041 | } | ||||
6042 | }else{ | ||||
6043 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | ||||
6044 | } | ||||
6045 | |||||
6046 | sqlite3_freesqlite3_api->free(sParse.apPhrase); | ||||
6047 | if( 0==*pzErr ){ | ||||
6048 | *pzErr = sParse.zErr; | ||||
6049 | }else{ | ||||
6050 | sqlite3_freesqlite3_api->free(sParse.zErr); | ||||
6051 | } | ||||
6052 | return sParse.rc; | ||||
6053 | } | ||||
6054 | |||||
6055 | /* | ||||
6056 | ** Assuming that buffer z is at least nByte bytes in size and contains a | ||||
6057 | ** valid utf-8 string, return the number of characters in the string. | ||||
6058 | */ | ||||
6059 | static int fts5ExprCountChar(const char *z, int nByte){ | ||||
6060 | int nRet = 0; | ||||
6061 | int ii; | ||||
6062 | for(ii=0; ii<nByte; ii++){ | ||||
6063 | if( (z[ii] & 0xC0)!=0x80 ) nRet++; | ||||
6064 | } | ||||
6065 | return nRet; | ||||
6066 | } | ||||
6067 | |||||
6068 | /* | ||||
6069 | ** This function is only called when using the special 'trigram' tokenizer. | ||||
6070 | ** Argument zText contains the text of a LIKE or GLOB pattern matched | ||||
6071 | ** against column iCol. This function creates and compiles an FTS5 MATCH | ||||
6072 | ** expression that will match a superset of the rows matched by the LIKE or | ||||
6073 | ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error | ||||
6074 | ** code. | ||||
6075 | */ | ||||
6076 | static int sqlite3Fts5ExprPattern( | ||||
6077 | Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp | ||||
6078 | ){ | ||||
6079 | i64 nText = strlen(zText); | ||||
6080 | char *zExpr = (char*)sqlite3_malloc64sqlite3_api->malloc64(nText*4 + 1); | ||||
6081 | int rc = SQLITE_OK0; | ||||
6082 | |||||
6083 | if( zExpr==0 ){ | ||||
6084 | rc = SQLITE_NOMEM7; | ||||
6085 | }else{ | ||||
6086 | char aSpec[3]; | ||||
6087 | int iOut = 0; | ||||
6088 | int i = 0; | ||||
6089 | int iFirst = 0; | ||||
6090 | |||||
6091 | if( bGlob==0 ){ | ||||
6092 | aSpec[0] = '_'; | ||||
6093 | aSpec[1] = '%'; | ||||
6094 | aSpec[2] = 0; | ||||
6095 | }else{ | ||||
6096 | aSpec[0] = '*'; | ||||
6097 | aSpec[1] = '?'; | ||||
6098 | aSpec[2] = '['; | ||||
6099 | } | ||||
6100 | |||||
6101 | while( i<=nText ){ | ||||
6102 | if( i==nText | ||||
6103 | || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] | ||||
6104 | ){ | ||||
6105 | |||||
6106 | if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){ | ||||
6107 | int jj; | ||||
6108 | zExpr[iOut++] = '"'; | ||||
6109 | for(jj=iFirst; jj<i; jj++){ | ||||
6110 | zExpr[iOut++] = zText[jj]; | ||||
6111 | if( zText[jj]=='"' ) zExpr[iOut++] = '"'; | ||||
6112 | } | ||||
6113 | zExpr[iOut++] = '"'; | ||||
6114 | zExpr[iOut++] = ' '; | ||||
6115 | } | ||||
6116 | if( zText[i]==aSpec[2] ){ | ||||
6117 | i += 2; | ||||
6118 | if( zText[i-1]=='^' ) i++; | ||||
6119 | while( i<nText && zText[i]!=']' ) i++; | ||||
6120 | } | ||||
6121 | iFirst = i+1; | ||||
6122 | } | ||||
6123 | i++; | ||||
6124 | } | ||||
6125 | if( iOut>0 ){ | ||||
6126 | int bAnd = 0; | ||||
6127 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | ||||
6128 | bAnd = 1; | ||||
6129 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
6130 | iCol = pConfig->nCol; | ||||
6131 | } | ||||
6132 | } | ||||
6133 | zExpr[iOut] = '\0'; | ||||
6134 | rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg); | ||||
6135 | }else{ | ||||
6136 | *pp = 0; | ||||
6137 | } | ||||
6138 | sqlite3_freesqlite3_api->free(zExpr); | ||||
6139 | } | ||||
6140 | |||||
6141 | return rc; | ||||
6142 | } | ||||
6143 | |||||
6144 | /* | ||||
6145 | ** Free the expression node object passed as the only argument. | ||||
6146 | */ | ||||
6147 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ | ||||
6148 | if( p ){ | ||||
6149 | int i; | ||||
6150 | for(i=0; i<p->nChild; i++){ | ||||
6151 | sqlite3Fts5ParseNodeFree(p->apChild[i]); | ||||
6152 | } | ||||
6153 | sqlite3Fts5ParseNearsetFree(p->pNear); | ||||
6154 | sqlite3_freesqlite3_api->free(p); | ||||
6155 | } | ||||
6156 | } | ||||
6157 | |||||
6158 | /* | ||||
6159 | ** Free the expression object passed as the only argument. | ||||
6160 | */ | ||||
6161 | static void sqlite3Fts5ExprFree(Fts5Expr *p){ | ||||
6162 | if( p ){ | ||||
6163 | sqlite3Fts5ParseNodeFree(p->pRoot); | ||||
6164 | sqlite3_freesqlite3_api->free(p->apExprPhrase); | ||||
6165 | sqlite3_freesqlite3_api->free(p); | ||||
6166 | } | ||||
6167 | } | ||||
6168 | |||||
6169 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){ | ||||
6170 | Fts5Parse sParse; | ||||
6171 | memset(&sParse, 0, sizeof(sParse)); | ||||
6172 | |||||
6173 | if( *pp1 && p2 ){ | ||||
6174 | Fts5Expr *p1 = *pp1; | ||||
6175 | int nPhrase = p1->nPhrase + p2->nPhrase; | ||||
6176 | |||||
6177 | p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND2, p1->pRoot, p2->pRoot,0); | ||||
6178 | p2->pRoot = 0; | ||||
6179 | |||||
6180 | if( sParse.rc==SQLITE_OK0 ){ | ||||
6181 | Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_reallocsqlite3_api->realloc( | ||||
6182 | p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*) | ||||
6183 | ); | ||||
6184 | if( ap==0 ){ | ||||
6185 | sParse.rc = SQLITE_NOMEM7; | ||||
6186 | }else{ | ||||
6187 | int i; | ||||
6188 | memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*)); | ||||
6189 | for(i=0; i<p2->nPhrase; i++){ | ||||
6190 | ap[i] = p2->apExprPhrase[i]; | ||||
6191 | } | ||||
6192 | p1->nPhrase = nPhrase; | ||||
6193 | p1->apExprPhrase = ap; | ||||
6194 | } | ||||
6195 | } | ||||
6196 | sqlite3_freesqlite3_api->free(p2->apExprPhrase); | ||||
6197 | sqlite3_freesqlite3_api->free(p2); | ||||
6198 | }else if( p2 ){ | ||||
6199 | *pp1 = p2; | ||||
6200 | } | ||||
6201 | |||||
6202 | return sParse.rc; | ||||
6203 | } | ||||
6204 | |||||
6205 | /* | ||||
6206 | ** Argument pTerm must be a synonym iterator. Return the current rowid | ||||
6207 | ** that it points to. | ||||
6208 | */ | ||||
6209 | static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ | ||||
6210 | i64 iRet = 0; | ||||
6211 | int bRetValid = 0; | ||||
6212 | Fts5ExprTerm *p; | ||||
6213 | |||||
6214 | assert( pTerm )((void) (0)); | ||||
6215 | assert( pTerm->pSynonym )((void) (0)); | ||||
6216 | assert( bDesc==0 || bDesc==1 )((void) (0)); | ||||
6217 | for(p=pTerm; p; p=p->pSynonym){ | ||||
6218 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | ||||
6219 | i64 iRowid = p->pIter->iRowid; | ||||
6220 | if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ | ||||
6221 | iRet = iRowid; | ||||
6222 | bRetValid = 1; | ||||
6223 | } | ||||
6224 | } | ||||
6225 | } | ||||
6226 | |||||
6227 | if( pbEof && bRetValid==0 ) *pbEof = 1; | ||||
6228 | return iRet; | ||||
6229 | } | ||||
6230 | |||||
6231 | /* | ||||
6232 | ** Argument pTerm must be a synonym iterator. | ||||
6233 | */ | ||||
6234 | static int fts5ExprSynonymList( | ||||
6235 | Fts5ExprTerm *pTerm, | ||||
6236 | i64 iRowid, | ||||
6237 | Fts5Buffer *pBuf, /* Use this buffer for space if required */ | ||||
6238 | u8 **pa, int *pn | ||||
6239 | ){ | ||||
6240 | Fts5PoslistReader aStatic[4]; | ||||
6241 | Fts5PoslistReader *aIter = aStatic; | ||||
6242 | int nIter = 0; | ||||
6243 | int nAlloc = 4; | ||||
6244 | int rc = SQLITE_OK0; | ||||
6245 | Fts5ExprTerm *p; | ||||
6246 | |||||
6247 | assert( pTerm->pSynonym )((void) (0)); | ||||
6248 | for(p=pTerm; p; p=p->pSynonym){ | ||||
6249 | Fts5IndexIter *pIter = p->pIter; | ||||
6250 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 && pIter->iRowid==iRowid ){ | ||||
6251 | if( pIter->nData==0 ) continue; | ||||
6252 | if( nIter==nAlloc ){ | ||||
6253 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; | ||||
6254 | Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
6255 | if( aNew==0 ){ | ||||
6256 | rc = SQLITE_NOMEM7; | ||||
6257 | goto synonym_poslist_out; | ||||
6258 | } | ||||
6259 | memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); | ||||
6260 | nAlloc = nAlloc*2; | ||||
6261 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | ||||
6262 | aIter = aNew; | ||||
6263 | } | ||||
6264 | sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); | ||||
6265 | assert( aIter[nIter].bEof==0 )((void) (0)); | ||||
6266 | nIter++; | ||||
6267 | } | ||||
6268 | } | ||||
6269 | |||||
6270 | if( nIter==1 ){ | ||||
6271 | *pa = (u8*)aIter[0].a; | ||||
6272 | *pn = aIter[0].n; | ||||
6273 | }else{ | ||||
6274 | Fts5PoslistWriter writer = {0}; | ||||
6275 | i64 iPrev = -1; | ||||
6276 | fts5BufferZero(pBuf)sqlite3Fts5BufferZero(pBuf); | ||||
6277 | while( 1 ){ | ||||
6278 | int i; | ||||
6279 | i64 iMin = FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | ||||
6280 | for(i=0; i<nIter; i++){ | ||||
6281 | if( aIter[i].bEof==0 ){ | ||||
6282 | if( aIter[i].iPos==iPrev ){ | ||||
6283 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; | ||||
6284 | } | ||||
6285 | if( aIter[i].iPos<iMin ){ | ||||
6286 | iMin = aIter[i].iPos; | ||||
6287 | } | ||||
6288 | } | ||||
6289 | } | ||||
6290 | if( iMin==FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) || rc!=SQLITE_OK0 ) break; | ||||
6291 | rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); | ||||
6292 | iPrev = iMin; | ||||
6293 | } | ||||
6294 | if( rc==SQLITE_OK0 ){ | ||||
6295 | *pa = pBuf->p; | ||||
6296 | *pn = pBuf->n; | ||||
6297 | } | ||||
6298 | } | ||||
6299 | |||||
6300 | synonym_poslist_out: | ||||
6301 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | ||||
6302 | return rc; | ||||
6303 | } | ||||
6304 | |||||
6305 | |||||
6306 | /* | ||||
6307 | ** All individual term iterators in pPhrase are guaranteed to be valid and | ||||
6308 | ** pointing to the same rowid when this function is called. This function | ||||
6309 | ** checks if the current rowid really is a match, and if so populates | ||||
6310 | ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch | ||||
6311 | ** is set to true if this is really a match, or false otherwise. | ||||
6312 | ** | ||||
6313 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | ||||
6314 | ** otherwise. It is not considered an error code if the current rowid is | ||||
6315 | ** not a match. | ||||
6316 | */ | ||||
6317 | static int fts5ExprPhraseIsMatch( | ||||
6318 | Fts5ExprNode *pNode, /* Node pPhrase belongs to */ | ||||
6319 | Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ | ||||
6320 | int *pbMatch /* OUT: Set to true if really a match */ | ||||
6321 | ){ | ||||
6322 | Fts5PoslistWriter writer = {0}; | ||||
6323 | Fts5PoslistReader aStatic[4]; | ||||
6324 | Fts5PoslistReader *aIter = aStatic; | ||||
6325 | int i; | ||||
6326 | int rc = SQLITE_OK0; | ||||
6327 | int bFirst = pPhrase->aTerm[0].bFirst; | ||||
6328 | |||||
6329 | fts5BufferZero(&pPhrase->poslist)sqlite3Fts5BufferZero(&pPhrase->poslist); | ||||
6330 | |||||
6331 | /* If the aStatic[] array is not large enough, allocate a large array | ||||
6332 | ** using sqlite3_malloc(). This approach could be improved upon. */ | ||||
6333 | if( pPhrase->nTerm>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | ||||
6334 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; | ||||
6335 | aIter = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
6336 | if( !aIter ) return SQLITE_NOMEM7; | ||||
6337 | } | ||||
6338 | memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); | ||||
6339 | |||||
6340 | /* Initialize a term iterator for each term in the phrase */ | ||||
6341 | for(i=0; i<pPhrase->nTerm; i++){ | ||||
6342 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | ||||
6343 | int n = 0; | ||||
6344 | int bFlag = 0; | ||||
6345 | u8 *a = 0; | ||||
6346 | if( pTerm->pSynonym ){ | ||||
6347 | Fts5Buffer buf = {0, 0, 0}; | ||||
6348 | rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); | ||||
6349 | if( rc ){ | ||||
6350 | sqlite3_freesqlite3_api->free(a); | ||||
6351 | goto ismatch_out; | ||||
6352 | } | ||||
6353 | if( a==buf.p ) bFlag = 1; | ||||
6354 | }else{ | ||||
6355 | a = (u8*)pTerm->pIter->pData; | ||||
6356 | n = pTerm->pIter->nData; | ||||
6357 | } | ||||
6358 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | ||||
6359 | aIter[i].bFlag = (u8)bFlag; | ||||
6360 | if( aIter[i].bEof ) goto ismatch_out; | ||||
6361 | } | ||||
6362 | |||||
6363 | while( 1 ){ | ||||
6364 | int bMatch; | ||||
6365 | i64 iPos = aIter[0].iPos; | ||||
6366 | do { | ||||
6367 | bMatch = 1; | ||||
6368 | for(i=0; i<pPhrase->nTerm; i++){ | ||||
6369 | Fts5PoslistReader *pPos = &aIter[i]; | ||||
6370 | i64 iAdj = iPos + i; | ||||
6371 | if( pPos->iPos!=iAdj ){ | ||||
6372 | bMatch = 0; | ||||
6373 | while( pPos->iPos<iAdj ){ | ||||
6374 | if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; | ||||
6375 | } | ||||
6376 | if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; | ||||
6377 | } | ||||
6378 | } | ||||
6379 | }while( bMatch==0 ); | ||||
6380 | |||||
6381 | /* Append position iPos to the output */ | ||||
6382 | if( bFirst==0 || FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF)==0 ){ | ||||
6383 | rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); | ||||
6384 | if( rc!=SQLITE_OK0 ) goto ismatch_out; | ||||
6385 | } | ||||
6386 | |||||
6387 | for(i=0; i<pPhrase->nTerm; i++){ | ||||
6388 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; | ||||
6389 | } | ||||
6390 | } | ||||
6391 | |||||
6392 | ismatch_out: | ||||
6393 | *pbMatch = (pPhrase->poslist.n>0); | ||||
6394 | for(i=0; i<pPhrase->nTerm; i++){ | ||||
6395 | if( aIter[i].bFlag ) sqlite3_freesqlite3_api->free((u8*)aIter[i].a); | ||||
6396 | } | ||||
6397 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | ||||
6398 | return rc; | ||||
6399 | } | ||||
6400 | |||||
6401 | typedef struct Fts5LookaheadReader Fts5LookaheadReader; | ||||
6402 | struct Fts5LookaheadReader { | ||||
6403 | const u8 *a; /* Buffer containing position list */ | ||||
6404 | int n; /* Size of buffer a[] in bytes */ | ||||
6405 | int i; /* Current offset in position list */ | ||||
6406 | i64 iPos; /* Current position */ | ||||
6407 | i64 iLookahead; /* Next position */ | ||||
6408 | }; | ||||
6409 | |||||
6410 | #define FTS5_LOOKAHEAD_EOF(((i64)1) << 62) (((i64)1) << 62) | ||||
6411 | |||||
6412 | static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ | ||||
6413 | p->iPos = p->iLookahead; | ||||
6414 | if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ | ||||
6415 | p->iLookahead = FTS5_LOOKAHEAD_EOF(((i64)1) << 62); | ||||
6416 | } | ||||
6417 | return (p->iPos==FTS5_LOOKAHEAD_EOF(((i64)1) << 62)); | ||||
6418 | } | ||||
6419 | |||||
6420 | static int fts5LookaheadReaderInit( | ||||
6421 | const u8 *a, int n, /* Buffer to read position list from */ | ||||
6422 | Fts5LookaheadReader *p /* Iterator object to initialize */ | ||||
6423 | ){ | ||||
6424 | memset(p, 0, sizeof(Fts5LookaheadReader)); | ||||
6425 | p->a = a; | ||||
6426 | p->n = n; | ||||
6427 | fts5LookaheadReaderNext(p); | ||||
6428 | return fts5LookaheadReaderNext(p); | ||||
6429 | } | ||||
6430 | |||||
6431 | typedef struct Fts5NearTrimmer Fts5NearTrimmer; | ||||
6432 | struct Fts5NearTrimmer { | ||||
6433 | Fts5LookaheadReader reader; /* Input iterator */ | ||||
6434 | Fts5PoslistWriter writer; /* Writer context */ | ||||
6435 | Fts5Buffer *pOut; /* Output poslist */ | ||||
6436 | }; | ||||
6437 | |||||
6438 | /* | ||||
6439 | ** The near-set object passed as the first argument contains more than | ||||
6440 | ** one phrase. All phrases currently point to the same row. The | ||||
6441 | ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function | ||||
6442 | ** tests if the current row contains instances of each phrase sufficiently | ||||
6443 | ** close together to meet the NEAR constraint. Non-zero is returned if it | ||||
6444 | ** does, or zero otherwise. | ||||
6445 | ** | ||||
6446 | ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this | ||||
6447 | ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) | ||||
6448 | ** occurs within this function (*pRc) is set accordingly before returning. | ||||
6449 | ** The return value is undefined in both these cases. | ||||
6450 | ** | ||||
6451 | ** If no error occurs and non-zero (a match) is returned, the position-list | ||||
6452 | ** of each phrase object is edited to contain only those entries that | ||||
6453 | ** meet the constraint before returning. | ||||
6454 | */ | ||||
6455 | static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ | ||||
6456 | Fts5NearTrimmer aStatic[4]; | ||||
6457 | Fts5NearTrimmer *a = aStatic; | ||||
6458 | Fts5ExprPhrase **apPhrase = pNear->apPhrase; | ||||
6459 | |||||
6460 | int i; | ||||
6461 | int rc = *pRc; | ||||
6462 | int bMatch; | ||||
6463 | |||||
6464 | assert( pNear->nPhrase>1 )((void) (0)); | ||||
6465 | |||||
6466 | /* If the aStatic[] array is not large enough, allocate a large array | ||||
6467 | ** using sqlite3_malloc(). This approach could be improved upon. */ | ||||
6468 | if( pNear->nPhrase>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | ||||
6469 | sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; | ||||
6470 | a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
6471 | }else{ | ||||
6472 | memset(aStatic, 0, sizeof(aStatic)); | ||||
6473 | } | ||||
6474 | if( rc!=SQLITE_OK0 ){ | ||||
6475 | *pRc = rc; | ||||
6476 | return 0; | ||||
6477 | } | ||||
6478 | |||||
6479 | /* Initialize a lookahead iterator for each phrase. After passing the | ||||
6480 | ** buffer and buffer size to the lookaside-reader init function, zero | ||||
6481 | ** the phrase poslist buffer. The new poslist for the phrase (containing | ||||
6482 | ** the same entries as the original with some entries removed on account | ||||
6483 | ** of the NEAR constraint) is written over the original even as it is | ||||
6484 | ** being read. This is safe as the entries for the new poslist are a | ||||
6485 | ** subset of the old, so it is not possible for data yet to be read to | ||||
6486 | ** be overwritten. */ | ||||
6487 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6488 | Fts5Buffer *pPoslist = &apPhrase[i]->poslist; | ||||
6489 | fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); | ||||
6490 | pPoslist->n = 0; | ||||
6491 | a[i].pOut = pPoslist; | ||||
6492 | } | ||||
6493 | |||||
6494 | while( 1 ){ | ||||
6495 | int iAdv; | ||||
6496 | i64 iMin; | ||||
6497 | i64 iMax; | ||||
6498 | |||||
6499 | /* This block advances the phrase iterators until they point to a set of | ||||
6500 | ** entries that together comprise a match. */ | ||||
6501 | iMax = a[0].reader.iPos; | ||||
6502 | do { | ||||
6503 | bMatch = 1; | ||||
6504 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6505 | Fts5LookaheadReader *pPos = &a[i].reader; | ||||
6506 | iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; | ||||
6507 | if( pPos->iPos<iMin || pPos->iPos>iMax ){ | ||||
6508 | bMatch = 0; | ||||
6509 | while( pPos->iPos<iMin ){ | ||||
6510 | if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; | ||||
6511 | } | ||||
6512 | if( pPos->iPos>iMax ) iMax = pPos->iPos; | ||||
6513 | } | ||||
6514 | } | ||||
6515 | }while( bMatch==0 ); | ||||
6516 | |||||
6517 | /* Add an entry to each output position list */ | ||||
6518 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6519 | i64 iPos = a[i].reader.iPos; | ||||
6520 | Fts5PoslistWriter *pWriter = &a[i].writer; | ||||
6521 | if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ | ||||
6522 | sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); | ||||
6523 | } | ||||
6524 | } | ||||
6525 | |||||
6526 | iAdv = 0; | ||||
6527 | iMin = a[0].reader.iLookahead; | ||||
6528 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6529 | if( a[i].reader.iLookahead < iMin ){ | ||||
6530 | iMin = a[i].reader.iLookahead; | ||||
6531 | iAdv = i; | ||||
6532 | } | ||||
6533 | } | ||||
6534 | if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; | ||||
6535 | } | ||||
6536 | |||||
6537 | ismatch_out: { | ||||
6538 | int bRet = a[0].pOut->n>0; | ||||
6539 | *pRc = rc; | ||||
6540 | if( a!=aStatic ) sqlite3_freesqlite3_api->free(a); | ||||
6541 | return bRet; | ||||
6542 | } | ||||
6543 | } | ||||
6544 | |||||
6545 | /* | ||||
6546 | ** Advance iterator pIter until it points to a value equal to or laster | ||||
6547 | ** than the initial value of *piLast. If this means the iterator points | ||||
6548 | ** to a value laster than *piLast, update *piLast to the new lastest value. | ||||
6549 | ** | ||||
6550 | ** If the iterator reaches EOF, set *pbEof to true before returning. If | ||||
6551 | ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc | ||||
6552 | ** are set, return a non-zero value. Otherwise, return zero. | ||||
6553 | */ | ||||
6554 | static int fts5ExprAdvanceto( | ||||
6555 | Fts5IndexIter *pIter, /* Iterator to advance */ | ||||
6556 | int bDesc, /* True if iterator is "rowid DESC" */ | ||||
6557 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | ||||
6558 | int *pRc, /* OUT: Error code */ | ||||
6559 | int *pbEof /* OUT: Set to true if EOF */ | ||||
6560 | ){ | ||||
6561 | i64 iLast = *piLast; | ||||
6562 | i64 iRowid; | ||||
6563 | |||||
6564 | iRowid = pIter->iRowid; | ||||
6565 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | ||||
6566 | int rc = sqlite3Fts5IterNextFrom(pIter, iLast); | ||||
6567 | if( rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | ||||
6568 | *pRc = rc; | ||||
6569 | *pbEof = 1; | ||||
6570 | return 1; | ||||
6571 | } | ||||
6572 | iRowid = pIter->iRowid; | ||||
6573 | assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) )((void) (0)); | ||||
6574 | } | ||||
6575 | *piLast = iRowid; | ||||
6576 | |||||
6577 | return 0; | ||||
6578 | } | ||||
6579 | |||||
6580 | static int fts5ExprSynonymAdvanceto( | ||||
6581 | Fts5ExprTerm *pTerm, /* Term iterator to advance */ | ||||
6582 | int bDesc, /* True if iterator is "rowid DESC" */ | ||||
6583 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | ||||
6584 | int *pRc /* OUT: Error code */ | ||||
6585 | ){ | ||||
6586 | int rc = SQLITE_OK0; | ||||
6587 | i64 iLast = *piLast; | ||||
6588 | Fts5ExprTerm *p; | ||||
6589 | int bEof = 0; | ||||
6590 | |||||
6591 | for(p=pTerm; rc==SQLITE_OK0 && p; p=p->pSynonym){ | ||||
6592 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | ||||
6593 | i64 iRowid = p->pIter->iRowid; | ||||
6594 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | ||||
6595 | rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); | ||||
6596 | } | ||||
6597 | } | ||||
6598 | } | ||||
6599 | |||||
6600 | if( rc!=SQLITE_OK0 ){ | ||||
6601 | *pRc = rc; | ||||
6602 | bEof = 1; | ||||
6603 | }else{ | ||||
6604 | *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); | ||||
6605 | } | ||||
6606 | return bEof; | ||||
6607 | } | ||||
6608 | |||||
6609 | |||||
6610 | static int fts5ExprNearTest( | ||||
6611 | int *pRc, | ||||
6612 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | ||||
6613 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ | ||||
6614 | ){ | ||||
6615 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
6616 | int rc = *pRc; | ||||
6617 | |||||
6618 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | ||||
6619 | Fts5ExprTerm *pTerm; | ||||
6620 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | ||||
6621 | pPhrase->poslist.n = 0; | ||||
6622 | for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ | ||||
6623 | Fts5IndexIter *pIter = pTerm->pIter; | ||||
6624 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | ||||
6625 | if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ | ||||
6626 | pPhrase->poslist.n = 1; | ||||
6627 | } | ||||
6628 | } | ||||
6629 | } | ||||
6630 | return pPhrase->poslist.n; | ||||
6631 | }else{ | ||||
6632 | int i; | ||||
6633 | |||||
6634 | /* Check that each phrase in the nearset matches the current row. | ||||
6635 | ** Populate the pPhrase->poslist buffers at the same time. If any | ||||
6636 | ** phrase is not a match, break out of the loop early. */ | ||||
6637 | for(i=0; rc==SQLITE_OK0 && i<pNear->nPhrase; i++){ | ||||
6638 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
6639 | if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym | ||||
6640 | || pNear->pColset || pPhrase->aTerm[0].bFirst | ||||
6641 | ){ | ||||
6642 | int bMatch = 0; | ||||
6643 | rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); | ||||
6644 | if( bMatch==0 ) break; | ||||
6645 | }else{ | ||||
6646 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | ||||
6647 | fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData)sqlite3Fts5BufferSet(&rc,&pPhrase->poslist,pIter-> nData,pIter->pData); | ||||
6648 | } | ||||
6649 | } | ||||
6650 | |||||
6651 | *pRc = rc; | ||||
6652 | if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ | ||||
6653 | return 1; | ||||
6654 | } | ||||
6655 | return 0; | ||||
6656 | } | ||||
6657 | } | ||||
6658 | |||||
6659 | |||||
6660 | /* | ||||
6661 | ** Initialize all term iterators in the pNear object. If any term is found | ||||
6662 | ** to match no documents at all, return immediately without initializing any | ||||
6663 | ** further iterators. | ||||
6664 | ** | ||||
6665 | ** If an error occurs, return an SQLite error code. Otherwise, return | ||||
6666 | ** SQLITE_OK. It is not considered an error if some term matches zero | ||||
6667 | ** documents. | ||||
6668 | */ | ||||
6669 | static int fts5ExprNearInitAll( | ||||
6670 | Fts5Expr *pExpr, | ||||
6671 | Fts5ExprNode *pNode | ||||
6672 | ){ | ||||
6673 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
6674 | int i; | ||||
6675 | |||||
6676 | assert( pNode->bNomatch==0 )((void) (0)); | ||||
6677 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6678 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
6679 | if( pPhrase->nTerm==0 ){ | ||||
6680 | pNode->bEof = 1; | ||||
6681 | return SQLITE_OK0; | ||||
6682 | }else{ | ||||
6683 | int j; | ||||
6684 | for(j=0; j<pPhrase->nTerm; j++){ | ||||
6685 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | ||||
6686 | Fts5ExprTerm *p; | ||||
6687 | int bHit = 0; | ||||
6688 | |||||
6689 | for(p=pTerm; p; p=p->pSynonym){ | ||||
6690 | int rc; | ||||
6691 | if( p->pIter ){ | ||||
6692 | sqlite3Fts5IterClose(p->pIter); | ||||
6693 | p->pIter = 0; | ||||
6694 | } | ||||
6695 | rc = sqlite3Fts5IndexQuery( | ||||
6696 | pExpr->pIndex, p->pTerm, p->nQueryTerm, | ||||
6697 | (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX0x0001 : 0) | | ||||
6698 | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC0x0002 : 0), | ||||
6699 | pNear->pColset, | ||||
6700 | &p->pIter | ||||
6701 | ); | ||||
6702 | assert( (rc==SQLITE_OK)==(p->pIter!=0) )((void) (0)); | ||||
6703 | if( rc!=SQLITE_OK0 ) return rc; | ||||
6704 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | ||||
6705 | bHit = 1; | ||||
6706 | } | ||||
6707 | } | ||||
6708 | |||||
6709 | if( bHit==0 ){ | ||||
6710 | pNode->bEof = 1; | ||||
6711 | return SQLITE_OK0; | ||||
6712 | } | ||||
6713 | } | ||||
6714 | } | ||||
6715 | } | ||||
6716 | |||||
6717 | pNode->bEof = 0; | ||||
6718 | return SQLITE_OK0; | ||||
6719 | } | ||||
6720 | |||||
6721 | /* | ||||
6722 | ** If pExpr is an ASC iterator, this function returns a value with the | ||||
6723 | ** same sign as: | ||||
6724 | ** | ||||
6725 | ** (iLhs - iRhs) | ||||
6726 | ** | ||||
6727 | ** Otherwise, if this is a DESC iterator, the opposite is returned: | ||||
6728 | ** | ||||
6729 | ** (iRhs - iLhs) | ||||
6730 | */ | ||||
6731 | static int fts5RowidCmp( | ||||
6732 | Fts5Expr *pExpr, | ||||
6733 | i64 iLhs, | ||||
6734 | i64 iRhs | ||||
6735 | ){ | ||||
6736 | assert( pExpr->bDesc==0 || pExpr->bDesc==1 )((void) (0)); | ||||
6737 | if( pExpr->bDesc==0 ){ | ||||
6738 | if( iLhs<iRhs ) return -1; | ||||
6739 | return (iLhs > iRhs); | ||||
6740 | }else{ | ||||
6741 | if( iLhs>iRhs ) return -1; | ||||
6742 | return (iLhs < iRhs); | ||||
6743 | } | ||||
6744 | } | ||||
6745 | |||||
6746 | static void fts5ExprSetEof(Fts5ExprNode *pNode){ | ||||
6747 | int i; | ||||
6748 | pNode->bEof = 1; | ||||
6749 | pNode->bNomatch = 0; | ||||
6750 | for(i=0; i<pNode->nChild; i++){ | ||||
6751 | fts5ExprSetEof(pNode->apChild[i]); | ||||
6752 | } | ||||
6753 | } | ||||
6754 | |||||
6755 | static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ | ||||
6756 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | ||||
6757 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
6758 | int i; | ||||
6759 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6760 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
6761 | pPhrase->poslist.n = 0; | ||||
6762 | } | ||||
6763 | }else{ | ||||
6764 | int i; | ||||
6765 | for(i=0; i<pNode->nChild; i++){ | ||||
6766 | fts5ExprNodeZeroPoslist(pNode->apChild[i]); | ||||
6767 | } | ||||
6768 | } | ||||
6769 | } | ||||
6770 | |||||
6771 | |||||
6772 | |||||
6773 | /* | ||||
6774 | ** Compare the values currently indicated by the two nodes as follows: | ||||
6775 | ** | ||||
6776 | ** res = (*p1) - (*p2) | ||||
6777 | ** | ||||
6778 | ** Nodes that point to values that come later in the iteration order are | ||||
6779 | ** considered to be larger. Nodes at EOF are the largest of all. | ||||
6780 | ** | ||||
6781 | ** This means that if the iteration order is ASC, then numerically larger | ||||
6782 | ** rowids are considered larger. Or if it is the default DESC, numerically | ||||
6783 | ** smaller rowids are larger. | ||||
6784 | */ | ||||
6785 | static int fts5NodeCompare( | ||||
6786 | Fts5Expr *pExpr, | ||||
6787 | Fts5ExprNode *p1, | ||||
6788 | Fts5ExprNode *p2 | ||||
6789 | ){ | ||||
6790 | if( p2->bEof ) return -1; | ||||
6791 | if( p1->bEof ) return +1; | ||||
6792 | return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); | ||||
6793 | } | ||||
6794 | |||||
6795 | /* | ||||
6796 | ** All individual term iterators in pNear are guaranteed to be valid when | ||||
6797 | ** this function is called. This function checks if all term iterators | ||||
6798 | ** point to the same rowid, and if not, advances them until they do. | ||||
6799 | ** If an EOF is reached before this happens, *pbEof is set to true before | ||||
6800 | ** returning. | ||||
6801 | ** | ||||
6802 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | ||||
6803 | ** otherwise. It is not considered an error code if an iterator reaches | ||||
6804 | ** EOF. | ||||
6805 | */ | ||||
6806 | static int fts5ExprNodeTest_STRING( | ||||
6807 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | ||||
6808 | Fts5ExprNode *pNode | ||||
6809 | ){ | ||||
6810 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
6811 | Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; | ||||
6812 | int rc = SQLITE_OK0; | ||||
6813 | i64 iLast; /* Lastest rowid any iterator points to */ | ||||
6814 | int i, j; /* Phrase and token index, respectively */ | ||||
6815 | int bMatch; /* True if all terms are at the same rowid */ | ||||
6816 | const int bDesc = pExpr->bDesc; | ||||
6817 | |||||
6818 | /* Check that this node should not be FTS5_TERM */ | ||||
6819 | assert( pNear->nPhrase>1((void) (0)) | ||||
6820 | || pNear->apPhrase[0]->nTerm>1((void) (0)) | ||||
6821 | || pNear->apPhrase[0]->aTerm[0].pSynonym((void) (0)) | ||||
6822 | || pNear->apPhrase[0]->aTerm[0].bFirst((void) (0)) | ||||
6823 | )((void) (0)); | ||||
6824 | |||||
6825 | /* Initialize iLast, the "lastest" rowid any iterator points to. If the | ||||
6826 | ** iterator skips through rowids in the default ascending order, this means | ||||
6827 | ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it | ||||
6828 | ** means the minimum rowid. */ | ||||
6829 | if( pLeft->aTerm[0].pSynonym ){ | ||||
6830 | iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); | ||||
6831 | }else{ | ||||
6832 | iLast = pLeft->aTerm[0].pIter->iRowid; | ||||
6833 | } | ||||
6834 | |||||
6835 | do { | ||||
6836 | bMatch = 1; | ||||
6837 | for(i=0; i<pNear->nPhrase; i++){ | ||||
6838 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
6839 | for(j=0; j<pPhrase->nTerm; j++){ | ||||
6840 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | ||||
6841 | if( pTerm->pSynonym ){ | ||||
6842 | i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); | ||||
6843 | if( iRowid==iLast ) continue; | ||||
6844 | bMatch = 0; | ||||
6845 | if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ | ||||
6846 | pNode->bNomatch = 0; | ||||
6847 | pNode->bEof = 1; | ||||
6848 | return rc; | ||||
6849 | } | ||||
6850 | }else{ | ||||
6851 | Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; | ||||
6852 | if( pIter->iRowid==iLast ) continue; | ||||
6853 | bMatch = 0; | ||||
6854 | if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ | ||||
6855 | return rc; | ||||
6856 | } | ||||
6857 | } | ||||
6858 | } | ||||
6859 | } | ||||
6860 | }while( bMatch==0 ); | ||||
6861 | |||||
6862 | pNode->iRowid = iLast; | ||||
6863 | pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK0); | ||||
6864 | assert( pNode->bEof==0 || pNode->bNomatch==0 )((void) (0)); | ||||
6865 | |||||
6866 | return rc; | ||||
6867 | } | ||||
6868 | |||||
6869 | /* | ||||
6870 | ** Advance the first term iterator in the first phrase of pNear. Set output | ||||
6871 | ** variable *pbEof to true if it reaches EOF or if an error occurs. | ||||
6872 | ** | ||||
6873 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
6874 | ** occurs. | ||||
6875 | */ | ||||
6876 | static int fts5ExprNodeNext_STRING( | ||||
6877 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | ||||
6878 | Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ | ||||
6879 | int bFromValid, | ||||
6880 | i64 iFrom | ||||
6881 | ){ | ||||
6882 | Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; | ||||
6883 | int rc = SQLITE_OK0; | ||||
6884 | |||||
6885 | pNode->bNomatch = 0; | ||||
6886 | if( pTerm->pSynonym ){ | ||||
6887 | int bEof = 1; | ||||
6888 | Fts5ExprTerm *p; | ||||
6889 | |||||
6890 | /* Find the firstest rowid any synonym points to. */ | ||||
6891 | i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); | ||||
6892 | |||||
6893 | /* Advance each iterator that currently points to iRowid. Or, if iFrom | ||||
6894 | ** is valid - each iterator that points to a rowid before iFrom. */ | ||||
6895 | for(p=pTerm; p; p=p->pSynonym){ | ||||
6896 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | ||||
6897 | i64 ii = p->pIter->iRowid; | ||||
6898 | if( ii==iRowid | ||||
6899 | || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) | ||||
6900 | ){ | ||||
6901 | if( bFromValid ){ | ||||
6902 | rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); | ||||
6903 | }else{ | ||||
6904 | rc = sqlite3Fts5IterNext(p->pIter); | ||||
6905 | } | ||||
6906 | if( rc!=SQLITE_OK0 ) break; | ||||
6907 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | ||||
6908 | bEof = 0; | ||||
6909 | } | ||||
6910 | }else{ | ||||
6911 | bEof = 0; | ||||
6912 | } | ||||
6913 | } | ||||
6914 | } | ||||
6915 | |||||
6916 | /* Set the EOF flag if either all synonym iterators are at EOF or an | ||||
6917 | ** error has occurred. */ | ||||
6918 | pNode->bEof = (rc || bEof); | ||||
6919 | }else{ | ||||
6920 | Fts5IndexIter *pIter = pTerm->pIter; | ||||
6921 | |||||
6922 | assert( Fts5NodeIsString(pNode) )((void) (0)); | ||||
6923 | if( bFromValid ){ | ||||
6924 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | ||||
6925 | }else{ | ||||
6926 | rc = sqlite3Fts5IterNext(pIter); | ||||
6927 | } | ||||
6928 | |||||
6929 | pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof)); | ||||
6930 | } | ||||
6931 | |||||
6932 | if( pNode->bEof==0 ){ | ||||
6933 | assert( rc==SQLITE_OK )((void) (0)); | ||||
6934 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | ||||
6935 | } | ||||
6936 | |||||
6937 | return rc; | ||||
6938 | } | ||||
6939 | |||||
6940 | |||||
6941 | static int fts5ExprNodeTest_TERM( | ||||
6942 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | ||||
6943 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ | ||||
6944 | ){ | ||||
6945 | /* As this "NEAR" object is actually a single phrase that consists | ||||
6946 | ** of a single term only, grab pointers into the poslist managed by the | ||||
6947 | ** fts5_index.c iterator object. This is much faster than synthesizing | ||||
6948 | ** a new poslist the way we have to for more complicated phrase or NEAR | ||||
6949 | ** expressions. */ | ||||
6950 | Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; | ||||
6951 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | ||||
6952 | |||||
6953 | assert( pNode->eType==FTS5_TERM )((void) (0)); | ||||
6954 | assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 )((void) (0)); | ||||
6955 | assert( pPhrase->aTerm[0].pSynonym==0 )((void) (0)); | ||||
6956 | |||||
6957 | pPhrase->poslist.n = pIter->nData; | ||||
6958 | if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | ||||
6959 | pPhrase->poslist.p = (u8*)pIter->pData; | ||||
6960 | } | ||||
6961 | pNode->iRowid = pIter->iRowid; | ||||
6962 | pNode->bNomatch = (pPhrase->poslist.n==0); | ||||
6963 | return SQLITE_OK0; | ||||
6964 | } | ||||
6965 | |||||
6966 | /* | ||||
6967 | ** xNext() method for a node of type FTS5_TERM. | ||||
6968 | */ | ||||
6969 | static int fts5ExprNodeNext_TERM( | ||||
6970 | Fts5Expr *pExpr, | ||||
6971 | Fts5ExprNode *pNode, | ||||
6972 | int bFromValid, | ||||
6973 | i64 iFrom | ||||
6974 | ){ | ||||
6975 | int rc; | ||||
6976 | Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; | ||||
6977 | |||||
6978 | assert( pNode->bEof==0 )((void) (0)); | ||||
6979 | if( bFromValid ){ | ||||
6980 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | ||||
6981 | }else{ | ||||
6982 | rc = sqlite3Fts5IterNext(pIter); | ||||
6983 | } | ||||
6984 | if( rc==SQLITE_OK0 && sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | ||||
6985 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | ||||
6986 | }else{ | ||||
6987 | pNode->bEof = 1; | ||||
6988 | pNode->bNomatch = 0; | ||||
6989 | } | ||||
6990 | return rc; | ||||
6991 | } | ||||
6992 | |||||
6993 | static void fts5ExprNodeTest_OR( | ||||
6994 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | ||||
6995 | Fts5ExprNode *pNode /* Expression node to test */ | ||||
6996 | ){ | ||||
6997 | Fts5ExprNode *pNext = pNode->apChild[0]; | ||||
6998 | int i; | ||||
6999 | |||||
7000 | for(i=1; i<pNode->nChild; i++){ | ||||
7001 | Fts5ExprNode *pChild = pNode->apChild[i]; | ||||
7002 | int cmp = fts5NodeCompare(pExpr, pNext, pChild); | ||||
7003 | if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ | ||||
7004 | pNext = pChild; | ||||
7005 | } | ||||
7006 | } | ||||
7007 | pNode->iRowid = pNext->iRowid; | ||||
7008 | pNode->bEof = pNext->bEof; | ||||
7009 | pNode->bNomatch = pNext->bNomatch; | ||||
7010 | } | ||||
7011 | |||||
7012 | static int fts5ExprNodeNext_OR( | ||||
7013 | Fts5Expr *pExpr, | ||||
7014 | Fts5ExprNode *pNode, | ||||
7015 | int bFromValid, | ||||
7016 | i64 iFrom | ||||
7017 | ){ | ||||
7018 | int i; | ||||
7019 | i64 iLast = pNode->iRowid; | ||||
7020 | |||||
7021 | for(i=0; i<pNode->nChild; i++){ | ||||
7022 | Fts5ExprNode *p1 = pNode->apChild[i]; | ||||
7023 | assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 )((void) (0)); | ||||
7024 | if( p1->bEof==0 ){ | ||||
7025 | if( (p1->iRowid==iLast) | ||||
7026 | || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) | ||||
7027 | ){ | ||||
7028 | int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom)(p1)->xNext((pExpr), (p1), (bFromValid), (iFrom)); | ||||
7029 | if( rc!=SQLITE_OK0 ){ | ||||
7030 | pNode->bNomatch = 0; | ||||
7031 | return rc; | ||||
7032 | } | ||||
7033 | } | ||||
7034 | } | ||||
7035 | } | ||||
7036 | |||||
7037 | fts5ExprNodeTest_OR(pExpr, pNode); | ||||
7038 | return SQLITE_OK0; | ||||
7039 | } | ||||
7040 | |||||
7041 | /* | ||||
7042 | ** Argument pNode is an FTS5_AND node. | ||||
7043 | */ | ||||
7044 | static int fts5ExprNodeTest_AND( | ||||
7045 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | ||||
7046 | Fts5ExprNode *pAnd /* FTS5_AND node to advance */ | ||||
7047 | ){ | ||||
7048 | int iChild; | ||||
7049 | i64 iLast = pAnd->iRowid; | ||||
7050 | int rc = SQLITE_OK0; | ||||
7051 | int bMatch; | ||||
7052 | |||||
7053 | assert( pAnd->bEof==0 )((void) (0)); | ||||
7054 | do { | ||||
7055 | pAnd->bNomatch = 0; | ||||
7056 | bMatch = 1; | ||||
7057 | for(iChild=0; iChild<pAnd->nChild; iChild++){ | ||||
7058 | Fts5ExprNode *pChild = pAnd->apChild[iChild]; | ||||
7059 | int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); | ||||
7060 | if( cmp>0 ){ | ||||
7061 | /* Advance pChild until it points to iLast or laster */ | ||||
7062 | rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast)(pChild)->xNext((pExpr), (pChild), (1), (iLast)); | ||||
7063 | if( rc!=SQLITE_OK0 ){ | ||||
7064 | pAnd->bNomatch = 0; | ||||
7065 | return rc; | ||||
7066 | } | ||||
7067 | } | ||||
7068 | |||||
7069 | /* If the child node is now at EOF, so is the parent AND node. Otherwise, | ||||
7070 | ** the child node is guaranteed to have advanced at least as far as | ||||
7071 | ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the | ||||
7072 | ** new lastest rowid seen so far. */ | ||||
7073 | assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 )((void) (0)); | ||||
7074 | if( pChild->bEof ){ | ||||
7075 | fts5ExprSetEof(pAnd); | ||||
7076 | bMatch = 1; | ||||
7077 | break; | ||||
7078 | }else if( iLast!=pChild->iRowid ){ | ||||
7079 | bMatch = 0; | ||||
7080 | iLast = pChild->iRowid; | ||||
7081 | } | ||||
7082 | |||||
7083 | if( pChild->bNomatch ){ | ||||
7084 | pAnd->bNomatch = 1; | ||||
7085 | } | ||||
7086 | } | ||||
7087 | }while( bMatch==0 ); | ||||
7088 | |||||
7089 | if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ | ||||
7090 | fts5ExprNodeZeroPoslist(pAnd); | ||||
7091 | } | ||||
7092 | pAnd->iRowid = iLast; | ||||
7093 | return SQLITE_OK0; | ||||
7094 | } | ||||
7095 | |||||
7096 | static int fts5ExprNodeNext_AND( | ||||
7097 | Fts5Expr *pExpr, | ||||
7098 | Fts5ExprNode *pNode, | ||||
7099 | int bFromValid, | ||||
7100 | i64 iFrom | ||||
7101 | ){ | ||||
7102 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | ||||
7103 | if( rc==SQLITE_OK0 ){ | ||||
7104 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | ||||
7105 | }else{ | ||||
7106 | pNode->bNomatch = 0; | ||||
7107 | } | ||||
7108 | return rc; | ||||
7109 | } | ||||
7110 | |||||
7111 | static int fts5ExprNodeTest_NOT( | ||||
7112 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | ||||
7113 | Fts5ExprNode *pNode /* FTS5_NOT node to advance */ | ||||
7114 | ){ | ||||
7115 | int rc = SQLITE_OK0; | ||||
7116 | Fts5ExprNode *p1 = pNode->apChild[0]; | ||||
7117 | Fts5ExprNode *p2 = pNode->apChild[1]; | ||||
7118 | assert( pNode->nChild==2 )((void) (0)); | ||||
7119 | |||||
7120 | while( rc==SQLITE_OK0 && p1->bEof==0 ){ | ||||
7121 | int cmp = fts5NodeCompare(pExpr, p1, p2); | ||||
7122 | if( cmp>0 ){ | ||||
7123 | rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid)(p2)->xNext((pExpr), (p2), (1), (p1->iRowid)); | ||||
7124 | cmp = fts5NodeCompare(pExpr, p1, p2); | ||||
7125 | } | ||||
7126 | assert( rc!=SQLITE_OK || cmp<=0 )((void) (0)); | ||||
7127 | if( cmp || p2->bNomatch ) break; | ||||
7128 | rc = fts5ExprNodeNext(pExpr, p1, 0, 0)(p1)->xNext((pExpr), (p1), (0), (0)); | ||||
7129 | } | ||||
7130 | pNode->bEof = p1->bEof; | ||||
7131 | pNode->bNomatch = p1->bNomatch; | ||||
7132 | pNode->iRowid = p1->iRowid; | ||||
7133 | if( p1->bEof ){ | ||||
7134 | fts5ExprNodeZeroPoslist(p2); | ||||
7135 | } | ||||
7136 | return rc; | ||||
7137 | } | ||||
7138 | |||||
7139 | static int fts5ExprNodeNext_NOT( | ||||
7140 | Fts5Expr *pExpr, | ||||
7141 | Fts5ExprNode *pNode, | ||||
7142 | int bFromValid, | ||||
7143 | i64 iFrom | ||||
7144 | ){ | ||||
7145 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | ||||
7146 | if( rc==SQLITE_OK0 ){ | ||||
7147 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | ||||
7148 | } | ||||
7149 | if( rc!=SQLITE_OK0 ){ | ||||
7150 | pNode->bNomatch = 0; | ||||
7151 | } | ||||
7152 | return rc; | ||||
7153 | } | ||||
7154 | |||||
7155 | /* | ||||
7156 | ** If pNode currently points to a match, this function returns SQLITE_OK | ||||
7157 | ** without modifying it. Otherwise, pNode is advanced until it does point | ||||
7158 | ** to a match or EOF is reached. | ||||
7159 | */ | ||||
7160 | static int fts5ExprNodeTest( | ||||
7161 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | ||||
7162 | Fts5ExprNode *pNode /* Expression node to test */ | ||||
7163 | ){ | ||||
7164 | int rc = SQLITE_OK0; | ||||
7165 | if( pNode->bEof==0 ){ | ||||
7166 | switch( pNode->eType ){ | ||||
7167 | |||||
7168 | case FTS5_STRING9: { | ||||
7169 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | ||||
7170 | break; | ||||
7171 | } | ||||
7172 | |||||
7173 | case FTS5_TERM4: { | ||||
7174 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | ||||
7175 | break; | ||||
7176 | } | ||||
7177 | |||||
7178 | case FTS5_AND2: { | ||||
7179 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | ||||
7180 | break; | ||||
7181 | } | ||||
7182 | |||||
7183 | case FTS5_OR1: { | ||||
7184 | fts5ExprNodeTest_OR(pExpr, pNode); | ||||
7185 | break; | ||||
7186 | } | ||||
7187 | |||||
7188 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | ||||
7189 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | ||||
7190 | break; | ||||
7191 | } | ||||
7192 | } | ||||
7193 | } | ||||
7194 | return rc; | ||||
7195 | } | ||||
7196 | |||||
7197 | |||||
7198 | /* | ||||
7199 | ** Set node pNode, which is part of expression pExpr, to point to the first | ||||
7200 | ** match. If there are no matches, set the Node.bEof flag to indicate EOF. | ||||
7201 | ** | ||||
7202 | ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. | ||||
7203 | ** It is not an error if there are no matches. | ||||
7204 | */ | ||||
7205 | static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ | ||||
7206 | int rc = SQLITE_OK0; | ||||
7207 | pNode->bEof = 0; | ||||
7208 | pNode->bNomatch = 0; | ||||
7209 | |||||
7210 | if( Fts5NodeIsString(pNode)((pNode)->eType==4 || (pNode)->eType==9) ){ | ||||
7211 | /* Initialize all term iterators in the NEAR object. */ | ||||
7212 | rc = fts5ExprNearInitAll(pExpr, pNode); | ||||
7213 | }else if( pNode->xNext==0 ){ | ||||
7214 | pNode->bEof = 1; | ||||
7215 | }else{ | ||||
7216 | int i; | ||||
7217 | int nEof = 0; | ||||
7218 | for(i=0; i<pNode->nChild && rc==SQLITE_OK0; i++){ | ||||
7219 | Fts5ExprNode *pChild = pNode->apChild[i]; | ||||
7220 | rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); | ||||
7221 | assert( pChild->bEof==0 || pChild->bEof==1 )((void) (0)); | ||||
7222 | nEof += pChild->bEof; | ||||
7223 | } | ||||
7224 | pNode->iRowid = pNode->apChild[0]->iRowid; | ||||
7225 | |||||
7226 | switch( pNode->eType ){ | ||||
7227 | case FTS5_AND2: | ||||
7228 | if( nEof>0 ) fts5ExprSetEof(pNode); | ||||
7229 | break; | ||||
7230 | |||||
7231 | case FTS5_OR1: | ||||
7232 | if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); | ||||
7233 | break; | ||||
7234 | |||||
7235 | default: | ||||
7236 | assert( pNode->eType==FTS5_NOT )((void) (0)); | ||||
7237 | pNode->bEof = pNode->apChild[0]->bEof; | ||||
7238 | break; | ||||
7239 | } | ||||
7240 | } | ||||
7241 | |||||
7242 | if( rc==SQLITE_OK0 ){ | ||||
7243 | rc = fts5ExprNodeTest(pExpr, pNode); | ||||
7244 | } | ||||
7245 | return rc; | ||||
7246 | } | ||||
7247 | |||||
7248 | |||||
7249 | /* | ||||
7250 | ** Begin iterating through the set of documents in index pIdx matched by | ||||
7251 | ** the MATCH expression passed as the first argument. If the "bDesc" | ||||
7252 | ** parameter is passed a non-zero value, iteration is in descending rowid | ||||
7253 | ** order. Or, if it is zero, in ascending order. | ||||
7254 | ** | ||||
7255 | ** If iterating in ascending rowid order (bDesc==0), the first document | ||||
7256 | ** visited is that with the smallest rowid that is larger than or equal | ||||
7257 | ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), | ||||
7258 | ** then the first document visited must have a rowid smaller than or | ||||
7259 | ** equal to iFirst. | ||||
7260 | ** | ||||
7261 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | ||||
7262 | ** is not considered an error if the query does not match any documents. | ||||
7263 | */ | ||||
7264 | static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ | ||||
7265 | Fts5ExprNode *pRoot = p->pRoot; | ||||
7266 | int rc; /* Return code */ | ||||
7267 | |||||
7268 | p->pIndex = pIdx; | ||||
7269 | p->bDesc = bDesc; | ||||
7270 | rc = fts5ExprNodeFirst(p, pRoot); | ||||
7271 | |||||
7272 | /* If not at EOF but the current rowid occurs earlier than iFirst in | ||||
7273 | ** the iteration order, move to document iFirst or later. */ | ||||
7274 | if( rc==SQLITE_OK0 | ||||
7275 | && 0==pRoot->bEof | ||||
7276 | && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 | ||||
7277 | ){ | ||||
7278 | rc = fts5ExprNodeNext(p, pRoot, 1, iFirst)(pRoot)->xNext((p), (pRoot), (1), (iFirst)); | ||||
7279 | } | ||||
7280 | |||||
7281 | /* If the iterator is not at a real match, skip forward until it is. */ | ||||
7282 | while( pRoot->bNomatch && rc==SQLITE_OK0 ){ | ||||
7283 | assert( pRoot->bEof==0 )((void) (0)); | ||||
7284 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | ||||
7285 | } | ||||
7286 | return rc; | ||||
7287 | } | ||||
7288 | |||||
7289 | /* | ||||
7290 | ** Move to the next document | ||||
7291 | ** | ||||
7292 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | ||||
7293 | ** is not considered an error if the query does not match any documents. | ||||
7294 | */ | ||||
7295 | static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ | ||||
7296 | int rc; | ||||
7297 | Fts5ExprNode *pRoot = p->pRoot; | ||||
7298 | assert( pRoot->bEof==0 && pRoot->bNomatch==0 )((void) (0)); | ||||
7299 | do { | ||||
7300 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | ||||
7301 | assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) )((void) (0)); | ||||
7302 | }while( pRoot->bNomatch ); | ||||
7303 | if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ | ||||
7304 | pRoot->bEof = 1; | ||||
7305 | } | ||||
7306 | return rc; | ||||
7307 | } | ||||
7308 | |||||
7309 | static int sqlite3Fts5ExprEof(Fts5Expr *p){ | ||||
7310 | return p->pRoot->bEof; | ||||
7311 | } | ||||
7312 | |||||
7313 | static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ | ||||
7314 | return p->pRoot->iRowid; | ||||
7315 | } | ||||
7316 | |||||
7317 | static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ | ||||
7318 | int rc = SQLITE_OK0; | ||||
7319 | *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); | ||||
7320 | return rc; | ||||
7321 | } | ||||
7322 | |||||
7323 | /* | ||||
7324 | ** Free the phrase object passed as the only argument. | ||||
7325 | */ | ||||
7326 | static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ | ||||
7327 | if( pPhrase ){ | ||||
7328 | int i; | ||||
7329 | for(i=0; i<pPhrase->nTerm; i++){ | ||||
7330 | Fts5ExprTerm *pSyn; | ||||
7331 | Fts5ExprTerm *pNext; | ||||
7332 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | ||||
7333 | sqlite3_freesqlite3_api->free(pTerm->pTerm); | ||||
7334 | sqlite3Fts5IterClose(pTerm->pIter); | ||||
7335 | for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ | ||||
7336 | pNext = pSyn->pSynonym; | ||||
7337 | sqlite3Fts5IterClose(pSyn->pIter); | ||||
7338 | fts5BufferFree((Fts5Buffer*)&pSyn[1])sqlite3Fts5BufferFree((Fts5Buffer*)&pSyn[1]); | ||||
7339 | sqlite3_freesqlite3_api->free(pSyn); | ||||
7340 | } | ||||
7341 | } | ||||
7342 | if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist)sqlite3Fts5BufferFree(&pPhrase->poslist); | ||||
7343 | sqlite3_freesqlite3_api->free(pPhrase); | ||||
7344 | } | ||||
7345 | } | ||||
7346 | |||||
7347 | /* | ||||
7348 | ** Set the "bFirst" flag on the first token of the phrase passed as the | ||||
7349 | ** only argument. | ||||
7350 | */ | ||||
7351 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ | ||||
7352 | if( pPhrase && pPhrase->nTerm ){ | ||||
7353 | pPhrase->aTerm[0].bFirst = 1; | ||||
7354 | } | ||||
7355 | } | ||||
7356 | |||||
7357 | /* | ||||
7358 | ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated | ||||
7359 | ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is | ||||
7360 | ** appended to it and the results returned. | ||||
7361 | ** | ||||
7362 | ** If an OOM error occurs, both the pNear and pPhrase objects are freed and | ||||
7363 | ** NULL returned. | ||||
7364 | */ | ||||
7365 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | ||||
7366 | Fts5Parse *pParse, /* Parse context */ | ||||
7367 | Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ | ||||
7368 | Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ | ||||
7369 | ){ | ||||
7370 | const int SZALLOC = 8; | ||||
7371 | Fts5ExprNearset *pRet = 0; | ||||
7372 | |||||
7373 | if( pParse->rc==SQLITE_OK0 ){ | ||||
7374 | if( pNear==0 ){ | ||||
7375 | sqlite3_int64 nByte; | ||||
7376 | nByte = SZ_FTS5EXPRNEARSET(SZALLOC+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(SZALLOC+1)*sizeof (Fts5ExprPhrase*)); | ||||
7377 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
7378 | if( pRet==0 ){ | ||||
7379 | pParse->rc = SQLITE_NOMEM7; | ||||
7380 | }else{ | ||||
7381 | memset(pRet, 0, (size_t)nByte); | ||||
7382 | } | ||||
7383 | }else if( (pNear->nPhrase % SZALLOC)==0 ){ | ||||
7384 | int nNew = pNear->nPhrase + SZALLOC; | ||||
7385 | sqlite3_int64 nByte; | ||||
7386 | |||||
7387 | nByte = SZ_FTS5EXPRNEARSET(nNew+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(nNew+1)*sizeof (Fts5ExprPhrase*)); | ||||
7388 | pRet = (Fts5ExprNearset*)sqlite3_realloc64sqlite3_api->realloc64(pNear, nByte); | ||||
7389 | if( pRet==0 ){ | ||||
7390 | pParse->rc = SQLITE_NOMEM7; | ||||
7391 | } | ||||
7392 | }else{ | ||||
7393 | pRet = pNear; | ||||
7394 | } | ||||
7395 | } | ||||
7396 | |||||
7397 | if( pRet==0 ){ | ||||
7398 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | ||||
7399 | sqlite3Fts5ParseNearsetFree(pNear); | ||||
7400 | sqlite3Fts5ParsePhraseFree(pPhrase); | ||||
7401 | }else{ | ||||
7402 | if( pRet->nPhrase>0 ){ | ||||
7403 | Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; | ||||
7404 | assert( pParse!=0 )((void) (0)); | ||||
7405 | assert( pParse->apPhrase!=0 )((void) (0)); | ||||
7406 | assert( pParse->nPhrase>=2 )((void) (0)); | ||||
7407 | assert( pLast==pParse->apPhrase[pParse->nPhrase-2] )((void) (0)); | ||||
7408 | if( pPhrase->nTerm==0 ){ | ||||
7409 | fts5ExprPhraseFree(pPhrase); | ||||
7410 | pRet->nPhrase--; | ||||
7411 | pParse->nPhrase--; | ||||
7412 | pPhrase = pLast; | ||||
7413 | }else if( pLast->nTerm==0 ){ | ||||
7414 | fts5ExprPhraseFree(pLast); | ||||
7415 | pParse->apPhrase[pParse->nPhrase-2] = pPhrase; | ||||
7416 | pParse->nPhrase--; | ||||
7417 | pRet->nPhrase--; | ||||
7418 | } | ||||
7419 | } | ||||
7420 | pRet->apPhrase[pRet->nPhrase++] = pPhrase; | ||||
7421 | } | ||||
7422 | return pRet; | ||||
7423 | } | ||||
7424 | |||||
7425 | typedef struct TokenCtx TokenCtx; | ||||
7426 | struct TokenCtx { | ||||
7427 | Fts5ExprPhrase *pPhrase; | ||||
7428 | Fts5Config *pConfig; | ||||
7429 | int rc; | ||||
7430 | }; | ||||
7431 | |||||
7432 | /* | ||||
7433 | ** Callback for tokenizing terms used by ParseTerm(). | ||||
7434 | */ | ||||
7435 | static int fts5ParseTokenize( | ||||
7436 | void *pContext, /* Pointer to Fts5InsertCtx object */ | ||||
7437 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
7438 | const char *pToken, /* Buffer containing token */ | ||||
7439 | int nToken, /* Size of token in bytes */ | ||||
7440 | int iUnused1, /* Start offset of token */ | ||||
7441 | int iUnused2 /* End offset of token */ | ||||
7442 | ){ | ||||
7443 | int rc = SQLITE_OK0; | ||||
7444 | const int SZALLOC = 8; | ||||
7445 | TokenCtx *pCtx = (TokenCtx*)pContext; | ||||
7446 | Fts5ExprPhrase *pPhrase = pCtx->pPhrase; | ||||
7447 | |||||
7448 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | ||||
7449 | |||||
7450 | /* If an error has already occurred, this is a no-op */ | ||||
7451 | if( pCtx->rc!=SQLITE_OK0 ) return pCtx->rc; | ||||
7452 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | ||||
7453 | |||||
7454 | if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED0x0001) ){ | ||||
7455 | Fts5ExprTerm *pSyn; | ||||
7456 | sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; | ||||
7457 | pSyn = (Fts5ExprTerm*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
7458 | if( pSyn==0 ){ | ||||
7459 | rc = SQLITE_NOMEM7; | ||||
7460 | }else{ | ||||
7461 | memset(pSyn, 0, (size_t)nByte); | ||||
7462 | pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); | ||||
7463 | pSyn->nFullTerm = pSyn->nQueryTerm = nToken; | ||||
7464 | if( pCtx->pConfig->bTokendata ){ | ||||
7465 | pSyn->nQueryTerm = (int)strlen(pSyn->pTerm); | ||||
7466 | } | ||||
7467 | memcpy(pSyn->pTerm, pToken, nToken); | ||||
7468 | pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; | ||||
7469 | pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; | ||||
7470 | } | ||||
7471 | }else{ | ||||
7472 | Fts5ExprTerm *pTerm; | ||||
7473 | if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ | ||||
7474 | Fts5ExprPhrase *pNew; | ||||
7475 | int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); | ||||
7476 | |||||
7477 | pNew = (Fts5ExprPhrase*)sqlite3_realloc64sqlite3_api->realloc64(pPhrase, | ||||
7478 | SZ_FTS5EXPRPHRASE(nNew+1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (nNew+1)*sizeof( Fts5ExprTerm)) | ||||
7479 | ); | ||||
7480 | if( pNew==0 ){ | ||||
7481 | rc = SQLITE_NOMEM7; | ||||
7482 | }else{ | ||||
7483 | if( pPhrase==0 ) memset(pNew, 0, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | ||||
7484 | pCtx->pPhrase = pPhrase = pNew; | ||||
7485 | pNew->nTerm = nNew - SZALLOC; | ||||
7486 | } | ||||
7487 | } | ||||
7488 | |||||
7489 | if( rc==SQLITE_OK0 ){ | ||||
7490 | pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; | ||||
7491 | memset(pTerm, 0, sizeof(Fts5ExprTerm)); | ||||
7492 | pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); | ||||
7493 | pTerm->nFullTerm = pTerm->nQueryTerm = nToken; | ||||
7494 | if( pCtx->pConfig->bTokendata && rc==SQLITE_OK0 ){ | ||||
7495 | pTerm->nQueryTerm = (int)strlen(pTerm->pTerm); | ||||
7496 | } | ||||
7497 | } | ||||
7498 | } | ||||
7499 | |||||
7500 | pCtx->rc = rc; | ||||
7501 | return rc; | ||||
7502 | } | ||||
7503 | |||||
7504 | |||||
7505 | /* | ||||
7506 | ** Free the phrase object passed as the only argument. | ||||
7507 | */ | ||||
7508 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ | ||||
7509 | fts5ExprPhraseFree(pPhrase); | ||||
7510 | } | ||||
7511 | |||||
7512 | /* | ||||
7513 | ** Free the phrase object passed as the second argument. | ||||
7514 | */ | ||||
7515 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ | ||||
7516 | if( pNear ){ | ||||
7517 | int i; | ||||
7518 | for(i=0; i<pNear->nPhrase; i++){ | ||||
7519 | fts5ExprPhraseFree(pNear->apPhrase[i]); | ||||
7520 | } | ||||
7521 | sqlite3_freesqlite3_api->free(pNear->pColset); | ||||
7522 | sqlite3_freesqlite3_api->free(pNear); | ||||
7523 | } | ||||
7524 | } | ||||
7525 | |||||
7526 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ | ||||
7527 | assert( pParse->pExpr==0 )((void) (0)); | ||||
7528 | pParse->pExpr = p; | ||||
7529 | } | ||||
7530 | |||||
7531 | static int parseGrowPhraseArray(Fts5Parse *pParse){ | ||||
7532 | if( (pParse->nPhrase % 8)==0 ){ | ||||
7533 | sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); | ||||
7534 | Fts5ExprPhrase **apNew; | ||||
7535 | apNew = (Fts5ExprPhrase**)sqlite3_realloc64sqlite3_api->realloc64(pParse->apPhrase, nByte); | ||||
7536 | if( apNew==0 ){ | ||||
7537 | pParse->rc = SQLITE_NOMEM7; | ||||
7538 | return SQLITE_NOMEM7; | ||||
7539 | } | ||||
7540 | pParse->apPhrase = apNew; | ||||
7541 | } | ||||
7542 | return SQLITE_OK0; | ||||
7543 | } | ||||
7544 | |||||
7545 | /* | ||||
7546 | ** This function is called by the parser to process a string token. The | ||||
7547 | ** string may or may not be quoted. In any case it is tokenized and a | ||||
7548 | ** phrase object consisting of all tokens returned. | ||||
7549 | */ | ||||
7550 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | ||||
7551 | Fts5Parse *pParse, /* Parse context */ | ||||
7552 | Fts5ExprPhrase *pAppend, /* Phrase to append to */ | ||||
7553 | Fts5Token *pToken, /* String to tokenize */ | ||||
7554 | int bPrefix /* True if there is a trailing "*" */ | ||||
7555 | ){ | ||||
7556 | Fts5Config *pConfig = pParse->pConfig; | ||||
7557 | TokenCtx sCtx; /* Context object passed to callback */ | ||||
7558 | int rc; /* Tokenize return code */ | ||||
7559 | char *z = 0; | ||||
7560 | |||||
7561 | memset(&sCtx, 0, sizeof(TokenCtx)); | ||||
7562 | sCtx.pPhrase = pAppend; | ||||
7563 | sCtx.pConfig = pConfig; | ||||
7564 | |||||
7565 | rc = fts5ParseStringFromToken(pToken, &z); | ||||
7566 | if( rc==SQLITE_OK0 ){ | ||||
7567 | int flags = FTS5_TOKENIZE_QUERY0x0001 | (bPrefix ? FTS5_TOKENIZE_PREFIX0x0002 : 0); | ||||
7568 | int n; | ||||
7569 | sqlite3Fts5Dequote(z); | ||||
7570 | n = (int)strlen(z); | ||||
7571 | rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); | ||||
7572 | } | ||||
7573 | sqlite3_freesqlite3_api->free(z); | ||||
7574 | if( rc || (rc = sCtx.rc) ){ | ||||
7575 | pParse->rc = rc; | ||||
7576 | fts5ExprPhraseFree(sCtx.pPhrase); | ||||
7577 | sCtx.pPhrase = 0; | ||||
7578 | }else{ | ||||
7579 | |||||
7580 | if( pAppend==0 ){ | ||||
7581 | if( parseGrowPhraseArray(pParse) ){ | ||||
7582 | fts5ExprPhraseFree(sCtx.pPhrase); | ||||
7583 | return 0; | ||||
7584 | } | ||||
7585 | pParse->nPhrase++; | ||||
7586 | } | ||||
7587 | |||||
7588 | if( sCtx.pPhrase==0 ){ | ||||
7589 | /* This happens when parsing a token or quoted phrase that contains | ||||
7590 | ** no token characters at all. (e.g ... MATCH '""'). */ | ||||
7591 | sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | ||||
7592 | }else if( sCtx.pPhrase->nTerm ){ | ||||
7593 | sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix; | ||||
7594 | } | ||||
7595 | assert( pParse->apPhrase!=0 )((void) (0)); | ||||
7596 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; | ||||
7597 | } | ||||
7598 | |||||
7599 | return sCtx.pPhrase; | ||||
7600 | } | ||||
7601 | |||||
7602 | /* | ||||
7603 | ** Create a new FTS5 expression by cloning phrase iPhrase of the | ||||
7604 | ** expression passed as the second argument. | ||||
7605 | */ | ||||
7606 | static int sqlite3Fts5ExprClonePhrase( | ||||
7607 | Fts5Expr *pExpr, | ||||
7608 | int iPhrase, | ||||
7609 | Fts5Expr **ppNew | ||||
7610 | ){ | ||||
7611 | int rc = SQLITE_OK0; /* Return code */ | ||||
7612 | Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ | ||||
7613 | Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ | ||||
7614 | TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ | ||||
7615 | if( !pExpr || iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | ||||
7616 | rc = SQLITE_RANGE25; | ||||
7617 | }else{ | ||||
7618 | pOrig = pExpr->apExprPhrase[iPhrase]; | ||||
7619 | pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); | ||||
7620 | } | ||||
7621 | if( rc==SQLITE_OK0 ){ | ||||
7622 | pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, | ||||
7623 | sizeof(Fts5ExprPhrase*)); | ||||
7624 | } | ||||
7625 | if( rc==SQLITE_OK0 ){ | ||||
7626 | pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRNODE(1)(__builtin_offsetof(Fts5ExprNode, apChild) + (1)*sizeof(Fts5ExprNode *))); | ||||
7627 | } | ||||
7628 | if( rc==SQLITE_OK0 ){ | ||||
7629 | pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, | ||||
7630 | SZ_FTS5EXPRNEARSET(2)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(2)*sizeof(Fts5ExprPhrase *))); | ||||
7631 | } | ||||
7632 | if( rc==SQLITE_OK0 && ALWAYS(pOrig!=0)(pOrig!=0) ){ | ||||
7633 | Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; | ||||
7634 | if( pColsetOrig ){ | ||||
7635 | sqlite3_int64 nByte; | ||||
7636 | Fts5Colset *pColset; | ||||
7637 | nByte = SZ_FTS5COLSET(pColsetOrig->nCol)(sizeof(i64)*((pColsetOrig->nCol+2)/2)); | ||||
7638 | pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
7639 | if( pColset ){ | ||||
7640 | memcpy(pColset, pColsetOrig, (size_t)nByte); | ||||
7641 | } | ||||
7642 | pNew->pRoot->pNear->pColset = pColset; | ||||
7643 | } | ||||
7644 | } | ||||
7645 | |||||
7646 | if( rc==SQLITE_OK0 ){ | ||||
7647 | if( pOrig->nTerm ){ | ||||
7648 | int i; /* Used to iterate through phrase terms */ | ||||
7649 | sCtx.pConfig = pExpr->pConfig; | ||||
7650 | for(i=0; rc==SQLITE_OK0 && i<pOrig->nTerm; i++){ | ||||
7651 | int tflags = 0; | ||||
7652 | Fts5ExprTerm *p; | ||||
7653 | for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK0; p=p->pSynonym){ | ||||
7654 | rc = fts5ParseTokenize((void*)&sCtx,tflags,p->pTerm,p->nFullTerm,0,0); | ||||
7655 | tflags = FTS5_TOKEN_COLOCATED0x0001; | ||||
7656 | } | ||||
7657 | if( rc==SQLITE_OK0 ){ | ||||
7658 | sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; | ||||
7659 | sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; | ||||
7660 | } | ||||
7661 | } | ||||
7662 | }else{ | ||||
7663 | /* This happens when parsing a token or quoted phrase that contains | ||||
7664 | ** no token characters at all. (e.g ... MATCH '""'). */ | ||||
7665 | sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | ||||
7666 | } | ||||
7667 | } | ||||
7668 | |||||
7669 | if( rc==SQLITE_OK0 && ALWAYS(sCtx.pPhrase)(sCtx.pPhrase) ){ | ||||
7670 | /* All the allocations succeeded. Put the expression object together. */ | ||||
7671 | pNew->pIndex = pExpr->pIndex; | ||||
7672 | pNew->pConfig = pExpr->pConfig; | ||||
7673 | pNew->nPhrase = 1; | ||||
7674 | pNew->apExprPhrase[0] = sCtx.pPhrase; | ||||
7675 | pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; | ||||
7676 | pNew->pRoot->pNear->nPhrase = 1; | ||||
7677 | sCtx.pPhrase->pNode = pNew->pRoot; | ||||
7678 | |||||
7679 | if( pOrig->nTerm==1 | ||||
7680 | && pOrig->aTerm[0].pSynonym==0 | ||||
7681 | && pOrig->aTerm[0].bFirst==0 | ||||
7682 | ){ | ||||
7683 | pNew->pRoot->eType = FTS5_TERM4; | ||||
7684 | pNew->pRoot->xNext = fts5ExprNodeNext_TERM; | ||||
7685 | }else{ | ||||
7686 | pNew->pRoot->eType = FTS5_STRING9; | ||||
7687 | pNew->pRoot->xNext = fts5ExprNodeNext_STRING; | ||||
7688 | } | ||||
7689 | }else{ | ||||
7690 | sqlite3Fts5ExprFree(pNew); | ||||
7691 | fts5ExprPhraseFree(sCtx.pPhrase); | ||||
7692 | pNew = 0; | ||||
7693 | } | ||||
7694 | |||||
7695 | *ppNew = pNew; | ||||
7696 | return rc; | ||||
7697 | } | ||||
7698 | |||||
7699 | |||||
7700 | /* | ||||
7701 | ** Token pTok has appeared in a MATCH expression where the NEAR operator | ||||
7702 | ** is expected. If token pTok does not contain "NEAR", store an error | ||||
7703 | ** in the pParse object. | ||||
7704 | */ | ||||
7705 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ | ||||
7706 | if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ | ||||
7707 | sqlite3Fts5ParseError( | ||||
7708 | pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p | ||||
7709 | ); | ||||
7710 | } | ||||
7711 | } | ||||
7712 | |||||
7713 | static void sqlite3Fts5ParseSetDistance( | ||||
7714 | Fts5Parse *pParse, | ||||
7715 | Fts5ExprNearset *pNear, | ||||
7716 | Fts5Token *p | ||||
7717 | ){ | ||||
7718 | if( pNear ){ | ||||
7719 | int nNear = 0; | ||||
7720 | int i; | ||||
7721 | if( p->n ){ | ||||
7722 | for(i=0; i<p->n; i++){ | ||||
7723 | char c = (char)p->p[i]; | ||||
7724 | if( c<'0' || c>'9' ){ | ||||
7725 | sqlite3Fts5ParseError( | ||||
7726 | pParse, "expected integer, got \"%.*s\"", p->n, p->p | ||||
7727 | ); | ||||
7728 | return; | ||||
7729 | } | ||||
7730 | if( nNear<214748363 ) nNear = nNear * 10 + (p->p[i] - '0'); | ||||
7731 | /* ^^^^^^^^^^^^^^^--- Prevent integer overflow */ | ||||
7732 | } | ||||
7733 | }else{ | ||||
7734 | nNear = FTS5_DEFAULT_NEARDIST10; | ||||
7735 | } | ||||
7736 | pNear->nNear = nNear; | ||||
7737 | } | ||||
7738 | } | ||||
7739 | |||||
7740 | /* | ||||
7741 | ** The second argument passed to this function may be NULL, or it may be | ||||
7742 | ** an existing Fts5Colset object. This function returns a pointer to | ||||
7743 | ** a new colset object containing the contents of (p) with new value column | ||||
7744 | ** number iCol appended. | ||||
7745 | ** | ||||
7746 | ** If an OOM error occurs, store an error code in pParse and return NULL. | ||||
7747 | ** The old colset object (if any) is not freed in this case. | ||||
7748 | */ | ||||
7749 | static Fts5Colset *fts5ParseColset( | ||||
7750 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | ||||
7751 | Fts5Colset *p, /* Existing colset object */ | ||||
7752 | int iCol /* New column to add to colset object */ | ||||
7753 | ){ | ||||
7754 | int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ | ||||
7755 | Fts5Colset *pNew; /* New colset object to return */ | ||||
7756 | |||||
7757 | assert( pParse->rc==SQLITE_OK )((void) (0)); | ||||
7758 | assert( iCol>=0 && iCol<pParse->pConfig->nCol )((void) (0)); | ||||
7759 | |||||
7760 | pNew = sqlite3_realloc64sqlite3_api->realloc64(p, SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2))); | ||||
7761 | if( pNew==0 ){ | ||||
7762 | pParse->rc = SQLITE_NOMEM7; | ||||
7763 | }else{ | ||||
7764 | int *aiCol = pNew->aiCol; | ||||
7765 | int i, j; | ||||
7766 | for(i=0; i<nCol; i++){ | ||||
7767 | if( aiCol[i]==iCol ) return pNew; | ||||
7768 | if( aiCol[i]>iCol ) break; | ||||
7769 | } | ||||
7770 | for(j=nCol; j>i; j--){ | ||||
7771 | aiCol[j] = aiCol[j-1]; | ||||
7772 | } | ||||
7773 | aiCol[i] = iCol; | ||||
7774 | pNew->nCol = nCol+1; | ||||
7775 | |||||
7776 | #ifndef NDEBUG1 | ||||
7777 | /* Check that the array is in order and contains no duplicate entries. */ | ||||
7778 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] )((void) (0)); | ||||
7779 | #endif | ||||
7780 | } | ||||
7781 | |||||
7782 | return pNew; | ||||
7783 | } | ||||
7784 | |||||
7785 | /* | ||||
7786 | ** Allocate and return an Fts5Colset object specifying the inverse of | ||||
7787 | ** the colset passed as the second argument. Free the colset passed | ||||
7788 | ** as the second argument before returning. | ||||
7789 | */ | ||||
7790 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ | ||||
7791 | Fts5Colset *pRet; | ||||
7792 | int nCol = pParse->pConfig->nCol; | ||||
7793 | |||||
7794 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, | ||||
7795 | SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2)) | ||||
7796 | ); | ||||
7797 | if( pRet ){ | ||||
7798 | int i; | ||||
7799 | int iOld = 0; | ||||
7800 | for(i=0; i<nCol; i++){ | ||||
7801 | if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ | ||||
7802 | pRet->aiCol[pRet->nCol++] = i; | ||||
7803 | }else{ | ||||
7804 | iOld++; | ||||
7805 | } | ||||
7806 | } | ||||
7807 | } | ||||
7808 | |||||
7809 | sqlite3_freesqlite3_api->free(p); | ||||
7810 | return pRet; | ||||
7811 | } | ||||
7812 | |||||
7813 | static Fts5Colset *sqlite3Fts5ParseColset( | ||||
7814 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | ||||
7815 | Fts5Colset *pColset, /* Existing colset object */ | ||||
7816 | Fts5Token *p | ||||
7817 | ){ | ||||
7818 | Fts5Colset *pRet = 0; | ||||
7819 | int iCol; | ||||
7820 | char *z; /* Dequoted copy of token p */ | ||||
7821 | |||||
7822 | z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); | ||||
7823 | if( pParse->rc==SQLITE_OK0 ){ | ||||
7824 | Fts5Config *pConfig = pParse->pConfig; | ||||
7825 | sqlite3Fts5Dequote(z); | ||||
7826 | for(iCol=0; iCol<pConfig->nCol; iCol++){ | ||||
7827 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(pConfig->azCol[iCol], z) ) break; | ||||
7828 | } | ||||
7829 | if( iCol==pConfig->nCol ){ | ||||
7830 | sqlite3Fts5ParseError(pParse, "no such column: %s", z); | ||||
7831 | }else{ | ||||
7832 | pRet = fts5ParseColset(pParse, pColset, iCol); | ||||
7833 | } | ||||
7834 | sqlite3_freesqlite3_api->free(z); | ||||
7835 | } | ||||
7836 | |||||
7837 | if( pRet==0 ){ | ||||
7838 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | ||||
7839 | sqlite3_freesqlite3_api->free(pColset); | ||||
7840 | } | ||||
7841 | |||||
7842 | return pRet; | ||||
7843 | } | ||||
7844 | |||||
7845 | /* | ||||
7846 | ** If argument pOrig is NULL, or if (*pRc) is set to anything other than | ||||
7847 | ** SQLITE_OK when this function is called, NULL is returned. | ||||
7848 | ** | ||||
7849 | ** Otherwise, a copy of (*pOrig) is made into memory obtained from | ||||
7850 | ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation | ||||
7851 | ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned. | ||||
7852 | */ | ||||
7853 | static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){ | ||||
7854 | Fts5Colset *pRet; | ||||
7855 | if( pOrig ){ | ||||
7856 | sqlite3_int64 nByte = SZ_FTS5COLSET(pOrig->nCol)(sizeof(i64)*((pOrig->nCol+2)/2)); | ||||
7857 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); | ||||
7858 | if( pRet ){ | ||||
7859 | memcpy(pRet, pOrig, (size_t)nByte); | ||||
7860 | } | ||||
7861 | }else{ | ||||
7862 | pRet = 0; | ||||
7863 | } | ||||
7864 | return pRet; | ||||
7865 | } | ||||
7866 | |||||
7867 | /* | ||||
7868 | ** Remove from colset pColset any columns that are not also in colset pMerge. | ||||
7869 | */ | ||||
7870 | static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ | ||||
7871 | int iIn = 0; /* Next input in pColset */ | ||||
7872 | int iMerge = 0; /* Next input in pMerge */ | ||||
7873 | int iOut = 0; /* Next output slot in pColset */ | ||||
7874 | |||||
7875 | while( iIn<pColset->nCol && iMerge<pMerge->nCol ){ | ||||
7876 | int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge]; | ||||
7877 | if( iDiff==0 ){ | ||||
7878 | pColset->aiCol[iOut++] = pMerge->aiCol[iMerge]; | ||||
7879 | iMerge++; | ||||
7880 | iIn++; | ||||
7881 | }else if( iDiff>0 ){ | ||||
7882 | iMerge++; | ||||
7883 | }else{ | ||||
7884 | iIn++; | ||||
7885 | } | ||||
7886 | } | ||||
7887 | pColset->nCol = iOut; | ||||
7888 | } | ||||
7889 | |||||
7890 | /* | ||||
7891 | ** Recursively apply colset pColset to expression node pNode and all of | ||||
7892 | ** its decendents. If (*ppFree) is not NULL, it contains a spare copy | ||||
7893 | ** of pColset. This function may use the spare copy and set (*ppFree) to | ||||
7894 | ** zero, or it may create copies of pColset using fts5CloneColset(). | ||||
7895 | */ | ||||
7896 | static void fts5ParseSetColset( | ||||
7897 | Fts5Parse *pParse, | ||||
7898 | Fts5ExprNode *pNode, | ||||
7899 | Fts5Colset *pColset, | ||||
7900 | Fts5Colset **ppFree | ||||
7901 | ){ | ||||
7902 | if( pParse->rc==SQLITE_OK0 ){ | ||||
7903 | assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING((void) (0)) | ||||
7904 | || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR((void) (0)) | ||||
7905 | || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF((void) (0)) | ||||
7906 | )((void) (0)); | ||||
7907 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | ||||
7908 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
7909 | if( pNear->pColset ){ | ||||
7910 | fts5MergeColset(pNear->pColset, pColset); | ||||
7911 | if( pNear->pColset->nCol==0 ){ | ||||
7912 | pNode->eType = FTS5_EOF0; | ||||
7913 | pNode->xNext = 0; | ||||
7914 | } | ||||
7915 | }else if( *ppFree ){ | ||||
7916 | pNear->pColset = pColset; | ||||
7917 | *ppFree = 0; | ||||
7918 | }else{ | ||||
7919 | pNear->pColset = fts5CloneColset(&pParse->rc, pColset); | ||||
7920 | } | ||||
7921 | }else{ | ||||
7922 | int i; | ||||
7923 | assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 )((void) (0)); | ||||
7924 | for(i=0; i<pNode->nChild; i++){ | ||||
7925 | fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree); | ||||
7926 | } | ||||
7927 | } | ||||
7928 | } | ||||
7929 | } | ||||
7930 | |||||
7931 | /* | ||||
7932 | ** Apply colset pColset to expression node pExpr and all of its descendents. | ||||
7933 | */ | ||||
7934 | static void sqlite3Fts5ParseSetColset( | ||||
7935 | Fts5Parse *pParse, | ||||
7936 | Fts5ExprNode *pExpr, | ||||
7937 | Fts5Colset *pColset | ||||
7938 | ){ | ||||
7939 | Fts5Colset *pFree = pColset; | ||||
7940 | if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
7941 | sqlite3Fts5ParseError(pParse, | ||||
7942 | "fts5: column queries are not supported (detail=none)" | ||||
7943 | ); | ||||
7944 | }else{ | ||||
7945 | fts5ParseSetColset(pParse, pExpr, pColset, &pFree); | ||||
7946 | } | ||||
7947 | sqlite3_freesqlite3_api->free(pFree); | ||||
7948 | } | ||||
7949 | |||||
7950 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ | ||||
7951 | switch( pNode->eType ){ | ||||
7952 | case FTS5_STRING9: { | ||||
7953 | Fts5ExprNearset *pNear = pNode->pNear; | ||||
7954 | if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 | ||||
7955 | && pNear->apPhrase[0]->aTerm[0].pSynonym==0 | ||||
7956 | && pNear->apPhrase[0]->aTerm[0].bFirst==0 | ||||
7957 | ){ | ||||
7958 | pNode->eType = FTS5_TERM4; | ||||
7959 | pNode->xNext = fts5ExprNodeNext_TERM; | ||||
7960 | }else{ | ||||
7961 | pNode->xNext = fts5ExprNodeNext_STRING; | ||||
7962 | } | ||||
7963 | break; | ||||
7964 | }; | ||||
7965 | |||||
7966 | case FTS5_OR1: { | ||||
7967 | pNode->xNext = fts5ExprNodeNext_OR; | ||||
7968 | break; | ||||
7969 | }; | ||||
7970 | |||||
7971 | case FTS5_AND2: { | ||||
7972 | pNode->xNext = fts5ExprNodeNext_AND; | ||||
7973 | break; | ||||
7974 | }; | ||||
7975 | |||||
7976 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | ||||
7977 | pNode->xNext = fts5ExprNodeNext_NOT; | ||||
7978 | break; | ||||
7979 | }; | ||||
7980 | } | ||||
7981 | } | ||||
7982 | |||||
7983 | /* | ||||
7984 | ** Add pSub as a child of p. | ||||
7985 | */ | ||||
7986 | static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ | ||||
7987 | int ii = p->nChild; | ||||
7988 | if( p->eType!=FTS5_NOT3 && pSub->eType==p->eType ){ | ||||
7989 | int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; | ||||
7990 | memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); | ||||
7991 | p->nChild += pSub->nChild; | ||||
7992 | sqlite3_freesqlite3_api->free(pSub); | ||||
7993 | }else{ | ||||
7994 | p->apChild[p->nChild++] = pSub; | ||||
7995 | } | ||||
7996 | for( ; ii<p->nChild; ii++){ | ||||
7997 | p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1)(((p->iHeight) > (p->apChild[ii]->iHeight + 1)) ? (p->iHeight) : (p->apChild[ii]->iHeight + 1)); | ||||
7998 | } | ||||
7999 | } | ||||
8000 | |||||
8001 | /* | ||||
8002 | ** This function is used when parsing LIKE or GLOB patterns against | ||||
8003 | ** trigram indexes that specify either detail=column or detail=none. | ||||
8004 | ** It converts a phrase: | ||||
8005 | ** | ||||
8006 | ** abc + def + ghi | ||||
8007 | ** | ||||
8008 | ** into an AND tree: | ||||
8009 | ** | ||||
8010 | ** abc AND def AND ghi | ||||
8011 | */ | ||||
8012 | static Fts5ExprNode *fts5ParsePhraseToAnd( | ||||
8013 | Fts5Parse *pParse, | ||||
8014 | Fts5ExprNearset *pNear | ||||
8015 | ){ | ||||
8016 | int nTerm = pNear->apPhrase[0]->nTerm; | ||||
8017 | int ii; | ||||
8018 | int nByte; | ||||
8019 | Fts5ExprNode *pRet; | ||||
8020 | |||||
8021 | assert( pNear->nPhrase==1 )((void) (0)); | ||||
8022 | assert( pParse->bPhraseToAnd )((void) (0)); | ||||
8023 | |||||
8024 | nByte = SZ_FTS5EXPRNODE(nTerm+1)(__builtin_offsetof(Fts5ExprNode, apChild) + (nTerm+1)*sizeof (Fts5ExprNode*)); | ||||
8025 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | ||||
8026 | if( pRet ){ | ||||
8027 | pRet->eType = FTS5_AND2; | ||||
8028 | pRet->nChild = nTerm; | ||||
8029 | pRet->iHeight = 1; | ||||
8030 | fts5ExprAssignXNext(pRet); | ||||
8031 | pParse->nPhrase--; | ||||
8032 | for(ii=0; ii<nTerm; ii++){ | ||||
8033 | Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero( | ||||
8034 | &pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm )) | ||||
8035 | ); | ||||
8036 | if( pPhrase ){ | ||||
8037 | if( parseGrowPhraseArray(pParse) ){ | ||||
8038 | fts5ExprPhraseFree(pPhrase); | ||||
8039 | }else{ | ||||
8040 | Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; | ||||
8041 | Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; | ||||
8042 | pParse->apPhrase[pParse->nPhrase++] = pPhrase; | ||||
8043 | pPhrase->nTerm = 1; | ||||
8044 | pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); | ||||
8045 | pTo->nQueryTerm = p->nQueryTerm; | ||||
8046 | pTo->nFullTerm = p->nFullTerm; | ||||
8047 | pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, | ||||
8048 | 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) | ||||
8049 | ); | ||||
8050 | } | ||||
8051 | } | ||||
8052 | } | ||||
8053 | |||||
8054 | if( pParse->rc ){ | ||||
8055 | sqlite3Fts5ParseNodeFree(pRet); | ||||
8056 | pRet = 0; | ||||
8057 | }else{ | ||||
8058 | sqlite3Fts5ParseNearsetFree(pNear); | ||||
8059 | } | ||||
8060 | } | ||||
8061 | |||||
8062 | return pRet; | ||||
8063 | } | ||||
8064 | |||||
8065 | /* | ||||
8066 | ** Allocate and return a new expression object. If anything goes wrong (i.e. | ||||
8067 | ** OOM error), leave an error code in pParse and return NULL. | ||||
8068 | */ | ||||
8069 | static Fts5ExprNode *sqlite3Fts5ParseNode( | ||||
8070 | Fts5Parse *pParse, /* Parse context */ | ||||
8071 | int eType, /* FTS5_STRING, AND, OR or NOT */ | ||||
8072 | Fts5ExprNode *pLeft, /* Left hand child expression */ | ||||
8073 | Fts5ExprNode *pRight, /* Right hand child expression */ | ||||
8074 | Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ | ||||
8075 | ){ | ||||
8076 | Fts5ExprNode *pRet = 0; | ||||
8077 | |||||
8078 | if( pParse->rc==SQLITE_OK0 ){ | ||||
8079 | int nChild = 0; /* Number of children of returned node */ | ||||
8080 | sqlite3_int64 nByte; /* Bytes of space to allocate for this node */ | ||||
8081 | |||||
8082 | assert( (eType!=FTS5_STRING && !pNear)((void) (0)) | ||||
8083 | || (eType==FTS5_STRING && !pLeft && !pRight)((void) (0)) | ||||
8084 | )((void) (0)); | ||||
8085 | if( eType==FTS5_STRING9 && pNear==0 ) return 0; | ||||
8086 | if( eType!=FTS5_STRING9 && pLeft==0 ) return pRight; | ||||
8087 | if( eType!=FTS5_STRING9 && pRight==0 ) return pLeft; | ||||
8088 | |||||
8089 | if( eType==FTS5_STRING9 | ||||
8090 | && pParse->bPhraseToAnd | ||||
8091 | && pNear->apPhrase[0]->nTerm>1 | ||||
8092 | ){ | ||||
8093 | pRet = fts5ParsePhraseToAnd(pParse, pNear); | ||||
8094 | }else{ | ||||
8095 | if( eType==FTS5_NOT3 ){ | ||||
8096 | nChild = 2; | ||||
8097 | }else if( eType==FTS5_AND2 || eType==FTS5_OR1 ){ | ||||
8098 | nChild = 2; | ||||
8099 | if( pLeft->eType==eType ) nChild += pLeft->nChild-1; | ||||
8100 | if( pRight->eType==eType ) nChild += pRight->nChild-1; | ||||
8101 | } | ||||
8102 | |||||
8103 | nByte = SZ_FTS5EXPRNODE(nChild)(__builtin_offsetof(Fts5ExprNode, apChild) + (nChild)*sizeof( Fts5ExprNode*)); | ||||
8104 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | ||||
8105 | |||||
8106 | if( pRet ){ | ||||
8107 | pRet->eType = eType; | ||||
8108 | pRet->pNear = pNear; | ||||
8109 | fts5ExprAssignXNext(pRet); | ||||
8110 | if( eType==FTS5_STRING9 ){ | ||||
8111 | int iPhrase; | ||||
8112 | for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ | ||||
8113 | pNear->apPhrase[iPhrase]->pNode = pRet; | ||||
8114 | if( pNear->apPhrase[iPhrase]->nTerm==0 ){ | ||||
8115 | pRet->xNext = 0; | ||||
8116 | pRet->eType = FTS5_EOF0; | ||||
8117 | } | ||||
8118 | } | ||||
8119 | |||||
8120 | if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | ||||
8121 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | ||||
8122 | if( pNear->nPhrase!=1 | ||||
8123 | || pPhrase->nTerm>1 | ||||
8124 | || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) | ||||
8125 | ){ | ||||
8126 | sqlite3Fts5ParseError(pParse, | ||||
8127 | "fts5: %s queries are not supported (detail!=full)", | ||||
8128 | pNear->nPhrase==1 ? "phrase": "NEAR" | ||||
8129 | ); | ||||
8130 | sqlite3Fts5ParseNodeFree(pRet); | ||||
8131 | pRet = 0; | ||||
8132 | pNear = 0; | ||||
8133 | assert( pLeft==0 && pRight==0 )((void) (0)); | ||||
8134 | } | ||||
8135 | } | ||||
8136 | }else{ | ||||
8137 | assert( pNear==0 )((void) (0)); | ||||
8138 | fts5ExprAddChildren(pRet, pLeft); | ||||
8139 | fts5ExprAddChildren(pRet, pRight); | ||||
8140 | pLeft = pRight = 0; | ||||
8141 | if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH256 ){ | ||||
8142 | sqlite3Fts5ParseError(pParse, | ||||
8143 | "fts5 expression tree is too large (maximum depth %d)", | ||||
8144 | SQLITE_FTS5_MAX_EXPR_DEPTH256 | ||||
8145 | ); | ||||
8146 | sqlite3Fts5ParseNodeFree(pRet); | ||||
8147 | pRet = 0; | ||||
8148 | } | ||||
8149 | } | ||||
8150 | } | ||||
8151 | } | ||||
8152 | } | ||||
8153 | |||||
8154 | if( pRet==0 ){ | ||||
8155 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | ||||
8156 | sqlite3Fts5ParseNodeFree(pLeft); | ||||
8157 | sqlite3Fts5ParseNodeFree(pRight); | ||||
8158 | sqlite3Fts5ParseNearsetFree(pNear); | ||||
8159 | } | ||||
8160 | return pRet; | ||||
8161 | } | ||||
8162 | |||||
8163 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | ||||
8164 | Fts5Parse *pParse, /* Parse context */ | ||||
8165 | Fts5ExprNode *pLeft, /* Left hand child expression */ | ||||
8166 | Fts5ExprNode *pRight /* Right hand child expression */ | ||||
8167 | ){ | ||||
8168 | Fts5ExprNode *pRet = 0; | ||||
8169 | Fts5ExprNode *pPrev; | ||||
8170 | |||||
8171 | if( pParse->rc ){ | ||||
8172 | sqlite3Fts5ParseNodeFree(pLeft); | ||||
8173 | sqlite3Fts5ParseNodeFree(pRight); | ||||
8174 | }else{ | ||||
8175 | |||||
8176 | assert( pLeft->eType==FTS5_STRING((void) (0)) | ||||
8177 | || pLeft->eType==FTS5_TERM((void) (0)) | ||||
8178 | || pLeft->eType==FTS5_EOF((void) (0)) | ||||
8179 | || pLeft->eType==FTS5_AND((void) (0)) | ||||
8180 | )((void) (0)); | ||||
8181 | assert( pRight->eType==FTS5_STRING((void) (0)) | ||||
8182 | || pRight->eType==FTS5_TERM((void) (0)) | ||||
8183 | || pRight->eType==FTS5_EOF((void) (0)) | ||||
8184 | || (pRight->eType==FTS5_AND && pParse->bPhraseToAnd)((void) (0)) | ||||
8185 | )((void) (0)); | ||||
8186 | |||||
8187 | if( pLeft->eType==FTS5_AND2 ){ | ||||
8188 | pPrev = pLeft->apChild[pLeft->nChild-1]; | ||||
8189 | }else{ | ||||
8190 | pPrev = pLeft; | ||||
8191 | } | ||||
8192 | assert( pPrev->eType==FTS5_STRING((void) (0)) | ||||
8193 | || pPrev->eType==FTS5_TERM((void) (0)) | ||||
8194 | || pPrev->eType==FTS5_EOF((void) (0)) | ||||
8195 | )((void) (0)); | ||||
8196 | |||||
8197 | if( pRight->eType==FTS5_EOF0 ){ | ||||
8198 | assert( pParse->apPhrase!=0 )((void) (0)); | ||||
8199 | assert( pParse->nPhrase>0 )((void) (0)); | ||||
8200 | assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] )((void) (0)); | ||||
8201 | sqlite3Fts5ParseNodeFree(pRight); | ||||
8202 | pRet = pLeft; | ||||
8203 | pParse->nPhrase--; | ||||
8204 | } | ||||
8205 | else if( pPrev->eType==FTS5_EOF0 ){ | ||||
8206 | Fts5ExprPhrase **ap; | ||||
8207 | |||||
8208 | if( pPrev==pLeft ){ | ||||
8209 | pRet = pRight; | ||||
8210 | }else{ | ||||
8211 | pLeft->apChild[pLeft->nChild-1] = pRight; | ||||
8212 | pRet = pLeft; | ||||
8213 | } | ||||
8214 | |||||
8215 | ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; | ||||
8216 | assert( ap[0]==pPrev->pNear->apPhrase[0] )((void) (0)); | ||||
8217 | memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); | ||||
8218 | pParse->nPhrase--; | ||||
8219 | |||||
8220 | sqlite3Fts5ParseNodeFree(pPrev); | ||||
8221 | } | ||||
8222 | else{ | ||||
8223 | pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND2, pLeft, pRight, 0); | ||||
8224 | } | ||||
8225 | } | ||||
8226 | |||||
8227 | return pRet; | ||||
8228 | } | ||||
8229 | |||||
8230 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
8231 | static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ | ||||
8232 | sqlite3_int64 nByte = 0; | ||||
8233 | Fts5ExprTerm *p; | ||||
8234 | char *zQuoted; | ||||
8235 | |||||
8236 | /* Determine the maximum amount of space required. */ | ||||
8237 | for(p=pTerm; p; p=p->pSynonym){ | ||||
8238 | nByte += pTerm->nQueryTerm * 2 + 3 + 2; | ||||
8239 | } | ||||
8240 | zQuoted = sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
8241 | |||||
8242 | if( zQuoted ){ | ||||
8243 | int i = 0; | ||||
8244 | for(p=pTerm; p; p=p->pSynonym){ | ||||
8245 | char *zIn = p->pTerm; | ||||
8246 | char *zEnd = &zIn[p->nQueryTerm]; | ||||
8247 | zQuoted[i++] = '"'; | ||||
8248 | while( zIn<zEnd ){ | ||||
8249 | if( *zIn=='"' ) zQuoted[i++] = '"'; | ||||
8250 | zQuoted[i++] = *zIn++; | ||||
8251 | } | ||||
8252 | zQuoted[i++] = '"'; | ||||
8253 | if( p->pSynonym ) zQuoted[i++] = '|'; | ||||
8254 | } | ||||
8255 | if( pTerm->bPrefix ){ | ||||
8256 | zQuoted[i++] = ' '; | ||||
8257 | zQuoted[i++] = '*'; | ||||
8258 | } | ||||
8259 | zQuoted[i++] = '\0'; | ||||
8260 | } | ||||
8261 | return zQuoted; | ||||
8262 | } | ||||
8263 | |||||
8264 | static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ | ||||
8265 | char *zNew; | ||||
8266 | va_list ap; | ||||
8267 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
8268 | zNew = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
8269 | va_end(ap)__builtin_va_end(ap); | ||||
8270 | if( zApp && zNew ){ | ||||
8271 | char *zNew2 = sqlite3_mprintfsqlite3_api->mprintf("%s%s", zApp, zNew); | ||||
8272 | sqlite3_freesqlite3_api->free(zNew); | ||||
8273 | zNew = zNew2; | ||||
8274 | } | ||||
8275 | sqlite3_freesqlite3_api->free(zApp); | ||||
8276 | return zNew; | ||||
8277 | } | ||||
8278 | |||||
8279 | /* | ||||
8280 | ** Compose a tcl-readable representation of expression pExpr. Return a | ||||
8281 | ** pointer to a buffer containing that representation. It is the | ||||
8282 | ** responsibility of the caller to at some point free the buffer using | ||||
8283 | ** sqlite3_free(). | ||||
8284 | */ | ||||
8285 | static char *fts5ExprPrintTcl( | ||||
8286 | Fts5Config *pConfig, | ||||
8287 | const char *zNearsetCmd, | ||||
8288 | Fts5ExprNode *pExpr | ||||
8289 | ){ | ||||
8290 | char *zRet = 0; | ||||
8291 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | ||||
8292 | Fts5ExprNearset *pNear = pExpr->pNear; | ||||
8293 | int i; | ||||
8294 | int iTerm; | ||||
8295 | |||||
8296 | zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); | ||||
8297 | if( zRet==0 ) return 0; | ||||
8298 | if( pNear->pColset ){ | ||||
8299 | int *aiCol = pNear->pColset->aiCol; | ||||
8300 | int nCol = pNear->pColset->nCol; | ||||
8301 | if( nCol==1 ){ | ||||
8302 | zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); | ||||
8303 | }else{ | ||||
8304 | zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); | ||||
8305 | for(i=1; i<pNear->pColset->nCol; i++){ | ||||
8306 | zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); | ||||
8307 | } | ||||
8308 | zRet = fts5PrintfAppend(zRet, "} "); | ||||
8309 | } | ||||
8310 | if( zRet==0 ) return 0; | ||||
8311 | } | ||||
8312 | |||||
8313 | if( pNear->nPhrase>1 ){ | ||||
8314 | zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); | ||||
8315 | if( zRet==0 ) return 0; | ||||
8316 | } | ||||
8317 | |||||
8318 | zRet = fts5PrintfAppend(zRet, "--"); | ||||
8319 | if( zRet==0 ) return 0; | ||||
8320 | |||||
8321 | for(i=0; i<pNear->nPhrase; i++){ | ||||
8322 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
8323 | |||||
8324 | zRet = fts5PrintfAppend(zRet, " {"); | ||||
8325 | for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ | ||||
8326 | Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; | ||||
8327 | zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", | ||||
8328 | p->nQueryTerm, p->pTerm | ||||
8329 | ); | ||||
8330 | if( pPhrase->aTerm[iTerm].bPrefix ){ | ||||
8331 | zRet = fts5PrintfAppend(zRet, "*"); | ||||
8332 | } | ||||
8333 | } | ||||
8334 | |||||
8335 | if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); | ||||
8336 | if( zRet==0 ) return 0; | ||||
8337 | } | ||||
8338 | |||||
8339 | }else if( pExpr->eType==0 ){ | ||||
8340 | zRet = sqlite3_mprintfsqlite3_api->mprintf("{}"); | ||||
8341 | }else{ | ||||
8342 | char const *zOp = 0; | ||||
8343 | int i; | ||||
8344 | switch( pExpr->eType ){ | ||||
8345 | case FTS5_AND2: zOp = "AND"; break; | ||||
8346 | case FTS5_NOT3: zOp = "NOT"; break; | ||||
8347 | default: | ||||
8348 | assert( pExpr->eType==FTS5_OR )((void) (0)); | ||||
8349 | zOp = "OR"; | ||||
8350 | break; | ||||
8351 | } | ||||
8352 | |||||
8353 | zRet = sqlite3_mprintfsqlite3_api->mprintf("%s", zOp); | ||||
8354 | for(i=0; zRet && i<pExpr->nChild; i++){ | ||||
8355 | char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); | ||||
8356 | if( !z ){ | ||||
8357 | sqlite3_freesqlite3_api->free(zRet); | ||||
8358 | zRet = 0; | ||||
8359 | }else{ | ||||
8360 | zRet = fts5PrintfAppend(zRet, " [%z]", z); | ||||
8361 | } | ||||
8362 | } | ||||
8363 | } | ||||
8364 | |||||
8365 | return zRet; | ||||
8366 | } | ||||
8367 | |||||
8368 | static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ | ||||
8369 | char *zRet = 0; | ||||
8370 | if( pExpr->eType==0 ){ | ||||
8371 | return sqlite3_mprintfsqlite3_api->mprintf("\"\""); | ||||
8372 | }else | ||||
8373 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | ||||
8374 | Fts5ExprNearset *pNear = pExpr->pNear; | ||||
8375 | int i; | ||||
8376 | int iTerm; | ||||
8377 | |||||
8378 | if( pNear->pColset ){ | ||||
8379 | int ii; | ||||
8380 | Fts5Colset *pColset = pNear->pColset; | ||||
8381 | if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{"); | ||||
8382 | for(ii=0; ii<pColset->nCol; ii++){ | ||||
8383 | zRet = fts5PrintfAppend(zRet, "%s%s", | ||||
8384 | pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " " | ||||
8385 | ); | ||||
8386 | } | ||||
8387 | if( zRet ){ | ||||
8388 | zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : ""); | ||||
8389 | } | ||||
8390 | if( zRet==0 ) return 0; | ||||
8391 | } | ||||
8392 | |||||
8393 | if( pNear->nPhrase>1 ){ | ||||
8394 | zRet = fts5PrintfAppend(zRet, "NEAR("); | ||||
8395 | if( zRet==0 ) return 0; | ||||
8396 | } | ||||
8397 | |||||
8398 | for(i=0; i<pNear->nPhrase; i++){ | ||||
8399 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | ||||
8400 | if( i!=0 ){ | ||||
8401 | zRet = fts5PrintfAppend(zRet, " "); | ||||
8402 | if( zRet==0 ) return 0; | ||||
8403 | } | ||||
8404 | for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ | ||||
8405 | char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); | ||||
8406 | if( zTerm ){ | ||||
8407 | zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); | ||||
8408 | sqlite3_freesqlite3_api->free(zTerm); | ||||
8409 | } | ||||
8410 | if( zTerm==0 || zRet==0 ){ | ||||
8411 | sqlite3_freesqlite3_api->free(zRet); | ||||
8412 | return 0; | ||||
8413 | } | ||||
8414 | } | ||||
8415 | } | ||||
8416 | |||||
8417 | if( pNear->nPhrase>1 ){ | ||||
8418 | zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); | ||||
8419 | if( zRet==0 ) return 0; | ||||
8420 | } | ||||
8421 | |||||
8422 | }else{ | ||||
8423 | char const *zOp = 0; | ||||
8424 | int i; | ||||
8425 | |||||
8426 | switch( pExpr->eType ){ | ||||
8427 | case FTS5_AND2: zOp = " AND "; break; | ||||
8428 | case FTS5_NOT3: zOp = " NOT "; break; | ||||
8429 | default: | ||||
8430 | assert( pExpr->eType==FTS5_OR )((void) (0)); | ||||
8431 | zOp = " OR "; | ||||
8432 | break; | ||||
8433 | } | ||||
8434 | |||||
8435 | for(i=0; i<pExpr->nChild; i++){ | ||||
8436 | char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); | ||||
8437 | if( z==0 ){ | ||||
8438 | sqlite3_freesqlite3_api->free(zRet); | ||||
8439 | zRet = 0; | ||||
8440 | }else{ | ||||
8441 | int e = pExpr->apChild[i]->eType; | ||||
8442 | int b = (e!=FTS5_STRING9 && e!=FTS5_TERM4 && e!=FTS5_EOF0); | ||||
8443 | zRet = fts5PrintfAppend(zRet, "%s%s%z%s", | ||||
8444 | (i==0 ? "" : zOp), | ||||
8445 | (b?"(":""), z, (b?")":"") | ||||
8446 | ); | ||||
8447 | } | ||||
8448 | if( zRet==0 ) break; | ||||
8449 | } | ||||
8450 | } | ||||
8451 | |||||
8452 | return zRet; | ||||
8453 | } | ||||
8454 | |||||
8455 | /* | ||||
8456 | ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) | ||||
8457 | ** and fts5_expr_tcl() (bTcl!=0). | ||||
8458 | */ | ||||
8459 | static void fts5ExprFunction( | ||||
8460 | sqlite3_context *pCtx, /* Function call context */ | ||||
8461 | int nArg, /* Number of args */ | ||||
8462 | sqlite3_value **apVal, /* Function arguments */ | ||||
8463 | int bTcl | ||||
8464 | ){ | ||||
8465 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | ||||
8466 | sqlite3 *db = sqlite3_context_db_handlesqlite3_api->context_db_handle(pCtx); | ||||
8467 | const char *zExpr = 0; | ||||
8468 | char *zErr = 0; | ||||
8469 | Fts5Expr *pExpr = 0; | ||||
8470 | int rc; | ||||
8471 | int i; | ||||
8472 | |||||
8473 | const char **azConfig; /* Array of arguments for Fts5Config */ | ||||
8474 | const char *zNearsetCmd = "nearset"; | ||||
8475 | int nConfig; /* Size of azConfig[] */ | ||||
8476 | Fts5Config *pConfig = 0; | ||||
8477 | int iArg = 1; | ||||
8478 | |||||
8479 | if( nArg<1 ){ | ||||
8480 | zErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of arguments to function %s", | ||||
8481 | bTcl ? "fts5_expr_tcl" : "fts5_expr" | ||||
8482 | ); | ||||
8483 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | ||||
8484 | sqlite3_freesqlite3_api->free(zErr); | ||||
8485 | return; | ||||
8486 | } | ||||
8487 | |||||
8488 | if( bTcl && nArg>1 ){ | ||||
8489 | zNearsetCmd = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | ||||
8490 | iArg = 2; | ||||
8491 | } | ||||
8492 | |||||
8493 | nConfig = 3 + (nArg-iArg); | ||||
8494 | azConfig = (const char**)sqlite3_malloc64sqlite3_api->malloc64(sizeof(char*) * nConfig); | ||||
8495 | if( azConfig==0 ){ | ||||
8496 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | ||||
8497 | return; | ||||
8498 | } | ||||
8499 | azConfig[0] = 0; | ||||
8500 | azConfig[1] = "main"; | ||||
8501 | azConfig[2] = "tbl"; | ||||
8502 | for(i=3; iArg<nArg; iArg++){ | ||||
8503 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[iArg]); | ||||
8504 | azConfig[i++] = (z ? z : ""); | ||||
8505 | } | ||||
8506 | |||||
8507 | zExpr = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | ||||
8508 | if( zExpr==0 ) zExpr = ""; | ||||
8509 | |||||
8510 | rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); | ||||
8511 | if( rc==SQLITE_OK0 ){ | ||||
8512 | rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr); | ||||
8513 | } | ||||
8514 | if( rc==SQLITE_OK0 ){ | ||||
8515 | char *zText; | ||||
8516 | if( pExpr->pRoot->xNext==0 ){ | ||||
8517 | zText = sqlite3_mprintfsqlite3_api->mprintf(""); | ||||
8518 | }else if( bTcl ){ | ||||
8519 | zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); | ||||
8520 | }else{ | ||||
8521 | zText = fts5ExprPrint(pConfig, pExpr->pRoot); | ||||
8522 | } | ||||
8523 | if( zText==0 ){ | ||||
8524 | rc = SQLITE_NOMEM7; | ||||
8525 | }else{ | ||||
8526 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
8527 | sqlite3_freesqlite3_api->free(zText); | ||||
8528 | } | ||||
8529 | } | ||||
8530 | |||||
8531 | if( rc!=SQLITE_OK0 ){ | ||||
8532 | if( zErr ){ | ||||
8533 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | ||||
8534 | sqlite3_freesqlite3_api->free(zErr); | ||||
8535 | }else{ | ||||
8536 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
8537 | } | ||||
8538 | } | ||||
8539 | sqlite3_freesqlite3_api->free((void *)azConfig); | ||||
8540 | sqlite3Fts5ConfigFree(pConfig); | ||||
8541 | sqlite3Fts5ExprFree(pExpr); | ||||
8542 | } | ||||
8543 | |||||
8544 | static void fts5ExprFunctionHr( | ||||
8545 | sqlite3_context *pCtx, /* Function call context */ | ||||
8546 | int nArg, /* Number of args */ | ||||
8547 | sqlite3_value **apVal /* Function arguments */ | ||||
8548 | ){ | ||||
8549 | fts5ExprFunction(pCtx, nArg, apVal, 0); | ||||
8550 | } | ||||
8551 | static void fts5ExprFunctionTcl( | ||||
8552 | sqlite3_context *pCtx, /* Function call context */ | ||||
8553 | int nArg, /* Number of args */ | ||||
8554 | sqlite3_value **apVal /* Function arguments */ | ||||
8555 | ){ | ||||
8556 | fts5ExprFunction(pCtx, nArg, apVal, 1); | ||||
8557 | } | ||||
8558 | |||||
8559 | /* | ||||
8560 | ** The implementation of an SQLite user-defined-function that accepts a | ||||
8561 | ** single integer as an argument. If the integer is an alpha-numeric | ||||
8562 | ** unicode code point, 1 is returned. Otherwise 0. | ||||
8563 | */ | ||||
8564 | static void fts5ExprIsAlnum( | ||||
8565 | sqlite3_context *pCtx, /* Function call context */ | ||||
8566 | int nArg, /* Number of args */ | ||||
8567 | sqlite3_value **apVal /* Function arguments */ | ||||
8568 | ){ | ||||
8569 | int iCode; | ||||
8570 | u8 aArr[32]; | ||||
8571 | if( nArg!=1 ){ | ||||
8572 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | ||||
8573 | "wrong number of arguments to function fts5_isalnum", -1 | ||||
8574 | ); | ||||
8575 | return; | ||||
8576 | } | ||||
8577 | memset(aArr, 0, sizeof(aArr)); | ||||
8578 | sqlite3Fts5UnicodeCatParse("L*", aArr); | ||||
8579 | sqlite3Fts5UnicodeCatParse("N*", aArr); | ||||
8580 | sqlite3Fts5UnicodeCatParse("Co", aArr); | ||||
8581 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | ||||
8582 | sqlite3_result_intsqlite3_api->result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); | ||||
8583 | } | ||||
8584 | |||||
8585 | static void fts5ExprFold( | ||||
8586 | sqlite3_context *pCtx, /* Function call context */ | ||||
8587 | int nArg, /* Number of args */ | ||||
8588 | sqlite3_value **apVal /* Function arguments */ | ||||
8589 | ){ | ||||
8590 | if( nArg!=1 && nArg!=2 ){ | ||||
8591 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | ||||
8592 | "wrong number of arguments to function fts5_fold", -1 | ||||
8593 | ); | ||||
8594 | }else{ | ||||
8595 | int iCode; | ||||
8596 | int bRemoveDiacritics = 0; | ||||
8597 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | ||||
8598 | if( nArg==2 ) bRemoveDiacritics = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | ||||
8599 | sqlite3_result_intsqlite3_api->result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); | ||||
8600 | } | ||||
8601 | } | ||||
8602 | #endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
8603 | |||||
8604 | /* | ||||
8605 | ** This is called during initialization to register the fts5_expr() scalar | ||||
8606 | ** UDF with the SQLite handle passed as the only argument. | ||||
8607 | */ | ||||
8608 | static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ | ||||
8609 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
8610 | struct Fts5ExprFunc { | ||||
8611 | const char *z; | ||||
8612 | void (*x)(sqlite3_context*,int,sqlite3_value**); | ||||
8613 | } aFunc[] = { | ||||
8614 | { "fts5_expr", fts5ExprFunctionHr }, | ||||
8615 | { "fts5_expr_tcl", fts5ExprFunctionTcl }, | ||||
8616 | { "fts5_isalnum", fts5ExprIsAlnum }, | ||||
8617 | { "fts5_fold", fts5ExprFold }, | ||||
8618 | }; | ||||
8619 | int i; | ||||
8620 | int rc = SQLITE_OK0; | ||||
8621 | void *pCtx = (void*)pGlobal; | ||||
8622 | |||||
8623 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aFunc)((int)(sizeof(aFunc) / sizeof(aFunc[0]))); i++){ | ||||
8624 | struct Fts5ExprFunc *p = &aFunc[i]; | ||||
8625 | rc = sqlite3_create_functionsqlite3_api->create_function(db, p->z, -1, SQLITE_UTF81, pCtx, p->x, 0, 0); | ||||
8626 | } | ||||
8627 | #else | ||||
8628 | int rc = SQLITE_OK0; | ||||
8629 | UNUSED_PARAM2(pGlobal,db)(void)(pGlobal), (void)(db); | ||||
8630 | #endif | ||||
8631 | |||||
8632 | /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and | ||||
8633 | ** sqlite3Fts5ParserFallback() are unused */ | ||||
8634 | #ifndef NDEBUG1 | ||||
8635 | (void)sqlite3Fts5ParserTrace; | ||||
8636 | #endif | ||||
8637 | (void)sqlite3Fts5ParserFallback; | ||||
8638 | |||||
8639 | return rc; | ||||
8640 | } | ||||
8641 | |||||
8642 | /* | ||||
8643 | ** Return the number of phrases in expression pExpr. | ||||
8644 | */ | ||||
8645 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ | ||||
8646 | return (pExpr ? pExpr->nPhrase : 0); | ||||
8647 | } | ||||
8648 | |||||
8649 | /* | ||||
8650 | ** Return the number of terms in the iPhrase'th phrase in pExpr. | ||||
8651 | */ | ||||
8652 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ | ||||
8653 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; | ||||
8654 | return pExpr->apExprPhrase[iPhrase]->nTerm; | ||||
8655 | } | ||||
8656 | |||||
8657 | /* | ||||
8658 | ** This function is used to access the current position list for phrase | ||||
8659 | ** iPhrase. | ||||
8660 | */ | ||||
8661 | static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ | ||||
8662 | int nRet; | ||||
8663 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | ||||
8664 | Fts5ExprNode *pNode = pPhrase->pNode; | ||||
8665 | if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ | ||||
8666 | *pa = pPhrase->poslist.p; | ||||
8667 | nRet = pPhrase->poslist.n; | ||||
8668 | }else{ | ||||
8669 | *pa = 0; | ||||
8670 | nRet = 0; | ||||
8671 | } | ||||
8672 | return nRet; | ||||
8673 | } | ||||
8674 | |||||
8675 | struct Fts5PoslistPopulator { | ||||
8676 | Fts5PoslistWriter writer; | ||||
8677 | int bOk; /* True if ok to populate */ | ||||
8678 | int bMiss; | ||||
8679 | }; | ||||
8680 | |||||
8681 | /* | ||||
8682 | ** Clear the position lists associated with all phrases in the expression | ||||
8683 | ** passed as the first argument. Argument bLive is true if the expression | ||||
8684 | ** might be pointing to a real entry, otherwise it has just been reset. | ||||
8685 | ** | ||||
8686 | ** At present this function is only used for detail=col and detail=none | ||||
8687 | ** fts5 tables. This implies that all phrases must be at most 1 token | ||||
8688 | ** in size, as phrase matches are not supported without detail=full. | ||||
8689 | */ | ||||
8690 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ | ||||
8691 | Fts5PoslistPopulator *pRet; | ||||
8692 | pRet = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | ||||
8693 | if( pRet ){ | ||||
8694 | int i; | ||||
8695 | memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | ||||
8696 | for(i=0; i<pExpr->nPhrase; i++){ | ||||
8697 | Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; | ||||
8698 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | ||||
8699 | assert( pExpr->apExprPhrase[i]->nTerm<=1 )((void) (0)); | ||||
8700 | if( bLive && | ||||
8701 | (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) | ||||
8702 | ){ | ||||
8703 | pRet[i].bMiss = 1; | ||||
8704 | }else{ | ||||
8705 | pBuf->n = 0; | ||||
8706 | } | ||||
8707 | } | ||||
8708 | } | ||||
8709 | return pRet; | ||||
8710 | } | ||||
8711 | |||||
8712 | struct Fts5ExprCtx { | ||||
8713 | Fts5Expr *pExpr; | ||||
8714 | Fts5PoslistPopulator *aPopulator; | ||||
8715 | i64 iOff; | ||||
8716 | }; | ||||
8717 | typedef struct Fts5ExprCtx Fts5ExprCtx; | ||||
8718 | |||||
8719 | /* | ||||
8720 | ** TODO: Make this more efficient! | ||||
8721 | */ | ||||
8722 | static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ | ||||
8723 | int i; | ||||
8724 | for(i=0; i<pColset->nCol; i++){ | ||||
8725 | if( pColset->aiCol[i]==iCol ) return 1; | ||||
8726 | } | ||||
8727 | return 0; | ||||
8728 | } | ||||
8729 | |||||
8730 | /* | ||||
8731 | ** pToken is a buffer nToken bytes in size that may or may not contain | ||||
8732 | ** an embedded 0x00 byte. If it does, return the number of bytes in | ||||
8733 | ** the buffer before the 0x00. If it does not, return nToken. | ||||
8734 | */ | ||||
8735 | static int fts5QueryTerm(const char *pToken, int nToken){ | ||||
8736 | int ii; | ||||
8737 | for(ii=0; ii<nToken && pToken[ii]; ii++){} | ||||
8738 | return ii; | ||||
8739 | } | ||||
8740 | |||||
8741 | static int fts5ExprPopulatePoslistsCb( | ||||
8742 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | ||||
8743 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | ||||
8744 | const char *pToken, /* Pointer to buffer containing token */ | ||||
8745 | int nToken, /* Size of token in bytes */ | ||||
8746 | int iUnused1, /* Byte offset of token within input text */ | ||||
8747 | int iUnused2 /* Byte offset of end of token within input text */ | ||||
8748 | ){ | ||||
8749 | Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; | ||||
8750 | Fts5Expr *pExpr = p->pExpr; | ||||
8751 | int i; | ||||
8752 | int nQuery = nToken; | ||||
8753 | i64 iRowid = pExpr->pRoot->iRowid; | ||||
8754 | |||||
8755 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | ||||
8756 | |||||
8757 | if( nQuery>FTS5_MAX_TOKEN_SIZE32768 ) nQuery = FTS5_MAX_TOKEN_SIZE32768; | ||||
8758 | if( pExpr->pConfig->bTokendata ){ | ||||
8759 | nQuery = fts5QueryTerm(pToken, nQuery); | ||||
8760 | } | ||||
8761 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ) p->iOff++; | ||||
8762 | for(i=0; i<pExpr->nPhrase; i++){ | ||||
8763 | Fts5ExprTerm *pT; | ||||
8764 | if( p->aPopulator[i].bOk==0 ) continue; | ||||
8765 | for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ | ||||
8766 | if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix)) | ||||
8767 | && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0 | ||||
8768 | ){ | ||||
8769 | int rc = sqlite3Fts5PoslistWriterAppend( | ||||
8770 | &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff | ||||
8771 | ); | ||||
8772 | if( rc==SQLITE_OK0 && (pExpr->pConfig->bTokendata || pT->bPrefix) ){ | ||||
8773 | int iCol = p->iOff>>32; | ||||
8774 | int iTokOff = p->iOff & 0x7FFFFFFF; | ||||
8775 | rc = sqlite3Fts5IndexIterWriteTokendata( | ||||
8776 | pT->pIter, pToken, nToken, iRowid, iCol, iTokOff | ||||
8777 | ); | ||||
8778 | } | ||||
8779 | if( rc ) return rc; | ||||
8780 | break; | ||||
8781 | } | ||||
8782 | } | ||||
8783 | } | ||||
8784 | return SQLITE_OK0; | ||||
8785 | } | ||||
8786 | |||||
8787 | static int sqlite3Fts5ExprPopulatePoslists( | ||||
8788 | Fts5Config *pConfig, | ||||
8789 | Fts5Expr *pExpr, | ||||
8790 | Fts5PoslistPopulator *aPopulator, | ||||
8791 | int iCol, | ||||
8792 | const char *z, int n | ||||
8793 | ){ | ||||
8794 | int i; | ||||
8795 | Fts5ExprCtx sCtx; | ||||
8796 | sCtx.pExpr = pExpr; | ||||
8797 | sCtx.aPopulator = aPopulator; | ||||
8798 | sCtx.iOff = (((i64)iCol) << 32) - 1; | ||||
8799 | |||||
8800 | for(i=0; i<pExpr->nPhrase; i++){ | ||||
8801 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | ||||
8802 | Fts5Colset *pColset = pNode->pNear->pColset; | ||||
8803 | if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) | ||||
8804 | || aPopulator[i].bMiss | ||||
8805 | ){ | ||||
8806 | aPopulator[i].bOk = 0; | ||||
8807 | }else{ | ||||
8808 | aPopulator[i].bOk = 1; | ||||
8809 | } | ||||
8810 | } | ||||
8811 | |||||
8812 | return sqlite3Fts5Tokenize(pConfig, | ||||
8813 | FTS5_TOKENIZE_DOCUMENT0x0004, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb | ||||
8814 | ); | ||||
8815 | } | ||||
8816 | |||||
8817 | static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ | ||||
8818 | if( pNode->eType==FTS5_TERM4 || pNode->eType==FTS5_STRING9 ){ | ||||
8819 | pNode->pNear->apPhrase[0]->poslist.n = 0; | ||||
8820 | }else{ | ||||
8821 | int i; | ||||
8822 | for(i=0; i<pNode->nChild; i++){ | ||||
8823 | fts5ExprClearPoslists(pNode->apChild[i]); | ||||
8824 | } | ||||
8825 | } | ||||
8826 | } | ||||
8827 | |||||
8828 | static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ | ||||
8829 | pNode->iRowid = iRowid; | ||||
8830 | pNode->bEof = 0; | ||||
8831 | switch( pNode->eType ){ | ||||
8832 | case 0: | ||||
8833 | case FTS5_TERM4: | ||||
8834 | case FTS5_STRING9: | ||||
8835 | return (pNode->pNear->apPhrase[0]->poslist.n>0); | ||||
8836 | |||||
8837 | case FTS5_AND2: { | ||||
8838 | int i; | ||||
8839 | for(i=0; i<pNode->nChild; i++){ | ||||
8840 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ | ||||
8841 | fts5ExprClearPoslists(pNode); | ||||
8842 | return 0; | ||||
8843 | } | ||||
8844 | } | ||||
8845 | break; | ||||
8846 | } | ||||
8847 | |||||
8848 | case FTS5_OR1: { | ||||
8849 | int i; | ||||
8850 | int bRet = 0; | ||||
8851 | for(i=0; i<pNode->nChild; i++){ | ||||
8852 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ | ||||
8853 | bRet = 1; | ||||
8854 | } | ||||
8855 | } | ||||
8856 | return bRet; | ||||
8857 | } | ||||
8858 | |||||
8859 | default: { | ||||
8860 | assert( pNode->eType==FTS5_NOT )((void) (0)); | ||||
8861 | if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) | ||||
8862 | || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) | ||||
8863 | ){ | ||||
8864 | fts5ExprClearPoslists(pNode); | ||||
8865 | return 0; | ||||
8866 | } | ||||
8867 | break; | ||||
8868 | } | ||||
8869 | } | ||||
8870 | return 1; | ||||
8871 | } | ||||
8872 | |||||
8873 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ | ||||
8874 | fts5ExprCheckPoslists(pExpr->pRoot, iRowid); | ||||
8875 | } | ||||
8876 | |||||
8877 | /* | ||||
8878 | ** This function is only called for detail=columns tables. | ||||
8879 | */ | ||||
8880 | static int sqlite3Fts5ExprPhraseCollist( | ||||
8881 | Fts5Expr *pExpr, | ||||
8882 | int iPhrase, | ||||
8883 | const u8 **ppCollist, | ||||
8884 | int *pnCollist | ||||
8885 | ){ | ||||
8886 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | ||||
8887 | Fts5ExprNode *pNode = pPhrase->pNode; | ||||
8888 | int rc = SQLITE_OK0; | ||||
8889 | |||||
8890 | assert( iPhrase>=0 && iPhrase<pExpr->nPhrase )((void) (0)); | ||||
8891 | assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | ||||
8892 | |||||
8893 | if( pNode->bEof==0 | ||||
8894 | && pNode->iRowid==pExpr->pRoot->iRowid | ||||
8895 | && pPhrase->poslist.n>0 | ||||
8896 | ){ | ||||
8897 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; | ||||
8898 | if( pTerm->pSynonym ){ | ||||
8899 | Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; | ||||
8900 | rc = fts5ExprSynonymList( | ||||
8901 | pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist | ||||
8902 | ); | ||||
8903 | }else{ | ||||
8904 | *ppCollist = pPhrase->aTerm[0].pIter->pData; | ||||
8905 | *pnCollist = pPhrase->aTerm[0].pIter->nData; | ||||
8906 | } | ||||
8907 | }else{ | ||||
8908 | *ppCollist = 0; | ||||
8909 | *pnCollist = 0; | ||||
8910 | } | ||||
8911 | |||||
8912 | return rc; | ||||
8913 | } | ||||
8914 | |||||
8915 | /* | ||||
8916 | ** Does the work of the fts5_api.xQueryToken() API method. | ||||
8917 | */ | ||||
8918 | static int sqlite3Fts5ExprQueryToken( | ||||
8919 | Fts5Expr *pExpr, | ||||
8920 | int iPhrase, | ||||
8921 | int iToken, | ||||
8922 | const char **ppOut, | ||||
8923 | int *pnOut | ||||
8924 | ){ | ||||
8925 | Fts5ExprPhrase *pPhrase = 0; | ||||
8926 | |||||
8927 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | ||||
8928 | return SQLITE_RANGE25; | ||||
8929 | } | ||||
8930 | pPhrase = pExpr->apExprPhrase[iPhrase]; | ||||
8931 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | ||||
8932 | return SQLITE_RANGE25; | ||||
8933 | } | ||||
8934 | |||||
8935 | *ppOut = pPhrase->aTerm[iToken].pTerm; | ||||
8936 | *pnOut = pPhrase->aTerm[iToken].nFullTerm; | ||||
8937 | return SQLITE_OK0; | ||||
8938 | } | ||||
8939 | |||||
8940 | /* | ||||
8941 | ** Does the work of the fts5_api.xInstToken() API method. | ||||
8942 | */ | ||||
8943 | static int sqlite3Fts5ExprInstToken( | ||||
8944 | Fts5Expr *pExpr, | ||||
8945 | i64 iRowid, | ||||
8946 | int iPhrase, | ||||
8947 | int iCol, | ||||
8948 | int iOff, | ||||
8949 | int iToken, | ||||
8950 | const char **ppOut, | ||||
8951 | int *pnOut | ||||
8952 | ){ | ||||
8953 | Fts5ExprPhrase *pPhrase = 0; | ||||
8954 | Fts5ExprTerm *pTerm = 0; | ||||
8955 | int rc = SQLITE_OK0; | ||||
8956 | |||||
8957 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | ||||
8958 | return SQLITE_RANGE25; | ||||
8959 | } | ||||
8960 | pPhrase = pExpr->apExprPhrase[iPhrase]; | ||||
8961 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | ||||
8962 | return SQLITE_RANGE25; | ||||
8963 | } | ||||
8964 | pTerm = &pPhrase->aTerm[iToken]; | ||||
8965 | if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){ | ||||
8966 | rc = sqlite3Fts5IterToken( | ||||
8967 | pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm, | ||||
8968 | iRowid, iCol, iOff+iToken, ppOut, pnOut | ||||
8969 | ); | ||||
8970 | }else{ | ||||
8971 | *ppOut = pTerm->pTerm; | ||||
8972 | *pnOut = pTerm->nFullTerm; | ||||
8973 | } | ||||
8974 | return rc; | ||||
8975 | } | ||||
8976 | |||||
8977 | /* | ||||
8978 | ** Clear the token mappings for all Fts5IndexIter objects managed by | ||||
8979 | ** the expression passed as the only argument. | ||||
8980 | */ | ||||
8981 | static void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ | ||||
8982 | int ii; | ||||
8983 | for(ii=0; ii<pExpr->nPhrase; ii++){ | ||||
8984 | Fts5ExprTerm *pT; | ||||
8985 | for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){ | ||||
8986 | sqlite3Fts5IndexIterClearTokendata(pT->pIter); | ||||
8987 | } | ||||
8988 | } | ||||
8989 | } | ||||
8990 | |||||
8991 | #line 1 "fts5_hash.c" | ||||
8992 | /* | ||||
8993 | ** 2014 August 11 | ||||
8994 | ** | ||||
8995 | ** The author disclaims copyright to this source code. In place of | ||||
8996 | ** a legal notice, here is a blessing: | ||||
8997 | ** | ||||
8998 | ** May you do good and not evil. | ||||
8999 | ** May you find forgiveness for yourself and forgive others. | ||||
9000 | ** May you share freely, never taking more than you give. | ||||
9001 | ** | ||||
9002 | ****************************************************************************** | ||||
9003 | ** | ||||
9004 | */ | ||||
9005 | |||||
9006 | |||||
9007 | |||||
9008 | /* #include "fts5Int.h" */ | ||||
9009 | |||||
9010 | typedef struct Fts5HashEntry Fts5HashEntry; | ||||
9011 | |||||
9012 | /* | ||||
9013 | ** This file contains the implementation of an in-memory hash table used | ||||
9014 | ** to accumulate "term -> doclist" content before it is flushed to a level-0 | ||||
9015 | ** segment. | ||||
9016 | */ | ||||
9017 | |||||
9018 | |||||
9019 | struct Fts5Hash { | ||||
9020 | int eDetail; /* Copy of Fts5Config.eDetail */ | ||||
9021 | int *pnByte; /* Pointer to bytes counter */ | ||||
9022 | int nEntry; /* Number of entries currently in hash */ | ||||
9023 | int nSlot; /* Size of aSlot[] array */ | ||||
9024 | Fts5HashEntry *pScan; /* Current ordered scan item */ | ||||
9025 | Fts5HashEntry **aSlot; /* Array of hash slots */ | ||||
9026 | }; | ||||
9027 | |||||
9028 | /* | ||||
9029 | ** Each entry in the hash table is represented by an object of the | ||||
9030 | ** following type. Each object, its key, and its current data are stored | ||||
9031 | ** in a single memory allocation. The key immediately follows the object | ||||
9032 | ** in memory. The position list data immediately follows the key data | ||||
9033 | ** in memory. | ||||
9034 | ** | ||||
9035 | ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single | ||||
9036 | ** byte identifying the index (either the main term index or a prefix-index), | ||||
9037 | ** followed by the term data. For example: "0token". There is no | ||||
9038 | ** nul-terminator - in this case nKey=6. | ||||
9039 | ** | ||||
9040 | ** The data that follows the key is in a similar, but not identical format | ||||
9041 | ** to the doclist data stored in the database. It is: | ||||
9042 | ** | ||||
9043 | ** * Rowid, as a varint | ||||
9044 | ** * Position list, without 0x00 terminator. | ||||
9045 | ** * Size of previous position list and rowid, as a 4 byte | ||||
9046 | ** big-endian integer. | ||||
9047 | ** | ||||
9048 | ** iRowidOff: | ||||
9049 | ** Offset of last rowid written to data area. Relative to first byte of | ||||
9050 | ** structure. | ||||
9051 | ** | ||||
9052 | ** nData: | ||||
9053 | ** Bytes of data written since iRowidOff. | ||||
9054 | */ | ||||
9055 | struct Fts5HashEntry { | ||||
9056 | Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ | ||||
9057 | Fts5HashEntry *pScanNext; /* Next entry in sorted order */ | ||||
9058 | |||||
9059 | int nAlloc; /* Total size of allocation */ | ||||
9060 | int iSzPoslist; /* Offset of space for 4-byte poslist size */ | ||||
9061 | int nData; /* Total bytes of data (incl. structure) */ | ||||
9062 | int nKey; /* Length of key in bytes */ | ||||
9063 | u8 bDel; /* Set delete-flag @ iSzPoslist */ | ||||
9064 | u8 bContent; /* Set content-flag (detail=none mode) */ | ||||
9065 | i16 iCol; /* Column of last value written */ | ||||
9066 | int iPos; /* Position of last value written */ | ||||
9067 | i64 iRowid; /* Rowid of last value written */ | ||||
9068 | }; | ||||
9069 | |||||
9070 | /* | ||||
9071 | ** Equivalent to: | ||||
9072 | ** | ||||
9073 | ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } | ||||
9074 | */ | ||||
9075 | #define fts5EntryKey(p)( ((char *)(&(p)[1])) ) ( ((char *)(&(p)[1])) ) | ||||
9076 | |||||
9077 | |||||
9078 | /* | ||||
9079 | ** Allocate a new hash table. | ||||
9080 | */ | ||||
9081 | static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ | ||||
9082 | int rc = SQLITE_OK0; | ||||
9083 | Fts5Hash *pNew; | ||||
9084 | |||||
9085 | *ppNew = pNew = (Fts5Hash*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Hash)); | ||||
9086 | if( pNew==0 ){ | ||||
9087 | rc = SQLITE_NOMEM7; | ||||
9088 | }else{ | ||||
9089 | sqlite3_int64 nByte; | ||||
9090 | memset(pNew, 0, sizeof(Fts5Hash)); | ||||
9091 | pNew->pnByte = pnByte; | ||||
9092 | pNew->eDetail = pConfig->eDetail; | ||||
9093 | |||||
9094 | pNew->nSlot = 1024; | ||||
9095 | nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; | ||||
9096 | pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
9097 | if( pNew->aSlot==0 ){ | ||||
9098 | sqlite3_freesqlite3_api->free(pNew); | ||||
9099 | *ppNew = 0; | ||||
9100 | rc = SQLITE_NOMEM7; | ||||
9101 | }else{ | ||||
9102 | memset(pNew->aSlot, 0, (size_t)nByte); | ||||
9103 | } | ||||
9104 | } | ||||
9105 | return rc; | ||||
9106 | } | ||||
9107 | |||||
9108 | /* | ||||
9109 | ** Free a hash table object. | ||||
9110 | */ | ||||
9111 | static void sqlite3Fts5HashFree(Fts5Hash *pHash){ | ||||
9112 | if( pHash ){ | ||||
9113 | sqlite3Fts5HashClear(pHash); | ||||
9114 | sqlite3_freesqlite3_api->free(pHash->aSlot); | ||||
9115 | sqlite3_freesqlite3_api->free(pHash); | ||||
9116 | } | ||||
9117 | } | ||||
9118 | |||||
9119 | /* | ||||
9120 | ** Empty (but do not delete) a hash table. | ||||
9121 | */ | ||||
9122 | static void sqlite3Fts5HashClear(Fts5Hash *pHash){ | ||||
9123 | int i; | ||||
9124 | for(i=0; i<pHash->nSlot; i++){ | ||||
9125 | Fts5HashEntry *pNext; | ||||
9126 | Fts5HashEntry *pSlot; | ||||
9127 | for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ | ||||
9128 | pNext = pSlot->pHashNext; | ||||
9129 | sqlite3_freesqlite3_api->free(pSlot); | ||||
9130 | } | ||||
9131 | } | ||||
9132 | memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); | ||||
9133 | pHash->nEntry = 0; | ||||
9134 | } | ||||
9135 | |||||
9136 | static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ | ||||
9137 | int i; | ||||
9138 | unsigned int h = 13; | ||||
9139 | for(i=n-1; i>=0; i--){ | ||||
9140 | h = (h << 3) ^ h ^ p[i]; | ||||
9141 | } | ||||
9142 | return (h % nSlot); | ||||
9143 | } | ||||
9144 | |||||
9145 | static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ | ||||
9146 | int i; | ||||
9147 | unsigned int h = 13; | ||||
9148 | for(i=n-1; i>=0; i--){ | ||||
9149 | h = (h << 3) ^ h ^ p[i]; | ||||
9150 | } | ||||
9151 | h = (h << 3) ^ h ^ b; | ||||
9152 | return (h % nSlot); | ||||
9153 | } | ||||
9154 | |||||
9155 | /* | ||||
9156 | ** Resize the hash table by doubling the number of slots. | ||||
9157 | */ | ||||
9158 | static int fts5HashResize(Fts5Hash *pHash){ | ||||
9159 | int nNew = pHash->nSlot*2; | ||||
9160 | int i; | ||||
9161 | Fts5HashEntry **apNew; | ||||
9162 | Fts5HashEntry **apOld = pHash->aSlot; | ||||
9163 | |||||
9164 | apNew = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nNew*sizeof(Fts5HashEntry*)); | ||||
9165 | if( !apNew ) return SQLITE_NOMEM7; | ||||
9166 | memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); | ||||
9167 | |||||
9168 | for(i=0; i<pHash->nSlot; i++){ | ||||
9169 | while( apOld[i] ){ | ||||
9170 | unsigned int iHash; | ||||
9171 | Fts5HashEntry *p = apOld[i]; | ||||
9172 | apOld[i] = p->pHashNext; | ||||
9173 | iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p)( ((char *)(&(p)[1])) ), p->nKey); | ||||
9174 | p->pHashNext = apNew[iHash]; | ||||
9175 | apNew[iHash] = p; | ||||
9176 | } | ||||
9177 | } | ||||
9178 | |||||
9179 | sqlite3_freesqlite3_api->free(apOld); | ||||
9180 | pHash->nSlot = nNew; | ||||
9181 | pHash->aSlot = apNew; | ||||
9182 | return SQLITE_OK0; | ||||
9183 | } | ||||
9184 | |||||
9185 | static int fts5HashAddPoslistSize( | ||||
9186 | Fts5Hash *pHash, | ||||
9187 | Fts5HashEntry *p, | ||||
9188 | Fts5HashEntry *p2 | ||||
9189 | ){ | ||||
9190 | int nRet = 0; | ||||
9191 | if( p->iSzPoslist ){ | ||||
9192 | u8 *pPtr = p2 ? (u8*)p2 : (u8*)p; | ||||
9193 | int nData = p->nData; | ||||
9194 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
9195 | assert( nData==p->iSzPoslist )((void) (0)); | ||||
9196 | if( p->bDel ){ | ||||
9197 | pPtr[nData++] = 0x00; | ||||
9198 | if( p->bContent ){ | ||||
9199 | pPtr[nData++] = 0x00; | ||||
9200 | } | ||||
9201 | } | ||||
9202 | }else{ | ||||
9203 | int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */ | ||||
9204 | int nPos = nSz*2 + p->bDel; /* Value of nPos field */ | ||||
9205 | |||||
9206 | assert( p->bDel==0 || p->bDel==1 )((void) (0)); | ||||
9207 | if( nPos<=127 ){ | ||||
9208 | pPtr[p->iSzPoslist] = (u8)nPos; | ||||
9209 | }else{ | ||||
9210 | int nByte = sqlite3Fts5GetVarintLen((u32)nPos); | ||||
9211 | memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); | ||||
9212 | sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); | ||||
9213 | nData += (nByte-1); | ||||
9214 | } | ||||
9215 | } | ||||
9216 | |||||
9217 | nRet = nData - p->nData; | ||||
9218 | if( p2==0 ){ | ||||
9219 | p->iSzPoslist = 0; | ||||
9220 | p->bDel = 0; | ||||
9221 | p->bContent = 0; | ||||
9222 | p->nData = nData; | ||||
9223 | } | ||||
9224 | } | ||||
9225 | return nRet; | ||||
9226 | } | ||||
9227 | |||||
9228 | /* | ||||
9229 | ** Add an entry to the in-memory hash table. The key is the concatenation | ||||
9230 | ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). | ||||
9231 | ** | ||||
9232 | ** (bByte || pToken) -> (iRowid,iCol,iPos) | ||||
9233 | ** | ||||
9234 | ** Or, if iCol is negative, then the value is a delete marker. | ||||
9235 | */ | ||||
9236 | static int sqlite3Fts5HashWrite( | ||||
9237 | Fts5Hash *pHash, | ||||
9238 | i64 iRowid, /* Rowid for this entry */ | ||||
9239 | int iCol, /* Column token appears in (-ve -> delete) */ | ||||
9240 | int iPos, /* Position of token within column */ | ||||
9241 | char bByte, /* First byte of token */ | ||||
9242 | const char *pToken, int nToken /* Token to add or remove to or from index */ | ||||
9243 | ){ | ||||
9244 | unsigned int iHash; | ||||
9245 | Fts5HashEntry *p; | ||||
9246 | u8 *pPtr; | ||||
9247 | int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ | ||||
9248 | int bNew; /* If non-delete entry should be written */ | ||||
9249 | |||||
9250 | bNew = (pHash->eDetail==FTS5_DETAIL_FULL0); | ||||
9251 | |||||
9252 | /* Attempt to locate an existing hash entry */ | ||||
9253 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | ||||
9254 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | ||||
9255 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | ||||
9256 | if( zKey[0]==bByte | ||||
9257 | && p->nKey==nToken+1 | ||||
9258 | && memcmp(&zKey[1], pToken, nToken)==0 | ||||
9259 | ){ | ||||
9260 | break; | ||||
9261 | } | ||||
9262 | } | ||||
9263 | |||||
9264 | /* If an existing hash entry cannot be found, create a new one. */ | ||||
9265 | if( p==0 ){ | ||||
9266 | /* Figure out how much space to allocate */ | ||||
9267 | char *zKey; | ||||
9268 | sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; | ||||
9269 | if( nByte<128 ) nByte = 128; | ||||
9270 | |||||
9271 | /* Grow the Fts5Hash.aSlot[] array if necessary. */ | ||||
9272 | if( (pHash->nEntry*2)>=pHash->nSlot ){ | ||||
9273 | int rc = fts5HashResize(pHash); | ||||
9274 | if( rc!=SQLITE_OK0 ) return rc; | ||||
9275 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | ||||
9276 | } | ||||
9277 | |||||
9278 | /* Allocate new Fts5HashEntry and add it to the hash table. */ | ||||
9279 | p = (Fts5HashEntry*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
9280 | if( !p ) return SQLITE_NOMEM7; | ||||
9281 | memset(p, 0, sizeof(Fts5HashEntry)); | ||||
9282 | p->nAlloc = (int)nByte; | ||||
9283 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | ||||
9284 | zKey[0] = bByte; | ||||
9285 | memcpy(&zKey[1], pToken, nToken); | ||||
9286 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) )((void) (0)); | ||||
9287 | p->nKey = nToken+1; | ||||
9288 | zKey[nToken+1] = '\0'; | ||||
9289 | p->nData = nToken+1 + sizeof(Fts5HashEntry); | ||||
9290 | p->pHashNext = pHash->aSlot[iHash]; | ||||
9291 | pHash->aSlot[iHash] = p; | ||||
9292 | pHash->nEntry++; | ||||
9293 | |||||
9294 | /* Add the first rowid field to the hash-entry */ | ||||
9295 | p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); | ||||
9296 | p->iRowid = iRowid; | ||||
9297 | |||||
9298 | p->iSzPoslist = p->nData; | ||||
9299 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | ||||
9300 | p->nData += 1; | ||||
9301 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | ||||
9302 | } | ||||
9303 | |||||
9304 | }else{ | ||||
9305 | |||||
9306 | /* Appending to an existing hash-entry. Check that there is enough | ||||
9307 | ** space to append the largest possible new entry. Worst case scenario | ||||
9308 | ** is: | ||||
9309 | ** | ||||
9310 | ** + 9 bytes for a new rowid, | ||||
9311 | ** + 4 byte reserved for the "poslist size" varint. | ||||
9312 | ** + 1 byte for a "new column" byte, | ||||
9313 | ** + 3 bytes for a new column number (16-bit max) as a varint, | ||||
9314 | ** + 5 bytes for the new position offset (32-bit max). | ||||
9315 | */ | ||||
9316 | if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ | ||||
9317 | sqlite3_int64 nNew = p->nAlloc * 2; | ||||
9318 | Fts5HashEntry *pNew; | ||||
9319 | Fts5HashEntry **pp; | ||||
9320 | pNew = (Fts5HashEntry*)sqlite3_realloc64sqlite3_api->realloc64(p, nNew); | ||||
9321 | if( pNew==0 ) return SQLITE_NOMEM7; | ||||
9322 | pNew->nAlloc = (int)nNew; | ||||
9323 | for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); | ||||
9324 | *pp = pNew; | ||||
9325 | p = pNew; | ||||
9326 | } | ||||
9327 | nIncr -= p->nData; | ||||
9328 | } | ||||
9329 | assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) )((void) (0)); | ||||
9330 | |||||
9331 | pPtr = (u8*)p; | ||||
9332 | |||||
9333 | /* If this is a new rowid, append the 4-byte size field for the previous | ||||
9334 | ** entry, and the new rowid for this entry. */ | ||||
9335 | if( iRowid!=p->iRowid ){ | ||||
9336 | u64 iDiff = (u64)iRowid - (u64)p->iRowid; | ||||
9337 | fts5HashAddPoslistSize(pHash, p, 0); | ||||
9338 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff); | ||||
9339 | p->iRowid = iRowid; | ||||
9340 | bNew = 1; | ||||
9341 | p->iSzPoslist = p->nData; | ||||
9342 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | ||||
9343 | p->nData += 1; | ||||
9344 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | ||||
9345 | p->iPos = 0; | ||||
9346 | } | ||||
9347 | } | ||||
9348 | |||||
9349 | if( iCol>=0 ){ | ||||
9350 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
9351 | p->bContent = 1; | ||||
9352 | }else{ | ||||
9353 | /* Append a new column value, if necessary */ | ||||
9354 | assert_nc( iCol>=p->iCol )((void) (0)); | ||||
9355 | if( iCol!=p->iCol ){ | ||||
9356 | if( pHash->eDetail==FTS5_DETAIL_FULL0 ){ | ||||
9357 | pPtr[p->nData++] = 0x01; | ||||
9358 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); | ||||
9359 | p->iCol = (i16)iCol; | ||||
9360 | p->iPos = 0; | ||||
9361 | }else{ | ||||
9362 | bNew = 1; | ||||
9363 | p->iCol = (i16)(iPos = iCol); | ||||
9364 | } | ||||
9365 | } | ||||
9366 | |||||
9367 | /* Append the new position offset, if necessary */ | ||||
9368 | if( bNew ){ | ||||
9369 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); | ||||
9370 | p->iPos = iPos; | ||||
9371 | } | ||||
9372 | } | ||||
9373 | }else{ | ||||
9374 | /* This is a delete. Set the delete flag. */ | ||||
9375 | p->bDel = 1; | ||||
9376 | } | ||||
9377 | |||||
9378 | nIncr += p->nData; | ||||
9379 | *pHash->pnByte += nIncr; | ||||
9380 | return SQLITE_OK0; | ||||
9381 | } | ||||
9382 | |||||
9383 | |||||
9384 | /* | ||||
9385 | ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, | ||||
9386 | ** each sorted in key order. This function merges the two lists into a | ||||
9387 | ** single list and returns a pointer to its first element. | ||||
9388 | */ | ||||
9389 | static Fts5HashEntry *fts5HashEntryMerge( | ||||
9390 | Fts5HashEntry *pLeft, | ||||
9391 | Fts5HashEntry *pRight | ||||
9392 | ){ | ||||
9393 | Fts5HashEntry *p1 = pLeft; | ||||
9394 | Fts5HashEntry *p2 = pRight; | ||||
9395 | Fts5HashEntry *pRet = 0; | ||||
9396 | Fts5HashEntry **ppOut = &pRet; | ||||
9397 | |||||
9398 | while( p1 || p2 ){ | ||||
9399 | if( p1==0 ){ | ||||
9400 | *ppOut = p2; | ||||
9401 | p2 = 0; | ||||
9402 | }else if( p2==0 ){ | ||||
9403 | *ppOut = p1; | ||||
9404 | p1 = 0; | ||||
9405 | }else{ | ||||
9406 | char *zKey1 = fts5EntryKey(p1)( ((char *)(&(p1)[1])) ); | ||||
9407 | char *zKey2 = fts5EntryKey(p2)( ((char *)(&(p2)[1])) ); | ||||
9408 | int nMin = MIN(p1->nKey, p2->nKey)(((p1->nKey) < (p2->nKey)) ? (p1->nKey) : (p2-> nKey)); | ||||
9409 | |||||
9410 | int cmp = memcmp(zKey1, zKey2, nMin); | ||||
9411 | if( cmp==0 ){ | ||||
9412 | cmp = p1->nKey - p2->nKey; | ||||
9413 | } | ||||
9414 | assert( cmp!=0 )((void) (0)); | ||||
9415 | |||||
9416 | if( cmp>0 ){ | ||||
9417 | /* p2 is smaller */ | ||||
9418 | *ppOut = p2; | ||||
9419 | ppOut = &p2->pScanNext; | ||||
9420 | p2 = p2->pScanNext; | ||||
9421 | }else{ | ||||
9422 | /* p1 is smaller */ | ||||
9423 | *ppOut = p1; | ||||
9424 | ppOut = &p1->pScanNext; | ||||
9425 | p1 = p1->pScanNext; | ||||
9426 | } | ||||
9427 | *ppOut = 0; | ||||
9428 | } | ||||
9429 | } | ||||
9430 | |||||
9431 | return pRet; | ||||
9432 | } | ||||
9433 | |||||
9434 | /* | ||||
9435 | ** Link all tokens from hash table iHash into a list in sorted order. The | ||||
9436 | ** tokens are not removed from the hash table. | ||||
9437 | */ | ||||
9438 | static int fts5HashEntrySort( | ||||
9439 | Fts5Hash *pHash, | ||||
9440 | const char *pTerm, int nTerm, /* Query prefix, if any */ | ||||
9441 | Fts5HashEntry **ppSorted | ||||
9442 | ){ | ||||
9443 | const int nMergeSlot = 32; | ||||
9444 | Fts5HashEntry **ap; | ||||
9445 | Fts5HashEntry *pList; | ||||
9446 | int iSlot; | ||||
9447 | int i; | ||||
9448 | |||||
9449 | *ppSorted = 0; | ||||
9450 | ap = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5HashEntry*) * nMergeSlot); | ||||
9451 | if( !ap ) return SQLITE_NOMEM7; | ||||
9452 | memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); | ||||
9453 | |||||
9454 | for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ | ||||
9455 | Fts5HashEntry *pIter; | ||||
9456 | for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ | ||||
9457 | if( pTerm==0 | ||||
9458 | || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter)( ((char *)(&(pIter)[1])) ), pTerm, nTerm)) | ||||
9459 | ){ | ||||
9460 | Fts5HashEntry *pEntry = pIter; | ||||
9461 | pEntry->pScanNext = 0; | ||||
9462 | for(i=0; ap[i]; i++){ | ||||
9463 | pEntry = fts5HashEntryMerge(pEntry, ap[i]); | ||||
9464 | ap[i] = 0; | ||||
9465 | } | ||||
9466 | ap[i] = pEntry; | ||||
9467 | } | ||||
9468 | } | ||||
9469 | } | ||||
9470 | |||||
9471 | pList = 0; | ||||
9472 | for(i=0; i<nMergeSlot; i++){ | ||||
9473 | pList = fts5HashEntryMerge(pList, ap[i]); | ||||
9474 | } | ||||
9475 | |||||
9476 | sqlite3_freesqlite3_api->free(ap); | ||||
9477 | *ppSorted = pList; | ||||
9478 | return SQLITE_OK0; | ||||
9479 | } | ||||
9480 | |||||
9481 | /* | ||||
9482 | ** Query the hash table for a doclist associated with term pTerm/nTerm. | ||||
9483 | */ | ||||
9484 | static int sqlite3Fts5HashQuery( | ||||
9485 | Fts5Hash *pHash, /* Hash table to query */ | ||||
9486 | int nPre, | ||||
9487 | const char *pTerm, int nTerm, /* Query term */ | ||||
9488 | void **ppOut, /* OUT: Pointer to new object */ | ||||
9489 | int *pnDoclist /* OUT: Size of doclist in bytes */ | ||||
9490 | ){ | ||||
9491 | unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); | ||||
9492 | char *zKey = 0; | ||||
9493 | Fts5HashEntry *p; | ||||
9494 | |||||
9495 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | ||||
9496 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | ||||
9497 | if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break; | ||||
9498 | } | ||||
9499 | |||||
9500 | if( p ){ | ||||
9501 | int nHashPre = sizeof(Fts5HashEntry) + nTerm; | ||||
9502 | int nList = p->nData - nHashPre; | ||||
9503 | u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64sqlite3_api->malloc64(nPre + nList + 10)); | ||||
9504 | if( pRet ){ | ||||
9505 | Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre]; | ||||
9506 | memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList); | ||||
9507 | nList += fts5HashAddPoslistSize(pHash, p, pFaux); | ||||
9508 | *pnDoclist = nList; | ||||
9509 | }else{ | ||||
9510 | *pnDoclist = 0; | ||||
9511 | return SQLITE_NOMEM7; | ||||
9512 | } | ||||
9513 | }else{ | ||||
9514 | *ppOut = 0; | ||||
9515 | *pnDoclist = 0; | ||||
9516 | } | ||||
9517 | |||||
9518 | return SQLITE_OK0; | ||||
9519 | } | ||||
9520 | |||||
9521 | static int sqlite3Fts5HashScanInit( | ||||
9522 | Fts5Hash *p, /* Hash table to query */ | ||||
9523 | const char *pTerm, int nTerm /* Query prefix */ | ||||
9524 | ){ | ||||
9525 | return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); | ||||
9526 | } | ||||
9527 | |||||
9528 | #ifdef SQLITE_DEBUG | ||||
9529 | static int fts5HashCount(Fts5Hash *pHash){ | ||||
9530 | int nEntry = 0; | ||||
9531 | int ii; | ||||
9532 | for(ii=0; ii<pHash->nSlot; ii++){ | ||||
9533 | Fts5HashEntry *p = 0; | ||||
9534 | for(p=pHash->aSlot[ii]; p; p=p->pHashNext){ | ||||
9535 | nEntry++; | ||||
9536 | } | ||||
9537 | } | ||||
9538 | return nEntry; | ||||
9539 | } | ||||
9540 | #endif | ||||
9541 | |||||
9542 | /* | ||||
9543 | ** Return true if the hash table is empty, false otherwise. | ||||
9544 | */ | ||||
9545 | static int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){ | ||||
9546 | assert( pHash->nEntry==fts5HashCount(pHash) )((void) (0)); | ||||
9547 | return pHash->nEntry==0; | ||||
9548 | } | ||||
9549 | |||||
9550 | static void sqlite3Fts5HashScanNext(Fts5Hash *p){ | ||||
9551 | assert( !sqlite3Fts5HashScanEof(p) )((void) (0)); | ||||
9552 | p->pScan = p->pScan->pScanNext; | ||||
9553 | } | ||||
9554 | |||||
9555 | static int sqlite3Fts5HashScanEof(Fts5Hash *p){ | ||||
9556 | return (p->pScan==0); | ||||
9557 | } | ||||
9558 | |||||
9559 | static void sqlite3Fts5HashScanEntry( | ||||
9560 | Fts5Hash *pHash, | ||||
9561 | const char **pzTerm, /* OUT: term (nul-terminated) */ | ||||
9562 | int *pnTerm, /* OUT: Size of term in bytes */ | ||||
9563 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | ||||
9564 | int *pnDoclist /* OUT: size of doclist in bytes */ | ||||
9565 | ){ | ||||
9566 | Fts5HashEntry *p; | ||||
9567 | if( (p = pHash->pScan) ){ | ||||
9568 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | ||||
9569 | int nTerm = p->nKey; | ||||
9570 | fts5HashAddPoslistSize(pHash, p, 0); | ||||
9571 | *pzTerm = zKey; | ||||
9572 | *pnTerm = nTerm; | ||||
9573 | *ppDoclist = (const u8*)&zKey[nTerm]; | ||||
9574 | *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm); | ||||
9575 | }else{ | ||||
9576 | *pzTerm = 0; | ||||
9577 | *pnTerm = 0; | ||||
9578 | *ppDoclist = 0; | ||||
9579 | *pnDoclist = 0; | ||||
9580 | } | ||||
9581 | } | ||||
9582 | |||||
9583 | #line 1 "fts5_index.c" | ||||
9584 | /* | ||||
9585 | ** 2014 May 31 | ||||
9586 | ** | ||||
9587 | ** The author disclaims copyright to this source code. In place of | ||||
9588 | ** a legal notice, here is a blessing: | ||||
9589 | ** | ||||
9590 | ** May you do good and not evil. | ||||
9591 | ** May you find forgiveness for yourself and forgive others. | ||||
9592 | ** May you share freely, never taking more than you give. | ||||
9593 | ** | ||||
9594 | ****************************************************************************** | ||||
9595 | ** | ||||
9596 | ** Low level access to the FTS index stored in the database file. The | ||||
9597 | ** routines in this file file implement all read and write access to the | ||||
9598 | ** %_data table. Other parts of the system access this functionality via | ||||
9599 | ** the interface defined in fts5Int.h. | ||||
9600 | */ | ||||
9601 | |||||
9602 | |||||
9603 | /* #include "fts5Int.h" */ | ||||
9604 | |||||
9605 | /* | ||||
9606 | ** Overview: | ||||
9607 | ** | ||||
9608 | ** The %_data table contains all the FTS indexes for an FTS5 virtual table. | ||||
9609 | ** As well as the main term index, there may be up to 31 prefix indexes. | ||||
9610 | ** The format is similar to FTS3/4, except that: | ||||
9611 | ** | ||||
9612 | ** * all segment b-tree leaf data is stored in fixed size page records | ||||
9613 | ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is | ||||
9614 | ** taken to ensure it is possible to iterate in either direction through | ||||
9615 | ** the entries in a doclist, or to seek to a specific entry within a | ||||
9616 | ** doclist, without loading it into memory. | ||||
9617 | ** | ||||
9618 | ** * large doclists that span many pages have associated "doclist index" | ||||
9619 | ** records that contain a copy of the first rowid on each page spanned by | ||||
9620 | ** the doclist. This is used to speed up seek operations, and merges of | ||||
9621 | ** large doclists with very small doclists. | ||||
9622 | ** | ||||
9623 | ** * extra fields in the "structure record" record the state of ongoing | ||||
9624 | ** incremental merge operations. | ||||
9625 | ** | ||||
9626 | */ | ||||
9627 | |||||
9628 | |||||
9629 | #define FTS5_OPT_WORK_UNIT1000 1000 /* Number of leaf pages per optimize step */ | ||||
9630 | #define FTS5_WORK_UNIT64 64 /* Number of leaf pages in unit of work */ | ||||
9631 | |||||
9632 | #define FTS5_MIN_DLIDX_SIZE4 4 /* Add dlidx if this many empty pages */ | ||||
9633 | |||||
9634 | #define FTS5_MAIN_PREFIX'0' '0' | ||||
9635 | |||||
9636 | #if FTS5_MAX_PREFIX_INDEXES31 > 31 | ||||
9637 | # error "FTS5_MAX_PREFIX_INDEXES is too large" | ||||
9638 | #endif | ||||
9639 | |||||
9640 | #define FTS5_MAX_LEVEL64 64 | ||||
9641 | |||||
9642 | /* | ||||
9643 | ** There are two versions of the format used for the structure record: | ||||
9644 | ** | ||||
9645 | ** 1. the legacy format, that may be read by all fts5 versions, and | ||||
9646 | ** | ||||
9647 | ** 2. the V2 format, which is used by contentless_delete=1 databases. | ||||
9648 | ** | ||||
9649 | ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy | ||||
9650 | ** format structure record contains a varint - the number of levels in | ||||
9651 | ** the structure. Whereas a V2 structure record contains the constant | ||||
9652 | ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a | ||||
9653 | ** varint has to be at least 16256 to begin with "0xFF". And the default | ||||
9654 | ** maximum number of levels is 64. | ||||
9655 | ** | ||||
9656 | ** See below for more on structure record formats. | ||||
9657 | */ | ||||
9658 | #define FTS5_STRUCTURE_V2"\xFF\x00\x00\x01" "\xFF\x00\x00\x01" | ||||
9659 | |||||
9660 | /* | ||||
9661 | ** Details: | ||||
9662 | ** | ||||
9663 | ** The %_data table managed by this module, | ||||
9664 | ** | ||||
9665 | ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); | ||||
9666 | ** | ||||
9667 | ** , contains the following 6 types of records. See the comments surrounding | ||||
9668 | ** the FTS5_*_ROWID macros below for a description of how %_data rowids are | ||||
9669 | ** assigned to each fo them. | ||||
9670 | ** | ||||
9671 | ** 1. Structure Records: | ||||
9672 | ** | ||||
9673 | ** The set of segments that make up an index - the index structure - are | ||||
9674 | ** recorded in a single record within the %_data table. The record consists | ||||
9675 | ** of a single 32-bit configuration cookie value followed by a list of | ||||
9676 | ** SQLite varints. | ||||
9677 | ** | ||||
9678 | ** If the structure record is a V2 record, the configuration cookie is | ||||
9679 | ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01]. | ||||
9680 | ** | ||||
9681 | ** Next, the record continues with three varints: | ||||
9682 | ** | ||||
9683 | ** + number of levels, | ||||
9684 | ** + total number of segments on all levels, | ||||
9685 | ** + value of write counter. | ||||
9686 | ** | ||||
9687 | ** Then, for each level from 0 to nMax: | ||||
9688 | ** | ||||
9689 | ** + number of input segments in ongoing merge. | ||||
9690 | ** + total number of segments in level. | ||||
9691 | ** + for each segment from oldest to newest: | ||||
9692 | ** + segment id (always > 0) | ||||
9693 | ** + first leaf page number (often 1, always greater than 0) | ||||
9694 | ** + final leaf page number | ||||
9695 | ** | ||||
9696 | ** Then, for V2 structures only: | ||||
9697 | ** | ||||
9698 | ** + lower origin counter value, | ||||
9699 | ** + upper origin counter value, | ||||
9700 | ** + the number of tombstone hash pages. | ||||
9701 | ** | ||||
9702 | ** 2. The Averages Record: | ||||
9703 | ** | ||||
9704 | ** A single record within the %_data table. The data is a list of varints. | ||||
9705 | ** The first value is the number of rows in the index. Then, for each column | ||||
9706 | ** from left to right, the total number of tokens in the column for all | ||||
9707 | ** rows of the table. | ||||
9708 | ** | ||||
9709 | ** 3. Segment leaves: | ||||
9710 | ** | ||||
9711 | ** TERM/DOCLIST FORMAT: | ||||
9712 | ** | ||||
9713 | ** Most of each segment leaf is taken up by term/doclist data. The | ||||
9714 | ** general format of term/doclist, starting with the first term | ||||
9715 | ** on the leaf page, is: | ||||
9716 | ** | ||||
9717 | ** varint : size of first term | ||||
9718 | ** blob: first term data | ||||
9719 | ** doclist: first doclist | ||||
9720 | ** zero-or-more { | ||||
9721 | ** varint: number of bytes in common with previous term | ||||
9722 | ** varint: number of bytes of new term data (nNew) | ||||
9723 | ** blob: nNew bytes of new term data | ||||
9724 | ** doclist: next doclist | ||||
9725 | ** } | ||||
9726 | ** | ||||
9727 | ** doclist format: | ||||
9728 | ** | ||||
9729 | ** varint: first rowid | ||||
9730 | ** poslist: first poslist | ||||
9731 | ** zero-or-more { | ||||
9732 | ** varint: rowid delta (always > 0) | ||||
9733 | ** poslist: next poslist | ||||
9734 | ** } | ||||
9735 | ** | ||||
9736 | ** poslist format: | ||||
9737 | ** | ||||
9738 | ** varint: size of poslist in bytes multiplied by 2, not including | ||||
9739 | ** this field. Plus 1 if this entry carries the "delete" flag. | ||||
9740 | ** collist: collist for column 0 | ||||
9741 | ** zero-or-more { | ||||
9742 | ** 0x01 byte | ||||
9743 | ** varint: column number (I) | ||||
9744 | ** collist: collist for column I | ||||
9745 | ** } | ||||
9746 | ** | ||||
9747 | ** collist format: | ||||
9748 | ** | ||||
9749 | ** varint: first offset + 2 | ||||
9750 | ** zero-or-more { | ||||
9751 | ** varint: offset delta + 2 | ||||
9752 | ** } | ||||
9753 | ** | ||||
9754 | ** PAGE FORMAT | ||||
9755 | ** | ||||
9756 | ** Each leaf page begins with a 4-byte header containing 2 16-bit | ||||
9757 | ** unsigned integer fields in big-endian format. They are: | ||||
9758 | ** | ||||
9759 | ** * The byte offset of the first rowid on the page, if it exists | ||||
9760 | ** and occurs before the first term (otherwise 0). | ||||
9761 | ** | ||||
9762 | ** * The byte offset of the start of the page footer. If the page | ||||
9763 | ** footer is 0 bytes in size, then this field is the same as the | ||||
9764 | ** size of the leaf page in bytes. | ||||
9765 | ** | ||||
9766 | ** The page footer consists of a single varint for each term located | ||||
9767 | ** on the page. Each varint is the byte offset of the current term | ||||
9768 | ** within the page, delta-compressed against the previous value. In | ||||
9769 | ** other words, the first varint in the footer is the byte offset of | ||||
9770 | ** the first term, the second is the byte offset of the second less that | ||||
9771 | ** of the first, and so on. | ||||
9772 | ** | ||||
9773 | ** The term/doclist format described above is accurate if the entire | ||||
9774 | ** term/doclist data fits on a single leaf page. If this is not the case, | ||||
9775 | ** the format is changed in two ways: | ||||
9776 | ** | ||||
9777 | ** + if the first rowid on a page occurs before the first term, it | ||||
9778 | ** is stored as a literal value: | ||||
9779 | ** | ||||
9780 | ** varint: first rowid | ||||
9781 | ** | ||||
9782 | ** + the first term on each page is stored in the same way as the | ||||
9783 | ** very first term of the segment: | ||||
9784 | ** | ||||
9785 | ** varint : size of first term | ||||
9786 | ** blob: first term data | ||||
9787 | ** | ||||
9788 | ** 5. Segment doclist indexes: | ||||
9789 | ** | ||||
9790 | ** Doclist indexes are themselves b-trees, however they usually consist of | ||||
9791 | ** a single leaf record only. The format of each doclist index leaf page | ||||
9792 | ** is: | ||||
9793 | ** | ||||
9794 | ** * Flags byte. Bits are: | ||||
9795 | ** 0x01: Clear if leaf is also the root page, otherwise set. | ||||
9796 | ** | ||||
9797 | ** * Page number of fts index leaf page. As a varint. | ||||
9798 | ** | ||||
9799 | ** * First rowid on page indicated by previous field. As a varint. | ||||
9800 | ** | ||||
9801 | ** * A list of varints, one for each subsequent termless page. A | ||||
9802 | ** positive delta if the termless page contains at least one rowid, | ||||
9803 | ** or an 0x00 byte otherwise. | ||||
9804 | ** | ||||
9805 | ** Internal doclist index nodes are: | ||||
9806 | ** | ||||
9807 | ** * Flags byte. Bits are: | ||||
9808 | ** 0x01: Clear for root page, otherwise set. | ||||
9809 | ** | ||||
9810 | ** * Page number of first child page. As a varint. | ||||
9811 | ** | ||||
9812 | ** * Copy of first rowid on page indicated by previous field. As a varint. | ||||
9813 | ** | ||||
9814 | ** * A list of delta-encoded varints - the first rowid on each subsequent | ||||
9815 | ** child page. | ||||
9816 | ** | ||||
9817 | ** 6. Tombstone Hash Page | ||||
9818 | ** | ||||
9819 | ** These records are only ever present in contentless_delete=1 tables. | ||||
9820 | ** There are zero or more of these associated with each segment. They | ||||
9821 | ** are used to store the tombstone rowids for rows contained in the | ||||
9822 | ** associated segments. | ||||
9823 | ** | ||||
9824 | ** The set of nHashPg tombstone hash pages associated with a single | ||||
9825 | ** segment together form a single hash table containing tombstone rowids. | ||||
9826 | ** To find the page of the hash on which a key might be stored: | ||||
9827 | ** | ||||
9828 | ** iPg = (rowid % nHashPg) | ||||
9829 | ** | ||||
9830 | ** Then, within page iPg, which has nSlot slots: | ||||
9831 | ** | ||||
9832 | ** iSlot = (rowid / nHashPg) % nSlot | ||||
9833 | ** | ||||
9834 | ** Each tombstone hash page begins with an 8 byte header: | ||||
9835 | ** | ||||
9836 | ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8. | ||||
9837 | ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the | ||||
9838 | ** first tombstone hash page for each segment (iPg=0). If set, | ||||
9839 | ** the hash table contains rowid 0. If clear, it does not. | ||||
9840 | ** Rowid 0 is handled specially. | ||||
9841 | ** 2-bytes: unused. | ||||
9842 | ** 4-bytes: Big-endian integer containing number of entries on page. | ||||
9843 | ** | ||||
9844 | ** Following this are nSlot 4 or 8 byte slots (depending on the key-size | ||||
9845 | ** in the first byte of the page header). The number of slots may be | ||||
9846 | ** determined based on the size of the page record and the key-size: | ||||
9847 | ** | ||||
9848 | ** nSlot = (nByte - 8) / key-size | ||||
9849 | */ | ||||
9850 | |||||
9851 | /* | ||||
9852 | ** Rowids for the averages and structure records in the %_data table. | ||||
9853 | */ | ||||
9854 | #define FTS5_AVERAGES_ROWID1 1 /* Rowid used for the averages record */ | ||||
9855 | #define FTS5_STRUCTURE_ROWID10 10 /* The structure record */ | ||||
9856 | |||||
9857 | /* | ||||
9858 | ** Macros determining the rowids used by segment leaves and dlidx leaves | ||||
9859 | ** and nodes. All nodes and leaves are stored in the %_data table with large | ||||
9860 | ** positive rowids. | ||||
9861 | ** | ||||
9862 | ** Each segment has a unique non-zero 16-bit id. | ||||
9863 | ** | ||||
9864 | ** The rowid for each segment leaf is found by passing the segment id and | ||||
9865 | ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered | ||||
9866 | ** sequentially starting from 1. | ||||
9867 | */ | ||||
9868 | #define FTS5_DATA_ID_B16 16 /* Max seg id number 65535 */ | ||||
9869 | #define FTS5_DATA_DLI_B1 1 /* Doclist-index flag (1 bit) */ | ||||
9870 | #define FTS5_DATA_HEIGHT_B5 5 /* Max dlidx tree height of 32 */ | ||||
9871 | #define FTS5_DATA_PAGE_B31 31 /* Max page number of 2147483648 */ | ||||
9872 | |||||
9873 | #define fts5_dri(segid, dlidx, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(dlidx) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) ( \ | ||||
9874 | ((i64)(segid) << (FTS5_DATA_PAGE_B31+FTS5_DATA_HEIGHT_B5+FTS5_DATA_DLI_B1)) + \ | ||||
9875 | ((i64)(dlidx) << (FTS5_DATA_PAGE_B31 + FTS5_DATA_HEIGHT_B5)) + \ | ||||
9876 | ((i64)(height) << (FTS5_DATA_PAGE_B31)) + \ | ||||
9877 | ((i64)(pgno)) \ | ||||
9878 | ) | ||||
9879 | |||||
9880 | #define FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 0, 0, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) | ||||
9881 | #define FTS5_DLIDX_ROWID(segid, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 1, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) | ||||
9882 | #define FTS5_TOMBSTONE_ROWID(segid,ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) fts5_dri(segid+(1<<16), 0, 0, ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) | ||||
9883 | |||||
9884 | #ifdef SQLITE_DEBUG | ||||
9885 | static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB(11 | (1<<8)); } | ||||
9886 | #endif | ||||
9887 | |||||
9888 | |||||
9889 | /* | ||||
9890 | ** Each time a blob is read from the %_data table, it is padded with this | ||||
9891 | ** many zero bytes. This makes it easier to decode the various record formats | ||||
9892 | ** without overreading if the records are corrupt. | ||||
9893 | */ | ||||
9894 | #define FTS5_DATA_ZERO_PADDING8 8 | ||||
9895 | #define FTS5_DATA_PADDING20 20 | ||||
9896 | |||||
9897 | typedef struct Fts5Data Fts5Data; | ||||
9898 | typedef struct Fts5DlidxIter Fts5DlidxIter; | ||||
9899 | typedef struct Fts5DlidxLvl Fts5DlidxLvl; | ||||
9900 | typedef struct Fts5DlidxWriter Fts5DlidxWriter; | ||||
9901 | typedef struct Fts5Iter Fts5Iter; | ||||
9902 | typedef struct Fts5PageWriter Fts5PageWriter; | ||||
9903 | typedef struct Fts5SegIter Fts5SegIter; | ||||
9904 | typedef struct Fts5DoclistIter Fts5DoclistIter; | ||||
9905 | typedef struct Fts5SegWriter Fts5SegWriter; | ||||
9906 | typedef struct Fts5Structure Fts5Structure; | ||||
9907 | typedef struct Fts5StructureLevel Fts5StructureLevel; | ||||
9908 | typedef struct Fts5StructureSegment Fts5StructureSegment; | ||||
9909 | typedef struct Fts5TokenDataIter Fts5TokenDataIter; | ||||
9910 | typedef struct Fts5TokenDataMap Fts5TokenDataMap; | ||||
9911 | typedef struct Fts5TombstoneArray Fts5TombstoneArray; | ||||
9912 | |||||
9913 | struct Fts5Data { | ||||
9914 | u8 *p; /* Pointer to buffer containing record */ | ||||
9915 | int nn; /* Size of record in bytes */ | ||||
9916 | int szLeaf; /* Size of leaf without page-index */ | ||||
9917 | }; | ||||
9918 | |||||
9919 | /* | ||||
9920 | ** One object per %_data table. | ||||
9921 | ** | ||||
9922 | ** nContentlessDelete: | ||||
9923 | ** The number of contentless delete operations since the most recent | ||||
9924 | ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked | ||||
9925 | ** so that extra auto-merge work can be done by fts5IndexFlush() to | ||||
9926 | ** account for the delete operations. | ||||
9927 | */ | ||||
9928 | struct Fts5Index { | ||||
9929 | Fts5Config *pConfig; /* Virtual table configuration */ | ||||
9930 | char *zDataTbl; /* Name of %_data table */ | ||||
9931 | int nWorkUnit; /* Leaf pages in a "unit" of work */ | ||||
9932 | |||||
9933 | /* | ||||
9934 | ** Variables related to the accumulation of tokens and doclists within the | ||||
9935 | ** in-memory hash tables before they are flushed to disk. | ||||
9936 | */ | ||||
9937 | Fts5Hash *pHash; /* Hash table for in-memory data */ | ||||
9938 | int nPendingData; /* Current bytes of pending data */ | ||||
9939 | i64 iWriteRowid; /* Rowid for current doc being written */ | ||||
9940 | int bDelete; /* Current write is a delete */ | ||||
9941 | int nContentlessDelete; /* Number of contentless delete ops */ | ||||
9942 | int nPendingRow; /* Number of INSERT in hash table */ | ||||
9943 | |||||
9944 | /* Error state. */ | ||||
9945 | int rc; /* Current error code */ | ||||
9946 | int flushRc; | ||||
9947 | |||||
9948 | /* State used by the fts5DataXXX() functions. */ | ||||
9949 | sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ | ||||
9950 | sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ | ||||
9951 | sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ | ||||
9952 | sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ | ||||
9953 | sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ | ||||
9954 | sqlite3_stmt *pIdxSelect; | ||||
9955 | sqlite3_stmt *pIdxNextSelect; | ||||
9956 | int nRead; /* Total number of blocks read */ | ||||
9957 | |||||
9958 | sqlite3_stmt *pDeleteFromIdx; | ||||
9959 | |||||
9960 | sqlite3_stmt *pDataVersion; | ||||
9961 | i64 iStructVersion; /* data_version when pStruct read */ | ||||
9962 | Fts5Structure *pStruct; /* Current db structure (or NULL) */ | ||||
9963 | }; | ||||
9964 | |||||
9965 | struct Fts5DoclistIter { | ||||
9966 | u8 *aEof; /* Pointer to 1 byte past end of doclist */ | ||||
9967 | |||||
9968 | /* Output variables. aPoslist==0 at EOF */ | ||||
9969 | i64 iRowid; | ||||
9970 | u8 *aPoslist; | ||||
9971 | int nPoslist; | ||||
9972 | int nSize; | ||||
9973 | }; | ||||
9974 | |||||
9975 | /* | ||||
9976 | ** The contents of the "structure" record for each index are represented | ||||
9977 | ** using an Fts5Structure record in memory. Which uses instances of the | ||||
9978 | ** other Fts5StructureXXX types as components. | ||||
9979 | ** | ||||
9980 | ** nOriginCntr: | ||||
9981 | ** This value is set to non-zero for structure records created for | ||||
9982 | ** contentlessdelete=1 tables only. In that case it represents the | ||||
9983 | ** origin value to apply to the next top-level segment created. | ||||
9984 | */ | ||||
9985 | struct Fts5StructureSegment { | ||||
9986 | int iSegid; /* Segment id */ | ||||
9987 | int pgnoFirst; /* First leaf page number in segment */ | ||||
9988 | int pgnoLast; /* Last leaf page number in segment */ | ||||
9989 | |||||
9990 | /* contentlessdelete=1 tables only: */ | ||||
9991 | u64 iOrigin1; | ||||
9992 | u64 iOrigin2; | ||||
9993 | int nPgTombstone; /* Number of tombstone hash table pages */ | ||||
9994 | u64 nEntryTombstone; /* Number of tombstone entries that "count" */ | ||||
9995 | u64 nEntry; /* Number of rows in this segment */ | ||||
9996 | }; | ||||
9997 | struct Fts5StructureLevel { | ||||
9998 | int nMerge; /* Number of segments in incr-merge */ | ||||
9999 | int nSeg; /* Total number of segments on level */ | ||||
10000 | Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ | ||||
10001 | }; | ||||
10002 | struct Fts5Structure { | ||||
10003 | int nRef; /* Object reference count */ | ||||
10004 | u64 nWriteCounter; /* Total leaves written to level 0 */ | ||||
10005 | u64 nOriginCntr; /* Origin value for next top-level segment */ | ||||
10006 | int nSegment; /* Total segments in this structure */ | ||||
10007 | int nLevel; /* Number of levels in this index */ | ||||
10008 | Fts5StructureLevel aLevel[FLEXARRAY]; /* Array of nLevel level objects */ | ||||
10009 | }; | ||||
10010 | |||||
10011 | /* Size (in bytes) of an Fts5Structure object holding up to N levels */ | ||||
10012 | #define SZ_FTS5STRUCTURE(N)(__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel )) \ | ||||
10013 | (offsetof(Fts5Structure,aLevel)__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel)) | ||||
10014 | |||||
10015 | /* | ||||
10016 | ** An object of type Fts5SegWriter is used to write to segments. | ||||
10017 | */ | ||||
10018 | struct Fts5PageWriter { | ||||
10019 | int pgno; /* Page number for this page */ | ||||
10020 | int iPrevPgidx; /* Previous value written into pgidx */ | ||||
10021 | Fts5Buffer buf; /* Buffer containing leaf data */ | ||||
10022 | Fts5Buffer pgidx; /* Buffer containing page-index */ | ||||
10023 | Fts5Buffer term; /* Buffer containing previous term on page */ | ||||
10024 | }; | ||||
10025 | struct Fts5DlidxWriter { | ||||
10026 | int pgno; /* Page number for this page */ | ||||
10027 | int bPrevValid; /* True if iPrev is valid */ | ||||
10028 | i64 iPrev; /* Previous rowid value written to page */ | ||||
10029 | Fts5Buffer buf; /* Buffer containing page data */ | ||||
10030 | }; | ||||
10031 | struct Fts5SegWriter { | ||||
10032 | int iSegid; /* Segid to write to */ | ||||
10033 | Fts5PageWriter writer; /* PageWriter object */ | ||||
10034 | i64 iPrevRowid; /* Previous rowid written to current leaf */ | ||||
10035 | u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ | ||||
10036 | u8 bFirstRowidInPage; /* True if next rowid is first in page */ | ||||
10037 | /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ | ||||
10038 | u8 bFirstTermInPage; /* True if next term will be first in leaf */ | ||||
10039 | int nLeafWritten; /* Number of leaf pages written */ | ||||
10040 | int nEmpty; /* Number of contiguous term-less nodes */ | ||||
10041 | |||||
10042 | int nDlidx; /* Allocated size of aDlidx[] array */ | ||||
10043 | Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ | ||||
10044 | |||||
10045 | /* Values to insert into the %_idx table */ | ||||
10046 | Fts5Buffer btterm; /* Next term to insert into %_idx table */ | ||||
10047 | int iBtPage; /* Page number corresponding to btterm */ | ||||
10048 | }; | ||||
10049 | |||||
10050 | typedef struct Fts5CResult Fts5CResult; | ||||
10051 | struct Fts5CResult { | ||||
10052 | u16 iFirst; /* aSeg[] index of firstest iterator */ | ||||
10053 | u8 bTermEq; /* True if the terms are equal */ | ||||
10054 | }; | ||||
10055 | |||||
10056 | /* | ||||
10057 | ** Object for iterating through a single segment, visiting each term/rowid | ||||
10058 | ** pair in the segment. | ||||
10059 | ** | ||||
10060 | ** pSeg: | ||||
10061 | ** The segment to iterate through. | ||||
10062 | ** | ||||
10063 | ** iLeafPgno: | ||||
10064 | ** Current leaf page number within segment. | ||||
10065 | ** | ||||
10066 | ** iLeafOffset: | ||||
10067 | ** Byte offset within the current leaf that is the first byte of the | ||||
10068 | ** position list data (one byte passed the position-list size field). | ||||
10069 | ** | ||||
10070 | ** pLeaf: | ||||
10071 | ** Buffer containing current leaf page data. Set to NULL at EOF. | ||||
10072 | ** | ||||
10073 | ** iTermLeafPgno, iTermLeafOffset: | ||||
10074 | ** Leaf page number containing the last term read from the segment. And | ||||
10075 | ** the offset immediately following the term data. | ||||
10076 | ** | ||||
10077 | ** flags: | ||||
10078 | ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: | ||||
10079 | ** | ||||
10080 | ** FTS5_SEGITER_ONETERM: | ||||
10081 | ** If set, set the iterator to point to EOF after the current doclist | ||||
10082 | ** has been exhausted. Do not proceed to the next term in the segment. | ||||
10083 | ** | ||||
10084 | ** FTS5_SEGITER_REVERSE: | ||||
10085 | ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If | ||||
10086 | ** it is set, iterate through rowid in descending order instead of the | ||||
10087 | ** default ascending order. | ||||
10088 | ** | ||||
10089 | ** iRowidOffset/nRowidOffset/aRowidOffset: | ||||
10090 | ** These are used if the FTS5_SEGITER_REVERSE flag is set. | ||||
10091 | ** | ||||
10092 | ** For each rowid on the page corresponding to the current term, the | ||||
10093 | ** corresponding aRowidOffset[] entry is set to the byte offset of the | ||||
10094 | ** start of the "position-list-size" field within the page. | ||||
10095 | ** | ||||
10096 | ** iTermIdx: | ||||
10097 | ** Index of current term on iTermLeafPgno. | ||||
10098 | ** | ||||
10099 | ** apTombstone/nTombstone: | ||||
10100 | ** These are used for contentless_delete=1 tables only. When the cursor | ||||
10101 | ** is first allocated, the apTombstone[] array is allocated so that it | ||||
10102 | ** is large enough for all tombstones hash pages associated with the | ||||
10103 | ** segment. The pages themselves are loaded lazily from the database as | ||||
10104 | ** they are required. | ||||
10105 | */ | ||||
10106 | struct Fts5SegIter { | ||||
10107 | Fts5StructureSegment *pSeg; /* Segment to iterate through */ | ||||
10108 | int flags; /* Mask of configuration flags */ | ||||
10109 | int iLeafPgno; /* Current leaf page number */ | ||||
10110 | Fts5Data *pLeaf; /* Current leaf data */ | ||||
10111 | Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ | ||||
10112 | i64 iLeafOffset; /* Byte offset within current leaf */ | ||||
10113 | Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */ | ||||
10114 | |||||
10115 | /* Next method */ | ||||
10116 | void (*xNext)(Fts5Index*, Fts5SegIter*, int*); | ||||
10117 | |||||
10118 | /* The page and offset from which the current term was read. The offset | ||||
10119 | ** is the offset of the first rowid in the current doclist. */ | ||||
10120 | int iTermLeafPgno; | ||||
10121 | int iTermLeafOffset; | ||||
10122 | |||||
10123 | int iPgidxOff; /* Next offset in pgidx */ | ||||
10124 | int iEndofDoclist; | ||||
10125 | |||||
10126 | /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ | ||||
10127 | int iRowidOffset; /* Current entry in aRowidOffset[] */ | ||||
10128 | int nRowidOffset; /* Allocated size of aRowidOffset[] array */ | ||||
10129 | int *aRowidOffset; /* Array of offset to rowid fields */ | ||||
10130 | |||||
10131 | Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ | ||||
10132 | |||||
10133 | /* Variables populated based on current entry. */ | ||||
10134 | Fts5Buffer term; /* Current term */ | ||||
10135 | i64 iRowid; /* Current rowid */ | ||||
10136 | int nPos; /* Number of bytes in current position list */ | ||||
10137 | u8 bDel; /* True if the delete flag is set */ | ||||
10138 | }; | ||||
10139 | |||||
10140 | /* | ||||
10141 | ** Array of tombstone pages. Reference counted. | ||||
10142 | */ | ||||
10143 | struct Fts5TombstoneArray { | ||||
10144 | int nRef; /* Number of pointers to this object */ | ||||
10145 | int nTombstone; | ||||
10146 | Fts5Data *apTombstone[FLEXARRAY]; /* Array of tombstone pages */ | ||||
10147 | }; | ||||
10148 | |||||
10149 | /* Size (in bytes) of an Fts5TombstoneArray holding up to N tombstones */ | ||||
10150 | #define SZ_FTS5TOMBSTONEARRAY(N)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof (Fts5Data*)) \ | ||||
10151 | (offsetof(Fts5TombstoneArray,apTombstone)__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof(Fts5Data*)) | ||||
10152 | |||||
10153 | /* | ||||
10154 | ** Argument is a pointer to an Fts5Data structure that contains a | ||||
10155 | ** leaf page. | ||||
10156 | */ | ||||
10157 | #define ASSERT_SZLEAF_OK(x)((void) (0)) assert( \((void) (0)) | ||||
10158 | (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \((void) (0)) | ||||
10159 | )((void) (0)) | ||||
10160 | |||||
10161 | #define FTS5_SEGITER_ONETERM0x01 0x01 | ||||
10162 | #define FTS5_SEGITER_REVERSE0x02 0x02 | ||||
10163 | |||||
10164 | /* | ||||
10165 | ** Argument is a pointer to an Fts5Data structure that contains a leaf | ||||
10166 | ** page. This macro evaluates to true if the leaf contains no terms, or | ||||
10167 | ** false if it contains at least one term. | ||||
10168 | */ | ||||
10169 | #define fts5LeafIsTermless(x)((x)->szLeaf >= (x)->nn) ((x)->szLeaf >= (x)->nn) | ||||
10170 | |||||
10171 | #define fts5LeafTermOff(x, i)(fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) | ||||
10172 | |||||
10173 | #define fts5LeafFirstRowidOff(x)(fts5GetU16((x)->p)) (fts5GetU16((x)->p)) | ||||
10174 | |||||
10175 | /* | ||||
10176 | ** Object for iterating through the merged results of one or more segments, | ||||
10177 | ** visiting each term/rowid pair in the merged data. | ||||
10178 | ** | ||||
10179 | ** nSeg is always a power of two greater than or equal to the number of | ||||
10180 | ** segments that this object is merging data from. Both the aSeg[] and | ||||
10181 | ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded | ||||
10182 | ** with zeroed objects - these are handled as if they were iterators opened | ||||
10183 | ** on empty segments. | ||||
10184 | ** | ||||
10185 | ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an | ||||
10186 | ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the | ||||
10187 | ** comparison in this context is the index of the iterator that currently | ||||
10188 | ** points to the smaller term/rowid combination. Iterators at EOF are | ||||
10189 | ** considered to be greater than all other iterators. | ||||
10190 | ** | ||||
10191 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to | ||||
10192 | ** the smallest key overall. aFirst[0] is unused. | ||||
10193 | ** | ||||
10194 | ** poslist: | ||||
10195 | ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. | ||||
10196 | ** There is no way to tell if this is populated or not. | ||||
10197 | ** | ||||
10198 | ** pColset: | ||||
10199 | ** If not NULL, points to an object containing a set of column indices. | ||||
10200 | ** Only matches that occur in one of these columns will be returned. | ||||
10201 | ** The Fts5Iter does not own the Fts5Colset object, and so it is not | ||||
10202 | ** freed when the iterator is closed - it is owned by the upper layer. | ||||
10203 | */ | ||||
10204 | struct Fts5Iter { | ||||
10205 | Fts5IndexIter base; /* Base class containing output vars */ | ||||
10206 | Fts5TokenDataIter *pTokenDataIter; | ||||
10207 | |||||
10208 | Fts5Index *pIndex; /* Index that owns this iterator */ | ||||
10209 | Fts5Buffer poslist; /* Buffer containing current poslist */ | ||||
10210 | Fts5Colset *pColset; /* Restrict matches to these columns */ | ||||
10211 | |||||
10212 | /* Invoked to set output variables. */ | ||||
10213 | void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); | ||||
10214 | |||||
10215 | int nSeg; /* Size of aSeg[] array */ | ||||
10216 | int bRev; /* True to iterate in reverse order */ | ||||
10217 | u8 bSkipEmpty; /* True to skip deleted entries */ | ||||
10218 | |||||
10219 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ | ||||
10220 | Fts5CResult *aFirst; /* Current merge state (see above) */ | ||||
10221 | Fts5SegIter aSeg[FLEXARRAY]; /* Array of segment iterators */ | ||||
10222 | }; | ||||
10223 | |||||
10224 | /* Size (in bytes) of an Fts5Iter object holding up to N segment iterators */ | ||||
10225 | #define SZ_FTS5ITER(N)(__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) (offsetof(Fts5Iter,aSeg)__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) | ||||
10226 | |||||
10227 | /* | ||||
10228 | ** An instance of the following type is used to iterate through the contents | ||||
10229 | ** of a doclist-index record. | ||||
10230 | ** | ||||
10231 | ** pData: | ||||
10232 | ** Record containing the doclist-index data. | ||||
10233 | ** | ||||
10234 | ** bEof: | ||||
10235 | ** Set to true once iterator has reached EOF. | ||||
10236 | ** | ||||
10237 | ** iOff: | ||||
10238 | ** Set to the current offset within record pData. | ||||
10239 | */ | ||||
10240 | struct Fts5DlidxLvl { | ||||
10241 | Fts5Data *pData; /* Data for current page of this level */ | ||||
10242 | int iOff; /* Current offset into pData */ | ||||
10243 | int bEof; /* At EOF already */ | ||||
10244 | int iFirstOff; /* Used by reverse iterators */ | ||||
10245 | |||||
10246 | /* Output variables */ | ||||
10247 | int iLeafPgno; /* Page number of current leaf page */ | ||||
10248 | i64 iRowid; /* First rowid on leaf iLeafPgno */ | ||||
10249 | }; | ||||
10250 | struct Fts5DlidxIter { | ||||
10251 | int nLvl; | ||||
10252 | int iSegid; | ||||
10253 | Fts5DlidxLvl aLvl[FLEXARRAY]; | ||||
10254 | }; | ||||
10255 | |||||
10256 | /* Size (in bytes) of an Fts5DlidxIter object with up to N levels */ | ||||
10257 | #define SZ_FTS5DLIDXITER(N)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl )) \ | ||||
10258 | (offsetof(Fts5DlidxIter,aLvl)__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl)) | ||||
10259 | |||||
10260 | static void fts5PutU16(u8 *aOut, u16 iVal){ | ||||
10261 | aOut[0] = (iVal>>8); | ||||
10262 | aOut[1] = (iVal&0xFF); | ||||
10263 | } | ||||
10264 | |||||
10265 | static u16 fts5GetU16(const u8 *aIn){ | ||||
10266 | return ((u16)aIn[0] << 8) + aIn[1]; | ||||
10267 | } | ||||
10268 | |||||
10269 | /* | ||||
10270 | ** The only argument points to a buffer at least 8 bytes in size. This | ||||
10271 | ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian | ||||
10272 | ** unsigned integer and returns the result. | ||||
10273 | */ | ||||
10274 | static u64 fts5GetU64(u8 *a){ | ||||
10275 | return ((u64)a[0] << 56) | ||||
10276 | + ((u64)a[1] << 48) | ||||
10277 | + ((u64)a[2] << 40) | ||||
10278 | + ((u64)a[3] << 32) | ||||
10279 | + ((u64)a[4] << 24) | ||||
10280 | + ((u64)a[5] << 16) | ||||
10281 | + ((u64)a[6] << 8) | ||||
10282 | + ((u64)a[7] << 0); | ||||
10283 | } | ||||
10284 | |||||
10285 | /* | ||||
10286 | ** The only argument points to a buffer at least 4 bytes in size. This | ||||
10287 | ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian | ||||
10288 | ** unsigned integer and returns the result. | ||||
10289 | */ | ||||
10290 | static u32 fts5GetU32(const u8 *a){ | ||||
10291 | return ((u32)a[0] << 24) | ||||
10292 | + ((u32)a[1] << 16) | ||||
10293 | + ((u32)a[2] << 8) | ||||
10294 | + ((u32)a[3] << 0); | ||||
10295 | } | ||||
10296 | |||||
10297 | /* | ||||
10298 | ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the | ||||
10299 | ** buffer indicated by the first argument. | ||||
10300 | */ | ||||
10301 | static void fts5PutU64(u8 *a, u64 iVal){ | ||||
10302 | a[0] = ((iVal >> 56) & 0xFF); | ||||
10303 | a[1] = ((iVal >> 48) & 0xFF); | ||||
10304 | a[2] = ((iVal >> 40) & 0xFF); | ||||
10305 | a[3] = ((iVal >> 32) & 0xFF); | ||||
10306 | a[4] = ((iVal >> 24) & 0xFF); | ||||
10307 | a[5] = ((iVal >> 16) & 0xFF); | ||||
10308 | a[6] = ((iVal >> 8) & 0xFF); | ||||
10309 | a[7] = ((iVal >> 0) & 0xFF); | ||||
10310 | } | ||||
10311 | |||||
10312 | /* | ||||
10313 | ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the | ||||
10314 | ** buffer indicated by the first argument. | ||||
10315 | */ | ||||
10316 | static void fts5PutU32(u8 *a, u32 iVal){ | ||||
10317 | a[0] = ((iVal >> 24) & 0xFF); | ||||
10318 | a[1] = ((iVal >> 16) & 0xFF); | ||||
10319 | a[2] = ((iVal >> 8) & 0xFF); | ||||
10320 | a[3] = ((iVal >> 0) & 0xFF); | ||||
10321 | } | ||||
10322 | |||||
10323 | /* | ||||
10324 | ** Allocate and return a buffer at least nByte bytes in size. | ||||
10325 | ** | ||||
10326 | ** If an OOM error is encountered, return NULL and set the error code in | ||||
10327 | ** the Fts5Index handle passed as the first argument. | ||||
10328 | */ | ||||
10329 | static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ | ||||
10330 | return sqlite3Fts5MallocZero(&p->rc, nByte); | ||||
10331 | } | ||||
10332 | |||||
10333 | /* | ||||
10334 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. | ||||
10335 | ** | ||||
10336 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | ||||
10337 | ** +ve if pRight is smaller than pLeft. In other words: | ||||
10338 | ** | ||||
10339 | ** res = *pLeft - *pRight | ||||
10340 | */ | ||||
10341 | #ifdef SQLITE_DEBUG | ||||
10342 | static int fts5BufferCompareBlob( | ||||
10343 | Fts5Buffer *pLeft, /* Left hand side of comparison */ | ||||
10344 | const u8 *pRight, int nRight /* Right hand side of comparison */ | ||||
10345 | ){ | ||||
10346 | int nCmp = MIN(pLeft->n, nRight)(((pLeft->n) < (nRight)) ? (pLeft->n) : (nRight)); | ||||
10347 | int res = memcmp(pLeft->p, pRight, nCmp); | ||||
10348 | return (res==0 ? (pLeft->n - nRight) : res); | ||||
10349 | } | ||||
10350 | #endif | ||||
10351 | |||||
10352 | /* | ||||
10353 | ** Compare the contents of the two buffers using memcmp(). If one buffer | ||||
10354 | ** is a prefix of the other, it is considered the lesser. | ||||
10355 | ** | ||||
10356 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | ||||
10357 | ** +ve if pRight is smaller than pLeft. In other words: | ||||
10358 | ** | ||||
10359 | ** res = *pLeft - *pRight | ||||
10360 | */ | ||||
10361 | static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ | ||||
10362 | int nCmp, res; | ||||
10363 | nCmp = MIN(pLeft->n, pRight->n)(((pLeft->n) < (pRight->n)) ? (pLeft->n) : (pRight ->n)); | ||||
10364 | assert( nCmp<=0 || pLeft->p!=0 )((void) (0)); | ||||
10365 | assert( nCmp<=0 || pRight->p!=0 )((void) (0)); | ||||
10366 | res = fts5Memcmp(pLeft->p, pRight->p, nCmp)((nCmp)<=0 ? 0 : memcmp((pLeft->p), (pRight->p), (nCmp ))); | ||||
10367 | return (res==0 ? (pLeft->n - pRight->n) : res); | ||||
10368 | } | ||||
10369 | |||||
10370 | static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ | ||||
10371 | int ret; | ||||
10372 | fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret)sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(ret)); | ||||
10373 | return ret; | ||||
10374 | } | ||||
10375 | |||||
10376 | /* | ||||
10377 | ** Close the read-only blob handle, if it is open. | ||||
10378 | */ | ||||
10379 | static void fts5IndexCloseReader(Fts5Index *p){ | ||||
10380 | if( p->pReader ){ | ||||
10381 | int rc; | ||||
10382 | sqlite3_blob *pReader = p->pReader; | ||||
10383 | p->pReader = 0; | ||||
10384 | rc = sqlite3_blob_closesqlite3_api->blob_close(pReader); | ||||
10385 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | ||||
10386 | } | ||||
10387 | } | ||||
10388 | |||||
10389 | /* | ||||
10390 | ** Retrieve a record from the %_data table. | ||||
10391 | ** | ||||
10392 | ** If an error occurs, NULL is returned and an error left in the | ||||
10393 | ** Fts5Index object. | ||||
10394 | */ | ||||
10395 | static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ | ||||
10396 | Fts5Data *pRet = 0; | ||||
10397 | if( p->rc==SQLITE_OK0 ){ | ||||
10398 | int rc = SQLITE_OK0; | ||||
10399 | |||||
10400 | if( p->pReader ){ | ||||
10401 | /* This call may return SQLITE_ABORT if there has been a savepoint | ||||
10402 | ** rollback since it was last used. In this case a new blob handle | ||||
10403 | ** is required. */ | ||||
10404 | sqlite3_blob *pBlob = p->pReader; | ||||
10405 | p->pReader = 0; | ||||
10406 | rc = sqlite3_blob_reopensqlite3_api->blob_reopen(pBlob, iRowid); | ||||
10407 | assert( p->pReader==0 )((void) (0)); | ||||
10408 | p->pReader = pBlob; | ||||
10409 | if( rc!=SQLITE_OK0 ){ | ||||
10410 | fts5IndexCloseReader(p); | ||||
10411 | } | ||||
10412 | if( rc==SQLITE_ABORT4 ) rc = SQLITE_OK0; | ||||
10413 | } | ||||
10414 | |||||
10415 | /* If the blob handle is not open at this point, open it and seek | ||||
10416 | ** to the requested entry. */ | ||||
10417 | if( p->pReader==0 && rc==SQLITE_OK0 ){ | ||||
10418 | Fts5Config *pConfig = p->pConfig; | ||||
10419 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, | ||||
10420 | pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader | ||||
10421 | ); | ||||
10422 | } | ||||
10423 | |||||
10424 | /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls | ||||
10425 | ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. | ||||
10426 | ** All the reasons those functions might return SQLITE_ERROR - missing | ||||
10427 | ** table, missing row, non-blob/text in block column - indicate | ||||
10428 | ** backing store corruption. */ | ||||
10429 | if( rc==SQLITE_ERROR1 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10430 | |||||
10431 | if( rc==SQLITE_OK0 ){ | ||||
10432 | u8 *aOut = 0; /* Read blob data into this buffer */ | ||||
10433 | int nByte = sqlite3_blob_bytessqlite3_api->blob_bytes(p->pReader); | ||||
10434 | int szData = (sizeof(Fts5Data) + 7) & ~7; | ||||
10435 | sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING20; | ||||
10436 | pRet = (Fts5Data*)sqlite3_malloc64sqlite3_api->malloc64(nAlloc); | ||||
10437 | if( pRet ){ | ||||
10438 | pRet->nn = nByte; | ||||
10439 | aOut = pRet->p = (u8*)pRet + szData; | ||||
10440 | }else{ | ||||
10441 | rc = SQLITE_NOMEM7; | ||||
10442 | } | ||||
10443 | |||||
10444 | if( rc==SQLITE_OK0 ){ | ||||
10445 | rc = sqlite3_blob_readsqlite3_api->blob_read(p->pReader, aOut, nByte, 0); | ||||
10446 | } | ||||
10447 | if( rc!=SQLITE_OK0 ){ | ||||
10448 | sqlite3_freesqlite3_api->free(pRet); | ||||
10449 | pRet = 0; | ||||
10450 | }else{ | ||||
10451 | /* TODO1: Fix this */ | ||||
10452 | pRet->p[nByte] = 0x00; | ||||
10453 | pRet->p[nByte+1] = 0x00; | ||||
10454 | pRet->szLeaf = fts5GetU16(&pRet->p[2]); | ||||
10455 | } | ||||
10456 | } | ||||
10457 | p->rc = rc; | ||||
10458 | p->nRead++; | ||||
10459 | } | ||||
10460 | |||||
10461 | assert( (pRet==0)==(p->rc!=SQLITE_OK) )((void) (0)); | ||||
10462 | assert( pRet==0 || EIGHT_BYTE_ALIGNMENT( pRet->p ) )((void) (0)); | ||||
10463 | return pRet; | ||||
10464 | } | ||||
10465 | |||||
10466 | |||||
10467 | /* | ||||
10468 | ** Release a reference to data record returned by an earlier call to | ||||
10469 | ** fts5DataRead(). | ||||
10470 | */ | ||||
10471 | static void fts5DataRelease(Fts5Data *pData){ | ||||
10472 | sqlite3_freesqlite3_api->free(pData); | ||||
10473 | } | ||||
10474 | |||||
10475 | static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ | ||||
10476 | Fts5Data *pRet = fts5DataRead(p, iRowid); | ||||
10477 | if( pRet ){ | ||||
10478 | if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){ | ||||
10479 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10480 | fts5DataRelease(pRet); | ||||
10481 | pRet = 0; | ||||
10482 | } | ||||
10483 | } | ||||
10484 | return pRet; | ||||
10485 | } | ||||
10486 | |||||
10487 | static int fts5IndexPrepareStmt( | ||||
10488 | Fts5Index *p, | ||||
10489 | sqlite3_stmt **ppStmt, | ||||
10490 | char *zSql | ||||
10491 | ){ | ||||
10492 | if( p->rc==SQLITE_OK0 ){ | ||||
10493 | if( zSql ){ | ||||
10494 | int rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(p->pConfig->db, zSql, -1, | ||||
10495 | SQLITE_PREPARE_PERSISTENT0x01|SQLITE_PREPARE_NO_VTAB0x04, | ||||
10496 | ppStmt, 0); | ||||
10497 | /* If this prepare() call fails with SQLITE_ERROR, then one of the | ||||
10498 | ** %_idx or %_data tables has been removed or modified. Call this | ||||
10499 | ** corruption. */ | ||||
10500 | p->rc = (rc==SQLITE_ERROR1 ? SQLITE_CORRUPT11 : rc); | ||||
10501 | }else{ | ||||
10502 | p->rc = SQLITE_NOMEM7; | ||||
10503 | } | ||||
10504 | } | ||||
10505 | sqlite3_freesqlite3_api->free(zSql); | ||||
10506 | return p->rc; | ||||
10507 | } | ||||
10508 | |||||
10509 | |||||
10510 | /* | ||||
10511 | ** INSERT OR REPLACE a record into the %_data table. | ||||
10512 | */ | ||||
10513 | static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ | ||||
10514 | if( p->rc!=SQLITE_OK0 ) return; | ||||
10515 | |||||
10516 | if( p->pWriter==0 ){ | ||||
10517 | Fts5Config *pConfig = p->pConfig; | ||||
10518 | fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintfsqlite3_api->mprintf( | ||||
10519 | "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", | ||||
10520 | pConfig->zDb, pConfig->zName | ||||
10521 | )); | ||||
10522 | if( p->rc ) return; | ||||
10523 | } | ||||
10524 | |||||
10525 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pWriter, 1, iRowid); | ||||
10526 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
10527 | sqlite3_stepsqlite3_api->step(p->pWriter); | ||||
10528 | p->rc = sqlite3_resetsqlite3_api->reset(p->pWriter); | ||||
10529 | sqlite3_bind_nullsqlite3_api->bind_null(p->pWriter, 2); | ||||
10530 | } | ||||
10531 | |||||
10532 | /* | ||||
10533 | ** Execute the following SQL: | ||||
10534 | ** | ||||
10535 | ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast | ||||
10536 | */ | ||||
10537 | static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ | ||||
10538 | if( p->rc!=SQLITE_OK0 ) return; | ||||
10539 | |||||
10540 | if( p->pDeleter==0 ){ | ||||
10541 | Fts5Config *pConfig = p->pConfig; | ||||
10542 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf( | ||||
10543 | "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", | ||||
10544 | pConfig->zDb, pConfig->zName | ||||
10545 | ); | ||||
10546 | if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return; | ||||
10547 | } | ||||
10548 | |||||
10549 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 1, iFirst); | ||||
10550 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 2, iLast); | ||||
10551 | sqlite3_stepsqlite3_api->step(p->pDeleter); | ||||
10552 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleter); | ||||
10553 | } | ||||
10554 | |||||
10555 | /* | ||||
10556 | ** Remove all records associated with segment iSegid. | ||||
10557 | */ | ||||
10558 | static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){ | ||||
10559 | int iSegid = pSeg->iSegid; | ||||
10560 | i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | ||||
10561 | i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)( ((i64)(iSegid+1) << (31 +5 +1)) + ((i64)(0) << ( 31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) )-1; | ||||
10562 | fts5DataDelete(p, iFirst, iLast); | ||||
10563 | |||||
10564 | if( pSeg->nPgTombstone ){ | ||||
10565 | i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | ||||
10566 | i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg ->nPgTombstone-1)) ); | ||||
10567 | fts5DataDelete(p, iTomb1, iTomb2); | ||||
10568 | } | ||||
10569 | if( p->pIdxDeleter==0 ){ | ||||
10570 | Fts5Config *pConfig = p->pConfig; | ||||
10571 | fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintfsqlite3_api->mprintf( | ||||
10572 | "DELETE FROM '%q'.'%q_idx' WHERE segid=?", | ||||
10573 | pConfig->zDb, pConfig->zName | ||||
10574 | )); | ||||
10575 | } | ||||
10576 | if( p->rc==SQLITE_OK0 ){ | ||||
10577 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxDeleter, 1, iSegid); | ||||
10578 | sqlite3_stepsqlite3_api->step(p->pIdxDeleter); | ||||
10579 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxDeleter); | ||||
10580 | } | ||||
10581 | } | ||||
10582 | |||||
10583 | /* | ||||
10584 | ** Release a reference to an Fts5Structure object returned by an earlier | ||||
10585 | ** call to fts5StructureRead() or fts5StructureDecode(). | ||||
10586 | */ | ||||
10587 | static void fts5StructureRelease(Fts5Structure *pStruct){ | ||||
10588 | if( pStruct && 0>=(--pStruct->nRef) ){ | ||||
10589 | int i; | ||||
10590 | assert( pStruct->nRef==0 )((void) (0)); | ||||
10591 | for(i=0; i<pStruct->nLevel; i++){ | ||||
10592 | sqlite3_freesqlite3_api->free(pStruct->aLevel[i].aSeg); | ||||
10593 | } | ||||
10594 | sqlite3_freesqlite3_api->free(pStruct); | ||||
10595 | } | ||||
10596 | } | ||||
10597 | |||||
10598 | static void fts5StructureRef(Fts5Structure *pStruct){ | ||||
10599 | pStruct->nRef++; | ||||
10600 | } | ||||
10601 | |||||
10602 | static void *sqlite3Fts5StructureRef(Fts5Index *p){ | ||||
10603 | fts5StructureRef(p->pStruct); | ||||
10604 | return (void*)p->pStruct; | ||||
10605 | } | ||||
10606 | static void sqlite3Fts5StructureRelease(void *p){ | ||||
10607 | if( p ){ | ||||
10608 | fts5StructureRelease((Fts5Structure*)p); | ||||
10609 | } | ||||
10610 | } | ||||
10611 | static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){ | ||||
10612 | if( p->pStruct!=(Fts5Structure*)pStruct ){ | ||||
10613 | return SQLITE_ABORT4; | ||||
10614 | } | ||||
10615 | return SQLITE_OK0; | ||||
10616 | } | ||||
10617 | |||||
10618 | /* | ||||
10619 | ** Ensure that structure object (*pp) is writable. | ||||
10620 | ** | ||||
10621 | ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If | ||||
10622 | ** an error occurs, (*pRc) is set to an SQLite error code before returning. | ||||
10623 | */ | ||||
10624 | static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){ | ||||
10625 | Fts5Structure *p = *pp; | ||||
10626 | if( *pRc==SQLITE_OK0 && p->nRef>1 ){ | ||||
10627 | i64 nByte = SZ_FTS5STRUCTURE(p->nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (p->nLevel)*sizeof (Fts5StructureLevel)); | ||||
10628 | Fts5Structure *pNew; | ||||
10629 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); | ||||
10630 | if( pNew ){ | ||||
10631 | int i; | ||||
10632 | memcpy(pNew, p, nByte); | ||||
10633 | for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; | ||||
10634 | for(i=0; i<p->nLevel; i++){ | ||||
10635 | Fts5StructureLevel *pLvl = &pNew->aLevel[i]; | ||||
10636 | nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg; | ||||
10637 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); | ||||
10638 | if( pLvl->aSeg==0 ){ | ||||
10639 | for(i=0; i<p->nLevel; i++){ | ||||
10640 | sqlite3_freesqlite3_api->free(pNew->aLevel[i].aSeg); | ||||
10641 | } | ||||
10642 | sqlite3_freesqlite3_api->free(pNew); | ||||
10643 | return; | ||||
10644 | } | ||||
10645 | memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte); | ||||
10646 | } | ||||
10647 | p->nRef--; | ||||
10648 | pNew->nRef = 1; | ||||
10649 | } | ||||
10650 | *pp = pNew; | ||||
10651 | } | ||||
10652 | } | ||||
10653 | |||||
10654 | /* | ||||
10655 | ** Deserialize and return the structure record currently stored in serialized | ||||
10656 | ** form within buffer pData/nData. | ||||
10657 | ** | ||||
10658 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | ||||
10659 | ** are over-allocated by one slot. This allows the structure contents | ||||
10660 | ** to be more easily edited. | ||||
10661 | ** | ||||
10662 | ** If an error occurs, *ppOut is set to NULL and an SQLite error code | ||||
10663 | ** returned. Otherwise, *ppOut is set to point to the new object and | ||||
10664 | ** SQLITE_OK returned. | ||||
10665 | */ | ||||
10666 | static int fts5StructureDecode( | ||||
10667 | const u8 *pData, /* Buffer containing serialized structure */ | ||||
10668 | int nData, /* Size of buffer pData in bytes */ | ||||
10669 | int *piCookie, /* Configuration cookie value */ | ||||
10670 | Fts5Structure **ppOut /* OUT: Deserialized object */ | ||||
10671 | ){ | ||||
10672 | int rc = SQLITE_OK0; | ||||
10673 | int i = 0; | ||||
10674 | int iLvl; | ||||
10675 | int nLevel = 0; | ||||
10676 | int nSegment = 0; | ||||
10677 | sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */ | ||||
10678 | Fts5Structure *pRet = 0; /* Structure object to return */ | ||||
10679 | int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */ | ||||
10680 | u64 nOriginCntr = 0; /* Largest origin value seen so far */ | ||||
10681 | |||||
10682 | /* Grab the cookie value */ | ||||
10683 | if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); | ||||
10684 | i = 4; | ||||
10685 | |||||
10686 | /* Check if this is a V2 structure record. Set bStructureV2 if it is. */ | ||||
10687 | if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2"\xFF\x00\x00\x01", 4) ){ | ||||
10688 | i += 4; | ||||
10689 | bStructureV2 = 1; | ||||
10690 | } | ||||
10691 | |||||
10692 | /* Read the total number of levels and segments from the start of the | ||||
10693 | ** structure record. */ | ||||
10694 | i += fts5GetVarint32(&pData[i], nLevel)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nLevel)); | ||||
10695 | i += fts5GetVarint32(&pData[i], nSegment)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nSegment)); | ||||
10696 | if( nLevel>FTS5_MAX_SEGMENT2000 || nLevel<0 | ||||
10697 | || nSegment>FTS5_MAX_SEGMENT2000 || nSegment<0 | ||||
10698 | ){ | ||||
10699 | return FTS5_CORRUPT(11 | (1<<8)); | ||||
10700 | } | ||||
10701 | nByte = SZ_FTS5STRUCTURE(nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel)*sizeof( Fts5StructureLevel)); | ||||
10702 | pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
10703 | |||||
10704 | if( pRet ){ | ||||
10705 | pRet->nRef = 1; | ||||
10706 | pRet->nLevel = nLevel; | ||||
10707 | pRet->nSegment = nSegment; | ||||
10708 | i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); | ||||
10709 | |||||
10710 | for(iLvl=0; rc==SQLITE_OK0 && iLvl<nLevel; iLvl++){ | ||||
10711 | Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; | ||||
10712 | int nTotal = 0; | ||||
10713 | int iSeg; | ||||
10714 | |||||
10715 | if( i>=nData ){ | ||||
10716 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10717 | }else{ | ||||
10718 | i += fts5GetVarint32(&pData[i], pLvl->nMerge)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pLvl->nMerge )); | ||||
10719 | i += fts5GetVarint32(&pData[i], nTotal)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nTotal)); | ||||
10720 | if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10721 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, | ||||
10722 | nTotal * sizeof(Fts5StructureSegment) | ||||
10723 | ); | ||||
10724 | nSegment -= nTotal; | ||||
10725 | } | ||||
10726 | |||||
10727 | if( rc==SQLITE_OK0 ){ | ||||
10728 | pLvl->nSeg = nTotal; | ||||
10729 | for(iSeg=0; iSeg<nTotal; iSeg++){ | ||||
10730 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | ||||
10731 | if( i>=nData ){ | ||||
10732 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10733 | break; | ||||
10734 | } | ||||
10735 | assert( pSeg!=0 )((void) (0)); | ||||
10736 | i += fts5GetVarint32(&pData[i], pSeg->iSegid)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->iSegid )); | ||||
10737 | i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoFirst )); | ||||
10738 | i += fts5GetVarint32(&pData[i], pSeg->pgnoLast)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoLast )); | ||||
10739 | if( bStructureV2 ){ | ||||
10740 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin1); | ||||
10741 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin2); | ||||
10742 | i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->nPgTombstone )); | ||||
10743 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntryTombstone); | ||||
10744 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntry); | ||||
10745 | nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2)(((nOriginCntr) > (pSeg->iOrigin2)) ? (nOriginCntr) : ( pSeg->iOrigin2)); | ||||
10746 | } | ||||
10747 | if( pSeg->pgnoLast<pSeg->pgnoFirst ){ | ||||
10748 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10749 | break; | ||||
10750 | } | ||||
10751 | } | ||||
10752 | if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10753 | if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10754 | } | ||||
10755 | } | ||||
10756 | if( nSegment!=0 && rc==SQLITE_OK0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
10757 | if( bStructureV2 ){ | ||||
10758 | pRet->nOriginCntr = nOriginCntr+1; | ||||
10759 | } | ||||
10760 | |||||
10761 | if( rc!=SQLITE_OK0 ){ | ||||
10762 | fts5StructureRelease(pRet); | ||||
10763 | pRet = 0; | ||||
10764 | } | ||||
10765 | } | ||||
10766 | |||||
10767 | *ppOut = pRet; | ||||
10768 | return rc; | ||||
10769 | } | ||||
10770 | |||||
10771 | /* | ||||
10772 | ** Add a level to the Fts5Structure.aLevel[] array of structure object | ||||
10773 | ** (*ppStruct). | ||||
10774 | */ | ||||
10775 | static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ | ||||
10776 | fts5StructureMakeWritable(pRc, ppStruct); | ||||
10777 | assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK )((void) (0)); | ||||
10778 | if( *pRc==SQLITE_OK0 ){ | ||||
10779 | Fts5Structure *pStruct = *ppStruct; | ||||
10780 | int nLevel = pStruct->nLevel; | ||||
10781 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(nLevel+2)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel+2)*sizeof (Fts5StructureLevel)); | ||||
10782 | |||||
10783 | pStruct = sqlite3_realloc64sqlite3_api->realloc64(pStruct, nByte); | ||||
10784 | if( pStruct ){ | ||||
10785 | memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); | ||||
10786 | pStruct->nLevel++; | ||||
10787 | *ppStruct = pStruct; | ||||
10788 | }else{ | ||||
10789 | *pRc = SQLITE_NOMEM7; | ||||
10790 | } | ||||
10791 | } | ||||
10792 | } | ||||
10793 | |||||
10794 | /* | ||||
10795 | ** Extend level iLvl so that there is room for at least nExtra more | ||||
10796 | ** segments. | ||||
10797 | */ | ||||
10798 | static void fts5StructureExtendLevel( | ||||
10799 | int *pRc, | ||||
10800 | Fts5Structure *pStruct, | ||||
10801 | int iLvl, | ||||
10802 | int nExtra, | ||||
10803 | int bInsert | ||||
10804 | ){ | ||||
10805 | if( *pRc==SQLITE_OK0 ){ | ||||
10806 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | ||||
10807 | Fts5StructureSegment *aNew; | ||||
10808 | sqlite3_int64 nByte; | ||||
10809 | |||||
10810 | nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); | ||||
10811 | aNew = sqlite3_realloc64sqlite3_api->realloc64(pLvl->aSeg, nByte); | ||||
10812 | if( aNew ){ | ||||
10813 | if( bInsert==0 ){ | ||||
10814 | memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); | ||||
10815 | }else{ | ||||
10816 | int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); | ||||
10817 | memmove(&aNew[nExtra], aNew, nMove); | ||||
10818 | memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); | ||||
10819 | } | ||||
10820 | pLvl->aSeg = aNew; | ||||
10821 | }else{ | ||||
10822 | *pRc = SQLITE_NOMEM7; | ||||
10823 | } | ||||
10824 | } | ||||
10825 | } | ||||
10826 | |||||
10827 | static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ | ||||
10828 | Fts5Structure *pRet = 0; | ||||
10829 | Fts5Config *pConfig = p->pConfig; | ||||
10830 | int iCookie; /* Configuration cookie */ | ||||
10831 | Fts5Data *pData; | ||||
10832 | |||||
10833 | pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID10); | ||||
10834 | if( p->rc==SQLITE_OK0 ){ | ||||
10835 | /* TODO: Do we need this if the leaf-index is appended? Probably... */ | ||||
10836 | memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING20); | ||||
10837 | p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); | ||||
10838 | if( p->rc==SQLITE_OK0 && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){ | ||||
10839 | p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); | ||||
10840 | } | ||||
10841 | fts5DataRelease(pData); | ||||
10842 | if( p->rc!=SQLITE_OK0 ){ | ||||
10843 | fts5StructureRelease(pRet); | ||||
10844 | pRet = 0; | ||||
10845 | } | ||||
10846 | } | ||||
10847 | |||||
10848 | return pRet; | ||||
10849 | } | ||||
10850 | |||||
10851 | static i64 fts5IndexDataVersion(Fts5Index *p){ | ||||
10852 | i64 iVersion = 0; | ||||
10853 | |||||
10854 | if( p->rc==SQLITE_OK0 ){ | ||||
10855 | if( p->pDataVersion==0 ){ | ||||
10856 | p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, | ||||
10857 | sqlite3_mprintfsqlite3_api->mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) | ||||
10858 | ); | ||||
10859 | if( p->rc ) return 0; | ||||
10860 | } | ||||
10861 | |||||
10862 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p->pDataVersion) ){ | ||||
10863 | iVersion = sqlite3_column_int64sqlite3_api->column_int64(p->pDataVersion, 0); | ||||
10864 | } | ||||
10865 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDataVersion); | ||||
10866 | } | ||||
10867 | |||||
10868 | return iVersion; | ||||
10869 | } | ||||
10870 | |||||
10871 | /* | ||||
10872 | ** Read, deserialize and return the structure record. | ||||
10873 | ** | ||||
10874 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | ||||
10875 | ** are over-allocated as described for function fts5StructureDecode() | ||||
10876 | ** above. | ||||
10877 | ** | ||||
10878 | ** If an error occurs, NULL is returned and an error code left in the | ||||
10879 | ** Fts5Index handle. If an error has already occurred when this function | ||||
10880 | ** is called, it is a no-op. | ||||
10881 | */ | ||||
10882 | static Fts5Structure *fts5StructureRead(Fts5Index *p){ | ||||
10883 | |||||
10884 | if( p->pStruct==0 ){ | ||||
10885 | p->iStructVersion = fts5IndexDataVersion(p); | ||||
10886 | if( p->rc==SQLITE_OK0 ){ | ||||
10887 | p->pStruct = fts5StructureReadUncached(p); | ||||
10888 | } | ||||
10889 | } | ||||
10890 | |||||
10891 | #if 0 | ||||
10892 | else{ | ||||
10893 | Fts5Structure *pTest = fts5StructureReadUncached(p); | ||||
10894 | if( pTest ){ | ||||
10895 | int i, j; | ||||
10896 | assert_nc( p->pStruct->nSegment==pTest->nSegment )((void) (0)); | ||||
10897 | assert_nc( p->pStruct->nLevel==pTest->nLevel )((void) (0)); | ||||
10898 | for(i=0; i<pTest->nLevel; i++){ | ||||
10899 | assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge )((void) (0)); | ||||
10900 | assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg )((void) (0)); | ||||
10901 | for(j=0; j<pTest->aLevel[i].nSeg; j++){ | ||||
10902 | Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; | ||||
10903 | Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; | ||||
10904 | assert_nc( p1->iSegid==p2->iSegid )((void) (0)); | ||||
10905 | assert_nc( p1->pgnoFirst==p2->pgnoFirst )((void) (0)); | ||||
10906 | assert_nc( p1->pgnoLast==p2->pgnoLast )((void) (0)); | ||||
10907 | } | ||||
10908 | } | ||||
10909 | fts5StructureRelease(pTest); | ||||
10910 | } | ||||
10911 | } | ||||
10912 | #endif | ||||
10913 | |||||
10914 | if( p->rc!=SQLITE_OK0 ) return 0; | ||||
10915 | assert( p->iStructVersion!=0 )((void) (0)); | ||||
10916 | assert( p->pStruct!=0 )((void) (0)); | ||||
10917 | fts5StructureRef(p->pStruct); | ||||
10918 | return p->pStruct; | ||||
10919 | } | ||||
10920 | |||||
10921 | static void fts5StructureInvalidate(Fts5Index *p){ | ||||
10922 | if( p->pStruct ){ | ||||
10923 | fts5StructureRelease(p->pStruct); | ||||
10924 | p->pStruct = 0; | ||||
10925 | } | ||||
10926 | } | ||||
10927 | |||||
10928 | /* | ||||
10929 | ** Return the total number of segments in index structure pStruct. This | ||||
10930 | ** function is only ever used as part of assert() conditions. | ||||
10931 | */ | ||||
10932 | #ifdef SQLITE_DEBUG | ||||
10933 | static int fts5StructureCountSegments(Fts5Structure *pStruct){ | ||||
10934 | int nSegment = 0; /* Total number of segments */ | ||||
10935 | if( pStruct ){ | ||||
10936 | int iLvl; /* Used to iterate through levels */ | ||||
10937 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
10938 | nSegment += pStruct->aLevel[iLvl].nSeg; | ||||
10939 | } | ||||
10940 | } | ||||
10941 | |||||
10942 | return nSegment; | ||||
10943 | } | ||||
10944 | #endif | ||||
10945 | |||||
10946 | #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pBlob , nBlob); (pBuf)->n += nBlob; } { \ | ||||
10947 | assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) )((void) (0)); \ | ||||
10948 | memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ | ||||
10949 | (pBuf)->n += nBlob; \ | ||||
10950 | } | ||||
10951 | |||||
10952 | #define fts5BufferSafeAppendVarint(pBuf, iVal){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iVal)); ((void) (0)); } { \ | ||||
10953 | (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ | ||||
10954 | assert( (pBuf)->nSpace>=(pBuf)->n )((void) (0)); \ | ||||
10955 | } | ||||
10956 | |||||
10957 | |||||
10958 | /* | ||||
10959 | ** Serialize and store the "structure" record. | ||||
10960 | ** | ||||
10961 | ** If an error occurs, leave an error code in the Fts5Index object. If an | ||||
10962 | ** error has already occurred, this function is a no-op. | ||||
10963 | */ | ||||
10964 | static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ | ||||
10965 | if( p->rc==SQLITE_OK0 ){ | ||||
10966 | Fts5Buffer buf; /* Buffer to serialize record into */ | ||||
10967 | int iLvl; /* Used to iterate through levels */ | ||||
10968 | int iCookie; /* Cookie value to store */ | ||||
10969 | int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9)); | ||||
10970 | |||||
10971 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | ||||
10972 | memset(&buf, 0, sizeof(Fts5Buffer)); | ||||
10973 | |||||
10974 | /* Append the current configuration cookie */ | ||||
10975 | iCookie = p->pConfig->iCookie; | ||||
10976 | if( iCookie<0 ) iCookie = 0; | ||||
10977 | |||||
10978 | if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){ | ||||
10979 | sqlite3Fts5Put32(buf.p, iCookie); | ||||
10980 | buf.n = 4; | ||||
10981 | if( pStruct->nOriginCntr>0 ){ | ||||
10982 | fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4){ ((void) (0)); memcpy(&(&buf)->p[(&buf)->n ], "\xFF\x00\x00\x01", 4); (&buf)->n += 4; }; | ||||
10983 | } | ||||
10984 | fts5BufferSafeAppendVarint(&buf, pStruct->nLevel){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nLevel)); ((void) (0)); }; | ||||
10985 | fts5BufferSafeAppendVarint(&buf, pStruct->nSegment){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nSegment)); ((void) (0)); }; | ||||
10986 | fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], ((i64)pStruct->nWriteCounter)); ((void ) (0)); }; | ||||
10987 | } | ||||
10988 | |||||
10989 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
10990 | int iSeg; /* Used to iterate through segments */ | ||||
10991 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | ||||
10992 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nMerge); | ||||
10993 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nSeg); | ||||
10994 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | ||||
10995 | |||||
10996 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | ||||
10997 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | ||||
10998 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iSegid); | ||||
10999 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoFirst); | ||||
11000 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoLast); | ||||
11001 | if( pStruct->nOriginCntr>0 ){ | ||||
11002 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin1); | ||||
11003 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin2); | ||||
11004 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nPgTombstone); | ||||
11005 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntryTombstone); | ||||
11006 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntry); | ||||
11007 | } | ||||
11008 | } | ||||
11009 | } | ||||
11010 | |||||
11011 | fts5DataWrite(p, FTS5_STRUCTURE_ROWID10, buf.p, buf.n); | ||||
11012 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | ||||
11013 | } | ||||
11014 | } | ||||
11015 | |||||
11016 | #if 0 | ||||
11017 | static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); | ||||
11018 | static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ | ||||
11019 | int rc = SQLITE_OK0; | ||||
11020 | Fts5Buffer buf; | ||||
11021 | memset(&buf, 0, sizeof(buf)); | ||||
11022 | fts5DebugStructure(&rc, &buf, pStruct); | ||||
11023 | fprintf(stdout, "%s: %s\n", zCaption, buf.p); | ||||
11024 | fflush(stdout); | ||||
11025 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | ||||
11026 | } | ||||
11027 | #else | ||||
11028 | # define fts5PrintStructure(x,y) | ||||
11029 | #endif | ||||
11030 | |||||
11031 | static int fts5SegmentSize(Fts5StructureSegment *pSeg){ | ||||
11032 | return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; | ||||
11033 | } | ||||
11034 | |||||
11035 | /* | ||||
11036 | ** Return a copy of index structure pStruct. Except, promote as many | ||||
11037 | ** segments as possible to level iPromote. If an OOM occurs, NULL is | ||||
11038 | ** returned. | ||||
11039 | */ | ||||
11040 | static void fts5StructurePromoteTo( | ||||
11041 | Fts5Index *p, | ||||
11042 | int iPromote, | ||||
11043 | int szPromote, | ||||
11044 | Fts5Structure *pStruct | ||||
11045 | ){ | ||||
11046 | int il, is; | ||||
11047 | Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; | ||||
11048 | |||||
11049 | if( pOut->nMerge==0 ){ | ||||
11050 | for(il=iPromote+1; il<pStruct->nLevel; il++){ | ||||
11051 | Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; | ||||
11052 | if( pLvl->nMerge ) return; | ||||
11053 | for(is=pLvl->nSeg-1; is>=0; is--){ | ||||
11054 | int sz = fts5SegmentSize(&pLvl->aSeg[is]); | ||||
11055 | if( sz>szPromote ) return; | ||||
11056 | fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); | ||||
11057 | if( p->rc ) return; | ||||
11058 | memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); | ||||
11059 | pOut->nSeg++; | ||||
11060 | pLvl->nSeg--; | ||||
11061 | } | ||||
11062 | } | ||||
11063 | } | ||||
11064 | } | ||||
11065 | |||||
11066 | /* | ||||
11067 | ** A new segment has just been written to level iLvl of index structure | ||||
11068 | ** pStruct. This function determines if any segments should be promoted | ||||
11069 | ** as a result. Segments are promoted in two scenarios: | ||||
11070 | ** | ||||
11071 | ** a) If the segment just written is smaller than one or more segments | ||||
11072 | ** within the previous populated level, it is promoted to the previous | ||||
11073 | ** populated level. | ||||
11074 | ** | ||||
11075 | ** b) If the segment just written is larger than the newest segment on | ||||
11076 | ** the next populated level, then that segment, and any other adjacent | ||||
11077 | ** segments that are also smaller than the one just written, are | ||||
11078 | ** promoted. | ||||
11079 | ** | ||||
11080 | ** If one or more segments are promoted, the structure object is updated | ||||
11081 | ** to reflect this. | ||||
11082 | */ | ||||
11083 | static void fts5StructurePromote( | ||||
11084 | Fts5Index *p, /* FTS5 backend object */ | ||||
11085 | int iLvl, /* Index level just updated */ | ||||
11086 | Fts5Structure *pStruct /* Index structure */ | ||||
11087 | ){ | ||||
11088 | if( p->rc==SQLITE_OK0 ){ | ||||
11089 | int iTst; | ||||
11090 | int iPromote = -1; | ||||
11091 | int szPromote = 0; /* Promote anything this size or smaller */ | ||||
11092 | Fts5StructureSegment *pSeg; /* Segment just written */ | ||||
11093 | int szSeg; /* Size of segment just written */ | ||||
11094 | int nSeg = pStruct->aLevel[iLvl].nSeg; | ||||
11095 | |||||
11096 | if( nSeg==0 ) return; | ||||
11097 | pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; | ||||
11098 | szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); | ||||
11099 | |||||
11100 | /* Check for condition (a) */ | ||||
11101 | for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); | ||||
11102 | if( iTst>=0 ){ | ||||
11103 | int i; | ||||
11104 | int szMax = 0; | ||||
11105 | Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; | ||||
11106 | assert( pTst->nMerge==0 )((void) (0)); | ||||
11107 | for(i=0; i<pTst->nSeg; i++){ | ||||
11108 | int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; | ||||
11109 | if( sz>szMax ) szMax = sz; | ||||
11110 | } | ||||
11111 | if( szMax>=szSeg ){ | ||||
11112 | /* Condition (a) is true. Promote the newest segment on level | ||||
11113 | ** iLvl to level iTst. */ | ||||
11114 | iPromote = iTst; | ||||
11115 | szPromote = szMax; | ||||
11116 | } | ||||
11117 | } | ||||
11118 | |||||
11119 | /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() | ||||
11120 | ** is a no-op if it is not. */ | ||||
11121 | if( iPromote<0 ){ | ||||
11122 | iPromote = iLvl; | ||||
11123 | szPromote = szSeg; | ||||
11124 | } | ||||
11125 | fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); | ||||
11126 | } | ||||
11127 | } | ||||
11128 | |||||
11129 | |||||
11130 | /* | ||||
11131 | ** Advance the iterator passed as the only argument. If the end of the | ||||
11132 | ** doclist-index page is reached, return non-zero. | ||||
11133 | */ | ||||
11134 | static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ | ||||
11135 | Fts5Data *pData = pLvl->pData; | ||||
11136 | |||||
11137 | if( pLvl->iOff==0 ){ | ||||
11138 | assert( pLvl->bEof==0 )((void) (0)); | ||||
11139 | pLvl->iOff = 1; | ||||
11140 | pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno)sqlite3Fts5GetVarint32(&pData->p[1],(u32*)&(pLvl-> iLeafPgno)); | ||||
11141 | pLvl->iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); | ||||
11142 | pLvl->iFirstOff = pLvl->iOff; | ||||
11143 | }else{ | ||||
11144 | int iOff; | ||||
11145 | for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ | ||||
11146 | if( pData->p[iOff] ) break; | ||||
11147 | } | ||||
11148 | |||||
11149 | if( iOff<pData->nn ){ | ||||
11150 | u64 iVal; | ||||
11151 | pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; | ||||
11152 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[iOff], &iVal); | ||||
11153 | pLvl->iRowid += iVal; | ||||
11154 | pLvl->iOff = iOff; | ||||
11155 | }else{ | ||||
11156 | pLvl->bEof = 1; | ||||
11157 | } | ||||
11158 | } | ||||
11159 | |||||
11160 | return pLvl->bEof; | ||||
11161 | } | ||||
11162 | |||||
11163 | /* | ||||
11164 | ** Advance the iterator passed as the only argument. | ||||
11165 | */ | ||||
11166 | static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | ||||
11167 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | ||||
11168 | |||||
11169 | assert( iLvl<pIter->nLvl )((void) (0)); | ||||
11170 | if( fts5DlidxLvlNext(pLvl) ){ | ||||
11171 | if( (iLvl+1) < pIter->nLvl ){ | ||||
11172 | fts5DlidxIterNextR(p, pIter, iLvl+1); | ||||
11173 | if( pLvl[1].bEof==0 ){ | ||||
11174 | fts5DataRelease(pLvl->pData); | ||||
11175 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | ||||
11176 | pLvl->pData = fts5DataRead(p, | ||||
11177 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | ||||
11178 | ); | ||||
11179 | if( pLvl->pData ) fts5DlidxLvlNext(pLvl); | ||||
11180 | } | ||||
11181 | } | ||||
11182 | } | ||||
11183 | |||||
11184 | return pIter->aLvl[0].bEof; | ||||
11185 | } | ||||
11186 | static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ | ||||
11187 | return fts5DlidxIterNextR(p, pIter, 0); | ||||
11188 | } | ||||
11189 | |||||
11190 | /* | ||||
11191 | ** The iterator passed as the first argument has the following fields set | ||||
11192 | ** as follows. This function sets up the rest of the iterator so that it | ||||
11193 | ** points to the first rowid in the doclist-index. | ||||
11194 | ** | ||||
11195 | ** pData: | ||||
11196 | ** pointer to doclist-index record, | ||||
11197 | ** | ||||
11198 | ** When this function is called pIter->iLeafPgno is the page number the | ||||
11199 | ** doclist is associated with (the one featuring the term). | ||||
11200 | */ | ||||
11201 | static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ | ||||
11202 | int i; | ||||
11203 | for(i=0; i<pIter->nLvl; i++){ | ||||
11204 | fts5DlidxLvlNext(&pIter->aLvl[i]); | ||||
11205 | } | ||||
11206 | return pIter->aLvl[0].bEof; | ||||
11207 | } | ||||
11208 | |||||
11209 | |||||
11210 | static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ | ||||
11211 | return p->rc!=SQLITE_OK0 || pIter->aLvl[0].bEof; | ||||
11212 | } | ||||
11213 | |||||
11214 | static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ | ||||
11215 | int i; | ||||
11216 | |||||
11217 | /* Advance each level to the last entry on the last page */ | ||||
11218 | for(i=pIter->nLvl-1; p->rc==SQLITE_OK0 && i>=0; i--){ | ||||
11219 | Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; | ||||
11220 | while( fts5DlidxLvlNext(pLvl)==0 ); | ||||
11221 | pLvl->bEof = 0; | ||||
11222 | |||||
11223 | if( i>0 ){ | ||||
11224 | Fts5DlidxLvl *pChild = &pLvl[-1]; | ||||
11225 | fts5DataRelease(pChild->pData); | ||||
11226 | memset(pChild, 0, sizeof(Fts5DlidxLvl)); | ||||
11227 | pChild->pData = fts5DataRead(p, | ||||
11228 | FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i-1) << (31)) + ((i64)(pLvl->iLeafPgno )) ) | ||||
11229 | ); | ||||
11230 | } | ||||
11231 | } | ||||
11232 | } | ||||
11233 | |||||
11234 | /* | ||||
11235 | ** Move the iterator passed as the only argument to the previous entry. | ||||
11236 | */ | ||||
11237 | static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ | ||||
11238 | int iOff = pLvl->iOff; | ||||
11239 | |||||
11240 | assert( pLvl->bEof==0 )((void) (0)); | ||||
11241 | if( iOff<=pLvl->iFirstOff ){ | ||||
11242 | pLvl->bEof = 1; | ||||
11243 | }else{ | ||||
11244 | u8 *a = pLvl->pData->p; | ||||
11245 | |||||
11246 | pLvl->iOff = 0; | ||||
11247 | fts5DlidxLvlNext(pLvl); | ||||
11248 | while( 1 ){ | ||||
11249 | int nZero = 0; | ||||
11250 | int ii = pLvl->iOff; | ||||
11251 | u64 delta = 0; | ||||
11252 | |||||
11253 | while( a[ii]==0 ){ | ||||
11254 | nZero++; | ||||
11255 | ii++; | ||||
11256 | } | ||||
11257 | ii += sqlite3Fts5GetVarint(&a[ii], &delta); | ||||
11258 | |||||
11259 | if( ii>=iOff ) break; | ||||
11260 | pLvl->iLeafPgno += nZero+1; | ||||
11261 | pLvl->iRowid += delta; | ||||
11262 | pLvl->iOff = ii; | ||||
11263 | } | ||||
11264 | } | ||||
11265 | |||||
11266 | return pLvl->bEof; | ||||
11267 | } | ||||
11268 | |||||
11269 | static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | ||||
11270 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | ||||
11271 | |||||
11272 | assert( iLvl<pIter->nLvl )((void) (0)); | ||||
11273 | if( fts5DlidxLvlPrev(pLvl) ){ | ||||
11274 | if( (iLvl+1) < pIter->nLvl ){ | ||||
11275 | fts5DlidxIterPrevR(p, pIter, iLvl+1); | ||||
11276 | if( pLvl[1].bEof==0 ){ | ||||
11277 | fts5DataRelease(pLvl->pData); | ||||
11278 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | ||||
11279 | pLvl->pData = fts5DataRead(p, | ||||
11280 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | ||||
11281 | ); | ||||
11282 | if( pLvl->pData ){ | ||||
11283 | while( fts5DlidxLvlNext(pLvl)==0 ); | ||||
11284 | pLvl->bEof = 0; | ||||
11285 | } | ||||
11286 | } | ||||
11287 | } | ||||
11288 | } | ||||
11289 | |||||
11290 | return pIter->aLvl[0].bEof; | ||||
11291 | } | ||||
11292 | static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ | ||||
11293 | return fts5DlidxIterPrevR(p, pIter, 0); | ||||
11294 | } | ||||
11295 | |||||
11296 | /* | ||||
11297 | ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). | ||||
11298 | */ | ||||
11299 | static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ | ||||
11300 | if( pIter ){ | ||||
11301 | int i; | ||||
11302 | for(i=0; i<pIter->nLvl; i++){ | ||||
11303 | fts5DataRelease(pIter->aLvl[i].pData); | ||||
11304 | } | ||||
11305 | sqlite3_freesqlite3_api->free(pIter); | ||||
11306 | } | ||||
11307 | } | ||||
11308 | |||||
11309 | static Fts5DlidxIter *fts5DlidxIterInit( | ||||
11310 | Fts5Index *p, /* Fts5 Backend to iterate within */ | ||||
11311 | int bRev, /* True for ORDER BY ASC */ | ||||
11312 | int iSegid, /* Segment id */ | ||||
11313 | int iLeafPg /* Leaf page number to load dlidx for */ | ||||
11314 | ){ | ||||
11315 | Fts5DlidxIter *pIter = 0; | ||||
11316 | int i; | ||||
11317 | int bDone = 0; | ||||
11318 | |||||
11319 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | ||||
11320 | sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(i+1)*sizeof(Fts5DlidxLvl )); | ||||
11321 | Fts5DlidxIter *pNew; | ||||
11322 | |||||
11323 | pNew = (Fts5DlidxIter*)sqlite3_realloc64sqlite3_api->realloc64(pIter, nByte); | ||||
11324 | if( pNew==0 ){ | ||||
11325 | p->rc = SQLITE_NOMEM7; | ||||
11326 | }else{ | ||||
11327 | i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(iLeafPg)) ); | ||||
11328 | Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; | ||||
11329 | pIter = pNew; | ||||
11330 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | ||||
11331 | pLvl->pData = fts5DataRead(p, iRowid); | ||||
11332 | if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ | ||||
11333 | bDone = 1; | ||||
11334 | } | ||||
11335 | pIter->nLvl = i+1; | ||||
11336 | } | ||||
11337 | } | ||||
11338 | |||||
11339 | if( p->rc==SQLITE_OK0 ){ | ||||
11340 | pIter->iSegid = iSegid; | ||||
11341 | if( bRev==0 ){ | ||||
11342 | fts5DlidxIterFirst(pIter); | ||||
11343 | }else{ | ||||
11344 | fts5DlidxIterLast(p, pIter); | ||||
11345 | } | ||||
11346 | } | ||||
11347 | |||||
11348 | if( p->rc!=SQLITE_OK0 ){ | ||||
11349 | fts5DlidxIterFree(pIter); | ||||
11350 | pIter = 0; | ||||
11351 | } | ||||
11352 | |||||
11353 | return pIter; | ||||
11354 | } | ||||
11355 | |||||
11356 | static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ | ||||
11357 | return pIter->aLvl[0].iRowid; | ||||
11358 | } | ||||
11359 | static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ | ||||
11360 | return pIter->aLvl[0].iLeafPgno; | ||||
11361 | } | ||||
11362 | |||||
11363 | /* | ||||
11364 | ** Load the next leaf page into the segment iterator. | ||||
11365 | */ | ||||
11366 | static void fts5SegIterNextPage( | ||||
11367 | Fts5Index *p, /* FTS5 backend object */ | ||||
11368 | Fts5SegIter *pIter /* Iterator to advance to next page */ | ||||
11369 | ){ | ||||
11370 | Fts5Data *pLeaf; | ||||
11371 | Fts5StructureSegment *pSeg = pIter->pSeg; | ||||
11372 | fts5DataRelease(pIter->pLeaf); | ||||
11373 | pIter->iLeafPgno++; | ||||
11374 | if( pIter->pNextLeaf ){ | ||||
11375 | pIter->pLeaf = pIter->pNextLeaf; | ||||
11376 | pIter->pNextLeaf = 0; | ||||
11377 | }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ | ||||
11378 | pIter->pLeaf = fts5LeafRead(p, | ||||
11379 | FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pIter->iLeafPgno )) ) | ||||
11380 | ); | ||||
11381 | }else{ | ||||
11382 | pIter->pLeaf = 0; | ||||
11383 | } | ||||
11384 | pLeaf = pIter->pLeaf; | ||||
11385 | |||||
11386 | if( pLeaf ){ | ||||
11387 | pIter->iPgidxOff = pLeaf->szLeaf; | ||||
11388 | if( fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ){ | ||||
11389 | pIter->iEndofDoclist = pLeaf->nn+1; | ||||
11390 | }else{ | ||||
11391 | pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | ||||
11392 | pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | ||||
11393 | )sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)); | ||||
11394 | } | ||||
11395 | } | ||||
11396 | } | ||||
11397 | |||||
11398 | /* | ||||
11399 | ** Argument p points to a buffer containing a varint to be interpreted as a | ||||
11400 | ** position list size field. Read the varint and return the number of bytes | ||||
11401 | ** read. Before returning, set *pnSz to the number of bytes in the position | ||||
11402 | ** list, and *pbDel to true if the delete flag is set, or false otherwise. | ||||
11403 | */ | ||||
11404 | static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ | ||||
11405 | int nSz; | ||||
11406 | int n = 0; | ||||
11407 | fts5FastGetVarint32(p, n, nSz){ nSz = (p)[n++]; if( nSz & 0x80 ){ n--; n += sqlite3Fts5GetVarint32 (&(p)[n],(u32*)&(nSz)); } }; | ||||
11408 | assert_nc( nSz>=0 )((void) (0)); | ||||
11409 | *pnSz = nSz/2; | ||||
11410 | *pbDel = nSz & 0x0001; | ||||
11411 | return n; | ||||
11412 | } | ||||
11413 | |||||
11414 | /* | ||||
11415 | ** Fts5SegIter.iLeafOffset currently points to the first byte of a | ||||
11416 | ** position-list size field. Read the value of the field and store it | ||||
11417 | ** in the following variables: | ||||
11418 | ** | ||||
11419 | ** Fts5SegIter.nPos | ||||
11420 | ** Fts5SegIter.bDel | ||||
11421 | ** | ||||
11422 | ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the | ||||
11423 | ** position list content (if any). | ||||
11424 | */ | ||||
11425 | static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11426 | if( p->rc==SQLITE_OK0 ){ | ||||
11427 | int iOff = pIter->iLeafOffset; /* Offset to read at */ | ||||
11428 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | ||||
11429 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
11430 | int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf)(((pIter->iEndofDoclist) < (pIter->pLeaf->szLeaf) ) ? (pIter->iEndofDoclist) : (pIter->pLeaf->szLeaf)); | ||||
11431 | pIter->bDel = 0; | ||||
11432 | pIter->nPos = 1; | ||||
11433 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | ||||
11434 | pIter->bDel = 1; | ||||
11435 | iOff++; | ||||
11436 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | ||||
11437 | pIter->nPos = 1; | ||||
11438 | iOff++; | ||||
11439 | }else{ | ||||
11440 | pIter->nPos = 0; | ||||
11441 | } | ||||
11442 | } | ||||
11443 | }else{ | ||||
11444 | int nSz; | ||||
11445 | fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz){ nSz = (pIter->pLeaf->p)[iOff++]; if( nSz & 0x80 ) { iOff--; iOff += sqlite3Fts5GetVarint32(&(pIter->pLeaf ->p)[iOff],(u32*)&(nSz)); } }; | ||||
11446 | pIter->bDel = (nSz & 0x0001); | ||||
11447 | pIter->nPos = nSz>>1; | ||||
11448 | assert_nc( pIter->nPos>=0 )((void) (0)); | ||||
11449 | } | ||||
11450 | pIter->iLeafOffset = iOff; | ||||
11451 | } | ||||
11452 | } | ||||
11453 | |||||
11454 | static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11455 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | ||||
11456 | i64 iOff = pIter->iLeafOffset; | ||||
11457 | |||||
11458 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | ||||
11459 | while( iOff>=pIter->pLeaf->szLeaf ){ | ||||
11460 | fts5SegIterNextPage(p, pIter); | ||||
11461 | if( pIter->pLeaf==0 ){ | ||||
11462 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
11463 | return; | ||||
11464 | } | ||||
11465 | iOff = 4; | ||||
11466 | a = pIter->pLeaf->p; | ||||
11467 | } | ||||
11468 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | ||||
11469 | pIter->iLeafOffset = iOff; | ||||
11470 | } | ||||
11471 | |||||
11472 | /* | ||||
11473 | ** Fts5SegIter.iLeafOffset currently points to the first byte of the | ||||
11474 | ** "nSuffix" field of a term. Function parameter nKeep contains the value | ||||
11475 | ** of the "nPrefix" field (if there was one - it is passed 0 if this is | ||||
11476 | ** the first term in the segment). | ||||
11477 | ** | ||||
11478 | ** This function populates: | ||||
11479 | ** | ||||
11480 | ** Fts5SegIter.term | ||||
11481 | ** Fts5SegIter.rowid | ||||
11482 | ** | ||||
11483 | ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of | ||||
11484 | ** the first position list. The position list belonging to document | ||||
11485 | ** (Fts5SegIter.iRowid). | ||||
11486 | */ | ||||
11487 | static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ | ||||
11488 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | ||||
11489 | i64 iOff = pIter->iLeafOffset; /* Offset to read at */ | ||||
11490 | int nNew; /* Bytes of new data */ | ||||
11491 | |||||
11492 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | ||||
11493 | if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){ | ||||
11494 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
11495 | return; | ||||
11496 | } | ||||
11497 | pIter->term.n = nKeep; | ||||
11498 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | ||||
11499 | assert( pIter->term.n<=pIter->term.nSpace )((void) (0)); | ||||
11500 | iOff += nNew; | ||||
11501 | pIter->iTermLeafOffset = iOff; | ||||
11502 | pIter->iTermLeafPgno = pIter->iLeafPgno; | ||||
11503 | pIter->iLeafOffset = iOff; | ||||
11504 | |||||
11505 | if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ | ||||
11506 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | ||||
11507 | }else{ | ||||
11508 | int nExtra; | ||||
11509 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (nExtra)); | ||||
11510 | pIter->iEndofDoclist += nExtra; | ||||
11511 | } | ||||
11512 | |||||
11513 | fts5SegIterLoadRowid(p, pIter); | ||||
11514 | } | ||||
11515 | |||||
11516 | static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); | ||||
11517 | static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); | ||||
11518 | static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); | ||||
11519 | |||||
11520 | static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11521 | if( pIter->flags & FTS5_SEGITER_REVERSE0x02 ){ | ||||
11522 | pIter->xNext = fts5SegIterNext_Reverse; | ||||
11523 | }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
11524 | pIter->xNext = fts5SegIterNext_None; | ||||
11525 | }else{ | ||||
11526 | pIter->xNext = fts5SegIterNext; | ||||
11527 | } | ||||
11528 | } | ||||
11529 | |||||
11530 | /* | ||||
11531 | ** Allocate a tombstone hash page array object (pIter->pTombArray) for | ||||
11532 | ** the iterator passed as the second argument. If an OOM error occurs, | ||||
11533 | ** leave an error in the Fts5Index object. | ||||
11534 | */ | ||||
11535 | static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11536 | const int nTomb = pIter->pSeg->nPgTombstone; | ||||
11537 | if( nTomb>0 ){ | ||||
11538 | int nByte = SZ_FTS5TOMBSTONEARRAY(nTomb+1)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(nTomb+1 )*sizeof(Fts5Data*)); | ||||
11539 | Fts5TombstoneArray *pNew; | ||||
11540 | pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); | ||||
11541 | if( pNew ){ | ||||
11542 | pNew->nTombstone = nTomb; | ||||
11543 | pNew->nRef = 1; | ||||
11544 | pIter->pTombArray = pNew; | ||||
11545 | } | ||||
11546 | } | ||||
11547 | } | ||||
11548 | |||||
11549 | /* | ||||
11550 | ** Initialize the iterator object pIter to iterate through the entries in | ||||
11551 | ** segment pSeg. The iterator is left pointing to the first entry when | ||||
11552 | ** this function returns. | ||||
11553 | ** | ||||
11554 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | ||||
11555 | ** an error has already occurred when this function is called, it is a no-op. | ||||
11556 | */ | ||||
11557 | static void fts5SegIterInit( | ||||
11558 | Fts5Index *p, /* FTS index object */ | ||||
11559 | Fts5StructureSegment *pSeg, /* Description of segment */ | ||||
11560 | Fts5SegIter *pIter /* Object to populate */ | ||||
11561 | ){ | ||||
11562 | if( pSeg->pgnoFirst==0 ){ | ||||
11563 | /* This happens if the segment is being used as an input to an incremental | ||||
11564 | ** merge and all data has already been "trimmed". See function | ||||
11565 | ** fts5TrimSegments() for details. In this case leave the iterator empty. | ||||
11566 | ** The caller will see the (pIter->pLeaf==0) and assume the iterator is | ||||
11567 | ** at EOF already. */ | ||||
11568 | assert( pIter->pLeaf==0 )((void) (0)); | ||||
11569 | return; | ||||
11570 | } | ||||
11571 | |||||
11572 | if( p->rc==SQLITE_OK0 ){ | ||||
11573 | memset(pIter, 0, sizeof(*pIter)); | ||||
11574 | fts5SegIterSetNext(p, pIter); | ||||
11575 | pIter->pSeg = pSeg; | ||||
11576 | pIter->iLeafPgno = pSeg->pgnoFirst-1; | ||||
11577 | do { | ||||
11578 | fts5SegIterNextPage(p, pIter); | ||||
11579 | }while( p->rc==SQLITE_OK0 && pIter->pLeaf && pIter->pLeaf->nn==4 ); | ||||
11580 | } | ||||
11581 | |||||
11582 | if( p->rc==SQLITE_OK0 && pIter->pLeaf ){ | ||||
11583 | pIter->iLeafOffset = 4; | ||||
11584 | assert( pIter->pLeaf!=0 )((void) (0)); | ||||
11585 | assert_nc( pIter->pLeaf->nn>4 )((void) (0)); | ||||
11586 | assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 )((void) (0)); | ||||
11587 | pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; | ||||
11588 | fts5SegIterLoadTerm(p, pIter, 0); | ||||
11589 | fts5SegIterLoadNPos(p, pIter); | ||||
11590 | fts5SegIterAllocTombstone(p, pIter); | ||||
11591 | } | ||||
11592 | } | ||||
11593 | |||||
11594 | /* | ||||
11595 | ** This function is only ever called on iterators created by calls to | ||||
11596 | ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. | ||||
11597 | ** | ||||
11598 | ** The iterator is in an unusual state when this function is called: the | ||||
11599 | ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of | ||||
11600 | ** the position-list size field for the first relevant rowid on the page. | ||||
11601 | ** Fts5SegIter.rowid is set, but nPos and bDel are not. | ||||
11602 | ** | ||||
11603 | ** This function advances the iterator so that it points to the last | ||||
11604 | ** relevant rowid on the page and, if necessary, initializes the | ||||
11605 | ** aRowidOffset[] and iRowidOffset variables. At this point the iterator | ||||
11606 | ** is in its regular state - Fts5SegIter.iLeafOffset points to the first | ||||
11607 | ** byte of the position list content associated with said rowid. | ||||
11608 | */ | ||||
11609 | static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11610 | int eDetail = p->pConfig->eDetail; | ||||
11611 | int n = pIter->pLeaf->szLeaf; | ||||
11612 | int i = pIter->iLeafOffset; | ||||
11613 | u8 *a = pIter->pLeaf->p; | ||||
11614 | int iRowidOffset = 0; | ||||
11615 | |||||
11616 | if( n>pIter->iEndofDoclist ){ | ||||
11617 | n = pIter->iEndofDoclist; | ||||
11618 | } | ||||
11619 | |||||
11620 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | ||||
11621 | while( 1 ){ | ||||
11622 | u64 iDelta = 0; | ||||
11623 | |||||
11624 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
11625 | /* todo */ | ||||
11626 | if( i<n && a[i]==0 ){ | ||||
11627 | i++; | ||||
11628 | if( i<n && a[i]==0 ) i++; | ||||
11629 | } | ||||
11630 | }else{ | ||||
11631 | int nPos; | ||||
11632 | int bDummy; | ||||
11633 | i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); | ||||
11634 | i += nPos; | ||||
11635 | } | ||||
11636 | if( i>=n ) break; | ||||
11637 | i += fts5GetVarintsqlite3Fts5GetVarint(&a[i], &iDelta); | ||||
11638 | pIter->iRowid += iDelta; | ||||
11639 | |||||
11640 | /* If necessary, grow the pIter->aRowidOffset[] array. */ | ||||
11641 | if( iRowidOffset>=pIter->nRowidOffset ){ | ||||
11642 | int nNew = pIter->nRowidOffset + 8; | ||||
11643 | int *aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(pIter->aRowidOffset,nNew*sizeof(int)); | ||||
11644 | if( aNew==0 ){ | ||||
11645 | p->rc = SQLITE_NOMEM7; | ||||
11646 | break; | ||||
11647 | } | ||||
11648 | pIter->aRowidOffset = aNew; | ||||
11649 | pIter->nRowidOffset = nNew; | ||||
11650 | } | ||||
11651 | |||||
11652 | pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; | ||||
11653 | pIter->iLeafOffset = i; | ||||
11654 | } | ||||
11655 | pIter->iRowidOffset = iRowidOffset; | ||||
11656 | fts5SegIterLoadNPos(p, pIter); | ||||
11657 | } | ||||
11658 | |||||
11659 | /* | ||||
11660 | ** | ||||
11661 | */ | ||||
11662 | static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11663 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | ||||
11664 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | ||||
11665 | |||||
11666 | fts5DataRelease(pIter->pLeaf); | ||||
11667 | pIter->pLeaf = 0; | ||||
11668 | while( p->rc==SQLITE_OK0 && pIter->iLeafPgno>pIter->iTermLeafPgno ){ | ||||
11669 | Fts5Data *pNew; | ||||
11670 | pIter->iLeafPgno--; | ||||
11671 | pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | ||||
11672 | pIter->pSeg->iSegid, pIter->iLeafPgno( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | ||||
11673 | )( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) )); | ||||
11674 | if( pNew ){ | ||||
11675 | /* iTermLeafOffset may be equal to szLeaf if the term is the last | ||||
11676 | ** thing on the page - i.e. the first rowid is on the following page. | ||||
11677 | ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ | ||||
11678 | if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ | ||||
11679 | assert( pIter->pLeaf==0 )((void) (0)); | ||||
11680 | if( pIter->iTermLeafOffset<pNew->szLeaf ){ | ||||
11681 | pIter->pLeaf = pNew; | ||||
11682 | pIter->iLeafOffset = pIter->iTermLeafOffset; | ||||
11683 | } | ||||
11684 | }else{ | ||||
11685 | int iRowidOff; | ||||
11686 | iRowidOff = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | ||||
11687 | if( iRowidOff ){ | ||||
11688 | if( iRowidOff>=pNew->szLeaf ){ | ||||
11689 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
11690 | }else{ | ||||
11691 | pIter->pLeaf = pNew; | ||||
11692 | pIter->iLeafOffset = iRowidOff; | ||||
11693 | } | ||||
11694 | } | ||||
11695 | } | ||||
11696 | |||||
11697 | if( pIter->pLeaf ){ | ||||
11698 | u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; | ||||
11699 | pIter->iLeafOffset += fts5GetVarintsqlite3Fts5GetVarint(a, (u64*)&pIter->iRowid); | ||||
11700 | break; | ||||
11701 | }else{ | ||||
11702 | fts5DataRelease(pNew); | ||||
11703 | } | ||||
11704 | } | ||||
11705 | } | ||||
11706 | |||||
11707 | if( pIter->pLeaf ){ | ||||
11708 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | ||||
11709 | fts5SegIterReverseInitPage(p, pIter); | ||||
11710 | } | ||||
11711 | } | ||||
11712 | |||||
11713 | /* | ||||
11714 | ** Return true if the iterator passed as the second argument currently | ||||
11715 | ** points to a delete marker. A delete marker is an entry with a 0 byte | ||||
11716 | ** position-list. | ||||
11717 | */ | ||||
11718 | static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ | ||||
11719 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | ||||
11720 | return (p->rc==SQLITE_OK0 && pSeg->pLeaf && pSeg->nPos==0); | ||||
11721 | } | ||||
11722 | |||||
11723 | /* | ||||
11724 | ** Advance iterator pIter to the next entry. | ||||
11725 | ** | ||||
11726 | ** This version of fts5SegIterNext() is only used by reverse iterators. | ||||
11727 | */ | ||||
11728 | static void fts5SegIterNext_Reverse( | ||||
11729 | Fts5Index *p, /* FTS5 backend object */ | ||||
11730 | Fts5SegIter *pIter, /* Iterator to advance */ | ||||
11731 | int *pbUnused /* Unused */ | ||||
11732 | ){ | ||||
11733 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | ||||
11734 | assert( pIter->pNextLeaf==0 )((void) (0)); | ||||
11735 | UNUSED_PARAM(pbUnused)(void)(pbUnused); | ||||
11736 | |||||
11737 | if( pIter->iRowidOffset>0 ){ | ||||
11738 | u8 *a = pIter->pLeaf->p; | ||||
11739 | int iOff; | ||||
11740 | u64 iDelta; | ||||
11741 | |||||
11742 | pIter->iRowidOffset--; | ||||
11743 | pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; | ||||
11744 | fts5SegIterLoadNPos(p, pIter); | ||||
11745 | iOff = pIter->iLeafOffset; | ||||
11746 | if( p->pConfig->eDetail!=FTS5_DETAIL_NONE1 ){ | ||||
11747 | iOff += pIter->nPos; | ||||
11748 | } | ||||
11749 | fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], &iDelta); | ||||
11750 | pIter->iRowid -= iDelta; | ||||
11751 | }else{ | ||||
11752 | fts5SegIterReverseNewPage(p, pIter); | ||||
11753 | } | ||||
11754 | } | ||||
11755 | |||||
11756 | /* | ||||
11757 | ** Advance iterator pIter to the next entry. | ||||
11758 | ** | ||||
11759 | ** This version of fts5SegIterNext() is only used if detail=none and the | ||||
11760 | ** iterator is not a reverse direction iterator. | ||||
11761 | */ | ||||
11762 | static void fts5SegIterNext_None( | ||||
11763 | Fts5Index *p, /* FTS5 backend object */ | ||||
11764 | Fts5SegIter *pIter, /* Iterator to advance */ | ||||
11765 | int *pbNewTerm /* OUT: Set for new term */ | ||||
11766 | ){ | ||||
11767 | int iOff; | ||||
11768 | |||||
11769 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
11770 | assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 )((void) (0)); | ||||
11771 | assert( p->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | ||||
11772 | |||||
11773 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | ||||
11774 | iOff = pIter->iLeafOffset; | ||||
11775 | |||||
11776 | /* Next entry is on the next page */ | ||||
11777 | while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ | ||||
11778 | fts5SegIterNextPage(p, pIter); | ||||
11779 | if( p->rc || pIter->pLeaf==0 ) return; | ||||
11780 | pIter->iRowid = 0; | ||||
11781 | iOff = 4; | ||||
11782 | } | ||||
11783 | |||||
11784 | if( iOff<pIter->iEndofDoclist ){ | ||||
11785 | /* Next entry is on the current page */ | ||||
11786 | u64 iDelta; | ||||
11787 | iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); | ||||
11788 | pIter->iLeafOffset = iOff; | ||||
11789 | pIter->iRowid += iDelta; | ||||
11790 | }else if( (pIter->flags & FTS5_SEGITER_ONETERM0x01)==0 ){ | ||||
11791 | if( pIter->pSeg ){ | ||||
11792 | int nKeep = 0; | ||||
11793 | if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ | ||||
11794 | iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iOff],(u32* )&(nKeep)); | ||||
11795 | } | ||||
11796 | pIter->iLeafOffset = iOff; | ||||
11797 | fts5SegIterLoadTerm(p, pIter, nKeep); | ||||
11798 | }else{ | ||||
11799 | const u8 *pList = 0; | ||||
11800 | const char *zTerm = 0; | ||||
11801 | int nTerm = 0; | ||||
11802 | int nList; | ||||
11803 | sqlite3Fts5HashScanNext(p->pHash); | ||||
11804 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | ||||
11805 | if( pList==0 ) goto next_none_eof; | ||||
11806 | pIter->pLeaf->p = (u8*)pList; | ||||
11807 | pIter->pLeaf->nn = nList; | ||||
11808 | pIter->pLeaf->szLeaf = nList; | ||||
11809 | pIter->iEndofDoclist = nList; | ||||
11810 | sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm); | ||||
11811 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | ||||
11812 | } | ||||
11813 | |||||
11814 | if( pbNewTerm ) *pbNewTerm = 1; | ||||
11815 | }else{ | ||||
11816 | goto next_none_eof; | ||||
11817 | } | ||||
11818 | |||||
11819 | fts5SegIterLoadNPos(p, pIter); | ||||
11820 | |||||
11821 | return; | ||||
11822 | next_none_eof: | ||||
11823 | fts5DataRelease(pIter->pLeaf); | ||||
11824 | pIter->pLeaf = 0; | ||||
11825 | } | ||||
11826 | |||||
11827 | |||||
11828 | /* | ||||
11829 | ** Advance iterator pIter to the next entry. | ||||
11830 | ** | ||||
11831 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It | ||||
11832 | ** is not considered an error if the iterator reaches EOF. If an error has | ||||
11833 | ** already occurred when this function is called, it is a no-op. | ||||
11834 | */ | ||||
11835 | static void fts5SegIterNext( | ||||
11836 | Fts5Index *p, /* FTS5 backend object */ | ||||
11837 | Fts5SegIter *pIter, /* Iterator to advance */ | ||||
11838 | int *pbNewTerm /* OUT: Set for new term */ | ||||
11839 | ){ | ||||
11840 | Fts5Data *pLeaf = pIter->pLeaf; | ||||
11841 | int iOff; | ||||
11842 | int bNewTerm = 0; | ||||
11843 | int nKeep = 0; | ||||
11844 | u8 *a; | ||||
11845 | int n; | ||||
11846 | |||||
11847 | assert( pbNewTerm==0 || *pbNewTerm==0 )((void) (0)); | ||||
11848 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | ||||
11849 | |||||
11850 | /* Search for the end of the position list within the current page. */ | ||||
11851 | a = pLeaf->p; | ||||
11852 | n = pLeaf->szLeaf; | ||||
11853 | |||||
11854 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | ||||
11855 | iOff = pIter->iLeafOffset + pIter->nPos; | ||||
11856 | |||||
11857 | if( iOff<n ){ | ||||
11858 | /* The next entry is on the current page. */ | ||||
11859 | assert_nc( iOff<=pIter->iEndofDoclist )((void) (0)); | ||||
11860 | if( iOff>=pIter->iEndofDoclist ){ | ||||
11861 | bNewTerm = 1; | ||||
11862 | if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ | ||||
11863 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | ||||
11864 | } | ||||
11865 | }else{ | ||||
11866 | u64 iDelta; | ||||
11867 | iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); | ||||
11868 | pIter->iRowid += iDelta; | ||||
11869 | assert_nc( iDelta>0 )((void) (0)); | ||||
11870 | } | ||||
11871 | pIter->iLeafOffset = iOff; | ||||
11872 | |||||
11873 | }else if( pIter->pSeg==0 ){ | ||||
11874 | const u8 *pList = 0; | ||||
11875 | const char *zTerm = 0; | ||||
11876 | int nTerm = 0; | ||||
11877 | int nList = 0; | ||||
11878 | assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm )((void) (0)); | ||||
11879 | if( 0==(pIter->flags & FTS5_SEGITER_ONETERM0x01) ){ | ||||
11880 | sqlite3Fts5HashScanNext(p->pHash); | ||||
11881 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | ||||
11882 | } | ||||
11883 | if( pList==0 ){ | ||||
11884 | fts5DataRelease(pIter->pLeaf); | ||||
11885 | pIter->pLeaf = 0; | ||||
11886 | }else{ | ||||
11887 | pIter->pLeaf->p = (u8*)pList; | ||||
11888 | pIter->pLeaf->nn = nList; | ||||
11889 | pIter->pLeaf->szLeaf = nList; | ||||
11890 | pIter->iEndofDoclist = nList+1; | ||||
11891 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm); | ||||
11892 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | ||||
11893 | *pbNewTerm = 1; | ||||
11894 | } | ||||
11895 | }else{ | ||||
11896 | iOff = 0; | ||||
11897 | /* Next entry is not on the current page */ | ||||
11898 | while( iOff==0 ){ | ||||
11899 | fts5SegIterNextPage(p, pIter); | ||||
11900 | pLeaf = pIter->pLeaf; | ||||
11901 | if( pLeaf==0 ) break; | ||||
11902 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | ||||
11903 | if( (iOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))) && iOff<pLeaf->szLeaf ){ | ||||
11904 | iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); | ||||
11905 | pIter->iLeafOffset = iOff; | ||||
11906 | |||||
11907 | if( pLeaf->nn>pLeaf->szLeaf ){ | ||||
11908 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | ||||
11909 | &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | ||||
11910 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)); | ||||
11911 | } | ||||
11912 | } | ||||
11913 | else if( pLeaf->nn>pLeaf->szLeaf ){ | ||||
11914 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | ||||
11915 | &pLeaf->p[pLeaf->szLeaf], iOffsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | ||||
11916 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)); | ||||
11917 | pIter->iLeafOffset = iOff; | ||||
11918 | pIter->iEndofDoclist = iOff; | ||||
11919 | bNewTerm = 1; | ||||
11920 | } | ||||
11921 | assert_nc( iOff<pLeaf->szLeaf )((void) (0)); | ||||
11922 | if( iOff>pLeaf->szLeaf ){ | ||||
11923 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
11924 | return; | ||||
11925 | } | ||||
11926 | } | ||||
11927 | } | ||||
11928 | |||||
11929 | /* Check if the iterator is now at EOF. If so, return early. */ | ||||
11930 | if( pIter->pLeaf ){ | ||||
11931 | if( bNewTerm ){ | ||||
11932 | if( pIter->flags & FTS5_SEGITER_ONETERM0x01 ){ | ||||
11933 | fts5DataRelease(pIter->pLeaf); | ||||
11934 | pIter->pLeaf = 0; | ||||
11935 | }else{ | ||||
11936 | fts5SegIterLoadTerm(p, pIter, nKeep); | ||||
11937 | fts5SegIterLoadNPos(p, pIter); | ||||
11938 | if( pbNewTerm ) *pbNewTerm = 1; | ||||
11939 | } | ||||
11940 | }else{ | ||||
11941 | /* The following could be done by calling fts5SegIterLoadNPos(). But | ||||
11942 | ** this block is particularly performance critical, so equivalent | ||||
11943 | ** code is inlined. */ | ||||
11944 | int nSz; | ||||
11945 | assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn )((void) (0)); | ||||
11946 | fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz){ nSz = (pIter->pLeaf->p)[pIter->iLeafOffset++]; if( nSz & 0x80 ){ pIter->iLeafOffset--; pIter->iLeafOffset += sqlite3Fts5GetVarint32(&(pIter->pLeaf->p)[pIter ->iLeafOffset],(u32*)&(nSz)); } }; | ||||
11947 | pIter->bDel = (nSz & 0x0001); | ||||
11948 | pIter->nPos = nSz>>1; | ||||
11949 | assert_nc( pIter->nPos>=0 )((void) (0)); | ||||
11950 | } | ||||
11951 | } | ||||
11952 | } | ||||
11953 | |||||
11954 | #define SWAPVAL(T, a, b){ T tmp; tmp=a; a=b; b=tmp; } { T tmp; tmp=a; a=b; b=tmp; } | ||||
11955 | |||||
11956 | #define fts5IndexSkipVarint(a, iOff){ int iEnd = iOff+9; while( (a[iOff++] & 0x80) && iOff<iEnd ); } { \ | ||||
11957 | int iEnd = iOff+9; \ | ||||
11958 | while( (a[iOff++] & 0x80) && iOff<iEnd ); \ | ||||
11959 | } | ||||
11960 | |||||
11961 | /* | ||||
11962 | ** Iterator pIter currently points to the first rowid in a doclist. This | ||||
11963 | ** function sets the iterator up so that iterates in reverse order through | ||||
11964 | ** the doclist. | ||||
11965 | */ | ||||
11966 | static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ | ||||
11967 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | ||||
11968 | Fts5Data *pLast = 0; | ||||
11969 | int pgnoLast = 0; | ||||
11970 | |||||
11971 | if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION4 ){ | ||||
11972 | int iSegid = pIter->pSeg->iSegid; | ||||
11973 | pgnoLast = fts5DlidxIterPgno(pDlidx); | ||||
11974 | pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgnoLast)) )); | ||||
11975 | }else{ | ||||
11976 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | ||||
11977 | |||||
11978 | /* Currently, Fts5SegIter.iLeafOffset points to the first byte of | ||||
11979 | ** position-list content for the current rowid. Back it up so that it | ||||
11980 | ** points to the start of the position-list size field. */ | ||||
11981 | int iPoslist; | ||||
11982 | if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ | ||||
11983 | iPoslist = pIter->iTermLeafOffset; | ||||
11984 | }else{ | ||||
11985 | iPoslist = 4; | ||||
11986 | } | ||||
11987 | fts5IndexSkipVarint(pLeaf->p, iPoslist){ int iEnd = iPoslist+9; while( (pLeaf->p[iPoslist++] & 0x80) && iPoslist<iEnd ); }; | ||||
11988 | pIter->iLeafOffset = iPoslist; | ||||
11989 | |||||
11990 | /* If this condition is true then the largest rowid for the current | ||||
11991 | ** term may not be stored on the current page. So search forward to | ||||
11992 | ** see where said rowid really is. */ | ||||
11993 | if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ | ||||
11994 | int pgno; | ||||
11995 | Fts5StructureSegment *pSeg = pIter->pSeg; | ||||
11996 | |||||
11997 | /* The last rowid in the doclist may not be on the current page. Search | ||||
11998 | ** forward to find the page containing the last rowid. */ | ||||
11999 | for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ | ||||
12000 | i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | ||||
12001 | Fts5Data *pNew = fts5LeafRead(p, iAbs); | ||||
12002 | if( pNew ){ | ||||
12003 | int iRowid, bTermless; | ||||
12004 | iRowid = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | ||||
12005 | bTermless = fts5LeafIsTermless(pNew)((pNew)->szLeaf >= (pNew)->nn); | ||||
12006 | if( iRowid ){ | ||||
12007 | SWAPVAL(Fts5Data*, pNew, pLast){ Fts5Data* tmp; tmp=pNew; pNew=pLast; pLast=tmp; }; | ||||
12008 | pgnoLast = pgno; | ||||
12009 | } | ||||
12010 | fts5DataRelease(pNew); | ||||
12011 | if( bTermless==0 ) break; | ||||
12012 | } | ||||
12013 | } | ||||
12014 | } | ||||
12015 | } | ||||
12016 | |||||
12017 | /* If pLast is NULL at this point, then the last rowid for this doclist | ||||
12018 | ** lies on the page currently indicated by the iterator. In this case | ||||
12019 | ** pIter->iLeafOffset is already set to point to the position-list size | ||||
12020 | ** field associated with the first relevant rowid on the page. | ||||
12021 | ** | ||||
12022 | ** Or, if pLast is non-NULL, then it is the page that contains the last | ||||
12023 | ** rowid. In this case configure the iterator so that it points to the | ||||
12024 | ** first rowid on this page. | ||||
12025 | */ | ||||
12026 | if( pLast ){ | ||||
12027 | int iOff; | ||||
12028 | fts5DataRelease(pIter->pLeaf); | ||||
12029 | pIter->pLeaf = pLast; | ||||
12030 | pIter->iLeafPgno = pgnoLast; | ||||
12031 | iOff = fts5LeafFirstRowidOff(pLast)(fts5GetU16((pLast)->p)); | ||||
12032 | if( iOff>pLast->szLeaf ){ | ||||
12033 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12034 | return; | ||||
12035 | } | ||||
12036 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); | ||||
12037 | pIter->iLeafOffset = iOff; | ||||
12038 | |||||
12039 | if( fts5LeafIsTermless(pLast)((pLast)->szLeaf >= (pLast)->nn) ){ | ||||
12040 | pIter->iEndofDoclist = pLast->nn+1; | ||||
12041 | }else{ | ||||
12042 | pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); | ||||
12043 | } | ||||
12044 | } | ||||
12045 | |||||
12046 | fts5SegIterReverseInitPage(p, pIter); | ||||
12047 | } | ||||
12048 | |||||
12049 | /* | ||||
12050 | ** Iterator pIter currently points to the first rowid of a doclist. | ||||
12051 | ** There is a doclist-index associated with the final term on the current | ||||
12052 | ** page. If the current term is the last term on the page, load the | ||||
12053 | ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). | ||||
12054 | */ | ||||
12055 | static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ | ||||
12056 | int iSeg = pIter->pSeg->iSegid; | ||||
12057 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | ||||
12058 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | ||||
12059 | |||||
12060 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | ||||
12061 | assert( pIter->pDlidx==0 )((void) (0)); | ||||
12062 | |||||
12063 | /* Check if the current doclist ends on this page. If it does, return | ||||
12064 | ** early without loading the doclist-index (as it belongs to a different | ||||
12065 | ** term. */ | ||||
12066 | if( pIter->iTermLeafPgno==pIter->iLeafPgno | ||||
12067 | && pIter->iEndofDoclist<pLeaf->szLeaf | ||||
12068 | ){ | ||||
12069 | return; | ||||
12070 | } | ||||
12071 | |||||
12072 | pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); | ||||
12073 | } | ||||
12074 | |||||
12075 | /* | ||||
12076 | ** The iterator object passed as the second argument currently contains | ||||
12077 | ** no valid values except for the Fts5SegIter.pLeaf member variable. This | ||||
12078 | ** function searches the leaf page for a term matching (pTerm/nTerm). | ||||
12079 | ** | ||||
12080 | ** If the specified term is found on the page, then the iterator is left | ||||
12081 | ** pointing to it. If argument bGe is zero and the term is not found, | ||||
12082 | ** the iterator is left pointing at EOF. | ||||
12083 | ** | ||||
12084 | ** If bGe is non-zero and the specified term is not found, then the | ||||
12085 | ** iterator is left pointing to the smallest term in the segment that | ||||
12086 | ** is larger than the specified term, even if this term is not on the | ||||
12087 | ** current page. | ||||
12088 | */ | ||||
12089 | static void fts5LeafSeek( | ||||
12090 | Fts5Index *p, /* Leave any error code here */ | ||||
12091 | int bGe, /* True for a >= search */ | ||||
12092 | Fts5SegIter *pIter, /* Iterator to seek */ | ||||
12093 | const u8 *pTerm, int nTerm /* Term to search for */ | ||||
12094 | ){ | ||||
12095 | u32 iOff; | ||||
12096 | const u8 *a = pIter->pLeaf->p; | ||||
12097 | u32 n = (u32)pIter->pLeaf->nn; | ||||
12098 | |||||
12099 | u32 nMatch = 0; | ||||
12100 | u32 nKeep = 0; | ||||
12101 | u32 nNew = 0; | ||||
12102 | u32 iTermOff; | ||||
12103 | u32 iPgidx; /* Current offset in pgidx */ | ||||
12104 | int bEndOfPage = 0; | ||||
12105 | |||||
12106 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12107 | |||||
12108 | iPgidx = (u32)pIter->pLeaf->szLeaf; | ||||
12109 | iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(iTermOff)); | ||||
12110 | iOff = iTermOff; | ||||
12111 | if( iOff>n ){ | ||||
12112 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12113 | return; | ||||
12114 | } | ||||
12115 | |||||
12116 | while( 1 ){ | ||||
12117 | |||||
12118 | /* Figure out how many new bytes are in this term */ | ||||
12119 | fts5FastGetVarint32(a, iOff, nNew){ nNew = (a)[iOff++]; if( nNew & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nNew)); } }; | ||||
12120 | if( nKeep<nMatch ){ | ||||
12121 | goto search_failed; | ||||
12122 | } | ||||
12123 | |||||
12124 | assert( nKeep>=nMatch )((void) (0)); | ||||
12125 | if( nKeep==nMatch ){ | ||||
12126 | u32 nCmp; | ||||
12127 | u32 i; | ||||
12128 | nCmp = (u32)MIN(nNew, nTerm-nMatch)(((nNew) < (nTerm-nMatch)) ? (nNew) : (nTerm-nMatch)); | ||||
12129 | for(i=0; i<nCmp; i++){ | ||||
12130 | if( a[iOff+i]!=pTerm[nMatch+i] ) break; | ||||
12131 | } | ||||
12132 | nMatch += i; | ||||
12133 | |||||
12134 | if( (u32)nTerm==nMatch ){ | ||||
12135 | if( i==nNew ){ | ||||
12136 | goto search_success; | ||||
12137 | }else{ | ||||
12138 | goto search_failed; | ||||
12139 | } | ||||
12140 | }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ | ||||
12141 | goto search_failed; | ||||
12142 | } | ||||
12143 | } | ||||
12144 | |||||
12145 | if( iPgidx>=n ){ | ||||
12146 | bEndOfPage = 1; | ||||
12147 | break; | ||||
12148 | } | ||||
12149 | |||||
12150 | iPgidx += fts5GetVarint32(&a[iPgidx], nKeep)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nKeep)); | ||||
12151 | iTermOff += nKeep; | ||||
12152 | iOff = iTermOff; | ||||
12153 | |||||
12154 | if( iOff>=n ){ | ||||
12155 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12156 | return; | ||||
12157 | } | ||||
12158 | |||||
12159 | /* Read the nKeep field of the next term. */ | ||||
12160 | fts5FastGetVarint32(a, iOff, nKeep){ nKeep = (a)[iOff++]; if( nKeep & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nKeep)); } }; | ||||
12161 | } | ||||
12162 | |||||
12163 | search_failed: | ||||
12164 | if( bGe==0 ){ | ||||
12165 | fts5DataRelease(pIter->pLeaf); | ||||
12166 | pIter->pLeaf = 0; | ||||
12167 | return; | ||||
12168 | }else if( bEndOfPage ){ | ||||
12169 | do { | ||||
12170 | fts5SegIterNextPage(p, pIter); | ||||
12171 | if( pIter->pLeaf==0 ) return; | ||||
12172 | a = pIter->pLeaf->p; | ||||
12173 | if( fts5LeafIsTermless(pIter->pLeaf)((pIter->pLeaf)->szLeaf >= (pIter->pLeaf)->nn)==0 ){ | ||||
12174 | iPgidx = (u32)pIter->pLeaf->szLeaf; | ||||
12175 | iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iPgidx],(u32 *)&(iOff)); | ||||
12176 | if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){ | ||||
12177 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12178 | return; | ||||
12179 | }else{ | ||||
12180 | nKeep = 0; | ||||
12181 | iTermOff = iOff; | ||||
12182 | n = (u32)pIter->pLeaf->nn; | ||||
12183 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | ||||
12184 | break; | ||||
12185 | } | ||||
12186 | } | ||||
12187 | }while( 1 ); | ||||
12188 | } | ||||
12189 | |||||
12190 | search_success: | ||||
12191 | if( (i64)iOff+nNew>n || nNew<1 ){ | ||||
12192 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12193 | return; | ||||
12194 | } | ||||
12195 | pIter->iLeafOffset = iOff + nNew; | ||||
12196 | pIter->iTermLeafOffset = pIter->iLeafOffset; | ||||
12197 | pIter->iTermLeafPgno = pIter->iLeafPgno; | ||||
12198 | |||||
12199 | fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm)sqlite3Fts5BufferSet(&p->rc,&pIter->term,nKeep, pTerm); | ||||
12200 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | ||||
12201 | |||||
12202 | if( iPgidx>=n ){ | ||||
12203 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | ||||
12204 | }else{ | ||||
12205 | int nExtra; | ||||
12206 | iPgidx += fts5GetVarint32(&a[iPgidx], nExtra)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nExtra)); | ||||
12207 | pIter->iEndofDoclist = iTermOff + nExtra; | ||||
12208 | } | ||||
12209 | pIter->iPgidxOff = iPgidx; | ||||
12210 | |||||
12211 | fts5SegIterLoadRowid(p, pIter); | ||||
12212 | fts5SegIterLoadNPos(p, pIter); | ||||
12213 | } | ||||
12214 | |||||
12215 | static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ | ||||
12216 | if( p->pIdxSelect==0 ){ | ||||
12217 | Fts5Config *pConfig = p->pConfig; | ||||
12218 | fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintfsqlite3_api->mprintf( | ||||
12219 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | ||||
12220 | "segid=? AND term<=? ORDER BY term DESC LIMIT 1", | ||||
12221 | pConfig->zDb, pConfig->zName | ||||
12222 | )); | ||||
12223 | } | ||||
12224 | return p->pIdxSelect; | ||||
12225 | } | ||||
12226 | |||||
12227 | /* | ||||
12228 | ** Initialize the object pIter to point to term pTerm/nTerm within segment | ||||
12229 | ** pSeg. If there is no such term in the index, the iterator is set to EOF. | ||||
12230 | ** | ||||
12231 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | ||||
12232 | ** an error has already occurred when this function is called, it is a no-op. | ||||
12233 | */ | ||||
12234 | static void fts5SegIterSeekInit( | ||||
12235 | Fts5Index *p, /* FTS5 backend */ | ||||
12236 | const u8 *pTerm, int nTerm, /* Term to seek to */ | ||||
12237 | int flags, /* Mask of FTS5INDEX_XXX flags */ | ||||
12238 | Fts5StructureSegment *pSeg, /* Description of segment */ | ||||
12239 | Fts5SegIter *pIter /* Object to populate */ | ||||
12240 | ){ | ||||
12241 | int iPg = 1; | ||||
12242 | int bGe = (flags & FTS5INDEX_QUERY_SCAN0x0008); | ||||
12243 | int bDlidx = 0; /* True if there is a doclist-index */ | ||||
12244 | sqlite3_stmt *pIdxSelect = 0; | ||||
12245 | |||||
12246 | assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 )((void) (0)); | ||||
12247 | assert( pTerm && nTerm )((void) (0)); | ||||
12248 | memset(pIter, 0, sizeof(*pIter)); | ||||
12249 | pIter->pSeg = pSeg; | ||||
12250 | |||||
12251 | /* This block sets stack variable iPg to the leaf page number that may | ||||
12252 | ** contain term (pTerm/nTerm), if it is present in the segment. */ | ||||
12253 | pIdxSelect = fts5IdxSelectStmt(p); | ||||
12254 | if( p->rc ) return; | ||||
12255 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, pSeg->iSegid); | ||||
12256 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
12257 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pIdxSelect) ){ | ||||
12258 | i64 val = sqlite3_column_intsqlite3_api->column_int(pIdxSelect, 0); | ||||
12259 | iPg = (int)(val>>1); | ||||
12260 | bDlidx = (val & 0x0001); | ||||
12261 | } | ||||
12262 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | ||||
12263 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | ||||
12264 | |||||
12265 | if( iPg<pSeg->pgnoFirst ){ | ||||
12266 | iPg = pSeg->pgnoFirst; | ||||
12267 | bDlidx = 0; | ||||
12268 | } | ||||
12269 | |||||
12270 | pIter->iLeafPgno = iPg - 1; | ||||
12271 | fts5SegIterNextPage(p, pIter); | ||||
12272 | |||||
12273 | if( pIter->pLeaf ){ | ||||
12274 | fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); | ||||
12275 | } | ||||
12276 | |||||
12277 | if( p->rc==SQLITE_OK0 && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM0x0100)) ){ | ||||
12278 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | ||||
12279 | if( pIter->pLeaf ){ | ||||
12280 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | ||||
12281 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | ||||
12282 | } | ||||
12283 | if( bDlidx ){ | ||||
12284 | fts5SegIterLoadDlidx(p, pIter); | ||||
12285 | } | ||||
12286 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | ||||
12287 | fts5SegIterReverse(p, pIter); | ||||
12288 | } | ||||
12289 | } | ||||
12290 | } | ||||
12291 | |||||
12292 | fts5SegIterSetNext(p, pIter); | ||||
12293 | if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM0x0100) ){ | ||||
12294 | fts5SegIterAllocTombstone(p, pIter); | ||||
12295 | } | ||||
12296 | |||||
12297 | /* Either: | ||||
12298 | ** | ||||
12299 | ** 1) an error has occurred, or | ||||
12300 | ** 2) the iterator points to EOF, or | ||||
12301 | ** 3) the iterator points to an entry with term (pTerm/nTerm), or | ||||
12302 | ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points | ||||
12303 | ** to an entry with a term greater than or equal to (pTerm/nTerm). | ||||
12304 | */ | ||||
12305 | assert_nc( p->rc!=SQLITE_OK /* 1 */((void) (0)) | ||||
12306 | || pIter->pLeaf==0 /* 2 */((void) (0)) | ||||
12307 | || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */((void) (0)) | ||||
12308 | || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */((void) (0)) | ||||
12309 | )((void) (0)); | ||||
12310 | } | ||||
12311 | |||||
12312 | |||||
12313 | /* | ||||
12314 | ** SQL used by fts5SegIterNextInit() to find the page to open. | ||||
12315 | */ | ||||
12316 | static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ | ||||
12317 | if( p->pIdxNextSelect==0 ){ | ||||
12318 | Fts5Config *pConfig = p->pConfig; | ||||
12319 | fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintfsqlite3_api->mprintf( | ||||
12320 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | ||||
12321 | "segid=? AND term>? ORDER BY term ASC LIMIT 1", | ||||
12322 | pConfig->zDb, pConfig->zName | ||||
12323 | )); | ||||
12324 | |||||
12325 | } | ||||
12326 | return p->pIdxNextSelect; | ||||
12327 | } | ||||
12328 | |||||
12329 | /* | ||||
12330 | ** This is similar to fts5SegIterSeekInit(), except that it initializes | ||||
12331 | ** the segment iterator to point to the first term following the page | ||||
12332 | ** with pToken/nToken on it. | ||||
12333 | */ | ||||
12334 | static void fts5SegIterNextInit( | ||||
12335 | Fts5Index *p, | ||||
12336 | const char *pTerm, int nTerm, | ||||
12337 | Fts5StructureSegment *pSeg, /* Description of segment */ | ||||
12338 | Fts5SegIter *pIter /* Object to populate */ | ||||
12339 | ){ | ||||
12340 | int iPg = -1; /* Page of segment to open */ | ||||
12341 | int bDlidx = 0; | ||||
12342 | sqlite3_stmt *pSel = 0; /* SELECT to find iPg */ | ||||
12343 | |||||
12344 | pSel = fts5IdxNextStmt(p); | ||||
12345 | if( pSel ){ | ||||
12346 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12347 | sqlite3_bind_intsqlite3_api->bind_int(pSel, 1, pSeg->iSegid); | ||||
12348 | sqlite3_bind_blobsqlite3_api->bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
12349 | |||||
12350 | if( sqlite3_stepsqlite3_api->step(pSel)==SQLITE_ROW100 ){ | ||||
12351 | i64 val = sqlite3_column_int64sqlite3_api->column_int64(pSel, 0); | ||||
12352 | iPg = (int)(val>>1); | ||||
12353 | bDlidx = (val & 0x0001); | ||||
12354 | } | ||||
12355 | p->rc = sqlite3_resetsqlite3_api->reset(pSel); | ||||
12356 | sqlite3_bind_nullsqlite3_api->bind_null(pSel, 2); | ||||
12357 | if( p->rc ) return; | ||||
12358 | } | ||||
12359 | |||||
12360 | memset(pIter, 0, sizeof(*pIter)); | ||||
12361 | pIter->pSeg = pSeg; | ||||
12362 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | ||||
12363 | if( iPg>=0 ){ | ||||
12364 | pIter->iLeafPgno = iPg - 1; | ||||
12365 | fts5SegIterNextPage(p, pIter); | ||||
12366 | fts5SegIterSetNext(p, pIter); | ||||
12367 | } | ||||
12368 | if( pIter->pLeaf ){ | ||||
12369 | const u8 *a = pIter->pLeaf->p; | ||||
12370 | int iTermOff = 0; | ||||
12371 | |||||
12372 | pIter->iPgidxOff = pIter->pLeaf->szLeaf; | ||||
12373 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (iTermOff)); | ||||
12374 | pIter->iLeafOffset = iTermOff; | ||||
12375 | fts5SegIterLoadTerm(p, pIter, 0); | ||||
12376 | fts5SegIterLoadNPos(p, pIter); | ||||
12377 | if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); | ||||
12378 | |||||
12379 | assert( p->rc!=SQLITE_OK ||((void) (0)) | ||||
12380 | fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0((void) (0)) | ||||
12381 | )((void) (0)); | ||||
12382 | } | ||||
12383 | } | ||||
12384 | |||||
12385 | /* | ||||
12386 | ** Initialize the object pIter to point to term pTerm/nTerm within the | ||||
12387 | ** in-memory hash table. If there is no such term in the hash-table, the | ||||
12388 | ** iterator is set to EOF. | ||||
12389 | ** | ||||
12390 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | ||||
12391 | ** an error has already occurred when this function is called, it is a no-op. | ||||
12392 | */ | ||||
12393 | static void fts5SegIterHashInit( | ||||
12394 | Fts5Index *p, /* FTS5 backend */ | ||||
12395 | const u8 *pTerm, int nTerm, /* Term to seek to */ | ||||
12396 | int flags, /* Mask of FTS5INDEX_XXX flags */ | ||||
12397 | Fts5SegIter *pIter /* Object to populate */ | ||||
12398 | ){ | ||||
12399 | int nList = 0; | ||||
12400 | const u8 *z = 0; | ||||
12401 | int n = 0; | ||||
12402 | Fts5Data *pLeaf = 0; | ||||
12403 | |||||
12404 | assert( p->pHash )((void) (0)); | ||||
12405 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12406 | |||||
12407 | if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN0x0008) ){ | ||||
12408 | const u8 *pList = 0; | ||||
12409 | |||||
12410 | p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); | ||||
12411 | sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList); | ||||
12412 | if( pList ){ | ||||
12413 | pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); | ||||
12414 | if( pLeaf ){ | ||||
12415 | pLeaf->p = (u8*)pList; | ||||
12416 | } | ||||
12417 | } | ||||
12418 | |||||
12419 | /* The call to sqlite3Fts5HashScanInit() causes the hash table to | ||||
12420 | ** fill the size field of all existing position lists. This means they | ||||
12421 | ** can no longer be appended to. Since the only scenario in which they | ||||
12422 | ** can be appended to is if the previous operation on this table was | ||||
12423 | ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this | ||||
12424 | ** possibility altogether. */ | ||||
12425 | p->bDelete = 0; | ||||
12426 | }else{ | ||||
12427 | p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data), | ||||
12428 | (const char*)pTerm, nTerm, (void**)&pLeaf, &nList | ||||
12429 | ); | ||||
12430 | if( pLeaf ){ | ||||
12431 | pLeaf->p = (u8*)&pLeaf[1]; | ||||
12432 | } | ||||
12433 | z = pTerm; | ||||
12434 | n = nTerm; | ||||
12435 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | ||||
12436 | } | ||||
12437 | |||||
12438 | if( pLeaf ){ | ||||
12439 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); | ||||
12440 | pLeaf->nn = pLeaf->szLeaf = nList; | ||||
12441 | pIter->pLeaf = pLeaf; | ||||
12442 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); | ||||
12443 | pIter->iEndofDoclist = pLeaf->nn; | ||||
12444 | |||||
12445 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | ||||
12446 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | ||||
12447 | fts5SegIterReverseInitPage(p, pIter); | ||||
12448 | }else{ | ||||
12449 | fts5SegIterLoadNPos(p, pIter); | ||||
12450 | } | ||||
12451 | } | ||||
12452 | |||||
12453 | fts5SegIterSetNext(p, pIter); | ||||
12454 | } | ||||
12455 | |||||
12456 | /* | ||||
12457 | ** Array ap[] contains n elements. Release each of these elements using | ||||
12458 | ** fts5DataRelease(). Then free the array itself using sqlite3_free(). | ||||
12459 | */ | ||||
12460 | static void fts5IndexFreeArray(Fts5Data **ap, int n){ | ||||
12461 | if( ap ){ | ||||
12462 | int ii; | ||||
12463 | for(ii=0; ii<n; ii++){ | ||||
12464 | fts5DataRelease(ap[ii]); | ||||
12465 | } | ||||
12466 | sqlite3_freesqlite3_api->free(ap); | ||||
12467 | } | ||||
12468 | } | ||||
12469 | |||||
12470 | /* | ||||
12471 | ** Decrement the ref-count of the object passed as the only argument. If it | ||||
12472 | ** reaches 0, free it and its contents. | ||||
12473 | */ | ||||
12474 | static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ | ||||
12475 | if( p ){ | ||||
12476 | p->nRef--; | ||||
12477 | if( p->nRef<=0 ){ | ||||
12478 | int ii; | ||||
12479 | for(ii=0; ii<p->nTombstone; ii++){ | ||||
12480 | fts5DataRelease(p->apTombstone[ii]); | ||||
12481 | } | ||||
12482 | sqlite3_freesqlite3_api->free(p); | ||||
12483 | } | ||||
12484 | } | ||||
12485 | } | ||||
12486 | |||||
12487 | /* | ||||
12488 | ** Zero the iterator passed as the only argument. | ||||
12489 | */ | ||||
12490 | static void fts5SegIterClear(Fts5SegIter *pIter){ | ||||
12491 | fts5BufferFree(&pIter->term)sqlite3Fts5BufferFree(&pIter->term); | ||||
12492 | fts5DataRelease(pIter->pLeaf); | ||||
12493 | fts5DataRelease(pIter->pNextLeaf); | ||||
12494 | fts5TombstoneArrayDelete(pIter->pTombArray); | ||||
12495 | fts5DlidxIterFree(pIter->pDlidx); | ||||
12496 | sqlite3_freesqlite3_api->free(pIter->aRowidOffset); | ||||
12497 | memset(pIter, 0, sizeof(Fts5SegIter)); | ||||
12498 | } | ||||
12499 | |||||
12500 | #ifdef SQLITE_DEBUG | ||||
12501 | |||||
12502 | /* | ||||
12503 | ** This function is used as part of the big assert() procedure implemented by | ||||
12504 | ** fts5AssertMultiIterSetup(). It ensures that the result currently stored | ||||
12505 | ** in *pRes is the correct result of comparing the current positions of the | ||||
12506 | ** two iterators. | ||||
12507 | */ | ||||
12508 | static void fts5AssertComparisonResult( | ||||
12509 | Fts5Iter *pIter, | ||||
12510 | Fts5SegIter *p1, | ||||
12511 | Fts5SegIter *p2, | ||||
12512 | Fts5CResult *pRes | ||||
12513 | ){ | ||||
12514 | int i1 = p1 - pIter->aSeg; | ||||
12515 | int i2 = p2 - pIter->aSeg; | ||||
12516 | |||||
12517 | if( p1->pLeaf || p2->pLeaf ){ | ||||
12518 | if( p1->pLeaf==0 ){ | ||||
12519 | assert( pRes->iFirst==i2 )((void) (0)); | ||||
12520 | }else if( p2->pLeaf==0 ){ | ||||
12521 | assert( pRes->iFirst==i1 )((void) (0)); | ||||
12522 | }else{ | ||||
12523 | int nMin = MIN(p1->term.n, p2->term.n)(((p1->term.n) < (p2->term.n)) ? (p1->term.n) : ( p2->term.n)); | ||||
12524 | int res = fts5Memcmp(p1->term.p, p2->term.p, nMin)((nMin)<=0 ? 0 : memcmp((p1->term.p), (p2->term.p), ( nMin))); | ||||
12525 | if( res==0 ) res = p1->term.n - p2->term.n; | ||||
12526 | |||||
12527 | if( res==0 ){ | ||||
12528 | assert( pRes->bTermEq==1 )((void) (0)); | ||||
12529 | assert( p1->iRowid!=p2->iRowid )((void) (0)); | ||||
12530 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; | ||||
12531 | }else{ | ||||
12532 | assert( pRes->bTermEq==0 )((void) (0)); | ||||
12533 | } | ||||
12534 | |||||
12535 | if( res<0 ){ | ||||
12536 | assert( pRes->iFirst==i1 )((void) (0)); | ||||
12537 | }else{ | ||||
12538 | assert( pRes->iFirst==i2 )((void) (0)); | ||||
12539 | } | ||||
12540 | } | ||||
12541 | } | ||||
12542 | } | ||||
12543 | |||||
12544 | /* | ||||
12545 | ** This function is a no-op unless SQLITE_DEBUG is defined when this module | ||||
12546 | ** is compiled. In that case, this function is essentially an assert() | ||||
12547 | ** statement used to verify that the contents of the pIter->aFirst[] array | ||||
12548 | ** are correct. | ||||
12549 | */ | ||||
12550 | static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ | ||||
12551 | if( p->rc==SQLITE_OK0 ){ | ||||
12552 | Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | ||||
12553 | int i; | ||||
12554 | |||||
12555 | assert( (pFirst->pLeaf==0)==pIter->base.bEof )((void) (0)); | ||||
12556 | |||||
12557 | /* Check that pIter->iSwitchRowid is set correctly. */ | ||||
12558 | for(i=0; i<pIter->nSeg; i++){ | ||||
12559 | Fts5SegIter *p1 = &pIter->aSeg[i]; | ||||
12560 | assert( p1==pFirst((void) (0)) | ||||
12561 | || p1->pLeaf==0((void) (0)) | ||||
12562 | || fts5BufferCompare(&pFirst->term, &p1->term)((void) (0)) | ||||
12563 | || p1->iRowid==pIter->iSwitchRowid((void) (0)) | ||||
12564 | || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev((void) (0)) | ||||
12565 | )((void) (0)); | ||||
12566 | } | ||||
12567 | |||||
12568 | for(i=0; i<pIter->nSeg; i+=2){ | ||||
12569 | Fts5SegIter *p1 = &pIter->aSeg[i]; | ||||
12570 | Fts5SegIter *p2 = &pIter->aSeg[i+1]; | ||||
12571 | Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; | ||||
12572 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | ||||
12573 | } | ||||
12574 | |||||
12575 | for(i=1; i<(pIter->nSeg / 2); i+=2){ | ||||
12576 | Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; | ||||
12577 | Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; | ||||
12578 | Fts5CResult *pRes = &pIter->aFirst[i]; | ||||
12579 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | ||||
12580 | } | ||||
12581 | } | ||||
12582 | } | ||||
12583 | #else | ||||
12584 | # define fts5AssertMultiIterSetup(x,y) | ||||
12585 | #endif | ||||
12586 | |||||
12587 | /* | ||||
12588 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. | ||||
12589 | ** | ||||
12590 | ** If the returned value is non-zero, then it is the index of an entry | ||||
12591 | ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing | ||||
12592 | ** to a key that is a duplicate of another, higher priority, | ||||
12593 | ** segment-iterator in the pSeg->aSeg[] array. | ||||
12594 | */ | ||||
12595 | static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ | ||||
12596 | int i1; /* Index of left-hand Fts5SegIter */ | ||||
12597 | int i2; /* Index of right-hand Fts5SegIter */ | ||||
12598 | int iRes; | ||||
12599 | Fts5SegIter *p1; /* Left-hand Fts5SegIter */ | ||||
12600 | Fts5SegIter *p2; /* Right-hand Fts5SegIter */ | ||||
12601 | Fts5CResult *pRes = &pIter->aFirst[iOut]; | ||||
12602 | |||||
12603 | assert( iOut<pIter->nSeg && iOut>0 )((void) (0)); | ||||
12604 | assert( pIter->bRev==0 || pIter->bRev==1 )((void) (0)); | ||||
12605 | |||||
12606 | if( iOut>=(pIter->nSeg/2) ){ | ||||
12607 | i1 = (iOut - pIter->nSeg/2) * 2; | ||||
12608 | i2 = i1 + 1; | ||||
12609 | }else{ | ||||
12610 | i1 = pIter->aFirst[iOut*2].iFirst; | ||||
12611 | i2 = pIter->aFirst[iOut*2+1].iFirst; | ||||
12612 | } | ||||
12613 | p1 = &pIter->aSeg[i1]; | ||||
12614 | p2 = &pIter->aSeg[i2]; | ||||
12615 | |||||
12616 | pRes->bTermEq = 0; | ||||
12617 | if( p1->pLeaf==0 ){ /* If p1 is at EOF */ | ||||
12618 | iRes = i2; | ||||
12619 | }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ | ||||
12620 | iRes = i1; | ||||
12621 | }else{ | ||||
12622 | int res = fts5BufferCompare(&p1->term, &p2->term); | ||||
12623 | if( res==0 ){ | ||||
12624 | assert_nc( i2>i1 )((void) (0)); | ||||
12625 | assert_nc( i2!=0 )((void) (0)); | ||||
12626 | pRes->bTermEq = 1; | ||||
12627 | if( p1->iRowid==p2->iRowid ){ | ||||
12628 | return i2; | ||||
12629 | } | ||||
12630 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; | ||||
12631 | } | ||||
12632 | assert( res!=0 )((void) (0)); | ||||
12633 | if( res<0 ){ | ||||
12634 | iRes = i1; | ||||
12635 | }else{ | ||||
12636 | iRes = i2; | ||||
12637 | } | ||||
12638 | } | ||||
12639 | |||||
12640 | pRes->iFirst = (u16)iRes; | ||||
12641 | return 0; | ||||
12642 | } | ||||
12643 | |||||
12644 | /* | ||||
12645 | ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. | ||||
12646 | ** It is an error if leaf iLeafPgno does not exist. Unless the db is | ||||
12647 | ** a 'secure-delete' db, if it contains no rowids then this is also an error. | ||||
12648 | */ | ||||
12649 | static void fts5SegIterGotoPage( | ||||
12650 | Fts5Index *p, /* FTS5 backend object */ | ||||
12651 | Fts5SegIter *pIter, /* Iterator to advance */ | ||||
12652 | int iLeafPgno | ||||
12653 | ){ | ||||
12654 | assert( iLeafPgno>pIter->iLeafPgno )((void) (0)); | ||||
12655 | |||||
12656 | if( iLeafPgno>pIter->pSeg->pgnoLast ){ | ||||
12657 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12658 | }else{ | ||||
12659 | fts5DataRelease(pIter->pNextLeaf); | ||||
12660 | pIter->pNextLeaf = 0; | ||||
12661 | pIter->iLeafPgno = iLeafPgno-1; | ||||
12662 | |||||
12663 | while( p->rc==SQLITE_OK0 ){ | ||||
12664 | int iOff; | ||||
12665 | fts5SegIterNextPage(p, pIter); | ||||
12666 | if( pIter->pLeaf==0 ) break; | ||||
12667 | iOff = fts5LeafFirstRowidOff(pIter->pLeaf)(fts5GetU16((pIter->pLeaf)->p)); | ||||
12668 | if( iOff>0 ){ | ||||
12669 | u8 *a = pIter->pLeaf->p; | ||||
12670 | int n = pIter->pLeaf->szLeaf; | ||||
12671 | if( iOff<4 || iOff>=n ){ | ||||
12672 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
12673 | }else{ | ||||
12674 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | ||||
12675 | pIter->iLeafOffset = iOff; | ||||
12676 | fts5SegIterLoadNPos(p, pIter); | ||||
12677 | } | ||||
12678 | break; | ||||
12679 | } | ||||
12680 | } | ||||
12681 | } | ||||
12682 | } | ||||
12683 | |||||
12684 | /* | ||||
12685 | ** Advance the iterator passed as the second argument until it is at or | ||||
12686 | ** past rowid iFrom. Regardless of the value of iFrom, the iterator is | ||||
12687 | ** always advanced at least once. | ||||
12688 | */ | ||||
12689 | static void fts5SegIterNextFrom( | ||||
12690 | Fts5Index *p, /* FTS5 backend object */ | ||||
12691 | Fts5SegIter *pIter, /* Iterator to advance */ | ||||
12692 | i64 iMatch /* Advance iterator at least this far */ | ||||
12693 | ){ | ||||
12694 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | ||||
12695 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | ||||
12696 | int iLeafPgno = pIter->iLeafPgno; | ||||
12697 | int bMove = 1; | ||||
12698 | |||||
12699 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | ||||
12700 | assert( pIter->pDlidx )((void) (0)); | ||||
12701 | assert( pIter->pLeaf )((void) (0)); | ||||
12702 | |||||
12703 | if( bRev==0 ){ | ||||
12704 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ | ||||
12705 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | ||||
12706 | fts5DlidxIterNext(p, pDlidx); | ||||
12707 | } | ||||
12708 | assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc )((void) (0)); | ||||
12709 | if( iLeafPgno>pIter->iLeafPgno ){ | ||||
12710 | fts5SegIterGotoPage(p, pIter, iLeafPgno); | ||||
12711 | bMove = 0; | ||||
12712 | } | ||||
12713 | }else{ | ||||
12714 | assert( pIter->pNextLeaf==0 )((void) (0)); | ||||
12715 | assert( iMatch<pIter->iRowid )((void) (0)); | ||||
12716 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ | ||||
12717 | fts5DlidxIterPrev(p, pDlidx); | ||||
12718 | } | ||||
12719 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | ||||
12720 | |||||
12721 | assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno )((void) (0)); | ||||
12722 | |||||
12723 | if( iLeafPgno<pIter->iLeafPgno ){ | ||||
12724 | pIter->iLeafPgno = iLeafPgno+1; | ||||
12725 | fts5SegIterReverseNewPage(p, pIter); | ||||
12726 | bMove = 0; | ||||
12727 | } | ||||
12728 | } | ||||
12729 | |||||
12730 | do{ | ||||
12731 | if( bMove && p->rc==SQLITE_OK0 ) pIter->xNext(p, pIter, 0); | ||||
12732 | if( pIter->pLeaf==0 ) break; | ||||
12733 | if( bRev==0 && pIter->iRowid>=iMatch ) break; | ||||
12734 | if( bRev!=0 && pIter->iRowid<=iMatch ) break; | ||||
12735 | bMove = 1; | ||||
12736 | }while( p->rc==SQLITE_OK0 ); | ||||
12737 | } | ||||
12738 | |||||
12739 | /* | ||||
12740 | ** Free the iterator object passed as the second argument. | ||||
12741 | */ | ||||
12742 | static void fts5MultiIterFree(Fts5Iter *pIter){ | ||||
12743 | if( pIter ){ | ||||
12744 | int i; | ||||
12745 | for(i=0; i<pIter->nSeg; i++){ | ||||
12746 | fts5SegIterClear(&pIter->aSeg[i]); | ||||
12747 | } | ||||
12748 | fts5BufferFree(&pIter->poslist)sqlite3Fts5BufferFree(&pIter->poslist); | ||||
12749 | sqlite3_freesqlite3_api->free(pIter); | ||||
12750 | } | ||||
12751 | } | ||||
12752 | |||||
12753 | static void fts5MultiIterAdvanced( | ||||
12754 | Fts5Index *p, /* FTS5 backend to iterate within */ | ||||
12755 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | ||||
12756 | int iChanged, /* Index of sub-iterator just advanced */ | ||||
12757 | int iMinset /* Minimum entry in aFirst[] to set */ | ||||
12758 | ){ | ||||
12759 | int i; | ||||
12760 | for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK0; i=i/2){ | ||||
12761 | int iEq; | ||||
12762 | if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ | ||||
12763 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | ||||
12764 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12765 | pSeg->xNext(p, pSeg, 0); | ||||
12766 | i = pIter->nSeg + iEq; | ||||
12767 | } | ||||
12768 | } | ||||
12769 | } | ||||
12770 | |||||
12771 | /* | ||||
12772 | ** Sub-iterator iChanged of iterator pIter has just been advanced. It still | ||||
12773 | ** points to the same term though - just a different rowid. This function | ||||
12774 | ** attempts to update the contents of the pIter->aFirst[] accordingly. | ||||
12775 | ** If it does so successfully, 0 is returned. Otherwise 1. | ||||
12776 | ** | ||||
12777 | ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() | ||||
12778 | ** on the iterator instead. That function does the same as this one, except | ||||
12779 | ** that it deals with more complicated cases as well. | ||||
12780 | */ | ||||
12781 | static int fts5MultiIterAdvanceRowid( | ||||
12782 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | ||||
12783 | int iChanged, /* Index of sub-iterator just advanced */ | ||||
12784 | Fts5SegIter **ppFirst | ||||
12785 | ){ | ||||
12786 | Fts5SegIter *pNew = &pIter->aSeg[iChanged]; | ||||
12787 | |||||
12788 | if( pNew->iRowid==pIter->iSwitchRowid | ||||
12789 | || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev | ||||
12790 | ){ | ||||
12791 | int i; | ||||
12792 | Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; | ||||
12793 | pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) : LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | ||||
12794 | for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ | ||||
12795 | Fts5CResult *pRes = &pIter->aFirst[i]; | ||||
12796 | |||||
12797 | assert( pNew->pLeaf )((void) (0)); | ||||
12798 | assert( pRes->bTermEq==0 || pOther->pLeaf )((void) (0)); | ||||
12799 | |||||
12800 | if( pRes->bTermEq ){ | ||||
12801 | if( pNew->iRowid==pOther->iRowid ){ | ||||
12802 | return 1; | ||||
12803 | }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ | ||||
12804 | pIter->iSwitchRowid = pOther->iRowid; | ||||
12805 | pNew = pOther; | ||||
12806 | }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ | ||||
12807 | pIter->iSwitchRowid = pOther->iRowid; | ||||
12808 | } | ||||
12809 | } | ||||
12810 | pRes->iFirst = (u16)(pNew - pIter->aSeg); | ||||
12811 | if( i==1 ) break; | ||||
12812 | |||||
12813 | pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; | ||||
12814 | } | ||||
12815 | } | ||||
12816 | |||||
12817 | *ppFirst = pNew; | ||||
12818 | return 0; | ||||
12819 | } | ||||
12820 | |||||
12821 | /* | ||||
12822 | ** Set the pIter->bEof variable based on the state of the sub-iterators. | ||||
12823 | */ | ||||
12824 | static void fts5MultiIterSetEof(Fts5Iter *pIter){ | ||||
12825 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | ||||
12826 | pIter->base.bEof = pSeg->pLeaf==0; | ||||
12827 | pIter->iSwitchRowid = pSeg->iRowid; | ||||
12828 | } | ||||
12829 | |||||
12830 | /* | ||||
12831 | ** The argument to this macro must be an Fts5Data structure containing a | ||||
12832 | ** tombstone hash page. This macro returns the key-size of the hash-page. | ||||
12833 | */ | ||||
12834 | #define TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) (pPg->p[0]==4 ? 4 : 8) | ||||
12835 | |||||
12836 | #define TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1) \ | ||||
12837 | ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8)) : 1) | ||||
12838 | |||||
12839 | /* | ||||
12840 | ** Query a single tombstone hash table for rowid iRowid. Return true if | ||||
12841 | ** it is found or false otherwise. The tombstone hash table is one of | ||||
12842 | ** nHashTable tables. | ||||
12843 | */ | ||||
12844 | static int fts5IndexTombstoneQuery( | ||||
12845 | Fts5Data *pHash, /* Hash table page to query */ | ||||
12846 | int nHashTable, /* Number of pages attached to segment */ | ||||
12847 | u64 iRowid /* Rowid to query hash for */ | ||||
12848 | ){ | ||||
12849 | const int szKey = TOMBSTONE_KEYSIZE(pHash)(pHash->p[0]==4 ? 4 : 8); | ||||
12850 | const int nSlot = TOMBSTONE_NSLOT(pHash)((pHash->nn > 16) ? ((pHash->nn-8) / (pHash->p[0] ==4 ? 4 : 8)) : 1); | ||||
12851 | int iSlot = (iRowid / nHashTable) % nSlot; | ||||
12852 | int nCollide = nSlot; | ||||
12853 | |||||
12854 | if( iRowid==0 ){ | ||||
12855 | return pHash->p[1]; | ||||
12856 | }else if( szKey==4 ){ | ||||
12857 | u32 *aSlot = (u32*)&pHash->p[8]; | ||||
12858 | while( aSlot[iSlot] ){ | ||||
12859 | if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1; | ||||
12860 | if( nCollide--==0 ) break; | ||||
12861 | iSlot = (iSlot+1)%nSlot; | ||||
12862 | } | ||||
12863 | }else{ | ||||
12864 | u64 *aSlot = (u64*)&pHash->p[8]; | ||||
12865 | while( aSlot[iSlot] ){ | ||||
12866 | if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1; | ||||
12867 | if( nCollide--==0 ) break; | ||||
12868 | iSlot = (iSlot+1)%nSlot; | ||||
12869 | } | ||||
12870 | } | ||||
12871 | |||||
12872 | return 0; | ||||
12873 | } | ||||
12874 | |||||
12875 | /* | ||||
12876 | ** Return true if the iterator passed as the only argument points | ||||
12877 | ** to an segment entry for which there is a tombstone. Return false | ||||
12878 | ** if there is no tombstone or if the iterator is already at EOF. | ||||
12879 | */ | ||||
12880 | static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ | ||||
12881 | int iFirst = pIter->aFirst[1].iFirst; | ||||
12882 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | ||||
12883 | Fts5TombstoneArray *pArray = pSeg->pTombArray; | ||||
12884 | |||||
12885 | if( pSeg->pLeaf && pArray ){ | ||||
12886 | /* Figure out which page the rowid might be present on. */ | ||||
12887 | int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone; | ||||
12888 | assert( iPg>=0 )((void) (0)); | ||||
12889 | |||||
12890 | /* If tombstone hash page iPg has not yet been loaded from the | ||||
12891 | ** database, load it now. */ | ||||
12892 | if( pArray->apTombstone[iPg]==0 ){ | ||||
12893 | pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex, | ||||
12894 | FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)( ((i64)(pSeg->pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << ( 31)) + ((i64)(iPg)) ) | ||||
12895 | ); | ||||
12896 | if( pArray->apTombstone[iPg]==0 ) return 0; | ||||
12897 | } | ||||
12898 | |||||
12899 | return fts5IndexTombstoneQuery( | ||||
12900 | pArray->apTombstone[iPg], | ||||
12901 | pArray->nTombstone, | ||||
12902 | pSeg->iRowid | ||||
12903 | ); | ||||
12904 | } | ||||
12905 | |||||
12906 | return 0; | ||||
12907 | } | ||||
12908 | |||||
12909 | /* | ||||
12910 | ** Move the iterator to the next entry. | ||||
12911 | ** | ||||
12912 | ** If an error occurs, an error code is left in Fts5Index.rc. It is not | ||||
12913 | ** considered an error if the iterator reaches EOF, or if it is already at | ||||
12914 | ** EOF when this function is called. | ||||
12915 | */ | ||||
12916 | static void fts5MultiIterNext( | ||||
12917 | Fts5Index *p, | ||||
12918 | Fts5Iter *pIter, | ||||
12919 | int bFrom, /* True if argument iFrom is valid */ | ||||
12920 | i64 iFrom /* Advance at least as far as this */ | ||||
12921 | ){ | ||||
12922 | int bUseFrom = bFrom; | ||||
12923 | assert( pIter->base.bEof==0 )((void) (0)); | ||||
12924 | while( p->rc==SQLITE_OK0 ){ | ||||
12925 | int iFirst = pIter->aFirst[1].iFirst; | ||||
12926 | int bNewTerm = 0; | ||||
12927 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | ||||
12928 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12929 | if( bUseFrom && pSeg->pDlidx ){ | ||||
12930 | fts5SegIterNextFrom(p, pSeg, iFrom); | ||||
12931 | }else{ | ||||
12932 | pSeg->xNext(p, pSeg, &bNewTerm); | ||||
12933 | } | ||||
12934 | |||||
12935 | if( pSeg->pLeaf==0 || bNewTerm | ||||
12936 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | ||||
12937 | ){ | ||||
12938 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | ||||
12939 | fts5MultiIterSetEof(pIter); | ||||
12940 | pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | ||||
12941 | if( pSeg->pLeaf==0 ) return; | ||||
12942 | } | ||||
12943 | |||||
12944 | fts5AssertMultiIterSetup(p, pIter); | ||||
12945 | assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf )((void) (0)); | ||||
12946 | if( (pIter->bSkipEmpty==0 || pSeg->nPos) | ||||
12947 | && 0==fts5MultiIterIsDeleted(pIter) | ||||
12948 | ){ | ||||
12949 | pIter->xSetOutputs(pIter, pSeg); | ||||
12950 | return; | ||||
12951 | } | ||||
12952 | bUseFrom = 0; | ||||
12953 | } | ||||
12954 | } | ||||
12955 | |||||
12956 | static void fts5MultiIterNext2( | ||||
12957 | Fts5Index *p, | ||||
12958 | Fts5Iter *pIter, | ||||
12959 | int *pbNewTerm /* OUT: True if *might* be new term */ | ||||
12960 | ){ | ||||
12961 | assert( pIter->bSkipEmpty )((void) (0)); | ||||
12962 | if( p->rc==SQLITE_OK0 ){ | ||||
12963 | *pbNewTerm = 0; | ||||
12964 | do{ | ||||
12965 | int iFirst = pIter->aFirst[1].iFirst; | ||||
12966 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | ||||
12967 | int bNewTerm = 0; | ||||
12968 | |||||
12969 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
12970 | pSeg->xNext(p, pSeg, &bNewTerm); | ||||
12971 | if( pSeg->pLeaf==0 || bNewTerm | ||||
12972 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | ||||
12973 | ){ | ||||
12974 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | ||||
12975 | fts5MultiIterSetEof(pIter); | ||||
12976 | *pbNewTerm = 1; | ||||
12977 | } | ||||
12978 | fts5AssertMultiIterSetup(p, pIter); | ||||
12979 | |||||
12980 | }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter)) | ||||
12981 | && (p->rc==SQLITE_OK0) | ||||
12982 | ); | ||||
12983 | } | ||||
12984 | } | ||||
12985 | |||||
12986 | static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ | ||||
12987 | UNUSED_PARAM2(pUnused1, pUnused2)(void)(pUnused1), (void)(pUnused2); | ||||
12988 | } | ||||
12989 | |||||
12990 | static Fts5Iter *fts5MultiIterAlloc( | ||||
12991 | Fts5Index *p, /* FTS5 backend to iterate within */ | ||||
12992 | int nSeg | ||||
12993 | ){ | ||||
12994 | Fts5Iter *pNew; | ||||
12995 | i64 nSlot; /* Power of two >= nSeg */ | ||||
12996 | |||||
12997 | for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); | ||||
12998 | pNew = fts5IdxMalloc(p, | ||||
12999 | SZ_FTS5ITER(nSlot)(__builtin_offsetof(Fts5Iter, aSeg)+(nSlot)*sizeof(Fts5SegIter )) + /* pNew + pNew->aSeg[] */ | ||||
13000 | sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ | ||||
13001 | ); | ||||
13002 | if( pNew ){ | ||||
13003 | pNew->nSeg = nSlot; | ||||
13004 | pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; | ||||
13005 | pNew->pIndex = p; | ||||
13006 | pNew->xSetOutputs = fts5IterSetOutputs_Noop; | ||||
13007 | } | ||||
13008 | return pNew; | ||||
13009 | } | ||||
13010 | |||||
13011 | static void fts5PoslistCallback( | ||||
13012 | Fts5Index *pUnused, | ||||
13013 | void *pContext, | ||||
13014 | const u8 *pChunk, int nChunk | ||||
13015 | ){ | ||||
13016 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
13017 | assert_nc( nChunk>=0 )((void) (0)); | ||||
13018 | if( nChunk>0 ){ | ||||
13019 | fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk){ ((void) (0)); memcpy(&((Fts5Buffer*)pContext)->p[((Fts5Buffer *)pContext)->n], pChunk, nChunk); ((Fts5Buffer*)pContext)-> n += nChunk; }; | ||||
13020 | } | ||||
13021 | } | ||||
13022 | |||||
13023 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; | ||||
13024 | struct PoslistCallbackCtx { | ||||
13025 | Fts5Buffer *pBuf; /* Append to this buffer */ | ||||
13026 | Fts5Colset *pColset; /* Restrict matches to this column */ | ||||
13027 | int eState; /* See above */ | ||||
13028 | }; | ||||
13029 | |||||
13030 | typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; | ||||
13031 | struct PoslistOffsetsCtx { | ||||
13032 | Fts5Buffer *pBuf; /* Append to this buffer */ | ||||
13033 | Fts5Colset *pColset; /* Restrict matches to this column */ | ||||
13034 | int iRead; | ||||
13035 | int iWrite; | ||||
13036 | }; | ||||
13037 | |||||
13038 | /* | ||||
13039 | ** TODO: Make this more efficient! | ||||
13040 | */ | ||||
13041 | static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ | ||||
13042 | int i; | ||||
13043 | for(i=0; i<pColset->nCol; i++){ | ||||
13044 | if( pColset->aiCol[i]==iCol ) return 1; | ||||
13045 | } | ||||
13046 | return 0; | ||||
13047 | } | ||||
13048 | |||||
13049 | static void fts5PoslistOffsetsCallback( | ||||
13050 | Fts5Index *pUnused, | ||||
13051 | void *pContext, | ||||
13052 | const u8 *pChunk, int nChunk | ||||
13053 | ){ | ||||
13054 | PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; | ||||
13055 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
13056 | assert_nc( nChunk>=0 )((void) (0)); | ||||
13057 | if( nChunk>0 ){ | ||||
13058 | int i = 0; | ||||
13059 | while( i<nChunk ){ | ||||
13060 | int iVal; | ||||
13061 | i += fts5GetVarint32(&pChunk[i], iVal)sqlite3Fts5GetVarint32(&pChunk[i],(u32*)&(iVal)); | ||||
13062 | iVal += pCtx->iRead - 2; | ||||
13063 | pCtx->iRead = iVal; | ||||
13064 | if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ | ||||
13065 | fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (iVal + 2 - pCtx->iWrite )); ((void) (0)); }; | ||||
13066 | pCtx->iWrite = iVal; | ||||
13067 | } | ||||
13068 | } | ||||
13069 | } | ||||
13070 | } | ||||
13071 | |||||
13072 | static void fts5PoslistFilterCallback( | ||||
13073 | Fts5Index *pUnused, | ||||
13074 | void *pContext, | ||||
13075 | const u8 *pChunk, int nChunk | ||||
13076 | ){ | ||||
13077 | PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; | ||||
13078 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
13079 | assert_nc( nChunk>=0 )((void) (0)); | ||||
13080 | if( nChunk>0 ){ | ||||
13081 | /* Search through to find the first varint with value 1. This is the | ||||
13082 | ** start of the next columns hits. */ | ||||
13083 | int i = 0; | ||||
13084 | int iStart = 0; | ||||
13085 | |||||
13086 | if( pCtx->eState==2 ){ | ||||
13087 | int iCol; | ||||
13088 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | ||||
13089 | if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ | ||||
13090 | pCtx->eState = 1; | ||||
13091 | fts5BufferSafeAppendVarint(pCtx->pBuf, 1){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (1)); ((void) (0)); }; | ||||
13092 | }else{ | ||||
13093 | pCtx->eState = 0; | ||||
13094 | } | ||||
13095 | } | ||||
13096 | |||||
13097 | do { | ||||
13098 | while( i<nChunk && pChunk[i]!=0x01 ){ | ||||
13099 | while( pChunk[i] & 0x80 ) i++; | ||||
13100 | i++; | ||||
13101 | } | ||||
13102 | if( pCtx->eState ){ | ||||
13103 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | ||||
13104 | } | ||||
13105 | if( i<nChunk ){ | ||||
13106 | int iCol; | ||||
13107 | iStart = i; | ||||
13108 | i++; | ||||
13109 | if( i>=nChunk ){ | ||||
13110 | pCtx->eState = 2; | ||||
13111 | }else{ | ||||
13112 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | ||||
13113 | pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); | ||||
13114 | if( pCtx->eState ){ | ||||
13115 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | ||||
13116 | iStart = i; | ||||
13117 | } | ||||
13118 | } | ||||
13119 | } | ||||
13120 | }while( i<nChunk ); | ||||
13121 | } | ||||
13122 | } | ||||
13123 | |||||
13124 | static void fts5ChunkIterate( | ||||
13125 | Fts5Index *p, /* Index object */ | ||||
13126 | Fts5SegIter *pSeg, /* Poslist of this iterator */ | ||||
13127 | void *pCtx, /* Context pointer for xChunk callback */ | ||||
13128 | void (*xChunk)(Fts5Index*, void*, const u8*, int) | ||||
13129 | ){ | ||||
13130 | int nRem = pSeg->nPos; /* Number of bytes still to come */ | ||||
13131 | Fts5Data *pData = 0; | ||||
13132 | u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | ||||
13133 | int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset)(((nRem) < (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )) ? (nRem) : (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )); | ||||
13134 | int pgno = pSeg->iLeafPgno; | ||||
13135 | int pgnoSave = 0; | ||||
13136 | |||||
13137 | /* This function does not work with detail=none databases. */ | ||||
13138 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | ||||
13139 | |||||
13140 | if( (pSeg->flags & FTS5_SEGITER_REVERSE0x02)==0 ){ | ||||
13141 | pgnoSave = pgno+1; | ||||
13142 | } | ||||
13143 | |||||
13144 | while( 1 ){ | ||||
13145 | xChunk(p, pCtx, pChunk, nChunk); | ||||
13146 | nRem -= nChunk; | ||||
13147 | fts5DataRelease(pData); | ||||
13148 | if( nRem<=0 ){ | ||||
13149 | break; | ||||
13150 | }else if( pSeg->pSeg==0 ){ | ||||
13151 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
13152 | return; | ||||
13153 | }else{ | ||||
13154 | pgno++; | ||||
13155 | pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)( ((i64)(pSeg->pSeg->iSegid) << (31 +5 +1)) + ((i64 )(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno )) )); | ||||
13156 | if( pData==0 ) break; | ||||
13157 | pChunk = &pData->p[4]; | ||||
13158 | nChunk = MIN(nRem, pData->szLeaf - 4)(((nRem) < (pData->szLeaf - 4)) ? (nRem) : (pData->szLeaf - 4)); | ||||
13159 | if( pgno==pgnoSave ){ | ||||
13160 | assert( pSeg->pNextLeaf==0 )((void) (0)); | ||||
13161 | pSeg->pNextLeaf = pData; | ||||
13162 | pData = 0; | ||||
13163 | } | ||||
13164 | } | ||||
13165 | } | ||||
13166 | } | ||||
13167 | |||||
13168 | /* | ||||
13169 | ** Iterator pIter currently points to a valid entry (not EOF). This | ||||
13170 | ** function appends the position list data for the current entry to | ||||
13171 | ** buffer pBuf. It does not make a copy of the position-list size | ||||
13172 | ** field. | ||||
13173 | */ | ||||
13174 | static void fts5SegiterPoslist( | ||||
13175 | Fts5Index *p, | ||||
13176 | Fts5SegIter *pSeg, | ||||
13177 | Fts5Colset *pColset, | ||||
13178 | Fts5Buffer *pBuf | ||||
13179 | ){ | ||||
13180 | assert( pBuf!=0 )((void) (0)); | ||||
13181 | assert( pSeg!=0 )((void) (0)); | ||||
13182 | if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING)( (u32)((pBuf)->n) + (u32)(pSeg->nPos+8) <= (u32)((pBuf )->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf ),(pSeg->nPos+8)+(pBuf)->n) ) ){ | ||||
13183 | assert( pBuf->p!=0 )((void) (0)); | ||||
13184 | assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING )((void) (0)); | ||||
13185 | memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING8); | ||||
13186 | if( pColset==0 ){ | ||||
13187 | fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); | ||||
13188 | }else{ | ||||
13189 | if( p->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | ||||
13190 | PoslistCallbackCtx sCtx; | ||||
13191 | sCtx.pBuf = pBuf; | ||||
13192 | sCtx.pColset = pColset; | ||||
13193 | sCtx.eState = fts5IndexColsetTest(pColset, 0); | ||||
13194 | assert( sCtx.eState==0 || sCtx.eState==1 )((void) (0)); | ||||
13195 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); | ||||
13196 | }else{ | ||||
13197 | PoslistOffsetsCtx sCtx; | ||||
13198 | memset(&sCtx, 0, sizeof(sCtx)); | ||||
13199 | sCtx.pBuf = pBuf; | ||||
13200 | sCtx.pColset = pColset; | ||||
13201 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); | ||||
13202 | } | ||||
13203 | } | ||||
13204 | } | ||||
13205 | } | ||||
13206 | |||||
13207 | /* | ||||
13208 | ** Parameter pPos points to a buffer containing a position list, size nPos. | ||||
13209 | ** This function filters it according to pColset (which must be non-NULL) | ||||
13210 | ** and sets pIter->base.pData/nData to point to the new position list. | ||||
13211 | ** If memory is required for the new position list, use buffer pIter->poslist. | ||||
13212 | ** Or, if the new position list is a contiguous subset of the input, set | ||||
13213 | ** pIter->base.pData/nData to point directly to it. | ||||
13214 | ** | ||||
13215 | ** This function is a no-op if *pRc is other than SQLITE_OK when it is | ||||
13216 | ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM | ||||
13217 | ** before returning. | ||||
13218 | */ | ||||
13219 | static void fts5IndexExtractColset( | ||||
13220 | int *pRc, | ||||
13221 | Fts5Colset *pColset, /* Colset to filter on */ | ||||
13222 | const u8 *pPos, int nPos, /* Position list */ | ||||
13223 | Fts5Iter *pIter | ||||
13224 | ){ | ||||
13225 | if( *pRc==SQLITE_OK0 ){ | ||||
13226 | const u8 *p = pPos; | ||||
13227 | const u8 *aCopy = p; | ||||
13228 | const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ | ||||
13229 | int i = 0; | ||||
13230 | int iCurrent = 0; | ||||
13231 | |||||
13232 | if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){ | ||||
13233 | return; | ||||
13234 | } | ||||
13235 | |||||
13236 | while( 1 ){ | ||||
13237 | while( pColset->aiCol[i]<iCurrent ){ | ||||
13238 | i++; | ||||
13239 | if( i==pColset->nCol ){ | ||||
13240 | pIter->base.pData = pIter->poslist.p; | ||||
13241 | pIter->base.nData = pIter->poslist.n; | ||||
13242 | return; | ||||
13243 | } | ||||
13244 | } | ||||
13245 | |||||
13246 | /* Advance pointer p until it points to pEnd or an 0x01 byte that is | ||||
13247 | ** not part of a varint */ | ||||
13248 | while( p<pEnd && *p!=0x01 ){ | ||||
13249 | while( *p++ & 0x80 ); | ||||
13250 | } | ||||
13251 | |||||
13252 | if( pColset->aiCol[i]==iCurrent ){ | ||||
13253 | if( pColset->nCol==1 ){ | ||||
13254 | pIter->base.pData = aCopy; | ||||
13255 | pIter->base.nData = p-aCopy; | ||||
13256 | return; | ||||
13257 | } | ||||
13258 | fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy){ ((void) (0)); memcpy(&(&pIter->poslist)->p[(& pIter->poslist)->n], aCopy, p-aCopy); (&pIter->poslist )->n += p-aCopy; }; | ||||
13259 | } | ||||
13260 | if( p>=pEnd ){ | ||||
13261 | pIter->base.pData = pIter->poslist.p; | ||||
13262 | pIter->base.nData = pIter->poslist.n; | ||||
13263 | return; | ||||
13264 | } | ||||
13265 | aCopy = p++; | ||||
13266 | iCurrent = *p++; | ||||
13267 | if( iCurrent & 0x80 ){ | ||||
13268 | p--; | ||||
13269 | p += fts5GetVarint32(p, iCurrent)sqlite3Fts5GetVarint32(p,(u32*)&(iCurrent)); | ||||
13270 | } | ||||
13271 | } | ||||
13272 | } | ||||
13273 | |||||
13274 | } | ||||
13275 | |||||
13276 | /* | ||||
13277 | ** xSetOutputs callback used by detail=none tables. | ||||
13278 | */ | ||||
13279 | static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13280 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | ||||
13281 | pIter->base.iRowid = pSeg->iRowid; | ||||
13282 | pIter->base.nData = pSeg->nPos; | ||||
13283 | } | ||||
13284 | |||||
13285 | /* | ||||
13286 | ** xSetOutputs callback used by detail=full and detail=col tables when no | ||||
13287 | ** column filters are specified. | ||||
13288 | */ | ||||
13289 | static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13290 | pIter->base.iRowid = pSeg->iRowid; | ||||
13291 | pIter->base.nData = pSeg->nPos; | ||||
13292 | |||||
13293 | assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | ||||
13294 | assert( pIter->pColset==0 )((void) (0)); | ||||
13295 | |||||
13296 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | ||||
13297 | /* All data is stored on the current page. Populate the output | ||||
13298 | ** variables to point into the body of the page object. */ | ||||
13299 | pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | ||||
13300 | }else{ | ||||
13301 | /* The data is distributed over two or more pages. Copy it into the | ||||
13302 | ** Fts5Iter.poslist buffer and then set the output pointer to point | ||||
13303 | ** to this buffer. */ | ||||
13304 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | ||||
13305 | fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); | ||||
13306 | pIter->base.pData = pIter->poslist.p; | ||||
13307 | } | ||||
13308 | } | ||||
13309 | |||||
13310 | /* | ||||
13311 | ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match | ||||
13312 | ** against no columns at all). | ||||
13313 | */ | ||||
13314 | static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13315 | UNUSED_PARAM(pSeg)(void)(pSeg); | ||||
13316 | pIter->base.nData = 0; | ||||
13317 | } | ||||
13318 | |||||
13319 | /* | ||||
13320 | ** xSetOutputs callback used by detail=col when there is a column filter | ||||
13321 | ** and there are 100 or more columns. Also called as a fallback from | ||||
13322 | ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. | ||||
13323 | */ | ||||
13324 | static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13325 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | ||||
13326 | fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); | ||||
13327 | pIter->base.iRowid = pSeg->iRowid; | ||||
13328 | pIter->base.pData = pIter->poslist.p; | ||||
13329 | pIter->base.nData = pIter->poslist.n; | ||||
13330 | } | ||||
13331 | |||||
13332 | /* | ||||
13333 | ** xSetOutputs callback used when: | ||||
13334 | ** | ||||
13335 | ** * detail=col, | ||||
13336 | ** * there is a column filter, and | ||||
13337 | ** * the table contains 100 or fewer columns. | ||||
13338 | ** | ||||
13339 | ** The last point is to ensure all column numbers are stored as | ||||
13340 | ** single-byte varints. | ||||
13341 | */ | ||||
13342 | static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13343 | |||||
13344 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | ||||
13345 | assert( pIter->pColset )((void) (0)); | ||||
13346 | |||||
13347 | if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ | ||||
13348 | fts5IterSetOutputs_Col(pIter, pSeg); | ||||
13349 | }else{ | ||||
13350 | u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; | ||||
13351 | u8 *pEnd = (u8*)&a[pSeg->nPos]; | ||||
13352 | int iPrev = 0; | ||||
13353 | int *aiCol = pIter->pColset->aiCol; | ||||
13354 | int *aiColEnd = &aiCol[pIter->pColset->nCol]; | ||||
13355 | |||||
13356 | u8 *aOut = pIter->poslist.p; | ||||
13357 | int iPrevOut = 0; | ||||
13358 | |||||
13359 | pIter->base.iRowid = pSeg->iRowid; | ||||
13360 | |||||
13361 | while( a<pEnd ){ | ||||
13362 | iPrev += (int)a++[0] - 2; | ||||
13363 | while( *aiCol<iPrev ){ | ||||
13364 | aiCol++; | ||||
13365 | if( aiCol==aiColEnd ) goto setoutputs_col_out; | ||||
13366 | } | ||||
13367 | if( *aiCol==iPrev ){ | ||||
13368 | *aOut++ = (u8)((iPrev - iPrevOut) + 2); | ||||
13369 | iPrevOut = iPrev; | ||||
13370 | } | ||||
13371 | } | ||||
13372 | |||||
13373 | setoutputs_col_out: | ||||
13374 | pIter->base.pData = pIter->poslist.p; | ||||
13375 | pIter->base.nData = aOut - pIter->poslist.p; | ||||
13376 | } | ||||
13377 | } | ||||
13378 | |||||
13379 | /* | ||||
13380 | ** xSetOutputs callback used by detail=full when there is a column filter. | ||||
13381 | */ | ||||
13382 | static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ | ||||
13383 | Fts5Colset *pColset = pIter->pColset; | ||||
13384 | pIter->base.iRowid = pSeg->iRowid; | ||||
13385 | |||||
13386 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL )((void) (0)); | ||||
13387 | assert( pColset )((void) (0)); | ||||
13388 | |||||
13389 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | ||||
13390 | /* All data is stored on the current page. Populate the output | ||||
13391 | ** variables to point into the body of the page object. */ | ||||
13392 | const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | ||||
13393 | int *pRc = &pIter->pIndex->rc; | ||||
13394 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | ||||
13395 | fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter); | ||||
13396 | }else{ | ||||
13397 | /* The data is distributed over two or more pages. Copy it into the | ||||
13398 | ** Fts5Iter.poslist buffer and then set the output pointer to point | ||||
13399 | ** to this buffer. */ | ||||
13400 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | ||||
13401 | fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); | ||||
13402 | pIter->base.pData = pIter->poslist.p; | ||||
13403 | pIter->base.nData = pIter->poslist.n; | ||||
13404 | } | ||||
13405 | } | ||||
13406 | |||||
13407 | static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ | ||||
13408 | assert( pIter!=0 || (*pRc)!=SQLITE_OK )((void) (0)); | ||||
13409 | if( *pRc==SQLITE_OK0 ){ | ||||
13410 | Fts5Config *pConfig = pIter->pIndex->pConfig; | ||||
13411 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
13412 | pIter->xSetOutputs = fts5IterSetOutputs_None; | ||||
13413 | } | ||||
13414 | |||||
13415 | else if( pIter->pColset==0 ){ | ||||
13416 | pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; | ||||
13417 | } | ||||
13418 | |||||
13419 | else if( pIter->pColset->nCol==0 ){ | ||||
13420 | pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; | ||||
13421 | } | ||||
13422 | |||||
13423 | else if( pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | ||||
13424 | pIter->xSetOutputs = fts5IterSetOutputs_Full; | ||||
13425 | } | ||||
13426 | |||||
13427 | else{ | ||||
13428 | assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | ||||
13429 | if( pConfig->nCol<=100 ){ | ||||
13430 | pIter->xSetOutputs = fts5IterSetOutputs_Col100; | ||||
13431 | sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); | ||||
13432 | }else{ | ||||
13433 | pIter->xSetOutputs = fts5IterSetOutputs_Col; | ||||
13434 | } | ||||
13435 | } | ||||
13436 | } | ||||
13437 | } | ||||
13438 | |||||
13439 | /* | ||||
13440 | ** All the component segment-iterators of pIter have been set up. This | ||||
13441 | ** functions finishes setup for iterator pIter itself. | ||||
13442 | */ | ||||
13443 | static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ | ||||
13444 | int iIter; | ||||
13445 | for(iIter=pIter->nSeg-1; iIter>0; iIter--){ | ||||
13446 | int iEq; | ||||
13447 | if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ | ||||
13448 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | ||||
13449 | if( p->rc==SQLITE_OK0 ) pSeg->xNext(p, pSeg, 0); | ||||
13450 | fts5MultiIterAdvanced(p, pIter, iEq, iIter); | ||||
13451 | } | ||||
13452 | } | ||||
13453 | fts5MultiIterSetEof(pIter); | ||||
13454 | fts5AssertMultiIterSetup(p, pIter); | ||||
13455 | |||||
13456 | if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) | ||||
13457 | || fts5MultiIterIsDeleted(pIter) | ||||
13458 | ){ | ||||
13459 | fts5MultiIterNext(p, pIter, 0, 0); | ||||
13460 | }else if( pIter->base.bEof==0 ){ | ||||
13461 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | ||||
13462 | pIter->xSetOutputs(pIter, pSeg); | ||||
13463 | } | ||||
13464 | } | ||||
13465 | |||||
13466 | /* | ||||
13467 | ** Allocate a new Fts5Iter object. | ||||
13468 | ** | ||||
13469 | ** The new object will be used to iterate through data in structure pStruct. | ||||
13470 | ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel | ||||
13471 | ** is zero or greater, data from the first nSegment segments on level iLevel | ||||
13472 | ** is merged. | ||||
13473 | ** | ||||
13474 | ** The iterator initially points to the first term/rowid entry in the | ||||
13475 | ** iterated data. | ||||
13476 | */ | ||||
13477 | static void fts5MultiIterNew( | ||||
13478 | Fts5Index *p, /* FTS5 backend to iterate within */ | ||||
13479 | Fts5Structure *pStruct, /* Structure of specific index */ | ||||
13480 | int flags, /* FTS5INDEX_QUERY_XXX flags */ | ||||
13481 | Fts5Colset *pColset, /* Colset to filter on (or NULL) */ | ||||
13482 | const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ | ||||
13483 | int iLevel, /* Level to iterate (-1 for all) */ | ||||
13484 | int nSegment, /* Number of segments to merge (iLevel>=0) */ | ||||
13485 | Fts5Iter **ppOut /* New object */ | ||||
13486 | ){ | ||||
13487 | int nSeg = 0; /* Number of segment-iters in use */ | ||||
13488 | int iIter = 0; /* */ | ||||
13489 | int iSeg; /* Used to iterate through segments */ | ||||
13490 | Fts5StructureLevel *pLvl; | ||||
13491 | Fts5Iter *pNew; | ||||
13492 | |||||
13493 | assert( (pTerm==0 && nTerm==0) || iLevel<0 )((void) (0)); | ||||
13494 | |||||
13495 | /* Allocate space for the new multi-seg-iterator. */ | ||||
13496 | if( p->rc==SQLITE_OK0 ){ | ||||
13497 | if( iLevel<0 ){ | ||||
13498 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | ||||
13499 | nSeg = pStruct->nSegment; | ||||
13500 | nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040)); | ||||
13501 | }else{ | ||||
13502 | nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment)(((pStruct->aLevel[iLevel].nSeg) < (nSegment)) ? (pStruct ->aLevel[iLevel].nSeg) : (nSegment)); | ||||
13503 | } | ||||
13504 | } | ||||
13505 | *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); | ||||
13506 | if( pNew==0 ){ | ||||
13507 | assert( p->rc!=SQLITE_OK )((void) (0)); | ||||
13508 | goto fts5MultiIterNew_post_check; | ||||
13509 | } | ||||
13510 | pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC0x0002)); | ||||
13511 | pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY0x0010)); | ||||
13512 | pNew->pColset = pColset; | ||||
13513 | if( (flags & FTS5INDEX_QUERY_NOOUTPUT0x0020)==0 ){ | ||||
13514 | fts5IterSetOutputCb(&p->rc, pNew); | ||||
13515 | } | ||||
13516 | |||||
13517 | /* Initialize each of the component segment iterators. */ | ||||
13518 | if( p->rc==SQLITE_OK0 ){ | ||||
13519 | if( iLevel<0 ){ | ||||
13520 | Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; | ||||
13521 | if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040) ){ | ||||
13522 | /* Add a segment iterator for the current contents of the hash table. */ | ||||
13523 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | ||||
13524 | fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); | ||||
13525 | } | ||||
13526 | for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ | ||||
13527 | for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ | ||||
13528 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | ||||
13529 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | ||||
13530 | if( pTerm==0 ){ | ||||
13531 | fts5SegIterInit(p, pSeg, pIter); | ||||
13532 | }else{ | ||||
13533 | fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); | ||||
13534 | } | ||||
13535 | } | ||||
13536 | } | ||||
13537 | }else{ | ||||
13538 | pLvl = &pStruct->aLevel[iLevel]; | ||||
13539 | for(iSeg=nSeg-1; iSeg>=0; iSeg--){ | ||||
13540 | fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); | ||||
13541 | } | ||||
13542 | } | ||||
13543 | assert( iIter==nSeg )((void) (0)); | ||||
13544 | } | ||||
13545 | |||||
13546 | /* If the above was successful, each component iterator now points | ||||
13547 | ** to the first entry in its segment. In this case initialize the | ||||
13548 | ** aFirst[] array. Or, if an error has occurred, free the iterator | ||||
13549 | ** object and set the output variable to NULL. */ | ||||
13550 | if( p->rc==SQLITE_OK0 ){ | ||||
13551 | fts5MultiIterFinishSetup(p, pNew); | ||||
13552 | }else{ | ||||
13553 | fts5MultiIterFree(pNew); | ||||
13554 | *ppOut = 0; | ||||
13555 | } | ||||
13556 | |||||
13557 | fts5MultiIterNew_post_check: | ||||
13558 | assert( (*ppOut)!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
13559 | return; | ||||
13560 | } | ||||
13561 | |||||
13562 | /* | ||||
13563 | ** Create an Fts5Iter that iterates through the doclist provided | ||||
13564 | ** as the second argument. | ||||
13565 | */ | ||||
13566 | static void fts5MultiIterNew2( | ||||
13567 | Fts5Index *p, /* FTS5 backend to iterate within */ | ||||
13568 | Fts5Data *pData, /* Doclist to iterate through */ | ||||
13569 | int bDesc, /* True for descending rowid order */ | ||||
13570 | Fts5Iter **ppOut /* New object */ | ||||
13571 | ){ | ||||
13572 | Fts5Iter *pNew; | ||||
13573 | pNew = fts5MultiIterAlloc(p, 2); | ||||
13574 | if( pNew ){ | ||||
13575 | Fts5SegIter *pIter = &pNew->aSeg[1]; | ||||
13576 | pIter->flags = FTS5_SEGITER_ONETERM0x01; | ||||
13577 | if( pData->szLeaf>0 ){ | ||||
13578 | pIter->pLeaf = pData; | ||||
13579 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pData->p, (u64*)&pIter->iRowid); | ||||
13580 | pIter->iEndofDoclist = pData->nn; | ||||
13581 | pNew->aFirst[1].iFirst = 1; | ||||
13582 | if( bDesc ){ | ||||
13583 | pNew->bRev = 1; | ||||
13584 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | ||||
13585 | fts5SegIterReverseInitPage(p, pIter); | ||||
13586 | }else{ | ||||
13587 | fts5SegIterLoadNPos(p, pIter); | ||||
13588 | } | ||||
13589 | pData = 0; | ||||
13590 | }else{ | ||||
13591 | pNew->base.bEof = 1; | ||||
13592 | } | ||||
13593 | fts5SegIterSetNext(p, pIter); | ||||
13594 | |||||
13595 | *ppOut = pNew; | ||||
13596 | } | ||||
13597 | |||||
13598 | fts5DataRelease(pData); | ||||
13599 | } | ||||
13600 | |||||
13601 | /* | ||||
13602 | ** Return true if the iterator is at EOF or if an error has occurred. | ||||
13603 | ** False otherwise. | ||||
13604 | */ | ||||
13605 | static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ | ||||
13606 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
13607 | assert( p->rc!=SQLITE_OK((void) (0)) | ||||
13608 | || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof((void) (0)) | ||||
13609 | )((void) (0)); | ||||
13610 | return (p->rc || pIter->base.bEof); | ||||
13611 | } | ||||
13612 | |||||
13613 | /* | ||||
13614 | ** Return the rowid of the entry that the iterator currently points | ||||
13615 | ** to. If the iterator points to EOF when this function is called the | ||||
13616 | ** results are undefined. | ||||
13617 | */ | ||||
13618 | static i64 fts5MultiIterRowid(Fts5Iter *pIter){ | ||||
13619 | assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf )((void) (0)); | ||||
13620 | return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; | ||||
13621 | } | ||||
13622 | |||||
13623 | /* | ||||
13624 | ** Move the iterator to the next entry at or following iMatch. | ||||
13625 | */ | ||||
13626 | static void fts5MultiIterNextFrom( | ||||
13627 | Fts5Index *p, | ||||
13628 | Fts5Iter *pIter, | ||||
13629 | i64 iMatch | ||||
13630 | ){ | ||||
13631 | while( 1 ){ | ||||
13632 | i64 iRowid; | ||||
13633 | fts5MultiIterNext(p, pIter, 1, iMatch); | ||||
13634 | if( fts5MultiIterEof(p, pIter) ) break; | ||||
13635 | iRowid = fts5MultiIterRowid(pIter); | ||||
13636 | if( pIter->bRev==0 && iRowid>=iMatch ) break; | ||||
13637 | if( pIter->bRev!=0 && iRowid<=iMatch ) break; | ||||
13638 | } | ||||
13639 | } | ||||
13640 | |||||
13641 | /* | ||||
13642 | ** Return a pointer to a buffer containing the term associated with the | ||||
13643 | ** entry that the iterator currently points to. | ||||
13644 | */ | ||||
13645 | static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ | ||||
13646 | Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | ||||
13647 | *pn = p->term.n; | ||||
13648 | return p->term.p; | ||||
13649 | } | ||||
13650 | |||||
13651 | /* | ||||
13652 | ** Allocate a new segment-id for the structure pStruct. The new segment | ||||
13653 | ** id must be between 1 and 65335 inclusive, and must not be used by | ||||
13654 | ** any currently existing segment. If a free segment id cannot be found, | ||||
13655 | ** SQLITE_FULL is returned. | ||||
13656 | ** | ||||
13657 | ** If an error has already occurred, this function is a no-op. 0 is | ||||
13658 | ** returned in this case. | ||||
13659 | */ | ||||
13660 | static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ | ||||
13661 | int iSegid = 0; | ||||
13662 | |||||
13663 | if( p->rc==SQLITE_OK0 ){ | ||||
13664 | if( pStruct->nSegment>=FTS5_MAX_SEGMENT2000 ){ | ||||
13665 | p->rc = SQLITE_FULL13; | ||||
13666 | }else{ | ||||
13667 | /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following | ||||
13668 | ** array is 63 elements, or 252 bytes, in size. */ | ||||
13669 | u32 aUsed[(FTS5_MAX_SEGMENT2000+31) / 32]; | ||||
13670 | int iLvl, iSeg; | ||||
13671 | int i; | ||||
13672 | u32 mask; | ||||
13673 | memset(aUsed, 0, sizeof(aUsed)); | ||||
13674 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
13675 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | ||||
13676 | int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; | ||||
13677 | if( iId<=FTS5_MAX_SEGMENT2000 && iId>0 ){ | ||||
13678 | aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); | ||||
13679 | } | ||||
13680 | } | ||||
13681 | } | ||||
13682 | |||||
13683 | for(i=0; aUsed[i]==0xFFFFFFFF; i++); | ||||
13684 | mask = aUsed[i]; | ||||
13685 | for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); | ||||
13686 | iSegid += 1 + i*32; | ||||
13687 | |||||
13688 | #ifdef SQLITE_DEBUG | ||||
13689 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
13690 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | ||||
13691 | assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid )((void) (0)); | ||||
13692 | } | ||||
13693 | } | ||||
13694 | assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT )((void) (0)); | ||||
13695 | |||||
13696 | { | ||||
13697 | sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); | ||||
13698 | if( p->rc==SQLITE_OK0 ){ | ||||
13699 | u8 aBlob[2] = {0xff, 0xff}; | ||||
13700 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, iSegid); | ||||
13701 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
13702 | assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW )((void) (0)); | ||||
13703 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | ||||
13704 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | ||||
13705 | } | ||||
13706 | } | ||||
13707 | #endif | ||||
13708 | } | ||||
13709 | } | ||||
13710 | |||||
13711 | return iSegid; | ||||
13712 | } | ||||
13713 | |||||
13714 | /* | ||||
13715 | ** Discard all data currently cached in the hash-tables. | ||||
13716 | */ | ||||
13717 | static void fts5IndexDiscardData(Fts5Index *p){ | ||||
13718 | assert( p->pHash || p->nPendingData==0 )((void) (0)); | ||||
13719 | if( p->pHash ){ | ||||
13720 | sqlite3Fts5HashClear(p->pHash); | ||||
13721 | p->nPendingData = 0; | ||||
13722 | p->nPendingRow = 0; | ||||
13723 | p->flushRc = SQLITE_OK0; | ||||
13724 | } | ||||
13725 | p->nContentlessDelete = 0; | ||||
13726 | } | ||||
13727 | |||||
13728 | /* | ||||
13729 | ** Return the size of the prefix, in bytes, that buffer | ||||
13730 | ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). | ||||
13731 | ** | ||||
13732 | ** Buffer (pNew/<length-unknown>) is guaranteed to be greater | ||||
13733 | ** than buffer (pOld/nOld). | ||||
13734 | */ | ||||
13735 | static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ | ||||
13736 | int i; | ||||
13737 | for(i=0; i<nOld; i++){ | ||||
13738 | if( pOld[i]!=pNew[i] ) break; | ||||
13739 | } | ||||
13740 | return i; | ||||
13741 | } | ||||
13742 | |||||
13743 | static void fts5WriteDlidxClear( | ||||
13744 | Fts5Index *p, | ||||
13745 | Fts5SegWriter *pWriter, | ||||
13746 | int bFlush /* If true, write dlidx to disk */ | ||||
13747 | ){ | ||||
13748 | int i; | ||||
13749 | assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) )((void) (0)); | ||||
13750 | for(i=0; i<pWriter->nDlidx; i++){ | ||||
13751 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | ||||
13752 | if( pDlidx->buf.n==0 ) break; | ||||
13753 | if( bFlush ){ | ||||
13754 | assert( pDlidx->pgno!=0 )((void) (0)); | ||||
13755 | fts5DataWrite(p, | ||||
13756 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | ||||
13757 | pDlidx->buf.p, pDlidx->buf.n | ||||
13758 | ); | ||||
13759 | } | ||||
13760 | sqlite3Fts5BufferZero(&pDlidx->buf); | ||||
13761 | pDlidx->bPrevValid = 0; | ||||
13762 | } | ||||
13763 | } | ||||
13764 | |||||
13765 | /* | ||||
13766 | ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. | ||||
13767 | ** Any new array elements are zeroed before returning. | ||||
13768 | */ | ||||
13769 | static int fts5WriteDlidxGrow( | ||||
13770 | Fts5Index *p, | ||||
13771 | Fts5SegWriter *pWriter, | ||||
13772 | int nLvl | ||||
13773 | ){ | ||||
13774 | if( p->rc==SQLITE_OK0 && nLvl>=pWriter->nDlidx ){ | ||||
13775 | Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64sqlite3_api->realloc64( | ||||
13776 | pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl | ||||
13777 | ); | ||||
13778 | if( aDlidx==0 ){ | ||||
13779 | p->rc = SQLITE_NOMEM7; | ||||
13780 | }else{ | ||||
13781 | size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); | ||||
13782 | memset(&aDlidx[pWriter->nDlidx], 0, nByte); | ||||
13783 | pWriter->aDlidx = aDlidx; | ||||
13784 | pWriter->nDlidx = nLvl; | ||||
13785 | } | ||||
13786 | } | ||||
13787 | return p->rc; | ||||
13788 | } | ||||
13789 | |||||
13790 | /* | ||||
13791 | ** If the current doclist-index accumulating in pWriter->aDlidx[] is large | ||||
13792 | ** enough, flush it to disk and return 1. Otherwise discard it and return | ||||
13793 | ** zero. | ||||
13794 | */ | ||||
13795 | static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ | ||||
13796 | int bFlag = 0; | ||||
13797 | |||||
13798 | /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written | ||||
13799 | ** to the database, also write the doclist-index to disk. */ | ||||
13800 | if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE4 ){ | ||||
13801 | bFlag = 1; | ||||
13802 | } | ||||
13803 | fts5WriteDlidxClear(p, pWriter, bFlag); | ||||
13804 | pWriter->nEmpty = 0; | ||||
13805 | return bFlag; | ||||
13806 | } | ||||
13807 | |||||
13808 | /* | ||||
13809 | ** This function is called whenever processing of the doclist for the | ||||
13810 | ** last term on leaf page (pWriter->iBtPage) is completed. | ||||
13811 | ** | ||||
13812 | ** The doclist-index for that term is currently stored in-memory within the | ||||
13813 | ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function | ||||
13814 | ** writes it out to disk. Or, if it is too small to bother with, discards | ||||
13815 | ** it. | ||||
13816 | ** | ||||
13817 | ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. | ||||
13818 | */ | ||||
13819 | static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ | ||||
13820 | int bFlag; | ||||
13821 | |||||
13822 | assert( pWriter->iBtPage || pWriter->nEmpty==0 )((void) (0)); | ||||
13823 | if( pWriter->iBtPage==0 ) return; | ||||
13824 | bFlag = fts5WriteFlushDlidx(p, pWriter); | ||||
13825 | |||||
13826 | if( p->rc==SQLITE_OK0 ){ | ||||
13827 | const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); | ||||
13828 | /* The following was already done in fts5WriteInit(): */ | ||||
13829 | /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ | ||||
13830 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
13831 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); | ||||
13832 | sqlite3_stepsqlite3_api->step(p->pIdxWriter); | ||||
13833 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxWriter); | ||||
13834 | sqlite3_bind_nullsqlite3_api->bind_null(p->pIdxWriter, 2); | ||||
13835 | } | ||||
13836 | pWriter->iBtPage = 0; | ||||
13837 | } | ||||
13838 | |||||
13839 | /* | ||||
13840 | ** This is called once for each leaf page except the first that contains | ||||
13841 | ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that | ||||
13842 | ** is larger than all terms written to earlier leaves, and equal to or | ||||
13843 | ** smaller than the first term on the new leaf. | ||||
13844 | ** | ||||
13845 | ** If an error occurs, an error code is left in Fts5Index.rc. If an error | ||||
13846 | ** has already occurred when this function is called, it is a no-op. | ||||
13847 | */ | ||||
13848 | static void fts5WriteBtreeTerm( | ||||
13849 | Fts5Index *p, /* FTS5 backend object */ | ||||
13850 | Fts5SegWriter *pWriter, /* Writer object */ | ||||
13851 | int nTerm, const u8 *pTerm /* First term on new page */ | ||||
13852 | ){ | ||||
13853 | fts5WriteFlushBtree(p, pWriter); | ||||
13854 | if( p->rc==SQLITE_OK0 ){ | ||||
13855 | fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pWriter->btterm,nTerm ,pTerm); | ||||
13856 | pWriter->iBtPage = pWriter->writer.pgno; | ||||
13857 | } | ||||
13858 | } | ||||
13859 | |||||
13860 | /* | ||||
13861 | ** This function is called when flushing a leaf page that contains no | ||||
13862 | ** terms at all to disk. | ||||
13863 | */ | ||||
13864 | static void fts5WriteBtreeNoTerm( | ||||
13865 | Fts5Index *p, /* FTS5 backend object */ | ||||
13866 | Fts5SegWriter *pWriter /* Writer object */ | ||||
13867 | ){ | ||||
13868 | /* If there were no rowids on the leaf page either and the doclist-index | ||||
13869 | ** has already been started, append an 0x00 byte to it. */ | ||||
13870 | if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ | ||||
13871 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; | ||||
13872 | assert( pDlidx->bPrevValid )((void) (0)); | ||||
13873 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); | ||||
13874 | } | ||||
13875 | |||||
13876 | /* Increment the "number of sequential leaves without a term" counter. */ | ||||
13877 | pWriter->nEmpty++; | ||||
13878 | } | ||||
13879 | |||||
13880 | static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ | ||||
13881 | i64 iRowid; | ||||
13882 | int iOff; | ||||
13883 | |||||
13884 | iOff = 1 + fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); | ||||
13885 | fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); | ||||
13886 | return iRowid; | ||||
13887 | } | ||||
13888 | |||||
13889 | /* | ||||
13890 | ** Rowid iRowid has just been appended to the current leaf page. It is the | ||||
13891 | ** first on the page. This function appends an appropriate entry to the current | ||||
13892 | ** doclist-index. | ||||
13893 | */ | ||||
13894 | static void fts5WriteDlidxAppend( | ||||
13895 | Fts5Index *p, | ||||
13896 | Fts5SegWriter *pWriter, | ||||
13897 | i64 iRowid | ||||
13898 | ){ | ||||
13899 | int i; | ||||
13900 | int bDone = 0; | ||||
13901 | |||||
13902 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | ||||
13903 | i64 iVal; | ||||
13904 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | ||||
13905 | |||||
13906 | if( pDlidx->buf.n>=p->pConfig->pgsz ){ | ||||
13907 | /* The current doclist-index page is full. Write it to disk and push | ||||
13908 | ** a copy of iRowid (which will become the first rowid on the next | ||||
13909 | ** doclist-index leaf page) up into the next level of the b-tree | ||||
13910 | ** hierarchy. If the node being flushed is currently the root node, | ||||
13911 | ** also push its first rowid upwards. */ | ||||
13912 | pDlidx->buf.p[0] = 0x01; /* Not the root node */ | ||||
13913 | fts5DataWrite(p, | ||||
13914 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | ||||
13915 | pDlidx->buf.p, pDlidx->buf.n | ||||
13916 | ); | ||||
13917 | fts5WriteDlidxGrow(p, pWriter, i+2); | ||||
13918 | pDlidx = &pWriter->aDlidx[i]; | ||||
13919 | if( p->rc==SQLITE_OK0 && pDlidx[1].buf.n==0 ){ | ||||
13920 | i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); | ||||
13921 | |||||
13922 | /* This was the root node. Push its first rowid up to the new root. */ | ||||
13923 | pDlidx[1].pgno = pDlidx->pgno; | ||||
13924 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); | ||||
13925 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); | ||||
13926 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); | ||||
13927 | pDlidx[1].bPrevValid = 1; | ||||
13928 | pDlidx[1].iPrev = iFirst; | ||||
13929 | } | ||||
13930 | |||||
13931 | sqlite3Fts5BufferZero(&pDlidx->buf); | ||||
13932 | pDlidx->bPrevValid = 0; | ||||
13933 | pDlidx->pgno++; | ||||
13934 | }else{ | ||||
13935 | bDone = 1; | ||||
13936 | } | ||||
13937 | |||||
13938 | if( pDlidx->bPrevValid ){ | ||||
13939 | iVal = (u64)iRowid - (u64)pDlidx->iPrev; | ||||
13940 | }else{ | ||||
13941 | i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); | ||||
13942 | assert( pDlidx->buf.n==0 )((void) (0)); | ||||
13943 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); | ||||
13944 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); | ||||
13945 | iVal = iRowid; | ||||
13946 | } | ||||
13947 | |||||
13948 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); | ||||
13949 | pDlidx->bPrevValid = 1; | ||||
13950 | pDlidx->iPrev = iRowid; | ||||
13951 | } | ||||
13952 | } | ||||
13953 | |||||
13954 | static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ | ||||
13955 | static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; | ||||
13956 | Fts5PageWriter *pPage = &pWriter->writer; | ||||
13957 | i64 iRowid; | ||||
13958 | |||||
13959 | assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) )((void) (0)); | ||||
13960 | |||||
13961 | /* Set the szLeaf header field. */ | ||||
13962 | assert( 0==fts5GetU16(&pPage->buf.p[2]) )((void) (0)); | ||||
13963 | fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); | ||||
13964 | |||||
13965 | if( pWriter->bFirstTermInPage ){ | ||||
13966 | /* No term was written to this page. */ | ||||
13967 | assert( pPage->pgidx.n==0 )((void) (0)); | ||||
13968 | fts5WriteBtreeNoTerm(p, pWriter); | ||||
13969 | }else{ | ||||
13970 | /* Append the pgidx to the page buffer. Set the szLeaf header field. */ | ||||
13971 | fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, pPage->pgidx.n,pPage->pgidx.p); | ||||
13972 | } | ||||
13973 | |||||
13974 | /* Write the page out to disk */ | ||||
13975 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pPage ->pgno)) ); | ||||
13976 | fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); | ||||
13977 | |||||
13978 | /* Initialize the next page. */ | ||||
13979 | fts5BufferZero(&pPage->buf)sqlite3Fts5BufferZero(&pPage->buf); | ||||
13980 | fts5BufferZero(&pPage->pgidx)sqlite3Fts5BufferZero(&pPage->pgidx); | ||||
13981 | fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, 4,zero); | ||||
13982 | pPage->iPrevPgidx = 0; | ||||
13983 | pPage->pgno++; | ||||
13984 | |||||
13985 | /* Increase the leaves written counter */ | ||||
13986 | pWriter->nLeafWritten++; | ||||
13987 | |||||
13988 | /* The new leaf holds no terms or rowids */ | ||||
13989 | pWriter->bFirstTermInPage = 1; | ||||
13990 | pWriter->bFirstRowidInPage = 1; | ||||
13991 | } | ||||
13992 | |||||
13993 | /* | ||||
13994 | ** Append term pTerm/nTerm to the segment being written by the writer passed | ||||
13995 | ** as the second argument. | ||||
13996 | ** | ||||
13997 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | ||||
13998 | ** already occurred, this function is a no-op. | ||||
13999 | */ | ||||
14000 | static void fts5WriteAppendTerm( | ||||
14001 | Fts5Index *p, | ||||
14002 | Fts5SegWriter *pWriter, | ||||
14003 | int nTerm, const u8 *pTerm | ||||
14004 | ){ | ||||
14005 | int nPrefix; /* Bytes of prefix compression for term */ | ||||
14006 | Fts5PageWriter *pPage = &pWriter->writer; | ||||
14007 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; | ||||
14008 | int nMin = MIN(pPage->term.n, nTerm)(((pPage->term.n) < (nTerm)) ? (pPage->term.n) : (nTerm )); | ||||
14009 | |||||
14010 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
14011 | assert( pPage->buf.n>=4 )((void) (0)); | ||||
14012 | assert( pPage->buf.n>4 || pWriter->bFirstTermInPage )((void) (0)); | ||||
14013 | |||||
14014 | /* If the current leaf page is full, flush it to disk. */ | ||||
14015 | if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ | ||||
14016 | if( pPage->buf.n>4 ){ | ||||
14017 | fts5WriteFlushLeaf(p, pWriter); | ||||
14018 | if( p->rc!=SQLITE_OK0 ) return; | ||||
14019 | } | ||||
14020 | fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING)( (u32)((&pPage->buf)->n) + (u32)(nTerm+20) <= ( u32)((&pPage->buf)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&p->rc),(&pPage->buf),(nTerm+20)+(&pPage-> buf)->n) ); | ||||
14021 | } | ||||
14022 | |||||
14023 | /* TODO1: Updating pgidx here. */ | ||||
14024 | pPgidx->n += sqlite3Fts5PutVarint( | ||||
14025 | &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx | ||||
14026 | ); | ||||
14027 | pPage->iPrevPgidx = pPage->buf.n; | ||||
14028 | #if 0 | ||||
14029 | fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); | ||||
14030 | pPgidx->n += 2; | ||||
14031 | #endif | ||||
14032 | |||||
14033 | if( pWriter->bFirstTermInPage ){ | ||||
14034 | nPrefix = 0; | ||||
14035 | if( pPage->pgno!=1 ){ | ||||
14036 | /* This is the first term on a leaf that is not the leftmost leaf in | ||||
14037 | ** the segment b-tree. In this case it is necessary to add a term to | ||||
14038 | ** the b-tree hierarchy that is (a) larger than the largest term | ||||
14039 | ** already written to the segment and (b) smaller than or equal to | ||||
14040 | ** this term. In other words, a prefix of (pTerm/nTerm) that is one | ||||
14041 | ** byte longer than the longest prefix (pTerm/nTerm) shares with the | ||||
14042 | ** previous term. | ||||
14043 | ** | ||||
14044 | ** Usually, the previous term is available in pPage->term. The exception | ||||
14045 | ** is if this is the first term written in an incremental-merge step. | ||||
14046 | ** In this case the previous term is not available, so just write a | ||||
14047 | ** copy of (pTerm/nTerm) into the parent node. This is slightly | ||||
14048 | ** inefficient, but still correct. */ | ||||
14049 | int n = nTerm; | ||||
14050 | if( pPage->term.n ){ | ||||
14051 | n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm); | ||||
14052 | } | ||||
14053 | fts5WriteBtreeTerm(p, pWriter, n, pTerm); | ||||
14054 | if( p->rc!=SQLITE_OK0 ) return; | ||||
14055 | pPage = &pWriter->writer; | ||||
14056 | } | ||||
14057 | }else{ | ||||
14058 | nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm); | ||||
14059 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nPrefix); | ||||
14060 | } | ||||
14061 | |||||
14062 | /* Append the number of bytes of new data, then the term data itself | ||||
14063 | ** to the page. */ | ||||
14064 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nTerm - nPrefix); | ||||
14065 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix])sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nTerm - nPrefix,&pTerm[nPrefix]); | ||||
14066 | |||||
14067 | /* Update the Fts5PageWriter.term field. */ | ||||
14068 | fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pPage->term,nTerm, pTerm); | ||||
14069 | pWriter->bFirstTermInPage = 0; | ||||
14070 | |||||
14071 | pWriter->bFirstRowidInPage = 0; | ||||
14072 | pWriter->bFirstRowidInDoclist = 1; | ||||
14073 | |||||
14074 | assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) )((void) (0)); | ||||
14075 | pWriter->aDlidx[0].pgno = pPage->pgno; | ||||
14076 | } | ||||
14077 | |||||
14078 | /* | ||||
14079 | ** Append a rowid and position-list size field to the writers output. | ||||
14080 | */ | ||||
14081 | static void fts5WriteAppendRowid( | ||||
14082 | Fts5Index *p, | ||||
14083 | Fts5SegWriter *pWriter, | ||||
14084 | i64 iRowid | ||||
14085 | ){ | ||||
14086 | if( p->rc==SQLITE_OK0 ){ | ||||
14087 | Fts5PageWriter *pPage = &pWriter->writer; | ||||
14088 | |||||
14089 | if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ | ||||
14090 | fts5WriteFlushLeaf(p, pWriter); | ||||
14091 | } | ||||
14092 | |||||
14093 | /* If this is to be the first rowid written to the page, set the | ||||
14094 | ** rowid-pointer in the page-header. Also append a value to the dlidx | ||||
14095 | ** buffer, in case a doclist-index is required. */ | ||||
14096 | if( pWriter->bFirstRowidInPage ){ | ||||
14097 | fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); | ||||
14098 | fts5WriteDlidxAppend(p, pWriter, iRowid); | ||||
14099 | } | ||||
14100 | |||||
14101 | /* Write the rowid. */ | ||||
14102 | if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ | ||||
14103 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)iRowid); | ||||
14104 | }else{ | ||||
14105 | assert_nc( p->rc || iRowid>pWriter->iPrevRowid )((void) (0)); | ||||
14106 | fts5BufferAppendVarint(&p->rc, &pPage->buf,sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | ||||
14107 | (u64)iRowid - (u64)pWriter->iPrevRowidsqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | ||||
14108 | )sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid); | ||||
14109 | } | ||||
14110 | pWriter->iPrevRowid = iRowid; | ||||
14111 | pWriter->bFirstRowidInDoclist = 0; | ||||
14112 | pWriter->bFirstRowidInPage = 0; | ||||
14113 | } | ||||
14114 | } | ||||
14115 | |||||
14116 | static void fts5WriteAppendPoslistData( | ||||
14117 | Fts5Index *p, | ||||
14118 | Fts5SegWriter *pWriter, | ||||
14119 | const u8 *aData, | ||||
14120 | int nData | ||||
14121 | ){ | ||||
14122 | Fts5PageWriter *pPage = &pWriter->writer; | ||||
14123 | const u8 *a = aData; | ||||
14124 | int n = nData; | ||||
14125 | |||||
14126 | assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
14127 | while( p->rc==SQLITE_OK0 | ||||
14128 | && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz | ||||
14129 | ){ | ||||
14130 | int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; | ||||
14131 | int nCopy = 0; | ||||
14132 | while( nCopy<nReq ){ | ||||
14133 | i64 dummy; | ||||
14134 | nCopy += fts5GetVarintsqlite3Fts5GetVarint(&a[nCopy], (u64*)&dummy); | ||||
14135 | } | ||||
14136 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nCopy,a); | ||||
14137 | a += nCopy; | ||||
14138 | n -= nCopy; | ||||
14139 | fts5WriteFlushLeaf(p, pWriter); | ||||
14140 | } | ||||
14141 | if( n>0 ){ | ||||
14142 | fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, n,a); | ||||
14143 | } | ||||
14144 | } | ||||
14145 | |||||
14146 | /* | ||||
14147 | ** Flush any data cached by the writer object to the database. Free any | ||||
14148 | ** allocations associated with the writer. | ||||
14149 | */ | ||||
14150 | static void fts5WriteFinish( | ||||
14151 | Fts5Index *p, | ||||
14152 | Fts5SegWriter *pWriter, /* Writer object */ | ||||
14153 | int *pnLeaf /* OUT: Number of leaf pages in b-tree */ | ||||
14154 | ){ | ||||
14155 | int i; | ||||
14156 | Fts5PageWriter *pLeaf = &pWriter->writer; | ||||
14157 | if( p->rc==SQLITE_OK0 ){ | ||||
14158 | assert( pLeaf->pgno>=1 )((void) (0)); | ||||
14159 | if( pLeaf->buf.n>4 ){ | ||||
14160 | fts5WriteFlushLeaf(p, pWriter); | ||||
14161 | } | ||||
14162 | *pnLeaf = pLeaf->pgno-1; | ||||
14163 | if( pLeaf->pgno>1 ){ | ||||
14164 | fts5WriteFlushBtree(p, pWriter); | ||||
14165 | } | ||||
14166 | } | ||||
14167 | fts5BufferFree(&pLeaf->term)sqlite3Fts5BufferFree(&pLeaf->term); | ||||
14168 | fts5BufferFree(&pLeaf->buf)sqlite3Fts5BufferFree(&pLeaf->buf); | ||||
14169 | fts5BufferFree(&pLeaf->pgidx)sqlite3Fts5BufferFree(&pLeaf->pgidx); | ||||
14170 | fts5BufferFree(&pWriter->btterm)sqlite3Fts5BufferFree(&pWriter->btterm); | ||||
14171 | |||||
14172 | for(i=0; i<pWriter->nDlidx; i++){ | ||||
14173 | sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); | ||||
14174 | } | ||||
14175 | sqlite3_freesqlite3_api->free(pWriter->aDlidx); | ||||
14176 | } | ||||
14177 | |||||
14178 | static void fts5WriteInit( | ||||
14179 | Fts5Index *p, | ||||
14180 | Fts5SegWriter *pWriter, | ||||
14181 | int iSegid | ||||
14182 | ){ | ||||
14183 | const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING20; | ||||
14184 | |||||
14185 | memset(pWriter, 0, sizeof(Fts5SegWriter)); | ||||
14186 | pWriter->iSegid = iSegid; | ||||
14187 | |||||
14188 | fts5WriteDlidxGrow(p, pWriter, 1); | ||||
14189 | pWriter->writer.pgno = 1; | ||||
14190 | pWriter->bFirstTermInPage = 1; | ||||
14191 | pWriter->iBtPage = 1; | ||||
14192 | |||||
14193 | assert( pWriter->writer.buf.n==0 )((void) (0)); | ||||
14194 | assert( pWriter->writer.pgidx.n==0 )((void) (0)); | ||||
14195 | |||||
14196 | /* Grow the two buffers to pgsz + padding bytes in size. */ | ||||
14197 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); | ||||
14198 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); | ||||
14199 | |||||
14200 | if( p->pIdxWriter==0 ){ | ||||
14201 | Fts5Config *pConfig = p->pConfig; | ||||
14202 | fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintfsqlite3_api->mprintf( | ||||
14203 | "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", | ||||
14204 | pConfig->zDb, pConfig->zName | ||||
14205 | )); | ||||
14206 | } | ||||
14207 | |||||
14208 | if( p->rc==SQLITE_OK0 ){ | ||||
14209 | /* Initialize the 4-byte leaf-page header to 0x00. */ | ||||
14210 | memset(pWriter->writer.buf.p, 0, 4); | ||||
14211 | pWriter->writer.buf.n = 4; | ||||
14212 | |||||
14213 | /* Bind the current output segment id to the index-writer. This is an | ||||
14214 | ** optimization over binding the same value over and over as rows are | ||||
14215 | ** inserted into %_idx by the current writer. */ | ||||
14216 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxWriter, 1, pWriter->iSegid); | ||||
14217 | } | ||||
14218 | } | ||||
14219 | |||||
14220 | /* | ||||
14221 | ** Iterator pIter was used to iterate through the input segments of on an | ||||
14222 | ** incremental merge operation. This function is called if the incremental | ||||
14223 | ** merge step has finished but the input has not been completely exhausted. | ||||
14224 | */ | ||||
14225 | static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ | ||||
14226 | int i; | ||||
14227 | Fts5Buffer buf; | ||||
14228 | memset(&buf, 0, sizeof(Fts5Buffer)); | ||||
14229 | for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK0; i++){ | ||||
14230 | Fts5SegIter *pSeg = &pIter->aSeg[i]; | ||||
14231 | if( pSeg->pSeg==0 ){ | ||||
14232 | /* no-op */ | ||||
14233 | }else if( pSeg->pLeaf==0 ){ | ||||
14234 | /* All keys from this input segment have been transfered to the output. | ||||
14235 | ** Set both the first and last page-numbers to 0 to indicate that the | ||||
14236 | ** segment is now empty. */ | ||||
14237 | pSeg->pSeg->pgnoLast = 0; | ||||
14238 | pSeg->pSeg->pgnoFirst = 0; | ||||
14239 | }else{ | ||||
14240 | int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ | ||||
14241 | i64 iLeafRowid; | ||||
14242 | Fts5Data *pData; | ||||
14243 | int iId = pSeg->pSeg->iSegid; | ||||
14244 | u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; | ||||
14245 | |||||
14246 | iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | ||||
14247 | pData = fts5LeafRead(p, iLeafRowid); | ||||
14248 | if( pData ){ | ||||
14249 | if( iOff>pData->szLeaf ){ | ||||
14250 | /* This can occur if the pages that the segments occupy overlap - if | ||||
14251 | ** a single page has been assigned to more than one segment. In | ||||
14252 | ** this case a prior iteration of this loop may have corrupted the | ||||
14253 | ** segment currently being trimmed. */ | ||||
14254 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
14255 | }else{ | ||||
14256 | fts5BufferZero(&buf)sqlite3Fts5BufferZero(&buf); | ||||
14257 | fts5BufferGrow(&p->rc, &buf, pData->nn)( (u32)((&buf)->n) + (u32)(pData->nn) <= (u32)(( &buf)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p-> rc),(&buf),(pData->nn)+(&buf)->n) ); | ||||
14258 | fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,sizeof(aHdr ),aHdr); | ||||
14259 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->term.n); | ||||
14260 | fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pSeg->term .n,pSeg->term.p); | ||||
14261 | fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> szLeaf-iOff,&pData->p[iOff]); | ||||
14262 | if( p->rc==SQLITE_OK0 ){ | ||||
14263 | /* Set the szLeaf field */ | ||||
14264 | fts5PutU16(&buf.p[2], (u16)buf.n); | ||||
14265 | } | ||||
14266 | |||||
14267 | /* Set up the new page-index array */ | ||||
14268 | fts5BufferAppendVarint(&p->rc, &buf, 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)4); | ||||
14269 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno | ||||
14270 | && pSeg->iEndofDoclist<pData->szLeaf | ||||
14271 | && pSeg->iPgidxOff<=pData->nn | ||||
14272 | ){ | ||||
14273 | int nDiff = pData->szLeaf - pSeg->iEndofDoclist; | ||||
14274 | fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)buf .n - 1 - nDiff - 4); | ||||
14275 | fts5BufferAppendBlob(&p->rc, &buf,sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | ||||
14276 | pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | ||||
14277 | )sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]); | ||||
14278 | } | ||||
14279 | |||||
14280 | pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; | ||||
14281 | fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(1)) ), iLeafRowid); | ||||
14282 | fts5DataWrite(p, iLeafRowid, buf.p, buf.n); | ||||
14283 | } | ||||
14284 | fts5DataRelease(pData); | ||||
14285 | } | ||||
14286 | } | ||||
14287 | } | ||||
14288 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | ||||
14289 | } | ||||
14290 | |||||
14291 | static void fts5MergeChunkCallback( | ||||
14292 | Fts5Index *p, | ||||
14293 | void *pCtx, | ||||
14294 | const u8 *pChunk, int nChunk | ||||
14295 | ){ | ||||
14296 | Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; | ||||
14297 | fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); | ||||
14298 | } | ||||
14299 | |||||
14300 | /* | ||||
14301 | ** | ||||
14302 | */ | ||||
14303 | static void fts5IndexMergeLevel( | ||||
14304 | Fts5Index *p, /* FTS5 backend object */ | ||||
14305 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ | ||||
14306 | int iLvl, /* Level to read input from */ | ||||
14307 | int *pnRem /* Write up to this many output leaves */ | ||||
14308 | ){ | ||||
14309 | Fts5Structure *pStruct = *ppStruct; | ||||
14310 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | ||||
14311 | Fts5StructureLevel *pLvlOut; | ||||
14312 | Fts5Iter *pIter = 0; /* Iterator to read input data */ | ||||
14313 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ | ||||
14314 | int nInput; /* Number of input segments */ | ||||
14315 | Fts5SegWriter writer; /* Writer object */ | ||||
14316 | Fts5StructureSegment *pSeg; /* Output segment */ | ||||
14317 | Fts5Buffer term; | ||||
14318 | int bOldest; /* True if the output segment is the oldest */ | ||||
14319 | int eDetail = p->pConfig->eDetail; | ||||
14320 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | ||||
14321 | int bTermWritten = 0; /* True if current term already output */ | ||||
14322 | |||||
14323 | assert( iLvl<pStruct->nLevel )((void) (0)); | ||||
14324 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | ||||
14325 | |||||
14326 | memset(&writer, 0, sizeof(Fts5SegWriter)); | ||||
14327 | memset(&term, 0, sizeof(Fts5Buffer)); | ||||
14328 | if( pLvl->nMerge ){ | ||||
14329 | pLvlOut = &pStruct->aLevel[iLvl+1]; | ||||
14330 | assert( pLvlOut->nSeg>0 )((void) (0)); | ||||
14331 | nInput = pLvl->nMerge; | ||||
14332 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; | ||||
14333 | |||||
14334 | fts5WriteInit(p, &writer, pSeg->iSegid); | ||||
14335 | writer.writer.pgno = pSeg->pgnoLast+1; | ||||
14336 | writer.iBtPage = 0; | ||||
14337 | }else{ | ||||
14338 | int iSegid = fts5AllocateSegid(p, pStruct); | ||||
14339 | |||||
14340 | /* Extend the Fts5Structure object as required to ensure the output | ||||
14341 | ** segment exists. */ | ||||
14342 | if( iLvl==pStruct->nLevel-1 ){ | ||||
14343 | fts5StructureAddLevel(&p->rc, ppStruct); | ||||
14344 | pStruct = *ppStruct; | ||||
14345 | } | ||||
14346 | fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); | ||||
14347 | if( p->rc ) return; | ||||
14348 | pLvl = &pStruct->aLevel[iLvl]; | ||||
14349 | pLvlOut = &pStruct->aLevel[iLvl+1]; | ||||
14350 | |||||
14351 | fts5WriteInit(p, &writer, iSegid); | ||||
14352 | |||||
14353 | /* Add the new segment to the output level */ | ||||
14354 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; | ||||
14355 | pLvlOut->nSeg++; | ||||
14356 | pSeg->pgnoFirst = 1; | ||||
14357 | pSeg->iSegid = iSegid; | ||||
14358 | pStruct->nSegment++; | ||||
14359 | |||||
14360 | /* Read input from all segments in the input level */ | ||||
14361 | nInput = pLvl->nSeg; | ||||
14362 | |||||
14363 | /* Set the range of origins that will go into the output segment. */ | ||||
14364 | if( pStruct->nOriginCntr>0 ){ | ||||
14365 | pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1; | ||||
14366 | pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2; | ||||
14367 | } | ||||
14368 | } | ||||
14369 | bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); | ||||
14370 | |||||
14371 | assert( iLvl>=0 )((void) (0)); | ||||
14372 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); | ||||
14373 | fts5MultiIterEof(p, pIter)==0; | ||||
14374 | fts5MultiIterNext(p, pIter, 0, 0) | ||||
14375 | ){ | ||||
14376 | Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | ||||
14377 | int nPos; /* position-list size field value */ | ||||
14378 | int nTerm; | ||||
14379 | const u8 *pTerm; | ||||
14380 | |||||
14381 | pTerm = fts5MultiIterTerm(pIter, &nTerm); | ||||
14382 | if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm)((nTerm)<=0 ? 0 : memcmp((pTerm), (term.p), (nTerm))) ){ | ||||
14383 | if( pnRem && writer.nLeafWritten>nRem ){ | ||||
14384 | break; | ||||
14385 | } | ||||
14386 | fts5BufferSet(&p->rc, &term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&term,nTerm,pTerm); | ||||
14387 | bTermWritten =0; | ||||
14388 | } | ||||
14389 | |||||
14390 | /* Check for key annihilation. */ | ||||
14391 | if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; | ||||
14392 | |||||
14393 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | ||||
14394 | /* This is a new term. Append a term to the output segment. */ | ||||
14395 | fts5WriteAppendTerm(p, &writer, nTerm, pTerm); | ||||
14396 | bTermWritten = 1; | ||||
14397 | } | ||||
14398 | |||||
14399 | /* Append the rowid to the output */ | ||||
14400 | /* WRITEPOSLISTSIZE */ | ||||
14401 | fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); | ||||
14402 | |||||
14403 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
14404 | if( pSegIter->bDel ){ | ||||
14405 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | ||||
14406 | if( pSegIter->nPos>0 ){ | ||||
14407 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | ||||
14408 | } | ||||
14409 | } | ||||
14410 | }else{ | ||||
14411 | /* Append the position-list data to the output */ | ||||
14412 | nPos = pSegIter->nPos*2 + pSegIter->bDel; | ||||
14413 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)nPos); | ||||
14414 | fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); | ||||
14415 | } | ||||
14416 | } | ||||
14417 | |||||
14418 | /* Flush the last leaf page to disk. Set the output segment b-tree height | ||||
14419 | ** and last leaf page number at the same time. */ | ||||
14420 | fts5WriteFinish(p, &writer, &pSeg->pgnoLast); | ||||
14421 | |||||
14422 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
14423 | if( fts5MultiIterEof(p, pIter) ){ | ||||
14424 | int i; | ||||
14425 | |||||
14426 | /* Remove the redundant segments from the %_data table */ | ||||
14427 | assert( pSeg->nEntry==0 )((void) (0)); | ||||
14428 | for(i=0; i<nInput; i++){ | ||||
14429 | Fts5StructureSegment *pOld = &pLvl->aSeg[i]; | ||||
14430 | pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone); | ||||
14431 | fts5DataRemoveSegment(p, pOld); | ||||
14432 | } | ||||
14433 | |||||
14434 | /* Remove the redundant segments from the input level */ | ||||
14435 | if( pLvl->nSeg!=nInput ){ | ||||
14436 | int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); | ||||
14437 | memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); | ||||
14438 | } | ||||
14439 | pStruct->nSegment -= nInput; | ||||
14440 | pLvl->nSeg -= nInput; | ||||
14441 | pLvl->nMerge = 0; | ||||
14442 | if( pSeg->pgnoLast==0 ){ | ||||
14443 | pLvlOut->nSeg--; | ||||
14444 | pStruct->nSegment--; | ||||
14445 | } | ||||
14446 | }else{ | ||||
14447 | assert( pSeg->pgnoLast>0 )((void) (0)); | ||||
14448 | fts5TrimSegments(p, pIter); | ||||
14449 | pLvl->nMerge = nInput; | ||||
14450 | } | ||||
14451 | |||||
14452 | fts5MultiIterFree(pIter); | ||||
14453 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | ||||
14454 | if( pnRem ) *pnRem -= writer.nLeafWritten; | ||||
14455 | } | ||||
14456 | |||||
14457 | /* | ||||
14458 | ** If this is not a contentless_delete=1 table, or if the 'deletemerge' | ||||
14459 | ** configuration option is set to 0, then this function always returns -1. | ||||
14460 | ** Otherwise, it searches the structure object passed as the second argument | ||||
14461 | ** for a level suitable for merging due to having a large number of | ||||
14462 | ** tombstones in the tombstone hash. If one is found, its index is returned. | ||||
14463 | ** Otherwise, if there is no suitable level, -1. | ||||
14464 | */ | ||||
14465 | static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){ | ||||
14466 | Fts5Config *pConfig = p->pConfig; | ||||
14467 | int iRet = -1; | ||||
14468 | if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){ | ||||
14469 | int ii; | ||||
14470 | int nBest = 0; | ||||
14471 | |||||
14472 | for(ii=0; ii<pStruct->nLevel; ii++){ | ||||
14473 | Fts5StructureLevel *pLvl = &pStruct->aLevel[ii]; | ||||
14474 | i64 nEntry = 0; | ||||
14475 | i64 nTomb = 0; | ||||
14476 | int iSeg; | ||||
14477 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | ||||
14478 | nEntry += pLvl->aSeg[iSeg].nEntry; | ||||
14479 | nTomb += pLvl->aSeg[iSeg].nEntryTombstone; | ||||
14480 | } | ||||
14481 | assert_nc( nEntry>0 || pLvl->nSeg==0 )((void) (0)); | ||||
14482 | if( nEntry>0 ){ | ||||
14483 | int nPercent = (nTomb * 100) / nEntry; | ||||
14484 | if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){ | ||||
14485 | iRet = ii; | ||||
14486 | nBest = nPercent; | ||||
14487 | } | ||||
14488 | } | ||||
14489 | |||||
14490 | /* If pLvl is already the input level to an ongoing merge, look no | ||||
14491 | ** further for a merge candidate. The caller should be allowed to | ||||
14492 | ** continue merging from pLvl first. */ | ||||
14493 | if( pLvl->nMerge ) break; | ||||
14494 | } | ||||
14495 | } | ||||
14496 | return iRet; | ||||
14497 | } | ||||
14498 | |||||
14499 | /* | ||||
14500 | ** Do up to nPg pages of automerge work on the index. | ||||
14501 | ** | ||||
14502 | ** Return true if any changes were actually made, or false otherwise. | ||||
14503 | */ | ||||
14504 | static int fts5IndexMerge( | ||||
14505 | Fts5Index *p, /* FTS5 backend object */ | ||||
14506 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | ||||
14507 | int nPg, /* Pages of work to do */ | ||||
14508 | int nMin /* Minimum number of segments to merge */ | ||||
14509 | ){ | ||||
14510 | int nRem = nPg; | ||||
14511 | int bRet = 0; | ||||
14512 | Fts5Structure *pStruct = *ppStruct; | ||||
14513 | while( nRem>0 && p->rc==SQLITE_OK0 ){ | ||||
14514 | int iLvl; /* To iterate through levels */ | ||||
14515 | int iBestLvl = 0; /* Level offering the most input segments */ | ||||
14516 | int nBest = 0; /* Number of input segments on best level */ | ||||
14517 | |||||
14518 | /* Set iBestLvl to the level to read input segments from. Or to -1 if | ||||
14519 | ** there is no level suitable to merge segments from. */ | ||||
14520 | assert( pStruct->nLevel>0 )((void) (0)); | ||||
14521 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
14522 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | ||||
14523 | if( pLvl->nMerge ){ | ||||
14524 | if( pLvl->nMerge>nBest ){ | ||||
14525 | iBestLvl = iLvl; | ||||
14526 | nBest = nMin; | ||||
14527 | } | ||||
14528 | break; | ||||
14529 | } | ||||
14530 | if( pLvl->nSeg>nBest ){ | ||||
14531 | nBest = pLvl->nSeg; | ||||
14532 | iBestLvl = iLvl; | ||||
14533 | } | ||||
14534 | } | ||||
14535 | if( nBest<nMin ){ | ||||
14536 | iBestLvl = fts5IndexFindDeleteMerge(p, pStruct); | ||||
14537 | } | ||||
14538 | |||||
14539 | if( iBestLvl<0 ) break; | ||||
14540 | bRet = 1; | ||||
14541 | fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); | ||||
14542 | if( p->rc==SQLITE_OK0 && pStruct->aLevel[iBestLvl].nMerge==0 ){ | ||||
14543 | fts5StructurePromote(p, iBestLvl+1, pStruct); | ||||
14544 | } | ||||
14545 | |||||
14546 | if( nMin==1 ) nMin = 2; | ||||
14547 | } | ||||
14548 | *ppStruct = pStruct; | ||||
14549 | return bRet; | ||||
14550 | } | ||||
14551 | |||||
14552 | /* | ||||
14553 | ** A total of nLeaf leaf pages of data has just been flushed to a level-0 | ||||
14554 | ** segment. This function updates the write-counter accordingly and, if | ||||
14555 | ** necessary, performs incremental merge work. | ||||
14556 | ** | ||||
14557 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | ||||
14558 | ** already occurred, this function is a no-op. | ||||
14559 | */ | ||||
14560 | static void fts5IndexAutomerge( | ||||
14561 | Fts5Index *p, /* FTS5 backend object */ | ||||
14562 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | ||||
14563 | int nLeaf /* Number of output leaves just written */ | ||||
14564 | ){ | ||||
14565 | if( p->rc==SQLITE_OK0 && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0)((*ppStruct)!=0) ){ | ||||
14566 | Fts5Structure *pStruct = *ppStruct; | ||||
14567 | u64 nWrite; /* Initial value of write-counter */ | ||||
14568 | int nWork; /* Number of work-quanta to perform */ | ||||
14569 | int nRem; /* Number of leaf pages left to write */ | ||||
14570 | |||||
14571 | /* Update the write-counter. While doing so, set nWork. */ | ||||
14572 | nWrite = pStruct->nWriteCounter; | ||||
14573 | nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); | ||||
14574 | pStruct->nWriteCounter += nLeaf; | ||||
14575 | nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); | ||||
14576 | |||||
14577 | fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); | ||||
14578 | } | ||||
14579 | } | ||||
14580 | |||||
14581 | static void fts5IndexCrisismerge( | ||||
14582 | Fts5Index *p, /* FTS5 backend object */ | ||||
14583 | Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ | ||||
14584 | ){ | ||||
14585 | const int nCrisis = p->pConfig->nCrisisMerge; | ||||
14586 | Fts5Structure *pStruct = *ppStruct; | ||||
14587 | if( pStruct && pStruct->nLevel>0 ){ | ||||
14588 | int iLvl = 0; | ||||
14589 | while( p->rc==SQLITE_OK0 && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ | ||||
14590 | fts5IndexMergeLevel(p, &pStruct, iLvl, 0); | ||||
14591 | assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) )((void) (0)); | ||||
14592 | fts5StructurePromote(p, iLvl+1, pStruct); | ||||
14593 | iLvl++; | ||||
14594 | } | ||||
14595 | *ppStruct = pStruct; | ||||
14596 | } | ||||
14597 | } | ||||
14598 | |||||
14599 | static int fts5IndexReturn(Fts5Index *p){ | ||||
14600 | int rc = p->rc; | ||||
14601 | p->rc = SQLITE_OK0; | ||||
14602 | return rc; | ||||
14603 | } | ||||
14604 | |||||
14605 | /* | ||||
14606 | ** Close the read-only blob handle, if it is open. | ||||
14607 | */ | ||||
14608 | static void sqlite3Fts5IndexCloseReader(Fts5Index *p){ | ||||
14609 | fts5IndexCloseReader(p); | ||||
14610 | fts5IndexReturn(p); | ||||
14611 | } | ||||
14612 | |||||
14613 | typedef struct Fts5FlushCtx Fts5FlushCtx; | ||||
14614 | struct Fts5FlushCtx { | ||||
14615 | Fts5Index *pIdx; | ||||
14616 | Fts5SegWriter writer; | ||||
14617 | }; | ||||
14618 | |||||
14619 | /* | ||||
14620 | ** Buffer aBuf[] contains a list of varints, all small enough to fit | ||||
14621 | ** in a 32-bit integer. Return the size of the largest prefix of this | ||||
14622 | ** list nMax bytes or less in size. | ||||
14623 | */ | ||||
14624 | static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ | ||||
14625 | int ret; | ||||
14626 | u32 dummy; | ||||
14627 | ret = fts5GetVarint32(aBuf, dummy)sqlite3Fts5GetVarint32(aBuf,(u32*)&(dummy)); | ||||
14628 | if( ret<nMax ){ | ||||
14629 | while( 1 ){ | ||||
14630 | int i = fts5GetVarint32(&aBuf[ret], dummy)sqlite3Fts5GetVarint32(&aBuf[ret],(u32*)&(dummy)); | ||||
14631 | if( (ret + i) > nMax ) break; | ||||
14632 | ret += i; | ||||
14633 | } | ||||
14634 | } | ||||
14635 | return ret; | ||||
14636 | } | ||||
14637 | |||||
14638 | /* | ||||
14639 | ** Execute the SQL statement: | ||||
14640 | ** | ||||
14641 | ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno); | ||||
14642 | ** | ||||
14643 | ** This is used when a secure-delete operation removes the last term | ||||
14644 | ** from a segment leaf page. In that case the %_idx entry is removed | ||||
14645 | ** too. This is done to ensure that if all instances of a token are | ||||
14646 | ** removed from an fts5 database in secure-delete mode, no trace of | ||||
14647 | ** the token itself remains in the database. | ||||
14648 | */ | ||||
14649 | static void fts5SecureDeleteIdxEntry( | ||||
14650 | Fts5Index *p, /* FTS5 backend object */ | ||||
14651 | int iSegid, /* Id of segment to delete entry for */ | ||||
14652 | int iPgno /* Page number within segment */ | ||||
14653 | ){ | ||||
14654 | if( iPgno!=1 ){ | ||||
14655 | assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE )((void) (0)); | ||||
14656 | if( p->pDeleteFromIdx==0 ){ | ||||
14657 | fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintfsqlite3_api->mprintf( | ||||
14658 | "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)", | ||||
14659 | p->pConfig->zDb, p->pConfig->zName | ||||
14660 | )); | ||||
14661 | } | ||||
14662 | if( p->rc==SQLITE_OK0 ){ | ||||
14663 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 1, iSegid); | ||||
14664 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 2, iPgno); | ||||
14665 | sqlite3_stepsqlite3_api->step(p->pDeleteFromIdx); | ||||
14666 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleteFromIdx); | ||||
14667 | } | ||||
14668 | } | ||||
14669 | } | ||||
14670 | |||||
14671 | /* | ||||
14672 | ** This is called when a secure-delete operation removes a position-list | ||||
14673 | ** that overflows onto segment page iPgno of segment pSeg. This function | ||||
14674 | ** rewrites node iPgno, and possibly one or more of its right-hand peers, | ||||
14675 | ** to remove this portion of the position list. | ||||
14676 | ** | ||||
14677 | ** Output variable (*pbLastInDoclist) is set to true if the position-list | ||||
14678 | ** removed is followed by a new term or the end-of-segment, or false if | ||||
14679 | ** it is followed by another rowid/position list. | ||||
14680 | */ | ||||
14681 | static void fts5SecureDeleteOverflow( | ||||
14682 | Fts5Index *p, | ||||
14683 | Fts5StructureSegment *pSeg, | ||||
14684 | int iPgno, | ||||
14685 | int *pbLastInDoclist | ||||
14686 | ){ | ||||
14687 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | ||||
14688 | int pgno; | ||||
14689 | Fts5Data *pLeaf = 0; | ||||
14690 | assert( iPgno!=1 )((void) (0)); | ||||
14691 | |||||
14692 | *pbLastInDoclist = 1; | ||||
14693 | for(pgno=iPgno; p->rc==SQLITE_OK0 && pgno<=pSeg->pgnoLast; pgno++){ | ||||
14694 | i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | ||||
14695 | int iNext = 0; | ||||
14696 | u8 *aPg = 0; | ||||
14697 | |||||
14698 | pLeaf = fts5DataRead(p, iRowid); | ||||
14699 | if( pLeaf==0 ) break; | ||||
14700 | aPg = pLeaf->p; | ||||
14701 | |||||
14702 | iNext = fts5GetU16(&aPg[0]); | ||||
14703 | if( iNext!=0 ){ | ||||
14704 | *pbLastInDoclist = 0; | ||||
14705 | } | ||||
14706 | if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){ | ||||
14707 | fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext)sqlite3Fts5GetVarint32(&aPg[pLeaf->szLeaf],(u32*)& (iNext)); | ||||
14708 | } | ||||
14709 | |||||
14710 | if( iNext==0 ){ | ||||
14711 | /* The page contains no terms or rowids. Replace it with an empty | ||||
14712 | ** page and move on to the right-hand peer. */ | ||||
14713 | const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04}; | ||||
14714 | assert_nc( bDetailNone==0 || pLeaf->nn==4 )((void) (0)); | ||||
14715 | if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty)); | ||||
14716 | fts5DataRelease(pLeaf); | ||||
14717 | pLeaf = 0; | ||||
14718 | }else if( bDetailNone ){ | ||||
14719 | break; | ||||
14720 | }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){ | ||||
14721 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
14722 | break; | ||||
14723 | }else{ | ||||
14724 | int nShift = iNext - 4; | ||||
14725 | int nPg; | ||||
14726 | |||||
14727 | int nIdx = 0; | ||||
14728 | u8 *aIdx = 0; | ||||
14729 | |||||
14730 | /* Unless the current page footer is 0 bytes in size (in which case | ||||
14731 | ** the new page footer will be as well), allocate and populate a | ||||
14732 | ** buffer containing the new page footer. Set stack variables aIdx | ||||
14733 | ** and nIdx accordingly. */ | ||||
14734 | if( pLeaf->nn>pLeaf->szLeaf ){ | ||||
14735 | int iFirst = 0; | ||||
14736 | int i1 = pLeaf->szLeaf; | ||||
14737 | int i2 = 0; | ||||
14738 | |||||
14739 | i1 += fts5GetVarint32(&aPg[i1], iFirst)sqlite3Fts5GetVarint32(&aPg[i1],(u32*)&(iFirst)); | ||||
14740 | if( iFirst<iNext ){ | ||||
14741 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
14742 | break; | ||||
14743 | } | ||||
14744 | aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2); | ||||
14745 | if( aIdx==0 ) break; | ||||
14746 | i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift); | ||||
14747 | if( i1<pLeaf->nn ){ | ||||
14748 | memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1); | ||||
14749 | i2 += (pLeaf->nn-i1); | ||||
14750 | } | ||||
14751 | nIdx = i2; | ||||
14752 | } | ||||
14753 | |||||
14754 | /* Modify the contents of buffer aPg[]. Set nPg to the new size | ||||
14755 | ** in bytes. The new page is always smaller than the old. */ | ||||
14756 | nPg = pLeaf->szLeaf - nShift; | ||||
14757 | memmove(&aPg[4], &aPg[4+nShift], nPg-4); | ||||
14758 | fts5PutU16(&aPg[2], nPg); | ||||
14759 | if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4); | ||||
14760 | if( nIdx>0 ){ | ||||
14761 | memcpy(&aPg[nPg], aIdx, nIdx); | ||||
14762 | nPg += nIdx; | ||||
14763 | } | ||||
14764 | sqlite3_freesqlite3_api->free(aIdx); | ||||
14765 | |||||
14766 | /* Write the new page to disk and exit the loop */ | ||||
14767 | assert( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | ||||
14768 | fts5DataWrite(p, iRowid, aPg, nPg); | ||||
14769 | break; | ||||
14770 | } | ||||
14771 | } | ||||
14772 | fts5DataRelease(pLeaf); | ||||
14773 | } | ||||
14774 | |||||
14775 | /* | ||||
14776 | ** Completely remove the entry that pSeg currently points to from | ||||
14777 | ** the database. | ||||
14778 | */ | ||||
14779 | static void fts5DoSecureDelete( | ||||
14780 | Fts5Index *p, | ||||
14781 | Fts5SegIter *pSeg | ||||
14782 | ){ | ||||
14783 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | ||||
14784 | int iSegid = pSeg->pSeg->iSegid; | ||||
14785 | u8 *aPg = pSeg->pLeaf->p; | ||||
14786 | int nPg = pSeg->pLeaf->nn; | ||||
14787 | int iPgIdx = pSeg->pLeaf->szLeaf; | ||||
14788 | |||||
14789 | u64 iDelta = 0; | ||||
14790 | int iNextOff = 0; | ||||
14791 | int iOff = 0; | ||||
14792 | int nIdx = 0; | ||||
14793 | u8 *aIdx = 0; | ||||
14794 | int bLastInDoclist = 0; | ||||
14795 | int iIdx = 0; | ||||
14796 | int iStart = 0; | ||||
14797 | int iDelKeyOff = 0; /* Offset of deleted key, if any */ | ||||
14798 | |||||
14799 | nIdx = nPg-iPgIdx; | ||||
14800 | aIdx = sqlite3Fts5MallocZero(&p->rc, ((i64)nIdx)+16); | ||||
14801 | if( p->rc ) return; | ||||
14802 | memcpy(aIdx, &aPg[iPgIdx], nIdx); | ||||
14803 | |||||
14804 | /* At this point segment iterator pSeg points to the entry | ||||
14805 | ** this function should remove from the b-tree segment. | ||||
14806 | ** | ||||
14807 | ** In detail=full or detail=column mode, pSeg->iLeafOffset is the | ||||
14808 | ** offset of the first byte in the position-list for the entry to | ||||
14809 | ** remove. Immediately before this comes two varints that will also | ||||
14810 | ** need to be removed: | ||||
14811 | ** | ||||
14812 | ** + the rowid or delta rowid value for the entry, and | ||||
14813 | ** + the size of the position list in bytes. | ||||
14814 | ** | ||||
14815 | ** Or, in detail=none mode, there is a single varint prior to | ||||
14816 | ** pSeg->iLeafOffset - the rowid or delta rowid value. | ||||
14817 | ** | ||||
14818 | ** This block sets the following variables: | ||||
14819 | ** | ||||
14820 | ** iStart: | ||||
14821 | ** The offset of the first byte of the rowid or delta-rowid | ||||
14822 | ** value for the doclist entry being removed. | ||||
14823 | ** | ||||
14824 | ** iDelta: | ||||
14825 | ** The value of the rowid or delta-rowid value for the doclist | ||||
14826 | ** entry being removed. | ||||
14827 | ** | ||||
14828 | ** iNextOff: | ||||
14829 | ** The offset of the next entry following the position list | ||||
14830 | ** for the one being removed. If the position list for this | ||||
14831 | ** entry overflows onto the next leaf page, this value will be | ||||
14832 | ** greater than pLeaf->szLeaf. | ||||
14833 | */ | ||||
14834 | { | ||||
14835 | int iSOP; /* Start-Of-Position-list */ | ||||
14836 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){ | ||||
14837 | iStart = pSeg->iTermLeafOffset; | ||||
14838 | }else{ | ||||
14839 | iStart = fts5GetU16(&aPg[0]); | ||||
14840 | } | ||||
14841 | |||||
14842 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | ||||
14843 | assert_nc( iSOP<=pSeg->iLeafOffset )((void) (0)); | ||||
14844 | |||||
14845 | if( bDetailNone ){ | ||||
14846 | while( iSOP<pSeg->iLeafOffset ){ | ||||
14847 | if( aPg[iSOP]==0x00 ) iSOP++; | ||||
14848 | if( aPg[iSOP]==0x00 ) iSOP++; | ||||
14849 | iStart = iSOP; | ||||
14850 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | ||||
14851 | } | ||||
14852 | |||||
14853 | iNextOff = iSOP; | ||||
14854 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | ||||
14855 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | ||||
14856 | |||||
14857 | }else{ | ||||
14858 | int nPos = 0; | ||||
14859 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | ||||
14860 | while( iSOP<pSeg->iLeafOffset ){ | ||||
14861 | iStart = iSOP + (nPos/2); | ||||
14862 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | ||||
14863 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | ||||
14864 | } | ||||
14865 | assert_nc( iSOP==pSeg->iLeafOffset )((void) (0)); | ||||
14866 | iNextOff = pSeg->iLeafOffset + pSeg->nPos; | ||||
14867 | } | ||||
14868 | } | ||||
14869 | |||||
14870 | iOff = iStart; | ||||
14871 | |||||
14872 | /* If the position-list for the entry being removed flows over past | ||||
14873 | ** the end of this page, delete the portion of the position-list on the | ||||
14874 | ** next page and beyond. | ||||
14875 | ** | ||||
14876 | ** Set variable bLastInDoclist to true if this entry happens | ||||
14877 | ** to be the last rowid in the doclist for its term. */ | ||||
14878 | if( iNextOff>=iPgIdx ){ | ||||
14879 | int pgno = pSeg->iLeafPgno+1; | ||||
14880 | fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist); | ||||
14881 | iNextOff = iPgIdx; | ||||
14882 | } | ||||
14883 | |||||
14884 | if( pSeg->bDel==0 ){ | ||||
14885 | if( iNextOff!=iPgIdx ){ | ||||
14886 | /* Loop through the page-footer. If iNextOff (offset of the | ||||
14887 | ** entry following the one we are removing) is equal to the | ||||
14888 | ** offset of a key on this page, then the entry is the last | ||||
14889 | ** in its doclist. */ | ||||
14890 | int iKeyOff = 0; | ||||
14891 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | ||||
14892 | u32 iVal = 0; | ||||
14893 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | ||||
14894 | iKeyOff += iVal; | ||||
14895 | if( iKeyOff==iNextOff ){ | ||||
14896 | bLastInDoclist = 1; | ||||
14897 | } | ||||
14898 | } | ||||
14899 | } | ||||
14900 | |||||
14901 | /* If this is (a) the first rowid on a page and (b) is not followed by | ||||
14902 | ** another position list on the same page, set the "first-rowid" field | ||||
14903 | ** of the header to 0. */ | ||||
14904 | if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){ | ||||
14905 | fts5PutU16(&aPg[0], 0); | ||||
14906 | } | ||||
14907 | } | ||||
14908 | |||||
14909 | if( pSeg->bDel ){ | ||||
14910 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta); | ||||
14911 | aPg[iOff++] = 0x01; | ||||
14912 | }else if( bLastInDoclist==0 ){ | ||||
14913 | if( iNextOff!=iPgIdx ){ | ||||
14914 | u64 iNextDelta = 0; | ||||
14915 | iNextOff += fts5GetVarintsqlite3Fts5GetVarint(&aPg[iNextOff], &iNextDelta); | ||||
14916 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta); | ||||
14917 | } | ||||
14918 | }else if( | ||||
14919 | pSeg->iLeafPgno==pSeg->iTermLeafPgno | ||||
14920 | && iStart==pSeg->iTermLeafOffset | ||||
14921 | ){ | ||||
14922 | /* The entry being removed was the only position list in its | ||||
14923 | ** doclist. Therefore the term needs to be removed as well. */ | ||||
14924 | int iKey = 0; | ||||
14925 | int iKeyOff = 0; | ||||
14926 | |||||
14927 | /* Set iKeyOff to the offset of the term that will be removed - the | ||||
14928 | ** last offset in the footer that is not greater than iStart. */ | ||||
14929 | for(iIdx=0; iIdx<nIdx; iKey++){ | ||||
14930 | u32 iVal = 0; | ||||
14931 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | ||||
14932 | if( (iKeyOff+iVal)>(u32)iStart ) break; | ||||
14933 | iKeyOff += iVal; | ||||
14934 | } | ||||
14935 | assert_nc( iKey>=1 )((void) (0)); | ||||
14936 | |||||
14937 | /* Set iDelKeyOff to the value of the footer entry to remove from | ||||
14938 | ** the page. */ | ||||
14939 | iDelKeyOff = iOff = iKeyOff; | ||||
14940 | |||||
14941 | if( iNextOff!=iPgIdx ){ | ||||
14942 | /* This is the only position-list associated with the term, and there | ||||
14943 | ** is another term following it on this page. So the subsequent term | ||||
14944 | ** needs to be moved to replace the term associated with the entry | ||||
14945 | ** being removed. */ | ||||
14946 | int nPrefix = 0; | ||||
14947 | int nSuffix = 0; | ||||
14948 | int nPrefix2 = 0; | ||||
14949 | int nSuffix2 = 0; | ||||
14950 | |||||
14951 | iDelKeyOff = iNextOff; | ||||
14952 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nPrefix2 )); | ||||
14953 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nSuffix2 )); | ||||
14954 | |||||
14955 | if( iKey!=1 ){ | ||||
14956 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nPrefix) ); | ||||
14957 | } | ||||
14958 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nSuffix) ); | ||||
14959 | |||||
14960 | nPrefix = MIN(nPrefix, nPrefix2)(((nPrefix) < (nPrefix2)) ? (nPrefix) : (nPrefix2)); | ||||
14961 | nSuffix = (nPrefix2 + nSuffix2) - nPrefix; | ||||
14962 | |||||
14963 | if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){ | ||||
14964 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
14965 | }else{ | ||||
14966 | if( iKey!=1 ){ | ||||
14967 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix); | ||||
14968 | } | ||||
14969 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix); | ||||
14970 | if( nPrefix2>pSeg->term.n ){ | ||||
14971 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
14972 | }else if( nPrefix2>nPrefix ){ | ||||
14973 | memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix); | ||||
14974 | iOff += (nPrefix2-nPrefix); | ||||
14975 | } | ||||
14976 | memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2); | ||||
14977 | iOff += nSuffix2; | ||||
14978 | iNextOff += nSuffix2; | ||||
14979 | } | ||||
14980 | } | ||||
14981 | }else if( iStart==4 ){ | ||||
14982 | int iPgno; | ||||
14983 | |||||
14984 | assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno )((void) (0)); | ||||
14985 | /* The entry being removed may be the only position list in | ||||
14986 | ** its doclist. */ | ||||
14987 | for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){ | ||||
14988 | Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPgno)) )); | ||||
14989 | int bEmpty = (pPg && pPg->nn==4); | ||||
14990 | fts5DataRelease(pPg); | ||||
14991 | if( bEmpty==0 ) break; | ||||
14992 | } | ||||
14993 | |||||
14994 | if( iPgno==pSeg->iTermLeafPgno ){ | ||||
14995 | i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | ||||
14996 | Fts5Data *pTerm = fts5DataRead(p, iId); | ||||
14997 | if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){ | ||||
14998 | u8 *aTermIdx = &pTerm->p[pTerm->szLeaf]; | ||||
14999 | int nTermIdx = pTerm->nn - pTerm->szLeaf; | ||||
15000 | int iTermIdx = 0; | ||||
15001 | int iTermOff = 0; | ||||
15002 | |||||
15003 | while( 1 ){ | ||||
15004 | u32 iVal = 0; | ||||
15005 | int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal)sqlite3Fts5GetVarint32(&aTermIdx[iTermIdx],(u32*)&(iVal )); | ||||
15006 | iTermOff += iVal; | ||||
15007 | if( (iTermIdx+nByte)>=nTermIdx ) break; | ||||
15008 | iTermIdx += nByte; | ||||
15009 | } | ||||
15010 | nTermIdx = iTermIdx; | ||||
15011 | |||||
15012 | memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx); | ||||
15013 | fts5PutU16(&pTerm->p[2], iTermOff); | ||||
15014 | |||||
15015 | fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx); | ||||
15016 | if( nTermIdx==0 ){ | ||||
15017 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno); | ||||
15018 | } | ||||
15019 | } | ||||
15020 | fts5DataRelease(pTerm); | ||||
15021 | } | ||||
15022 | } | ||||
15023 | |||||
15024 | /* Assuming no error has occurred, this block does final edits to the | ||||
15025 | ** leaf page before writing it back to disk. Input variables are: | ||||
15026 | ** | ||||
15027 | ** nPg: Total initial size of leaf page. | ||||
15028 | ** iPgIdx: Initial offset of page footer. | ||||
15029 | ** | ||||
15030 | ** iOff: Offset to move data to | ||||
15031 | ** iNextOff: Offset to move data from | ||||
15032 | */ | ||||
15033 | if( p->rc==SQLITE_OK0 ){ | ||||
15034 | const int nMove = nPg - iNextOff; /* Number of bytes to move */ | ||||
15035 | int nShift = iNextOff - iOff; /* Distance to move them */ | ||||
15036 | |||||
15037 | int iPrevKeyOut = 0; | ||||
15038 | int iKeyIn = 0; | ||||
15039 | |||||
15040 | memmove(&aPg[iOff], &aPg[iNextOff], nMove); | ||||
15041 | iPgIdx -= nShift; | ||||
15042 | nPg = iPgIdx; | ||||
15043 | fts5PutU16(&aPg[2], iPgIdx); | ||||
15044 | |||||
15045 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | ||||
15046 | u32 iVal = 0; | ||||
15047 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | ||||
15048 | iKeyIn += iVal; | ||||
15049 | if( iKeyIn!=iDelKeyOff ){ | ||||
15050 | int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0)); | ||||
15051 | nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut); | ||||
15052 | iPrevKeyOut = iKeyOut; | ||||
15053 | } | ||||
15054 | } | ||||
15055 | |||||
15056 | if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){ | ||||
15057 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno); | ||||
15058 | } | ||||
15059 | |||||
15060 | assert_nc( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | ||||
15061 | fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iLeafPgno )) ), aPg, nPg); | ||||
15062 | } | ||||
15063 | sqlite3_freesqlite3_api->free(aIdx); | ||||
15064 | } | ||||
15065 | |||||
15066 | /* | ||||
15067 | ** This is called as part of flushing a delete to disk in 'secure-delete' | ||||
15068 | ** mode. It edits the segments within the database described by argument | ||||
15069 | ** pStruct to remove the entries for term zTerm, rowid iRowid. | ||||
15070 | ** | ||||
15071 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
15072 | ** has occurred. Any error code is also stored in the Fts5Index handle. | ||||
15073 | */ | ||||
15074 | static int fts5FlushSecureDelete( | ||||
15075 | Fts5Index *p, | ||||
15076 | Fts5Structure *pStruct, | ||||
15077 | const char *zTerm, | ||||
15078 | int nTerm, | ||||
15079 | i64 iRowid | ||||
15080 | ){ | ||||
15081 | const int f = FTS5INDEX_QUERY_SKIPHASH0x0040; | ||||
15082 | Fts5Iter *pIter = 0; /* Used to find term instance */ | ||||
15083 | |||||
15084 | /* If the version number has not been set to SECUREDELETE, do so now. */ | ||||
15085 | if( p->pConfig->iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 ){ | ||||
15086 | Fts5Config *pConfig = p->pConfig; | ||||
15087 | sqlite3_stmt *pStmt = 0; | ||||
15088 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | ||||
15089 | "REPLACE INTO %Q.'%q_config' VALUES ('version', %d)", | ||||
15090 | pConfig->zDb, pConfig->zName, FTS5_CURRENT_VERSION_SECUREDELETE5 | ||||
15091 | )); | ||||
15092 | if( p->rc==SQLITE_OK0 ){ | ||||
15093 | int rc; | ||||
15094 | sqlite3_stepsqlite3_api->step(pStmt); | ||||
15095 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
15096 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | ||||
15097 | pConfig->iCookie++; | ||||
15098 | pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE5; | ||||
15099 | } | ||||
15100 | } | ||||
15101 | |||||
15102 | fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); | ||||
15103 | if( fts5MultiIterEof(p, pIter)==0 ){ | ||||
15104 | i64 iThis = fts5MultiIterRowid(pIter); | ||||
15105 | if( iThis<iRowid ){ | ||||
15106 | fts5MultiIterNextFrom(p, pIter, iRowid); | ||||
15107 | } | ||||
15108 | |||||
15109 | if( p->rc==SQLITE_OK0 | ||||
15110 | && fts5MultiIterEof(p, pIter)==0 | ||||
15111 | && iRowid==fts5MultiIterRowid(pIter) | ||||
15112 | ){ | ||||
15113 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | ||||
15114 | fts5DoSecureDelete(p, pSeg); | ||||
15115 | } | ||||
15116 | } | ||||
15117 | |||||
15118 | fts5MultiIterFree(pIter); | ||||
15119 | return p->rc; | ||||
15120 | } | ||||
15121 | |||||
15122 | |||||
15123 | /* | ||||
15124 | ** Flush the contents of in-memory hash table iHash to a new level-0 | ||||
15125 | ** segment on disk. Also update the corresponding structure record. | ||||
15126 | ** | ||||
15127 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | ||||
15128 | ** already occurred, this function is a no-op. | ||||
15129 | */ | ||||
15130 | static void fts5FlushOneHash(Fts5Index *p){ | ||||
15131 | Fts5Hash *pHash = p->pHash; | ||||
15132 | Fts5Structure *pStruct; | ||||
15133 | int iSegid; | ||||
15134 | int pgnoLast = 0; /* Last leaf page number in segment */ | ||||
15135 | |||||
15136 | /* Obtain a reference to the index structure and allocate a new segment-id | ||||
15137 | ** for the new level-0 segment. */ | ||||
15138 | pStruct = fts5StructureRead(p); | ||||
15139 | fts5StructureInvalidate(p); | ||||
15140 | |||||
15141 | if( sqlite3Fts5HashIsEmpty(pHash)==0 ){ | ||||
15142 | iSegid = fts5AllocateSegid(p, pStruct); | ||||
15143 | if( iSegid ){ | ||||
15144 | const int pgsz = p->pConfig->pgsz; | ||||
15145 | int eDetail = p->pConfig->eDetail; | ||||
15146 | int bSecureDelete = p->pConfig->bSecureDelete; | ||||
15147 | Fts5StructureSegment *pSeg; /* New segment within pStruct */ | ||||
15148 | Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ | ||||
15149 | Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ | ||||
15150 | |||||
15151 | Fts5SegWriter writer; | ||||
15152 | fts5WriteInit(p, &writer, iSegid); | ||||
15153 | |||||
15154 | pBuf = &writer.writer.buf; | ||||
15155 | pPgidx = &writer.writer.pgidx; | ||||
15156 | |||||
15157 | /* fts5WriteInit() should have initialized the buffers to (most likely) | ||||
15158 | ** the maximum space required. */ | ||||
15159 | assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | ||||
15160 | assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | ||||
15161 | |||||
15162 | /* Begin scanning through hash table entries. This loop runs once for each | ||||
15163 | ** term/doclist currently stored within the hash table. */ | ||||
15164 | if( p->rc==SQLITE_OK0 ){ | ||||
15165 | p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); | ||||
15166 | } | ||||
15167 | while( p->rc==SQLITE_OK0 && 0==sqlite3Fts5HashScanEof(pHash) ){ | ||||
15168 | const char *zTerm; /* Buffer containing term */ | ||||
15169 | int nTerm; /* Size of zTerm in bytes */ | ||||
15170 | const u8 *pDoclist; /* Pointer to doclist for this term */ | ||||
15171 | int nDoclist; /* Size of doclist in bytes */ | ||||
15172 | |||||
15173 | /* Get the term and doclist for this entry. */ | ||||
15174 | sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); | ||||
15175 | if( bSecureDelete==0 ){ | ||||
15176 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | ||||
15177 | if( p->rc!=SQLITE_OK0 ) break; | ||||
15178 | assert( writer.bFirstRowidInPage==0 )((void) (0)); | ||||
15179 | } | ||||
15180 | |||||
15181 | if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ | ||||
15182 | /* The entire doclist will fit on the current leaf. */ | ||||
15183 | fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pDoclist , nDoclist); (pBuf)->n += nDoclist; }; | ||||
15184 | }else{ | ||||
15185 | int bTermWritten = !bSecureDelete; | ||||
15186 | i64 iRowid = 0; | ||||
15187 | i64 iPrev = 0; | ||||
15188 | int iOff = 0; | ||||
15189 | |||||
15190 | /* The entire doclist will not fit on this leaf. The following | ||||
15191 | ** loop iterates through the poslists that make up the current | ||||
15192 | ** doclist. */ | ||||
15193 | while( p->rc==SQLITE_OK0 && iOff<nDoclist ){ | ||||
15194 | u64 iDelta = 0; | ||||
15195 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pDoclist[iOff], &iDelta); | ||||
15196 | iRowid += iDelta; | ||||
15197 | |||||
15198 | /* If in secure delete mode, and if this entry in the poslist is | ||||
15199 | ** in fact a delete, then edit the existing segments directly | ||||
15200 | ** using fts5FlushSecureDelete(). */ | ||||
15201 | if( bSecureDelete ){ | ||||
15202 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
15203 | if( iOff<nDoclist && pDoclist[iOff]==0x00 | ||||
15204 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | ||||
15205 | ){ | ||||
15206 | iOff++; | ||||
15207 | if( iOff<nDoclist && pDoclist[iOff]==0x00 ){ | ||||
15208 | iOff++; | ||||
15209 | nDoclist = 0; | ||||
15210 | }else{ | ||||
15211 | continue; | ||||
15212 | } | ||||
15213 | } | ||||
15214 | }else if( (pDoclist[iOff] & 0x01) | ||||
15215 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | ||||
15216 | ){ | ||||
15217 | if( p->rc!=SQLITE_OK0 || pDoclist[iOff]==0x01 ){ | ||||
15218 | iOff++; | ||||
15219 | continue; | ||||
15220 | } | ||||
15221 | } | ||||
15222 | } | ||||
15223 | |||||
15224 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | ||||
15225 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | ||||
15226 | bTermWritten = 1; | ||||
15227 | assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 )((void) (0)); | ||||
15228 | } | ||||
15229 | |||||
15230 | if( writer.bFirstRowidInPage ){ | ||||
15231 | fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ | ||||
15232 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); | ||||
15233 | writer.bFirstRowidInPage = 0; | ||||
15234 | fts5WriteDlidxAppend(p, &writer, iRowid); | ||||
15235 | }else{ | ||||
15236 | u64 iRowidDelta = (u64)iRowid - (u64)iPrev; | ||||
15237 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta); | ||||
15238 | } | ||||
15239 | if( p->rc!=SQLITE_OK0 ) break; | ||||
15240 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | ||||
15241 | iPrev = iRowid; | ||||
15242 | |||||
15243 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
15244 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | ||||
15245 | pBuf->p[pBuf->n++] = 0; | ||||
15246 | iOff++; | ||||
15247 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | ||||
15248 | pBuf->p[pBuf->n++] = 0; | ||||
15249 | iOff++; | ||||
15250 | } | ||||
15251 | } | ||||
15252 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | ||||
15253 | fts5WriteFlushLeaf(p, &writer); | ||||
15254 | } | ||||
15255 | }else{ | ||||
15256 | int bDel = 0; | ||||
15257 | int nPos = 0; | ||||
15258 | int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel); | ||||
15259 | if( bDel && bSecureDelete ){ | ||||
15260 | fts5BufferAppendVarint(&p->rc, pBuf, nPos*2)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)nPos*2); | ||||
15261 | iOff += nCopy; | ||||
15262 | nCopy = nPos; | ||||
15263 | }else{ | ||||
15264 | nCopy += nPos; | ||||
15265 | } | ||||
15266 | if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ | ||||
15267 | /* The entire poslist will fit on the current leaf. So copy | ||||
15268 | ** it in one go. */ | ||||
15269 | fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pDoclist[iOff], nCopy); (pBuf)->n += nCopy; }; | ||||
15270 | }else{ | ||||
15271 | /* The entire poslist will not fit on this leaf. So it needs | ||||
15272 | ** to be broken into sections. The only qualification being | ||||
15273 | ** that each varint must be stored contiguously. */ | ||||
15274 | const u8 *pPoslist = &pDoclist[iOff]; | ||||
15275 | int iPos = 0; | ||||
15276 | while( p->rc==SQLITE_OK0 ){ | ||||
15277 | int nSpace = pgsz - pBuf->n - pPgidx->n; | ||||
15278 | int n = 0; | ||||
15279 | if( (nCopy - iPos)<=nSpace ){ | ||||
15280 | n = nCopy - iPos; | ||||
15281 | }else{ | ||||
15282 | n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); | ||||
15283 | } | ||||
15284 | assert( n>0 )((void) (0)); | ||||
15285 | fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pPoslist[iPos], n); (pBuf)->n += n; }; | ||||
15286 | iPos += n; | ||||
15287 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | ||||
15288 | fts5WriteFlushLeaf(p, &writer); | ||||
15289 | } | ||||
15290 | if( iPos>=nCopy ) break; | ||||
15291 | } | ||||
15292 | } | ||||
15293 | iOff += nCopy; | ||||
15294 | } | ||||
15295 | } | ||||
15296 | } | ||||
15297 | |||||
15298 | /* TODO2: Doclist terminator written here. */ | ||||
15299 | /* pBuf->p[pBuf->n++] = '\0'; */ | ||||
15300 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | ||||
15301 | if( p->rc==SQLITE_OK0 ) sqlite3Fts5HashScanNext(pHash); | ||||
15302 | } | ||||
15303 | fts5WriteFinish(p, &writer, &pgnoLast); | ||||
15304 | |||||
15305 | assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 )((void) (0)); | ||||
15306 | if( pgnoLast>0 ){ | ||||
15307 | /* Update the Fts5Structure. It is written back to the database by the | ||||
15308 | ** fts5StructureRelease() call below. */ | ||||
15309 | if( pStruct->nLevel==0 ){ | ||||
15310 | fts5StructureAddLevel(&p->rc, &pStruct); | ||||
15311 | } | ||||
15312 | fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); | ||||
15313 | if( p->rc==SQLITE_OK0 ){ | ||||
15314 | pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; | ||||
15315 | pSeg->iSegid = iSegid; | ||||
15316 | pSeg->pgnoFirst = 1; | ||||
15317 | pSeg->pgnoLast = pgnoLast; | ||||
15318 | if( pStruct->nOriginCntr>0 ){ | ||||
15319 | pSeg->iOrigin1 = pStruct->nOriginCntr; | ||||
15320 | pSeg->iOrigin2 = pStruct->nOriginCntr; | ||||
15321 | pSeg->nEntry = p->nPendingRow; | ||||
15322 | pStruct->nOriginCntr++; | ||||
15323 | } | ||||
15324 | pStruct->nSegment++; | ||||
15325 | } | ||||
15326 | fts5StructurePromote(p, 0, pStruct); | ||||
15327 | } | ||||
15328 | } | ||||
15329 | } | ||||
15330 | |||||
15331 | fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete); | ||||
15332 | fts5IndexCrisismerge(p, &pStruct); | ||||
15333 | fts5StructureWrite(p, pStruct); | ||||
15334 | fts5StructureRelease(pStruct); | ||||
15335 | } | ||||
15336 | |||||
15337 | /* | ||||
15338 | ** Flush any data stored in the in-memory hash tables to the database. | ||||
15339 | */ | ||||
15340 | static void fts5IndexFlush(Fts5Index *p){ | ||||
15341 | /* Unless it is empty, flush the hash table to disk */ | ||||
15342 | if( p->flushRc ){ | ||||
15343 | p->rc = p->flushRc; | ||||
15344 | return; | ||||
15345 | } | ||||
15346 | if( p->nPendingData || p->nContentlessDelete ){ | ||||
15347 | assert( p->pHash )((void) (0)); | ||||
15348 | fts5FlushOneHash(p); | ||||
15349 | if( p->rc==SQLITE_OK0 ){ | ||||
15350 | sqlite3Fts5HashClear(p->pHash); | ||||
15351 | p->nPendingData = 0; | ||||
15352 | p->nPendingRow = 0; | ||||
15353 | p->nContentlessDelete = 0; | ||||
15354 | }else if( p->nPendingData || p->nContentlessDelete ){ | ||||
15355 | p->flushRc = p->rc; | ||||
15356 | } | ||||
15357 | } | ||||
15358 | } | ||||
15359 | |||||
15360 | static Fts5Structure *fts5IndexOptimizeStruct( | ||||
15361 | Fts5Index *p, | ||||
15362 | Fts5Structure *pStruct | ||||
15363 | ){ | ||||
15364 | Fts5Structure *pNew = 0; | ||||
15365 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel )); | ||||
15366 | int nSeg = pStruct->nSegment; | ||||
15367 | int i; | ||||
15368 | |||||
15369 | /* Figure out if this structure requires optimization. A structure does | ||||
15370 | ** not require optimization if either: | ||||
15371 | ** | ||||
15372 | ** 1. it consists of fewer than two segments, or | ||||
15373 | ** 2. all segments are on the same level, or | ||||
15374 | ** 3. all segments except one are currently inputs to a merge operation. | ||||
15375 | ** | ||||
15376 | ** In the first case, if there are no tombstone hash pages, return NULL. In | ||||
15377 | ** the second, increment the ref-count on *pStruct and return a copy of the | ||||
15378 | ** pointer to it. | ||||
15379 | */ | ||||
15380 | if( nSeg==0 ) return 0; | ||||
15381 | for(i=0; i<pStruct->nLevel; i++){ | ||||
15382 | int nThis = pStruct->aLevel[i].nSeg; | ||||
15383 | int nMerge = pStruct->aLevel[i].nMerge; | ||||
15384 | if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){ | ||||
15385 | if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){ | ||||
15386 | return 0; | ||||
15387 | } | ||||
15388 | fts5StructureRef(pStruct); | ||||
15389 | return pStruct; | ||||
15390 | } | ||||
15391 | assert( pStruct->aLevel[i].nMerge<=nThis )((void) (0)); | ||||
15392 | } | ||||
15393 | |||||
15394 | nByte += (((i64)pStruct->nLevel)+1) * sizeof(Fts5StructureLevel); | ||||
15395 | assert( nByte==SZ_FTS5STRUCTURE(pStruct->nLevel+2) )((void) (0)); | ||||
15396 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); | ||||
15397 | |||||
15398 | if( pNew ){ | ||||
15399 | Fts5StructureLevel *pLvl; | ||||
15400 | nByte = nSeg * sizeof(Fts5StructureSegment); | ||||
15401 | pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL)(((pStruct->nLevel+1) < (64)) ? (pStruct->nLevel+1) : (64)); | ||||
15402 | pNew->nRef = 1; | ||||
15403 | pNew->nWriteCounter = pStruct->nWriteCounter; | ||||
15404 | pNew->nOriginCntr = pStruct->nOriginCntr; | ||||
15405 | pLvl = &pNew->aLevel[pNew->nLevel-1]; | ||||
15406 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); | ||||
15407 | if( pLvl->aSeg ){ | ||||
15408 | int iLvl, iSeg; | ||||
15409 | int iSegOut = 0; | ||||
15410 | /* Iterate through all segments, from oldest to newest. Add them to | ||||
15411 | ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest | ||||
15412 | ** segment in the data structure. */ | ||||
15413 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | ||||
15414 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | ||||
15415 | pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; | ||||
15416 | iSegOut++; | ||||
15417 | } | ||||
15418 | } | ||||
15419 | pNew->nSegment = pLvl->nSeg = nSeg; | ||||
15420 | }else{ | ||||
15421 | sqlite3_freesqlite3_api->free(pNew); | ||||
15422 | pNew = 0; | ||||
15423 | } | ||||
15424 | } | ||||
15425 | |||||
15426 | return pNew; | ||||
15427 | } | ||||
15428 | |||||
15429 | static int sqlite3Fts5IndexOptimize(Fts5Index *p){ | ||||
15430 | Fts5Structure *pStruct; | ||||
15431 | Fts5Structure *pNew = 0; | ||||
15432 | |||||
15433 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
15434 | fts5IndexFlush(p); | ||||
15435 | assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 )((void) (0)); | ||||
15436 | pStruct = fts5StructureRead(p); | ||||
15437 | assert( p->rc!=SQLITE_OK || pStruct!=0 )((void) (0)); | ||||
15438 | fts5StructureInvalidate(p); | ||||
15439 | |||||
15440 | if( pStruct ){ | ||||
15441 | pNew = fts5IndexOptimizeStruct(p, pStruct); | ||||
15442 | } | ||||
15443 | fts5StructureRelease(pStruct); | ||||
15444 | |||||
15445 | assert( pNew==0 || pNew->nSegment>0 )((void) (0)); | ||||
15446 | if( pNew ){ | ||||
15447 | int iLvl; | ||||
15448 | for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} | ||||
15449 | while( p->rc==SQLITE_OK0 && pNew->aLevel[iLvl].nSeg>0 ){ | ||||
15450 | int nRem = FTS5_OPT_WORK_UNIT1000; | ||||
15451 | fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); | ||||
15452 | } | ||||
15453 | |||||
15454 | fts5StructureWrite(p, pNew); | ||||
15455 | fts5StructureRelease(pNew); | ||||
15456 | } | ||||
15457 | |||||
15458 | return fts5IndexReturn(p); | ||||
15459 | } | ||||
15460 | |||||
15461 | /* | ||||
15462 | ** This is called to implement the special "VALUES('merge', $nMerge)" | ||||
15463 | ** INSERT command. | ||||
15464 | */ | ||||
15465 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ | ||||
15466 | Fts5Structure *pStruct = 0; | ||||
15467 | |||||
15468 | fts5IndexFlush(p); | ||||
15469 | pStruct = fts5StructureRead(p); | ||||
15470 | if( pStruct ){ | ||||
15471 | int nMin = p->pConfig->nUsermerge; | ||||
15472 | fts5StructureInvalidate(p); | ||||
15473 | if( nMerge<0 ){ | ||||
15474 | Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); | ||||
15475 | fts5StructureRelease(pStruct); | ||||
15476 | pStruct = pNew; | ||||
15477 | nMin = 1; | ||||
15478 | nMerge = nMerge*-1; | ||||
15479 | } | ||||
15480 | if( pStruct && pStruct->nLevel ){ | ||||
15481 | if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ | ||||
15482 | fts5StructureWrite(p, pStruct); | ||||
15483 | } | ||||
15484 | } | ||||
15485 | fts5StructureRelease(pStruct); | ||||
15486 | } | ||||
15487 | return fts5IndexReturn(p); | ||||
15488 | } | ||||
15489 | |||||
15490 | static void fts5AppendRowid( | ||||
15491 | Fts5Index *p, | ||||
15492 | u64 iDelta, | ||||
15493 | Fts5Iter *pUnused, | ||||
15494 | Fts5Buffer *pBuf | ||||
15495 | ){ | ||||
15496 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
15497 | fts5BufferAppendVarint(&p->rc, pBuf, iDelta)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)iDelta); | ||||
15498 | } | ||||
15499 | |||||
15500 | static void fts5AppendPoslist( | ||||
15501 | Fts5Index *p, | ||||
15502 | u64 iDelta, | ||||
15503 | Fts5Iter *pMulti, | ||||
15504 | Fts5Buffer *pBuf | ||||
15505 | ){ | ||||
15506 | int nData = pMulti->base.nData; | ||||
15507 | int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING8; | ||||
15508 | assert( nData>0 )((void) (0)); | ||||
15509 | if( p->rc==SQLITE_OK0 && 0==fts5BufferGrow(&p->rc, pBuf, nByte)( (u32)((pBuf)->n) + (u32)(nByte) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf),(nByte)+ (pBuf)->n) ) ){ | ||||
15510 | fts5BufferSafeAppendVarint(pBuf, iDelta){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iDelta)); ((void) (0)); }; | ||||
15511 | fts5BufferSafeAppendVarint(pBuf, nData*2){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (nData*2)); ((void) (0)); }; | ||||
15512 | fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pMulti ->base.pData, nData); (pBuf)->n += nData; }; | ||||
15513 | memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING8); | ||||
15514 | } | ||||
15515 | } | ||||
15516 | |||||
15517 | |||||
15518 | static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ | ||||
15519 | u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; | ||||
15520 | |||||
15521 | assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) )((void) (0)); | ||||
15522 | if( p>=pIter->aEof ){ | ||||
15523 | pIter->aPoslist = 0; | ||||
15524 | }else{ | ||||
15525 | i64 iDelta; | ||||
15526 | |||||
15527 | p += fts5GetVarintsqlite3Fts5GetVarint(p, (u64*)&iDelta); | ||||
15528 | pIter->iRowid += iDelta; | ||||
15529 | |||||
15530 | /* Read position list size */ | ||||
15531 | if( p[0] & 0x80 ){ | ||||
15532 | int nPos; | ||||
15533 | pIter->nSize = fts5GetVarint32(p, nPos)sqlite3Fts5GetVarint32(p,(u32*)&(nPos)); | ||||
15534 | pIter->nPoslist = (nPos>>1); | ||||
15535 | }else{ | ||||
15536 | pIter->nPoslist = ((int)(p[0])) >> 1; | ||||
15537 | pIter->nSize = 1; | ||||
15538 | } | ||||
15539 | |||||
15540 | pIter->aPoslist = p; | ||||
15541 | if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){ | ||||
15542 | pIter->aPoslist = 0; | ||||
15543 | } | ||||
15544 | } | ||||
15545 | } | ||||
15546 | |||||
15547 | static void fts5DoclistIterInit( | ||||
15548 | Fts5Buffer *pBuf, | ||||
15549 | Fts5DoclistIter *pIter | ||||
15550 | ){ | ||||
15551 | memset(pIter, 0, sizeof(*pIter)); | ||||
15552 | if( pBuf->n>0 ){ | ||||
15553 | pIter->aPoslist = pBuf->p; | ||||
15554 | pIter->aEof = &pBuf->p[pBuf->n]; | ||||
15555 | fts5DoclistIterNext(pIter); | ||||
15556 | } | ||||
15557 | } | ||||
15558 | |||||
15559 | #if 0 | ||||
15560 | /* | ||||
15561 | ** Append a doclist to buffer pBuf. | ||||
15562 | ** | ||||
15563 | ** This function assumes that space within the buffer has already been | ||||
15564 | ** allocated. | ||||
15565 | */ | ||||
15566 | static void fts5MergeAppendDocid({ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | ||||
15567 | Fts5Buffer *pBuf, /* Buffer to write to */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | ||||
15568 | i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | ||||
15569 | i64 iRowid /* Rowid to append */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | ||||
15570 | ){ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); }{ | ||||
15571 | assert( pBuf->n!=0 || (*piLastRowid)==0 )((void) (0)); | ||||
15572 | fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iRowid - *piLastRowid)); ((void) (0)); }; | ||||
15573 | *piLastRowid = iRowid; | ||||
15574 | } | ||||
15575 | #endif | ||||
15576 | |||||
15577 | #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid){ ((void) (0)); { ((pBuf))->n += sqlite3Fts5PutVarint(& ((pBuf))->p[((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid ))); ((void) (0)); }; (iLastRowid) = (iRowid); } { \ | ||||
15578 | assert( (pBuf)->n!=0 || (iLastRowid)==0 )((void) (0)); \ | ||||
15579 | fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)){ ((pBuf))->n += sqlite3Fts5PutVarint(&((pBuf))->p[ ((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid))); ((void ) (0)); }; \ | ||||
15580 | (iLastRowid) = (iRowid); \ | ||||
15581 | } | ||||
15582 | |||||
15583 | /* | ||||
15584 | ** Swap the contents of buffer *p1 with that of *p2. | ||||
15585 | */ | ||||
15586 | static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ | ||||
15587 | Fts5Buffer tmp = *p1; | ||||
15588 | *p1 = *p2; | ||||
15589 | *p2 = tmp; | ||||
15590 | } | ||||
15591 | |||||
15592 | static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ | ||||
15593 | int i = *piOff; | ||||
15594 | if( i>=pBuf->n ){ | ||||
15595 | *piOff = -1; | ||||
15596 | }else{ | ||||
15597 | u64 iVal; | ||||
15598 | *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); | ||||
15599 | *piRowid += iVal; | ||||
15600 | } | ||||
15601 | } | ||||
15602 | |||||
15603 | /* | ||||
15604 | ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. | ||||
15605 | ** In this case the buffers consist of a delta-encoded list of rowids only. | ||||
15606 | */ | ||||
15607 | static void fts5MergeRowidLists( | ||||
15608 | Fts5Index *p, /* FTS5 backend object */ | ||||
15609 | Fts5Buffer *p1, /* First list to merge */ | ||||
15610 | int nBuf, /* Number of entries in apBuf[] */ | ||||
15611 | Fts5Buffer *aBuf /* Array of other lists to merge into p1 */ | ||||
15612 | ){ | ||||
15613 | int i1 = 0; | ||||
15614 | int i2 = 0; | ||||
15615 | i64 iRowid1 = 0; | ||||
15616 | i64 iRowid2 = 0; | ||||
15617 | i64 iOut = 0; | ||||
15618 | Fts5Buffer *p2 = &aBuf[0]; | ||||
15619 | Fts5Buffer out; | ||||
15620 | |||||
15621 | (void)nBuf; | ||||
15622 | memset(&out, 0, sizeof(out)); | ||||
15623 | assert( nBuf==1 )((void) (0)); | ||||
15624 | sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); | ||||
15625 | if( p->rc ) return; | ||||
15626 | |||||
15627 | fts5NextRowid(p1, &i1, &iRowid1); | ||||
15628 | fts5NextRowid(p2, &i2, &iRowid2); | ||||
15629 | while( i1>=0 || i2>=0 ){ | ||||
15630 | if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ | ||||
15631 | assert( iOut==0 || iRowid1>iOut )((void) (0)); | ||||
15632 | fts5BufferSafeAppendVarint(&out, iRowid1 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid1 - iOut)); ((void) (0)); }; | ||||
15633 | iOut = iRowid1; | ||||
15634 | fts5NextRowid(p1, &i1, &iRowid1); | ||||
15635 | }else{ | ||||
15636 | assert( iOut==0 || iRowid2>iOut )((void) (0)); | ||||
15637 | fts5BufferSafeAppendVarint(&out, iRowid2 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid2 - iOut)); ((void) (0)); }; | ||||
15638 | iOut = iRowid2; | ||||
15639 | if( i1>=0 && iRowid1==iRowid2 ){ | ||||
15640 | fts5NextRowid(p1, &i1, &iRowid1); | ||||
15641 | } | ||||
15642 | fts5NextRowid(p2, &i2, &iRowid2); | ||||
15643 | } | ||||
15644 | } | ||||
15645 | |||||
15646 | fts5BufferSwap(&out, p1); | ||||
15647 | fts5BufferFree(&out)sqlite3Fts5BufferFree(&out); | ||||
15648 | } | ||||
15649 | |||||
15650 | typedef struct PrefixMerger PrefixMerger; | ||||
15651 | struct PrefixMerger { | ||||
15652 | Fts5DoclistIter iter; /* Doclist iterator */ | ||||
15653 | i64 iPos; /* For iterating through a position list */ | ||||
15654 | int iOff; | ||||
15655 | u8 *aPos; | ||||
15656 | PrefixMerger *pNext; /* Next in docid/poslist order */ | ||||
15657 | }; | ||||
15658 | |||||
15659 | static void fts5PrefixMergerInsertByRowid( | ||||
15660 | PrefixMerger **ppHead, | ||||
15661 | PrefixMerger *p | ||||
15662 | ){ | ||||
15663 | if( p->iter.aPoslist ){ | ||||
15664 | PrefixMerger **pp = ppHead; | ||||
15665 | while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){ | ||||
15666 | pp = &(*pp)->pNext; | ||||
15667 | } | ||||
15668 | p->pNext = *pp; | ||||
15669 | *pp = p; | ||||
15670 | } | ||||
15671 | } | ||||
15672 | |||||
15673 | static void fts5PrefixMergerInsertByPosition( | ||||
15674 | PrefixMerger **ppHead, | ||||
15675 | PrefixMerger *p | ||||
15676 | ){ | ||||
15677 | if( p->iPos>=0 ){ | ||||
15678 | PrefixMerger **pp = ppHead; | ||||
15679 | while( *pp && p->iPos>(*pp)->iPos ){ | ||||
15680 | pp = &(*pp)->pNext; | ||||
15681 | } | ||||
15682 | p->pNext = *pp; | ||||
15683 | *pp = p; | ||||
15684 | } | ||||
15685 | } | ||||
15686 | |||||
15687 | |||||
15688 | /* | ||||
15689 | ** Array aBuf[] contains nBuf doclists. These are all merged in with the | ||||
15690 | ** doclist in buffer p1. | ||||
15691 | */ | ||||
15692 | static void fts5MergePrefixLists( | ||||
15693 | Fts5Index *p, /* FTS5 backend object */ | ||||
15694 | Fts5Buffer *p1, /* First list to merge */ | ||||
15695 | int nBuf, /* Number of buffers in array aBuf[] */ | ||||
15696 | Fts5Buffer *aBuf /* Other lists to merge in */ | ||||
15697 | ){ | ||||
15698 | #define fts5PrefixMergerNextPosition(p)sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,& (p)->iOff,&(p)->iPos) \ | ||||
15699 | sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) | ||||
15700 | #define FTS5_MERGE_NLIST16 16 | ||||
15701 | PrefixMerger aMerger[FTS5_MERGE_NLIST16]; | ||||
15702 | PrefixMerger *pHead = 0; | ||||
15703 | int i; | ||||
15704 | int nOut = 0; | ||||
15705 | Fts5Buffer out = {0, 0, 0}; | ||||
15706 | Fts5Buffer tmp = {0, 0, 0}; | ||||
15707 | i64 iLastRowid = 0; | ||||
15708 | |||||
15709 | /* Initialize a doclist-iterator for each input buffer. Arrange them in | ||||
15710 | ** a linked-list starting at pHead in ascending order of rowid. Avoid | ||||
15711 | ** linking any iterators already at EOF into the linked list at all. */ | ||||
15712 | assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) )((void) (0)); | ||||
15713 | memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); | ||||
15714 | pHead = &aMerger[nBuf]; | ||||
15715 | fts5DoclistIterInit(p1, &pHead->iter); | ||||
15716 | for(i=0; i<nBuf; i++){ | ||||
15717 | fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter); | ||||
15718 | fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]); | ||||
15719 | nOut += aBuf[i].n; | ||||
15720 | } | ||||
15721 | if( nOut==0 ) return; | ||||
15722 | nOut += p1->n + 9 + 10*nBuf; | ||||
15723 | |||||
15724 | /* The maximum size of the output is equal to the sum of the | ||||
15725 | ** input sizes + 1 varint (9 bytes). The extra varint is because if the | ||||
15726 | ** first rowid in one input is a large negative number, and the first in | ||||
15727 | ** the other a non-negative number, the delta for the non-negative | ||||
15728 | ** number will be larger on disk than the literal integer value | ||||
15729 | ** was. | ||||
15730 | ** | ||||
15731 | ** Or, if the input position-lists are corrupt, then the output might | ||||
15732 | ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 | ||||
15733 | ** (the value PoslistNext64() uses for EOF) as a position and appending | ||||
15734 | ** it to the output. This can happen at most once for each input | ||||
15735 | ** position-list, hence (nBuf+1) 10 byte paddings. */ | ||||
15736 | if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return; | ||||
15737 | |||||
15738 | while( pHead ){ | ||||
15739 | fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid){ ((void) (0)); { ((&out))->n += sqlite3Fts5PutVarint( &((&out))->p[((&out))->n], ((u64)(pHead-> iter.iRowid) - (u64)(iLastRowid))); ((void) (0)); }; (iLastRowid ) = (pHead->iter.iRowid); }; | ||||
15740 | |||||
15741 | if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ | ||||
15742 | /* Merge data from two or more poslists */ | ||||
15743 | i64 iPrev = 0; | ||||
15744 | int nTmp = FTS5_DATA_ZERO_PADDING8; | ||||
15745 | int nMerge = 0; | ||||
15746 | PrefixMerger *pSave = pHead; | ||||
15747 | PrefixMerger *pThis = 0; | ||||
15748 | int nTail = 0; | ||||
15749 | |||||
15750 | pHead = 0; | ||||
15751 | while( pSave && pSave->iter.iRowid==iLastRowid ){ | ||||
15752 | PrefixMerger *pNext = pSave->pNext; | ||||
15753 | pSave->iOff = 0; | ||||
15754 | pSave->iPos = 0; | ||||
15755 | pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize]; | ||||
15756 | fts5PrefixMergerNextPosition(pSave)sqlite3Fts5PoslistNext64((pSave)->aPos,(pSave)->iter.nPoslist ,&(pSave)->iOff,&(pSave)->iPos); | ||||
15757 | nTmp += pSave->iter.nPoslist + 10; | ||||
15758 | nMerge++; | ||||
15759 | fts5PrefixMergerInsertByPosition(&pHead, pSave); | ||||
15760 | pSave = pNext; | ||||
15761 | } | ||||
15762 | |||||
15763 | if( pHead==0 || pHead->pNext==0 ){ | ||||
15764 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
15765 | break; | ||||
15766 | } | ||||
15767 | |||||
15768 | /* See the earlier comment in this function for an explanation of why | ||||
15769 | ** corrupt input position lists might cause the output to consume | ||||
15770 | ** at most nMerge*10 bytes of unexpected space. */ | ||||
15771 | if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ | ||||
15772 | break; | ||||
15773 | } | ||||
15774 | fts5BufferZero(&tmp)sqlite3Fts5BufferZero(&tmp); | ||||
15775 | |||||
15776 | pThis = pHead; | ||||
15777 | pHead = pThis->pNext; | ||||
15778 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | ||||
15779 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | ||||
15780 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | ||||
15781 | |||||
15782 | while( pHead->pNext ){ | ||||
15783 | pThis = pHead; | ||||
15784 | if( pThis->iPos!=iPrev ){ | ||||
15785 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | ||||
15786 | } | ||||
15787 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | ||||
15788 | pHead = pThis->pNext; | ||||
15789 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | ||||
15790 | } | ||||
15791 | |||||
15792 | if( pHead->iPos!=iPrev ){ | ||||
15793 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos); | ||||
15794 | } | ||||
15795 | nTail = pHead->iter.nPoslist - pHead->iOff; | ||||
15796 | |||||
15797 | /* WRITEPOSLISTSIZE */ | ||||
15798 | assert_nc( tmp.n+nTail<=nTmp )((void) (0)); | ||||
15799 | assert( tmp.n+nTail<=nTmp+nMerge*10 )((void) (0)); | ||||
15800 | if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING8 ){ | ||||
15801 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
15802 | break; | ||||
15803 | } | ||||
15804 | fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], ((tmp.n+nTail) * 2)); ((void) (0)); }; | ||||
15805 | fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], tmp.p, tmp.n); (&out)->n += tmp.n; }; | ||||
15806 | if( nTail>0 ){ | ||||
15807 | fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], &pHead->aPos[pHead->iOff], nTail); (&out)-> n += nTail; }; | ||||
15808 | } | ||||
15809 | |||||
15810 | pHead = pSave; | ||||
15811 | for(i=0; i<nBuf+1; i++){ | ||||
15812 | PrefixMerger *pX = &aMerger[i]; | ||||
15813 | if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){ | ||||
15814 | fts5DoclistIterNext(&pX->iter); | ||||
15815 | fts5PrefixMergerInsertByRowid(&pHead, pX); | ||||
15816 | } | ||||
15817 | } | ||||
15818 | |||||
15819 | }else{ | ||||
15820 | /* Copy poslist from pHead to output */ | ||||
15821 | PrefixMerger *pThis = pHead; | ||||
15822 | Fts5DoclistIter *pI = &pThis->iter; | ||||
15823 | fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], pI->aPoslist, pI->nPoslist+pI->nSize); (&out) ->n += pI->nPoslist+pI->nSize; }; | ||||
15824 | fts5DoclistIterNext(pI); | ||||
15825 | pHead = pThis->pNext; | ||||
15826 | fts5PrefixMergerInsertByRowid(&pHead, pThis); | ||||
15827 | } | ||||
15828 | } | ||||
15829 | |||||
15830 | fts5BufferFree(p1)sqlite3Fts5BufferFree(p1); | ||||
15831 | fts5BufferFree(&tmp)sqlite3Fts5BufferFree(&tmp); | ||||
15832 | memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING8); | ||||
15833 | *p1 = out; | ||||
15834 | } | ||||
15835 | |||||
15836 | |||||
15837 | /* | ||||
15838 | ** Iterate through a range of entries in the FTS index, invoking the xVisit | ||||
15839 | ** callback for each of them. | ||||
15840 | ** | ||||
15841 | ** Parameter pToken points to an nToken buffer containing an FTS index term | ||||
15842 | ** (i.e. a document term with the preceding 1 byte index identifier - | ||||
15843 | ** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits | ||||
15844 | ** all entries for terms that have pToken/nToken as a prefix. If bPrefix | ||||
15845 | ** is false, then only entries with pToken/nToken as the entire key are | ||||
15846 | ** visited. | ||||
15847 | ** | ||||
15848 | ** If the current table is a tokendata=1 table, then if bPrefix is true then | ||||
15849 | ** each index term is treated separately. However, if bPrefix is false, then | ||||
15850 | ** all index terms corresponding to pToken/nToken are collapsed into a single | ||||
15851 | ** term before the callback is invoked. | ||||
15852 | ** | ||||
15853 | ** The callback invoked for each entry visited is specified by paramter xVisit. | ||||
15854 | ** Each time it is invoked, it is passed a pointer to the Fts5Index object, | ||||
15855 | ** a copy of the 7th paramter to this function (pCtx) and a pointer to the | ||||
15856 | ** iterator that indicates the current entry. If the current entry is the | ||||
15857 | ** first with a new term (i.e. different from that of the previous entry, | ||||
15858 | ** including the very first term), then the final two parameters are passed | ||||
15859 | ** a pointer to the term and its size in bytes, respectively. If the current | ||||
15860 | ** entry is not the first associated with its term, these two parameters | ||||
15861 | ** are passed 0. | ||||
15862 | ** | ||||
15863 | ** If parameter pColset is not NULL, then it is used to filter entries before | ||||
15864 | ** the callback is invoked. | ||||
15865 | */ | ||||
15866 | static int fts5VisitEntries( | ||||
15867 | Fts5Index *p, /* Fts5 index object */ | ||||
15868 | Fts5Colset *pColset, /* Columns filter to apply, or NULL */ | ||||
15869 | u8 *pToken, /* Buffer containing token */ | ||||
15870 | int nToken, /* Size of buffer pToken in bytes */ | ||||
15871 | int bPrefix, /* True for a prefix scan */ | ||||
15872 | void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int), | ||||
15873 | void *pCtx /* Passed as second argument to xVisit() */ | ||||
15874 | ){ | ||||
15875 | const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN0x0008 : 0) | ||||
15876 | | FTS5INDEX_QUERY_SKIPEMPTY0x0010 | ||||
15877 | | FTS5INDEX_QUERY_NOOUTPUT0x0020; | ||||
15878 | Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ | ||||
15879 | int bNewTerm = 1; | ||||
15880 | Fts5Structure *pStruct = fts5StructureRead(p); | ||||
15881 | |||||
15882 | fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); | ||||
15883 | fts5IterSetOutputCb(&p->rc, p1); | ||||
15884 | for( /* no-op */ ; | ||||
15885 | fts5MultiIterEof(p, p1)==0; | ||||
15886 | fts5MultiIterNext2(p, p1, &bNewTerm) | ||||
15887 | ){ | ||||
15888 | Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; | ||||
15889 | int nNew = 0; | ||||
15890 | const u8 *pNew = 0; | ||||
15891 | |||||
15892 | p1->xSetOutputs(p1, pSeg); | ||||
15893 | if( p->rc ) break; | ||||
15894 | |||||
15895 | if( bNewTerm ){ | ||||
15896 | nNew = pSeg->term.n; | ||||
15897 | pNew = pSeg->term.p; | ||||
15898 | if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break; | ||||
15899 | } | ||||
15900 | |||||
15901 | xVisit(p, pCtx, p1, pNew, nNew); | ||||
15902 | } | ||||
15903 | fts5MultiIterFree(p1); | ||||
15904 | |||||
15905 | fts5StructureRelease(pStruct); | ||||
15906 | return p->rc; | ||||
15907 | } | ||||
15908 | |||||
15909 | |||||
15910 | /* | ||||
15911 | ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an | ||||
15912 | ** array of these for each row it visits (so all iRowid fields are the same). | ||||
15913 | ** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an | ||||
15914 | ** array of these for the entire query (in which case iRowid fields may take | ||||
15915 | ** a variety of values). | ||||
15916 | ** | ||||
15917 | ** Each instance in the array indicates the iterator (and therefore term) | ||||
15918 | ** associated with position iPos of rowid iRowid. This is used by the | ||||
15919 | ** xInstToken() API. | ||||
15920 | ** | ||||
15921 | ** iRowid: | ||||
15922 | ** Rowid for the current entry. | ||||
15923 | ** | ||||
15924 | ** iPos: | ||||
15925 | ** Position of current entry within row. In the usual ((iCol<<32)+iOff) | ||||
15926 | ** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). | ||||
15927 | ** | ||||
15928 | ** iIter: | ||||
15929 | ** If the Fts5TokenDataIter iterator that the entry is part of is | ||||
15930 | ** actually an iterator (i.e. with nIter>0, not just a container for | ||||
15931 | ** Fts5TokenDataMap structures), then this variable is an index into | ||||
15932 | ** the apIter[] array. The corresponding term is that which the iterator | ||||
15933 | ** at apIter[iIter] currently points to. | ||||
15934 | ** | ||||
15935 | ** Or, if the Fts5TokenDataIter iterator is just a container object | ||||
15936 | ** (nIter==0), then iIter is an index into the term.p[] buffer where | ||||
15937 | ** the term is stored. | ||||
15938 | ** | ||||
15939 | ** nByte: | ||||
15940 | ** In the case where iIter is an index into term.p[], this variable | ||||
15941 | ** is the size of the term in bytes. If iIter is an index into apIter[], | ||||
15942 | ** this variable is unused. | ||||
15943 | */ | ||||
15944 | struct Fts5TokenDataMap { | ||||
15945 | i64 iRowid; /* Row this token is located in */ | ||||
15946 | i64 iPos; /* Position of token */ | ||||
15947 | int iIter; /* Iterator token was read from */ | ||||
15948 | int nByte; /* Length of token in bytes (or 0) */ | ||||
15949 | }; | ||||
15950 | |||||
15951 | /* | ||||
15952 | ** An object used to supplement Fts5Iter for tokendata=1 iterators. | ||||
15953 | ** | ||||
15954 | ** This object serves two purposes. The first is as a container for an array | ||||
15955 | ** of Fts5TokenDataMap structures, which are used to find the token required | ||||
15956 | ** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and | ||||
15957 | ** aMap[] variables. | ||||
15958 | */ | ||||
15959 | struct Fts5TokenDataIter { | ||||
15960 | int nMapAlloc; /* Allocated size of aMap[] in entries */ | ||||
15961 | int nMap; /* Number of valid entries in aMap[] */ | ||||
15962 | Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ | ||||
15963 | |||||
15964 | /* The following are used for prefix-queries only. */ | ||||
15965 | Fts5Buffer terms; | ||||
15966 | |||||
15967 | /* The following are used for other full-token tokendata queries only. */ | ||||
15968 | int nIter; | ||||
15969 | int nIterAlloc; | ||||
15970 | Fts5PoslistReader *aPoslistReader; | ||||
15971 | int *aPoslistToIter; | ||||
15972 | Fts5Iter *apIter[FLEXARRAY]; | ||||
15973 | }; | ||||
15974 | |||||
15975 | /* Size in bytes of an Fts5TokenDataIter object holding up to N iterators */ | ||||
15976 | #define SZ_FTS5TOKENDATAITER(N)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter )) \ | ||||
15977 | (offsetof(Fts5TokenDataIter,apIter)__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter)) | ||||
15978 | |||||
15979 | /* | ||||
15980 | ** The two input arrays - a1[] and a2[] - are in sorted order. This function | ||||
15981 | ** merges the two arrays together and writes the result to output array | ||||
15982 | ** aOut[]. aOut[] is guaranteed to be large enough to hold the result. | ||||
15983 | ** | ||||
15984 | ** Duplicate entries are copied into the output. So the size of the output | ||||
15985 | ** array is always (n1+n2) entries. | ||||
15986 | */ | ||||
15987 | static void fts5TokendataMerge( | ||||
15988 | Fts5TokenDataMap *a1, int n1, /* Input array 1 */ | ||||
15989 | Fts5TokenDataMap *a2, int n2, /* Input array 2 */ | ||||
15990 | Fts5TokenDataMap *aOut /* Output array */ | ||||
15991 | ){ | ||||
15992 | int i1 = 0; | ||||
15993 | int i2 = 0; | ||||
15994 | |||||
15995 | assert( n1>=0 && n2>=0 )((void) (0)); | ||||
15996 | while( i1<n1 || i2<n2 ){ | ||||
15997 | Fts5TokenDataMap *pOut = &aOut[i1+i2]; | ||||
15998 | if( i2>=n2 || (i1<n1 && ( | ||||
15999 | a1[i1].iRowid<a2[i2].iRowid | ||||
16000 | || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos) | ||||
16001 | ))){ | ||||
16002 | memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap)); | ||||
16003 | i1++; | ||||
16004 | }else{ | ||||
16005 | memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap)); | ||||
16006 | i2++; | ||||
16007 | } | ||||
16008 | } | ||||
16009 | } | ||||
16010 | |||||
16011 | |||||
16012 | /* | ||||
16013 | ** Append a mapping to the token-map belonging to object pT. | ||||
16014 | */ | ||||
16015 | static void fts5TokendataIterAppendMap( | ||||
16016 | Fts5Index *p, | ||||
16017 | Fts5TokenDataIter *pT, | ||||
16018 | int iIter, | ||||
16019 | int nByte, | ||||
16020 | i64 iRowid, | ||||
16021 | i64 iPos | ||||
16022 | ){ | ||||
16023 | if( p->rc==SQLITE_OK0 ){ | ||||
16024 | if( pT->nMap==pT->nMapAlloc ){ | ||||
16025 | int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; | ||||
16026 | int nAlloc = nNew * sizeof(Fts5TokenDataMap); | ||||
16027 | Fts5TokenDataMap *aNew; | ||||
16028 | |||||
16029 | aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc(pT->aMap, nAlloc); | ||||
16030 | if( aNew==0 ){ | ||||
16031 | p->rc = SQLITE_NOMEM7; | ||||
16032 | return; | ||||
16033 | } | ||||
16034 | |||||
16035 | pT->aMap = aNew; | ||||
16036 | pT->nMapAlloc = nNew; | ||||
16037 | } | ||||
16038 | |||||
16039 | pT->aMap[pT->nMap].iRowid = iRowid; | ||||
16040 | pT->aMap[pT->nMap].iPos = iPos; | ||||
16041 | pT->aMap[pT->nMap].iIter = iIter; | ||||
16042 | pT->aMap[pT->nMap].nByte = nByte; | ||||
16043 | pT->nMap++; | ||||
16044 | } | ||||
16045 | } | ||||
16046 | |||||
16047 | /* | ||||
16048 | ** Sort the contents of the pT->aMap[] array. | ||||
16049 | ** | ||||
16050 | ** The sorting algorithm requires a malloc(). If this fails, an error code | ||||
16051 | ** is left in Fts5Index.rc before returning. | ||||
16052 | */ | ||||
16053 | static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ | ||||
16054 | Fts5TokenDataMap *aTmp = 0; | ||||
16055 | int nByte = pT->nMap * sizeof(Fts5TokenDataMap); | ||||
16056 | |||||
16057 | aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); | ||||
16058 | if( aTmp ){ | ||||
16059 | Fts5TokenDataMap *a1 = pT->aMap; | ||||
16060 | Fts5TokenDataMap *a2 = aTmp; | ||||
16061 | i64 nHalf; | ||||
16062 | |||||
16063 | for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){ | ||||
16064 | int i1; | ||||
16065 | for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){ | ||||
16066 | int n1 = MIN(nHalf, pT->nMap-i1)(((nHalf) < (pT->nMap-i1)) ? (nHalf) : (pT->nMap-i1) ); | ||||
16067 | int n2 = MIN(nHalf, pT->nMap-i1-n1)(((nHalf) < (pT->nMap-i1-n1)) ? (nHalf) : (pT->nMap- i1-n1)); | ||||
16068 | fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); | ||||
16069 | } | ||||
16070 | SWAPVAL(Fts5TokenDataMap*, a1, a2){ Fts5TokenDataMap* tmp; tmp=a1; a1=a2; a2=tmp; }; | ||||
16071 | } | ||||
16072 | |||||
16073 | if( a1!=pT->aMap ){ | ||||
16074 | memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); | ||||
16075 | } | ||||
16076 | sqlite3_freesqlite3_api->free(aTmp); | ||||
16077 | |||||
16078 | #ifdef SQLITE_DEBUG | ||||
16079 | { | ||||
16080 | int ii; | ||||
16081 | for(ii=1; ii<pT->nMap; ii++){ | ||||
16082 | Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; | ||||
16083 | Fts5TokenDataMap *p2 = &pT->aMap[ii]; | ||||
16084 | assert( p1->iRowid<p2->iRowid((void) (0)) | ||||
16085 | || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)((void) (0)) | ||||
16086 | )((void) (0)); | ||||
16087 | } | ||||
16088 | } | ||||
16089 | #endif | ||||
16090 | } | ||||
16091 | } | ||||
16092 | |||||
16093 | /* | ||||
16094 | ** Delete an Fts5TokenDataIter structure and its contents. | ||||
16095 | */ | ||||
16096 | static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ | ||||
16097 | if( pSet ){ | ||||
16098 | int ii; | ||||
16099 | for(ii=0; ii<pSet->nIter; ii++){ | ||||
16100 | fts5MultiIterFree(pSet->apIter[ii]); | ||||
16101 | } | ||||
16102 | fts5BufferFree(&pSet->terms)sqlite3Fts5BufferFree(&pSet->terms); | ||||
16103 | sqlite3_freesqlite3_api->free(pSet->aPoslistReader); | ||||
16104 | sqlite3_freesqlite3_api->free(pSet->aMap); | ||||
16105 | sqlite3_freesqlite3_api->free(pSet); | ||||
16106 | } | ||||
16107 | } | ||||
16108 | |||||
16109 | |||||
16110 | /* | ||||
16111 | ** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() | ||||
16112 | ** to pass data to prefixIterSetupTokendataCb(). | ||||
16113 | */ | ||||
16114 | typedef struct TokendataSetupCtx TokendataSetupCtx; | ||||
16115 | struct TokendataSetupCtx { | ||||
16116 | Fts5TokenDataIter *pT; /* Object being populated with mappings */ | ||||
16117 | int iTermOff; /* Offset of current term in terms.p[] */ | ||||
16118 | int nTermByte; /* Size of current term in bytes */ | ||||
16119 | }; | ||||
16120 | |||||
16121 | /* | ||||
16122 | ** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This | ||||
16123 | ** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each | ||||
16124 | ** position in the current position-list. It doesn't matter that some of | ||||
16125 | ** these may be out of order - they will be sorted later. | ||||
16126 | */ | ||||
16127 | static void prefixIterSetupTokendataCb( | ||||
16128 | Fts5Index *p, | ||||
16129 | void *pCtx, | ||||
16130 | Fts5Iter *p1, | ||||
16131 | const u8 *pNew, | ||||
16132 | int nNew | ||||
16133 | ){ | ||||
16134 | TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; | ||||
16135 | int iPosOff = 0; | ||||
16136 | i64 iPos = 0; | ||||
16137 | |||||
16138 | if( pNew ){ | ||||
16139 | pSetup->nTermByte = nNew-1; | ||||
16140 | pSetup->iTermOff = pSetup->pT->terms.n; | ||||
16141 | fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1)sqlite3Fts5BufferAppendBlob(&p->rc,&pSetup->pT-> terms,nNew-1,pNew+1); | ||||
16142 | } | ||||
16143 | |||||
16144 | while( 0==sqlite3Fts5PoslistNext64( | ||||
16145 | p1->base.pData, p1->base.nData, &iPosOff, &iPos | ||||
16146 | ) ){ | ||||
16147 | fts5TokendataIterAppendMap(p, | ||||
16148 | pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos | ||||
16149 | ); | ||||
16150 | } | ||||
16151 | } | ||||
16152 | |||||
16153 | |||||
16154 | /* | ||||
16155 | ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). | ||||
16156 | */ | ||||
16157 | typedef struct PrefixSetupCtx PrefixSetupCtx; | ||||
16158 | struct PrefixSetupCtx { | ||||
16159 | void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); | ||||
16160 | void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); | ||||
16161 | i64 iLastRowid; | ||||
16162 | int nMerge; | ||||
16163 | Fts5Buffer *aBuf; | ||||
16164 | int nBuf; | ||||
16165 | Fts5Buffer doclist; | ||||
16166 | TokendataSetupCtx *pTokendata; | ||||
16167 | }; | ||||
16168 | |||||
16169 | /* | ||||
16170 | ** fts5VisitEntries() callback used by fts5SetupPrefixIter() | ||||
16171 | */ | ||||
16172 | static void prefixIterSetupCb( | ||||
16173 | Fts5Index *p, | ||||
16174 | void *pCtx, | ||||
16175 | Fts5Iter *p1, | ||||
16176 | const u8 *pNew, | ||||
16177 | int nNew | ||||
16178 | ){ | ||||
16179 | PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx; | ||||
16180 | const int nMerge = pSetup->nMerge; | ||||
16181 | |||||
16182 | if( p1->base.nData>0 ){ | ||||
16183 | if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){ | ||||
16184 | int i; | ||||
16185 | for(i=0; p->rc==SQLITE_OK0 && pSetup->doclist.n; i++){ | ||||
16186 | int i1 = i*nMerge; | ||||
16187 | int iStore; | ||||
16188 | assert( i1+nMerge<=pSetup->nBuf )((void) (0)); | ||||
16189 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | ||||
16190 | if( pSetup->aBuf[iStore].n==0 ){ | ||||
16191 | fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]); | ||||
16192 | fts5BufferZero(&pSetup->doclist)sqlite3Fts5BufferZero(&pSetup->doclist); | ||||
16193 | break; | ||||
16194 | } | ||||
16195 | } | ||||
16196 | if( iStore==i1+nMerge ){ | ||||
16197 | pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]); | ||||
16198 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | ||||
16199 | fts5BufferZero(&pSetup->aBuf[iStore])sqlite3Fts5BufferZero(&pSetup->aBuf[iStore]); | ||||
16200 | } | ||||
16201 | } | ||||
16202 | } | ||||
16203 | pSetup->iLastRowid = 0; | ||||
16204 | } | ||||
16205 | |||||
16206 | pSetup->xAppend( | ||||
16207 | p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist | ||||
16208 | ); | ||||
16209 | pSetup->iLastRowid = p1->base.iRowid; | ||||
16210 | } | ||||
16211 | |||||
16212 | if( pSetup->pTokendata ){ | ||||
16213 | prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew); | ||||
16214 | } | ||||
16215 | } | ||||
16216 | |||||
16217 | static void fts5SetupPrefixIter( | ||||
16218 | Fts5Index *p, /* Index to read from */ | ||||
16219 | int bDesc, /* True for "ORDER BY rowid DESC" */ | ||||
16220 | int iIdx, /* Index to scan for data */ | ||||
16221 | u8 *pToken, /* Buffer containing prefix to match */ | ||||
16222 | int nToken, /* Size of buffer pToken in bytes */ | ||||
16223 | Fts5Colset *pColset, /* Restrict matches to these columns */ | ||||
16224 | Fts5Iter **ppIter /* OUT: New iterator */ | ||||
16225 | ){ | ||||
16226 | Fts5Structure *pStruct; | ||||
16227 | PrefixSetupCtx s; | ||||
16228 | TokendataSetupCtx s2; | ||||
16229 | |||||
16230 | memset(&s, 0, sizeof(s)); | ||||
16231 | memset(&s2, 0, sizeof(s2)); | ||||
16232 | |||||
16233 | s.nMerge = 1; | ||||
16234 | s.iLastRowid = 0; | ||||
16235 | s.nBuf = 32; | ||||
16236 | if( iIdx==0 | ||||
16237 | && p->pConfig->eDetail==FTS5_DETAIL_FULL0 | ||||
16238 | && p->pConfig->bPrefixInsttoken | ||||
16239 | ){ | ||||
16240 | s.pTokendata = &s2; | ||||
16241 | s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | ||||
16242 | } | ||||
16243 | |||||
16244 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
16245 | s.xMerge = fts5MergeRowidLists; | ||||
16246 | s.xAppend = fts5AppendRowid; | ||||
16247 | }else{ | ||||
16248 | s.nMerge = FTS5_MERGE_NLIST16-1; | ||||
16249 | s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ | ||||
16250 | s.xMerge = fts5MergePrefixLists; | ||||
16251 | s.xAppend = fts5AppendPoslist; | ||||
16252 | } | ||||
16253 | |||||
16254 | s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf); | ||||
16255 | pStruct = fts5StructureRead(p); | ||||
16256 | assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) )((void) (0)); | ||||
16257 | |||||
16258 | if( p->rc==SQLITE_OK0 ){ | ||||
16259 | void *pCtx = (void*)&s; | ||||
16260 | int i; | ||||
16261 | Fts5Data *pData; | ||||
16262 | |||||
16263 | /* If iIdx is non-zero, then it is the number of a prefix-index for | ||||
16264 | ** prefixes 1 character longer than the prefix being queried for. That | ||||
16265 | ** index contains all the doclists required, except for the one | ||||
16266 | ** corresponding to the prefix itself. That one is extracted from the | ||||
16267 | ** main term index here. */ | ||||
16268 | if( iIdx!=0 ){ | ||||
16269 | pToken[0] = FTS5_MAIN_PREFIX'0'; | ||||
16270 | fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx); | ||||
16271 | } | ||||
16272 | |||||
16273 | pToken[0] = FTS5_MAIN_PREFIX'0' + iIdx; | ||||
16274 | fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx); | ||||
16275 | |||||
16276 | assert( (s.nBuf%s.nMerge)==0 )((void) (0)); | ||||
16277 | for(i=0; i<s.nBuf; i+=s.nMerge){ | ||||
16278 | int iFree; | ||||
16279 | if( p->rc==SQLITE_OK0 ){ | ||||
16280 | s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]); | ||||
16281 | } | ||||
16282 | for(iFree=i; iFree<i+s.nMerge; iFree++){ | ||||
16283 | fts5BufferFree(&s.aBuf[iFree])sqlite3Fts5BufferFree(&s.aBuf[iFree]); | ||||
16284 | } | ||||
16285 | } | ||||
16286 | |||||
16287 | pData = fts5IdxMalloc(p, sizeof(*pData) | ||||
16288 | + ((i64)s.doclist.n)+FTS5_DATA_ZERO_PADDING8); | ||||
16289 | assert( pData!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
16290 | if( pData ){ | ||||
16291 | pData->p = (u8*)&pData[1]; | ||||
16292 | pData->nn = pData->szLeaf = s.doclist.n; | ||||
16293 | if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); | ||||
16294 | fts5MultiIterNew2(p, pData, bDesc, ppIter); | ||||
16295 | } | ||||
16296 | |||||
16297 | assert( (*ppIter)!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
16298 | if( p->rc==SQLITE_OK0 && s.pTokendata ){ | ||||
16299 | fts5TokendataIterSortMap(p, s2.pT); | ||||
16300 | (*ppIter)->pTokenDataIter = s2.pT; | ||||
16301 | s2.pT = 0; | ||||
16302 | } | ||||
16303 | } | ||||
16304 | |||||
16305 | fts5TokendataIterDelete(s2.pT); | ||||
16306 | fts5BufferFree(&s.doclist)sqlite3Fts5BufferFree(&s.doclist); | ||||
16307 | fts5StructureRelease(pStruct); | ||||
16308 | sqlite3_freesqlite3_api->free(s.aBuf); | ||||
16309 | } | ||||
16310 | |||||
16311 | |||||
16312 | /* | ||||
16313 | ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain | ||||
16314 | ** to the document with rowid iRowid. | ||||
16315 | */ | ||||
16316 | static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ | ||||
16317 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
16318 | |||||
16319 | /* Allocate the hash table if it has not already been allocated */ | ||||
16320 | if( p->pHash==0 ){ | ||||
16321 | p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); | ||||
16322 | } | ||||
16323 | |||||
16324 | /* Flush the hash table to disk if required */ | ||||
16325 | if( iRowid<p->iWriteRowid | ||||
16326 | || (iRowid==p->iWriteRowid && p->bDelete==0) | ||||
16327 | || (p->nPendingData > p->pConfig->nHashSize) | ||||
16328 | ){ | ||||
16329 | fts5IndexFlush(p); | ||||
16330 | } | ||||
16331 | |||||
16332 | p->iWriteRowid = iRowid; | ||||
16333 | p->bDelete = bDelete; | ||||
16334 | if( bDelete==0 ){ | ||||
16335 | p->nPendingRow++; | ||||
16336 | } | ||||
16337 | return fts5IndexReturn(p); | ||||
16338 | } | ||||
16339 | |||||
16340 | /* | ||||
16341 | ** Commit data to disk. | ||||
16342 | */ | ||||
16343 | static int sqlite3Fts5IndexSync(Fts5Index *p){ | ||||
16344 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
16345 | fts5IndexFlush(p); | ||||
16346 | fts5IndexCloseReader(p); | ||||
16347 | return fts5IndexReturn(p); | ||||
16348 | } | ||||
16349 | |||||
16350 | /* | ||||
16351 | ** Discard any data stored in the in-memory hash tables. Do not write it | ||||
16352 | ** to the database. Additionally, assume that the contents of the %_data | ||||
16353 | ** table may have changed on disk. So any in-memory caches of %_data | ||||
16354 | ** records must be invalidated. | ||||
16355 | */ | ||||
16356 | static int sqlite3Fts5IndexRollback(Fts5Index *p){ | ||||
16357 | fts5IndexCloseReader(p); | ||||
16358 | fts5IndexDiscardData(p); | ||||
16359 | fts5StructureInvalidate(p); | ||||
16360 | return fts5IndexReturn(p); | ||||
16361 | } | ||||
16362 | |||||
16363 | /* | ||||
16364 | ** The %_data table is completely empty when this function is called. This | ||||
16365 | ** function populates it with the initial structure objects for each index, | ||||
16366 | ** and the initial version of the "averages" record (a zero-byte blob). | ||||
16367 | */ | ||||
16368 | static int sqlite3Fts5IndexReinit(Fts5Index *p){ | ||||
16369 | Fts5Structure *pTmp; | ||||
16370 | u8 tmpSpace[SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))]; | ||||
16371 | fts5StructureInvalidate(p); | ||||
16372 | fts5IndexDiscardData(p); | ||||
16373 | pTmp = (Fts5Structure*)tmpSpace; | ||||
16374 | memset(pTmp, 0, SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))); | ||||
16375 | if( p->pConfig->bContentlessDelete ){ | ||||
16376 | pTmp->nOriginCntr = 1; | ||||
16377 | } | ||||
16378 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, (const u8*)"", 0); | ||||
16379 | fts5StructureWrite(p, pTmp); | ||||
16380 | return fts5IndexReturn(p); | ||||
16381 | } | ||||
16382 | |||||
16383 | /* | ||||
16384 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | ||||
16385 | ** and initialize the underlying %_data table. | ||||
16386 | ** | ||||
16387 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | ||||
16388 | ** Otherwise, set *pp to NULL and return an SQLite error code. | ||||
16389 | */ | ||||
16390 | static int sqlite3Fts5IndexOpen( | ||||
16391 | Fts5Config *pConfig, | ||||
16392 | int bCreate, | ||||
16393 | Fts5Index **pp, | ||||
16394 | char **pzErr | ||||
16395 | ){ | ||||
16396 | int rc = SQLITE_OK0; | ||||
16397 | Fts5Index *p; /* New object */ | ||||
16398 | |||||
16399 | *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); | ||||
16400 | if( rc==SQLITE_OK0 ){ | ||||
16401 | p->pConfig = pConfig; | ||||
16402 | p->nWorkUnit = FTS5_WORK_UNIT64; | ||||
16403 | p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); | ||||
16404 | if( p->zDataTbl && bCreate ){ | ||||
16405 | rc = sqlite3Fts5CreateTable( | ||||
16406 | pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr | ||||
16407 | ); | ||||
16408 | if( rc==SQLITE_OK0 ){ | ||||
16409 | rc = sqlite3Fts5CreateTable(pConfig, "idx", | ||||
16410 | "segid, term, pgno, PRIMARY KEY(segid, term)", | ||||
16411 | 1, pzErr | ||||
16412 | ); | ||||
16413 | } | ||||
16414 | if( rc==SQLITE_OK0 ){ | ||||
16415 | rc = sqlite3Fts5IndexReinit(p); | ||||
16416 | } | ||||
16417 | } | ||||
16418 | } | ||||
16419 | |||||
16420 | assert( rc!=SQLITE_OK || p->rc==SQLITE_OK )((void) (0)); | ||||
16421 | if( rc ){ | ||||
16422 | sqlite3Fts5IndexClose(p); | ||||
16423 | *pp = 0; | ||||
16424 | } | ||||
16425 | return rc; | ||||
16426 | } | ||||
16427 | |||||
16428 | /* | ||||
16429 | ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). | ||||
16430 | */ | ||||
16431 | static int sqlite3Fts5IndexClose(Fts5Index *p){ | ||||
16432 | int rc = SQLITE_OK0; | ||||
16433 | if( p ){ | ||||
16434 | assert( p->pReader==0 )((void) (0)); | ||||
16435 | fts5StructureInvalidate(p); | ||||
16436 | sqlite3_finalizesqlite3_api->finalize(p->pWriter); | ||||
16437 | sqlite3_finalizesqlite3_api->finalize(p->pDeleter); | ||||
16438 | sqlite3_finalizesqlite3_api->finalize(p->pIdxWriter); | ||||
16439 | sqlite3_finalizesqlite3_api->finalize(p->pIdxDeleter); | ||||
16440 | sqlite3_finalizesqlite3_api->finalize(p->pIdxSelect); | ||||
16441 | sqlite3_finalizesqlite3_api->finalize(p->pIdxNextSelect); | ||||
16442 | sqlite3_finalizesqlite3_api->finalize(p->pDataVersion); | ||||
16443 | sqlite3_finalizesqlite3_api->finalize(p->pDeleteFromIdx); | ||||
16444 | sqlite3Fts5HashFree(p->pHash); | ||||
16445 | sqlite3_freesqlite3_api->free(p->zDataTbl); | ||||
16446 | sqlite3_freesqlite3_api->free(p); | ||||
16447 | } | ||||
16448 | return rc; | ||||
16449 | } | ||||
16450 | |||||
16451 | /* | ||||
16452 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | ||||
16453 | ** size. Return the number of bytes in the nChar character prefix of the | ||||
16454 | ** buffer, or 0 if there are less than nChar characters in total. | ||||
16455 | */ | ||||
16456 | static int sqlite3Fts5IndexCharlenToBytelen( | ||||
16457 | const char *p, | ||||
16458 | int nByte, | ||||
16459 | int nChar | ||||
16460 | ){ | ||||
16461 | int n = 0; | ||||
16462 | int i; | ||||
16463 | for(i=0; i<nChar; i++){ | ||||
16464 | if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ | ||||
16465 | if( (unsigned char)p[n++]>=0xc0 ){ | ||||
16466 | if( n>=nByte ) return 0; | ||||
16467 | while( (p[n] & 0xc0)==0x80 ){ | ||||
16468 | n++; | ||||
16469 | if( n>=nByte ){ | ||||
16470 | if( i+1==nChar ) break; | ||||
16471 | return 0; | ||||
16472 | } | ||||
16473 | } | ||||
16474 | } | ||||
16475 | } | ||||
16476 | return n; | ||||
16477 | } | ||||
16478 | |||||
16479 | /* | ||||
16480 | ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of | ||||
16481 | ** unicode characters in the string. | ||||
16482 | */ | ||||
16483 | static int fts5IndexCharlen(const char *pIn, int nIn){ | ||||
16484 | int nChar = 0; | ||||
16485 | int i = 0; | ||||
16486 | while( i<nIn ){ | ||||
16487 | if( (unsigned char)pIn[i++]>=0xc0 ){ | ||||
16488 | while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; | ||||
16489 | } | ||||
16490 | nChar++; | ||||
16491 | } | ||||
16492 | return nChar; | ||||
16493 | } | ||||
16494 | |||||
16495 | /* | ||||
16496 | ** Insert or remove data to or from the index. Each time a document is | ||||
16497 | ** added to or removed from the index, this function is called one or more | ||||
16498 | ** times. | ||||
16499 | ** | ||||
16500 | ** For an insert, it must be called once for each token in the new document. | ||||
16501 | ** If the operation is a delete, it must be called (at least) once for each | ||||
16502 | ** unique token in the document with an iCol value less than zero. The iPos | ||||
16503 | ** argument is ignored for a delete. | ||||
16504 | */ | ||||
16505 | static int sqlite3Fts5IndexWrite( | ||||
16506 | Fts5Index *p, /* Index to write to */ | ||||
16507 | int iCol, /* Column token appears in (-ve -> delete) */ | ||||
16508 | int iPos, /* Position of token within column */ | ||||
16509 | const char *pToken, int nToken /* Token to add or remove to or from index */ | ||||
16510 | ){ | ||||
16511 | int i; /* Used to iterate through indexes */ | ||||
16512 | int rc = SQLITE_OK0; /* Return code */ | ||||
16513 | Fts5Config *pConfig = p->pConfig; | ||||
16514 | |||||
16515 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
16516 | assert( (iCol<0)==p->bDelete )((void) (0)); | ||||
16517 | |||||
16518 | /* Add the entry to the main terms index. */ | ||||
16519 | rc = sqlite3Fts5HashWrite( | ||||
16520 | p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX'0', pToken, nToken | ||||
16521 | ); | ||||
16522 | |||||
16523 | for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK0; i++){ | ||||
16524 | const int nChar = pConfig->aPrefix[i]; | ||||
16525 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | ||||
16526 | if( nByte ){ | ||||
16527 | rc = sqlite3Fts5HashWrite(p->pHash, | ||||
16528 | p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX'0'+i+1), pToken, | ||||
16529 | nByte | ||||
16530 | ); | ||||
16531 | } | ||||
16532 | } | ||||
16533 | |||||
16534 | return rc; | ||||
16535 | } | ||||
16536 | |||||
16537 | /* | ||||
16538 | ** pToken points to a buffer of size nToken bytes containing a search | ||||
16539 | ** term, including the index number at the start, used on a tokendata=1 | ||||
16540 | ** table. This function returns true if the term in buffer pBuf matches | ||||
16541 | ** token pToken/nToken. | ||||
16542 | */ | ||||
16543 | static int fts5IsTokendataPrefix( | ||||
16544 | Fts5Buffer *pBuf, | ||||
16545 | const u8 *pToken, | ||||
16546 | int nToken | ||||
16547 | ){ | ||||
16548 | return ( | ||||
16549 | pBuf->n>=nToken | ||||
16550 | && 0==memcmp(pBuf->p, pToken, nToken) | ||||
16551 | && (pBuf->n==nToken || pBuf->p[nToken]==0x00) | ||||
16552 | ); | ||||
16553 | } | ||||
16554 | |||||
16555 | /* | ||||
16556 | ** Ensure the segment-iterator passed as the only argument points to EOF. | ||||
16557 | */ | ||||
16558 | static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ | ||||
16559 | fts5DataRelease(pSeg->pLeaf); | ||||
16560 | pSeg->pLeaf = 0; | ||||
16561 | } | ||||
16562 | |||||
16563 | static void fts5IterClose(Fts5IndexIter *pIndexIter){ | ||||
16564 | if( pIndexIter ){ | ||||
16565 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
16566 | Fts5Index *pIndex = pIter->pIndex; | ||||
16567 | fts5TokendataIterDelete(pIter->pTokenDataIter); | ||||
16568 | fts5MultiIterFree(pIter); | ||||
16569 | fts5IndexCloseReader(pIndex); | ||||
16570 | } | ||||
16571 | } | ||||
16572 | |||||
16573 | /* | ||||
16574 | ** This function appends iterator pAppend to Fts5TokenDataIter pIn and | ||||
16575 | ** returns the result. | ||||
16576 | */ | ||||
16577 | static Fts5TokenDataIter *fts5AppendTokendataIter( | ||||
16578 | Fts5Index *p, /* Index object (for error code) */ | ||||
16579 | Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */ | ||||
16580 | Fts5Iter *pAppend /* Append this iterator */ | ||||
16581 | ){ | ||||
16582 | Fts5TokenDataIter *pRet = pIn; | ||||
16583 | |||||
16584 | if( p->rc==SQLITE_OK0 ){ | ||||
16585 | if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ | ||||
16586 | int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; | ||||
16587 | int nByte = SZ_FTS5TOKENDATAITER(nAlloc+1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (nAlloc+1)*sizeof (Fts5Iter)); | ||||
16588 | Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_reallocsqlite3_api->realloc(pIn, nByte); | ||||
16589 | |||||
16590 | if( pNew==0 ){ | ||||
16591 | p->rc = SQLITE_NOMEM7; | ||||
16592 | }else{ | ||||
16593 | if( pIn==0 ) memset(pNew, 0, nByte); | ||||
16594 | pRet = pNew; | ||||
16595 | pNew->nIterAlloc = nAlloc; | ||||
16596 | } | ||||
16597 | } | ||||
16598 | } | ||||
16599 | if( p->rc ){ | ||||
16600 | fts5IterClose((Fts5IndexIter*)pAppend); | ||||
16601 | }else{ | ||||
16602 | pRet->apIter[pRet->nIter++] = pAppend; | ||||
16603 | } | ||||
16604 | assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc )((void) (0)); | ||||
16605 | |||||
16606 | return pRet; | ||||
16607 | } | ||||
16608 | |||||
16609 | /* | ||||
16610 | ** The iterator passed as the only argument must be a tokendata=1 iterator | ||||
16611 | ** (pIter->pTokenDataIter!=0). This function sets the iterator output | ||||
16612 | ** variables (pIter->base.*) according to the contents of the current | ||||
16613 | ** row. | ||||
16614 | */ | ||||
16615 | static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ | ||||
16616 | int ii; | ||||
16617 | int nHit = 0; | ||||
16618 | i64 iRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | ||||
16619 | int iMin = 0; | ||||
16620 | |||||
16621 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | ||||
16622 | |||||
16623 | pIter->base.nData = 0; | ||||
16624 | pIter->base.pData = 0; | ||||
16625 | |||||
16626 | for(ii=0; ii<pT->nIter; ii++){ | ||||
16627 | Fts5Iter *p = pT->apIter[ii]; | ||||
16628 | if( p->base.bEof==0 ){ | ||||
16629 | if( nHit==0 || p->base.iRowid<iRowid ){ | ||||
16630 | iRowid = p->base.iRowid; | ||||
16631 | nHit = 1; | ||||
16632 | pIter->base.pData = p->base.pData; | ||||
16633 | pIter->base.nData = p->base.nData; | ||||
16634 | iMin = ii; | ||||
16635 | }else if( p->base.iRowid==iRowid ){ | ||||
16636 | nHit++; | ||||
16637 | } | ||||
16638 | } | ||||
16639 | } | ||||
16640 | |||||
16641 | if( nHit==0 ){ | ||||
16642 | pIter->base.bEof = 1; | ||||
16643 | }else{ | ||||
16644 | int eDetail = pIter->pIndex->pConfig->eDetail; | ||||
16645 | pIter->base.bEof = 0; | ||||
16646 | pIter->base.iRowid = iRowid; | ||||
16647 | |||||
16648 | if( nHit==1 && eDetail==FTS5_DETAIL_FULL0 ){ | ||||
16649 | fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1); | ||||
16650 | }else | ||||
16651 | if( nHit>1 && eDetail!=FTS5_DETAIL_NONE1 ){ | ||||
16652 | int nReader = 0; | ||||
16653 | int nByte = 0; | ||||
16654 | i64 iPrev = 0; | ||||
16655 | |||||
16656 | /* Allocate array of iterators if they are not already allocated. */ | ||||
16657 | if( pT->aPoslistReader==0 ){ | ||||
16658 | pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero( | ||||
16659 | &pIter->pIndex->rc, | ||||
16660 | pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int)) | ||||
16661 | ); | ||||
16662 | if( pT->aPoslistReader==0 ) return; | ||||
16663 | pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter]; | ||||
16664 | } | ||||
16665 | |||||
16666 | /* Populate an iterator for each poslist that will be merged */ | ||||
16667 | for(ii=0; ii<pT->nIter; ii++){ | ||||
16668 | Fts5Iter *p = pT->apIter[ii]; | ||||
16669 | if( iRowid==p->base.iRowid ){ | ||||
16670 | pT->aPoslistToIter[nReader] = ii; | ||||
16671 | sqlite3Fts5PoslistReaderInit( | ||||
16672 | p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] | ||||
16673 | ); | ||||
16674 | nByte += p->base.nData; | ||||
16675 | } | ||||
16676 | } | ||||
16677 | |||||
16678 | /* Ensure the output buffer is large enough */ | ||||
16679 | if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10)( (u32)((&pIter->poslist)->n) + (u32)(nByte+nHit*10 ) <= (u32)((&pIter->poslist)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&pIter->pIndex->rc),(&pIter->poslist),(nByte +nHit*10)+(&pIter->poslist)->n) ) ){ | ||||
16680 | return; | ||||
16681 | } | ||||
16682 | |||||
16683 | /* Ensure the token-mapping is large enough */ | ||||
16684 | if( eDetail==FTS5_DETAIL_FULL0 && pT->nMapAlloc<(pT->nMap + nByte) ){ | ||||
16685 | int nNew = (pT->nMapAlloc + nByte) * 2; | ||||
16686 | Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc( | ||||
16687 | pT->aMap, nNew*sizeof(Fts5TokenDataMap) | ||||
16688 | ); | ||||
16689 | if( aNew==0 ){ | ||||
16690 | pIter->pIndex->rc = SQLITE_NOMEM7; | ||||
16691 | return; | ||||
16692 | } | ||||
16693 | pT->aMap = aNew; | ||||
16694 | pT->nMapAlloc = nNew; | ||||
16695 | } | ||||
16696 | |||||
16697 | pIter->poslist.n = 0; | ||||
16698 | |||||
16699 | while( 1 ){ | ||||
16700 | i64 iMinPos = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | ||||
16701 | |||||
16702 | /* Find smallest position */ | ||||
16703 | iMin = 0; | ||||
16704 | for(ii=0; ii<nReader; ii++){ | ||||
16705 | Fts5PoslistReader *pReader = &pT->aPoslistReader[ii]; | ||||
16706 | if( pReader->bEof==0 ){ | ||||
16707 | if( pReader->iPos<iMinPos ){ | ||||
16708 | iMinPos = pReader->iPos; | ||||
16709 | iMin = ii; | ||||
16710 | } | ||||
16711 | } | ||||
16712 | } | ||||
16713 | |||||
16714 | /* If all readers were at EOF, break out of the loop. */ | ||||
16715 | if( iMinPos==LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) ) break; | ||||
16716 | |||||
16717 | sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); | ||||
16718 | sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); | ||||
16719 | |||||
16720 | if( eDetail==FTS5_DETAIL_FULL0 ){ | ||||
16721 | pT->aMap[pT->nMap].iPos = iMinPos; | ||||
16722 | pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin]; | ||||
16723 | pT->aMap[pT->nMap].iRowid = iRowid; | ||||
16724 | pT->nMap++; | ||||
16725 | } | ||||
16726 | } | ||||
16727 | |||||
16728 | pIter->base.pData = pIter->poslist.p; | ||||
16729 | pIter->base.nData = pIter->poslist.n; | ||||
16730 | } | ||||
16731 | } | ||||
16732 | } | ||||
16733 | |||||
16734 | /* | ||||
16735 | ** The iterator passed as the only argument must be a tokendata=1 iterator | ||||
16736 | ** (pIter->pTokenDataIter!=0). This function advances the iterator. If | ||||
16737 | ** argument bFrom is false, then the iterator is advanced to the next | ||||
16738 | ** entry. Or, if bFrom is true, it is advanced to the first entry with | ||||
16739 | ** a rowid of iFrom or greater. | ||||
16740 | */ | ||||
16741 | static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ | ||||
16742 | int ii; | ||||
16743 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | ||||
16744 | Fts5Index *pIndex = pIter->pIndex; | ||||
16745 | |||||
16746 | for(ii=0; ii<pT->nIter; ii++){ | ||||
16747 | Fts5Iter *p = pT->apIter[ii]; | ||||
16748 | if( p->base.bEof==0 | ||||
16749 | && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom)) | ||||
16750 | ){ | ||||
16751 | fts5MultiIterNext(pIndex, p, bFrom, iFrom); | ||||
16752 | while( bFrom && p->base.bEof==0 | ||||
16753 | && p->base.iRowid<iFrom | ||||
16754 | && pIndex->rc==SQLITE_OK0 | ||||
16755 | ){ | ||||
16756 | fts5MultiIterNext(pIndex, p, 0, 0); | ||||
16757 | } | ||||
16758 | } | ||||
16759 | } | ||||
16760 | |||||
16761 | if( pIndex->rc==SQLITE_OK0 ){ | ||||
16762 | fts5IterSetOutputsTokendata(pIter); | ||||
16763 | } | ||||
16764 | } | ||||
16765 | |||||
16766 | /* | ||||
16767 | ** If the segment-iterator passed as the first argument is at EOF, then | ||||
16768 | ** set pIter->term to a copy of buffer pTerm. | ||||
16769 | */ | ||||
16770 | static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ | ||||
16771 | if( pIter && pIter->aSeg[0].pLeaf==0 ){ | ||||
16772 | fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p)sqlite3Fts5BufferSet(&pIter->pIndex->rc,&pIter-> aSeg[0].term,pTerm->n,pTerm->p); | ||||
16773 | } | ||||
16774 | } | ||||
16775 | |||||
16776 | /* | ||||
16777 | ** This function sets up an iterator to use for a non-prefix query on a | ||||
16778 | ** tokendata=1 table. | ||||
16779 | */ | ||||
16780 | static Fts5Iter *fts5SetupTokendataIter( | ||||
16781 | Fts5Index *p, /* FTS index to query */ | ||||
16782 | const u8 *pToken, /* Buffer containing query term */ | ||||
16783 | int nToken, /* Size of buffer pToken in bytes */ | ||||
16784 | Fts5Colset *pColset /* Colset to filter on */ | ||||
16785 | ){ | ||||
16786 | Fts5Iter *pRet = 0; | ||||
16787 | Fts5TokenDataIter *pSet = 0; | ||||
16788 | Fts5Structure *pStruct = 0; | ||||
16789 | const int flags = FTS5INDEX_QUERY_SCANONETERM0x0100 | FTS5INDEX_QUERY_SCAN0x0008; | ||||
16790 | |||||
16791 | Fts5Buffer bSeek = {0, 0, 0}; | ||||
16792 | Fts5Buffer *pSmall = 0; | ||||
16793 | |||||
16794 | fts5IndexFlush(p); | ||||
16795 | pStruct = fts5StructureRead(p); | ||||
16796 | |||||
16797 | while( p->rc==SQLITE_OK0 ){ | ||||
16798 | Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; | ||||
16799 | Fts5Iter *pNew = 0; | ||||
16800 | Fts5SegIter *pNewIter = 0; | ||||
16801 | Fts5SegIter *pPrevIter = 0; | ||||
16802 | |||||
16803 | int iLvl, iSeg, ii; | ||||
16804 | |||||
16805 | pNew = fts5MultiIterAlloc(p, pStruct->nSegment); | ||||
16806 | if( pSmall ){ | ||||
16807 | fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p)sqlite3Fts5BufferSet(&p->rc,&bSeek,pSmall->n,pSmall ->p); | ||||
16808 | fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0")sqlite3Fts5BufferAppendBlob(&p->rc,&bSeek,1,(const u8*)"\0"); | ||||
16809 | }else{ | ||||
16810 | fts5BufferSet(&p->rc, &bSeek, nToken, pToken)sqlite3Fts5BufferSet(&p->rc,&bSeek,nToken,pToken); | ||||
16811 | } | ||||
16812 | if( p->rc ){ | ||||
16813 | fts5IterClose((Fts5IndexIter*)pNew); | ||||
16814 | break; | ||||
16815 | } | ||||
16816 | |||||
16817 | pNewIter = &pNew->aSeg[0]; | ||||
16818 | pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); | ||||
16819 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
16820 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | ||||
16821 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | ||||
16822 | int bDone = 0; | ||||
16823 | |||||
16824 | if( pPrevIter ){ | ||||
16825 | if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ | ||||
16826 | memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); | ||||
16827 | memset(pPrevIter, 0, sizeof(Fts5SegIter)); | ||||
16828 | bDone = 1; | ||||
16829 | }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){ | ||||
16830 | fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); | ||||
16831 | bDone = 1; | ||||
16832 | } | ||||
16833 | } | ||||
16834 | |||||
16835 | if( bDone==0 ){ | ||||
16836 | fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); | ||||
16837 | } | ||||
16838 | |||||
16839 | if( pPrevIter ){ | ||||
16840 | if( pPrevIter->pTombArray ){ | ||||
16841 | pNewIter->pTombArray = pPrevIter->pTombArray; | ||||
16842 | pNewIter->pTombArray->nRef++; | ||||
16843 | } | ||||
16844 | }else{ | ||||
16845 | fts5SegIterAllocTombstone(p, pNewIter); | ||||
16846 | } | ||||
16847 | |||||
16848 | pNewIter++; | ||||
16849 | if( pPrevIter ) pPrevIter++; | ||||
16850 | if( p->rc ) break; | ||||
16851 | } | ||||
16852 | } | ||||
16853 | fts5TokendataSetTermIfEof(pPrev, pSmall); | ||||
16854 | |||||
16855 | pNew->bSkipEmpty = 1; | ||||
16856 | pNew->pColset = pColset; | ||||
16857 | fts5IterSetOutputCb(&p->rc, pNew); | ||||
16858 | |||||
16859 | /* Loop through all segments in the new iterator. Find the smallest | ||||
16860 | ** term that any segment-iterator points to. Iterator pNew will be | ||||
16861 | ** used for this term. Also, set any iterator that points to a term that | ||||
16862 | ** does not match pToken/nToken to point to EOF */ | ||||
16863 | pSmall = 0; | ||||
16864 | for(ii=0; ii<pNew->nSeg; ii++){ | ||||
16865 | Fts5SegIter *pII = &pNew->aSeg[ii]; | ||||
16866 | if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ | ||||
16867 | fts5SegIterSetEOF(pII); | ||||
16868 | } | ||||
16869 | if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ | ||||
16870 | pSmall = &pII->term; | ||||
16871 | } | ||||
16872 | } | ||||
16873 | |||||
16874 | /* If pSmall is still NULL at this point, then the new iterator does | ||||
16875 | ** not point to any terms that match the query. So delete it and break | ||||
16876 | ** out of the loop - all required iterators have been collected. */ | ||||
16877 | if( pSmall==0 ){ | ||||
16878 | fts5IterClose((Fts5IndexIter*)pNew); | ||||
16879 | break; | ||||
16880 | } | ||||
16881 | |||||
16882 | /* Append this iterator to the set and continue. */ | ||||
16883 | pSet = fts5AppendTokendataIter(p, pSet, pNew); | ||||
16884 | } | ||||
16885 | |||||
16886 | if( p->rc==SQLITE_OK0 && pSet ){ | ||||
16887 | int ii; | ||||
16888 | for(ii=0; ii<pSet->nIter; ii++){ | ||||
16889 | Fts5Iter *pIter = pSet->apIter[ii]; | ||||
16890 | int iSeg; | ||||
16891 | for(iSeg=0; iSeg<pIter->nSeg; iSeg++){ | ||||
16892 | pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM0x01; | ||||
16893 | } | ||||
16894 | fts5MultiIterFinishSetup(p, pIter); | ||||
16895 | } | ||||
16896 | } | ||||
16897 | |||||
16898 | if( p->rc==SQLITE_OK0 ){ | ||||
16899 | pRet = fts5MultiIterAlloc(p, 0); | ||||
16900 | } | ||||
16901 | if( pRet ){ | ||||
16902 | pRet->nSeg = 0; | ||||
16903 | pRet->pTokenDataIter = pSet; | ||||
16904 | if( pSet ){ | ||||
16905 | fts5IterSetOutputsTokendata(pRet); | ||||
16906 | }else{ | ||||
16907 | pRet->base.bEof = 1; | ||||
16908 | } | ||||
16909 | }else{ | ||||
16910 | fts5TokendataIterDelete(pSet); | ||||
16911 | } | ||||
16912 | |||||
16913 | fts5StructureRelease(pStruct); | ||||
16914 | fts5BufferFree(&bSeek)sqlite3Fts5BufferFree(&bSeek); | ||||
16915 | return pRet; | ||||
16916 | } | ||||
16917 | |||||
16918 | /* | ||||
16919 | ** Open a new iterator to iterate though all rowid that match the | ||||
16920 | ** specified token or token prefix. | ||||
16921 | */ | ||||
16922 | static int sqlite3Fts5IndexQuery( | ||||
16923 | Fts5Index *p, /* FTS index to query */ | ||||
16924 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | ||||
16925 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | ||||
16926 | Fts5Colset *pColset, /* Match these columns only */ | ||||
16927 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | ||||
16928 | ){ | ||||
16929 | Fts5Config *pConfig = p->pConfig; | ||||
16930 | Fts5Iter *pRet = 0; | ||||
16931 | Fts5Buffer buf = {0, 0, 0}; | ||||
16932 | |||||
16933 | /* If the QUERY_SCAN flag is set, all other flags must be clear. */ | ||||
16934 | assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN )((void) (0)); | ||||
16935 | |||||
16936 | if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ | ||||
16937 | int iIdx = 0; /* Index to search */ | ||||
16938 | int iPrefixIdx = 0; /* +1 prefix index */ | ||||
16939 | int bTokendata = pConfig->bTokendata; | ||||
16940 | assert( buf.p!=0 )((void) (0)); | ||||
16941 | if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); | ||||
16942 | |||||
16943 | /* The NOTOKENDATA flag is set when each token in a tokendata=1 table | ||||
16944 | ** should be treated individually, instead of merging all those with | ||||
16945 | ** a common prefix into a single entry. This is used, for example, by | ||||
16946 | ** queries performed as part of an integrity-check, or by the fts5vocab | ||||
16947 | ** module. */ | ||||
16948 | if( flags & (FTS5INDEX_QUERY_NOTOKENDATA0x0080|FTS5INDEX_QUERY_SCAN0x0008) ){ | ||||
16949 | bTokendata = 0; | ||||
16950 | } | ||||
16951 | |||||
16952 | /* Figure out which index to search and set iIdx accordingly. If this | ||||
16953 | ** is a prefix query for which there is no prefix index, set iIdx to | ||||
16954 | ** greater than pConfig->nPrefix to indicate that the query will be | ||||
16955 | ** satisfied by scanning multiple terms in the main index. | ||||
16956 | ** | ||||
16957 | ** If the QUERY_TEST_NOIDX flag was specified, then this must be a | ||||
16958 | ** prefix-query. Instead of using a prefix-index (if one exists), | ||||
16959 | ** evaluate the prefix query using the main FTS index. This is used | ||||
16960 | ** for internal sanity checking by the integrity-check in debug | ||||
16961 | ** mode only. */ | ||||
16962 | #ifdef SQLITE_DEBUG | ||||
16963 | if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX0x0004) ){ | ||||
16964 | assert( flags & FTS5INDEX_QUERY_PREFIX )((void) (0)); | ||||
16965 | iIdx = 1+pConfig->nPrefix; | ||||
16966 | }else | ||||
16967 | #endif | ||||
16968 | if( flags & FTS5INDEX_QUERY_PREFIX0x0001 ){ | ||||
16969 | int nChar = fts5IndexCharlen(pToken, nToken); | ||||
16970 | for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ | ||||
16971 | int nIdxChar = pConfig->aPrefix[iIdx-1]; | ||||
16972 | if( nIdxChar==nChar ) break; | ||||
16973 | if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; | ||||
16974 | } | ||||
16975 | } | ||||
16976 | |||||
16977 | if( bTokendata && iIdx==0 ){ | ||||
16978 | buf.p[0] = FTS5_MAIN_PREFIX'0'; | ||||
16979 | pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); | ||||
16980 | }else if( iIdx<=pConfig->nPrefix ){ | ||||
16981 | /* Straight index lookup */ | ||||
16982 | Fts5Structure *pStruct = fts5StructureRead(p); | ||||
16983 | buf.p[0] = (u8)(FTS5_MAIN_PREFIX'0' + iIdx); | ||||
16984 | if( pStruct ){ | ||||
16985 | fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY0x0010, | ||||
16986 | pColset, buf.p, nToken+1, -1, 0, &pRet | ||||
16987 | ); | ||||
16988 | fts5StructureRelease(pStruct); | ||||
16989 | } | ||||
16990 | }else{ | ||||
16991 | /* Scan multiple terms in the main index for a prefix query. */ | ||||
16992 | int bDesc = (flags & FTS5INDEX_QUERY_DESC0x0002)!=0; | ||||
16993 | fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); | ||||
16994 | if( pRet==0 ){ | ||||
16995 | assert( p->rc!=SQLITE_OK )((void) (0)); | ||||
16996 | }else{ | ||||
16997 | assert( pRet->pColset==0 )((void) (0)); | ||||
16998 | fts5IterSetOutputCb(&p->rc, pRet); | ||||
16999 | if( p->rc==SQLITE_OK0 ){ | ||||
17000 | Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; | ||||
17001 | if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); | ||||
17002 | } | ||||
17003 | } | ||||
17004 | } | ||||
17005 | |||||
17006 | if( p->rc ){ | ||||
17007 | fts5IterClose((Fts5IndexIter*)pRet); | ||||
17008 | pRet = 0; | ||||
17009 | fts5IndexCloseReader(p); | ||||
17010 | } | ||||
17011 | |||||
17012 | *ppIter = (Fts5IndexIter*)pRet; | ||||
17013 | sqlite3Fts5BufferFree(&buf); | ||||
17014 | } | ||||
17015 | return fts5IndexReturn(p); | ||||
17016 | } | ||||
17017 | |||||
17018 | /* | ||||
17019 | ** Return true if the iterator passed as the only argument is at EOF. | ||||
17020 | */ | ||||
17021 | /* | ||||
17022 | ** Move to the next matching rowid. | ||||
17023 | */ | ||||
17024 | static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ | ||||
17025 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17026 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | ||||
17027 | if( pIter->nSeg==0 ){ | ||||
17028 | assert( pIter->pTokenDataIter )((void) (0)); | ||||
17029 | fts5TokendataIterNext(pIter, 0, 0); | ||||
17030 | }else{ | ||||
17031 | fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); | ||||
17032 | } | ||||
17033 | return fts5IndexReturn(pIter->pIndex); | ||||
17034 | } | ||||
17035 | |||||
17036 | /* | ||||
17037 | ** Move to the next matching term/rowid. Used by the fts5vocab module. | ||||
17038 | */ | ||||
17039 | static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ | ||||
17040 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17041 | Fts5Index *p = pIter->pIndex; | ||||
17042 | |||||
17043 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | ||||
17044 | |||||
17045 | fts5MultiIterNext(p, pIter, 0, 0); | ||||
17046 | if( p->rc==SQLITE_OK0 ){ | ||||
17047 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | ||||
17048 | if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX'0' ){ | ||||
17049 | fts5DataRelease(pSeg->pLeaf); | ||||
17050 | pSeg->pLeaf = 0; | ||||
17051 | pIter->base.bEof = 1; | ||||
17052 | } | ||||
17053 | } | ||||
17054 | |||||
17055 | return fts5IndexReturn(pIter->pIndex); | ||||
17056 | } | ||||
17057 | |||||
17058 | /* | ||||
17059 | ** Move to the next matching rowid that occurs at or after iMatch. The | ||||
17060 | ** definition of "at or after" depends on whether this iterator iterates | ||||
17061 | ** in ascending or descending rowid order. | ||||
17062 | */ | ||||
17063 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ | ||||
17064 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17065 | if( pIter->nSeg==0 ){ | ||||
17066 | assert( pIter->pTokenDataIter )((void) (0)); | ||||
17067 | fts5TokendataIterNext(pIter, 1, iMatch); | ||||
17068 | }else{ | ||||
17069 | fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); | ||||
17070 | } | ||||
17071 | return fts5IndexReturn(pIter->pIndex); | ||||
17072 | } | ||||
17073 | |||||
17074 | /* | ||||
17075 | ** Return the current term. | ||||
17076 | */ | ||||
17077 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ | ||||
17078 | int n; | ||||
17079 | const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); | ||||
17080 | assert_nc( z || n<=1 )((void) (0)); | ||||
17081 | *pn = n-1; | ||||
17082 | return (z ? &z[1] : 0); | ||||
17083 | } | ||||
17084 | |||||
17085 | /* | ||||
17086 | ** pIter is a prefix query. This function populates pIter->pTokenDataIter | ||||
17087 | ** with an Fts5TokenDataIter object containing mappings for all rows | ||||
17088 | ** matched by the query. | ||||
17089 | */ | ||||
17090 | static int fts5SetupPrefixIterTokendata( | ||||
17091 | Fts5Iter *pIter, | ||||
17092 | const char *pToken, /* Token prefix to search for */ | ||||
17093 | int nToken /* Size of pToken in bytes */ | ||||
17094 | ){ | ||||
17095 | Fts5Index *p = pIter->pIndex; | ||||
17096 | Fts5Buffer token = {0, 0, 0}; | ||||
17097 | TokendataSetupCtx ctx; | ||||
17098 | |||||
17099 | memset(&ctx, 0, sizeof(ctx)); | ||||
17100 | |||||
17101 | fts5BufferGrow(&p->rc, &token, nToken+1)( (u32)((&token)->n) + (u32)(nToken+1) <= (u32)((& token)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc ),(&token),(nToken+1)+(&token)->n) ); | ||||
17102 | assert( token.p!=0 || p->rc!=SQLITE_OK )((void) (0)); | ||||
17103 | ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | ||||
17104 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | ||||
17105 | |||||
17106 | if( p->rc==SQLITE_OK0 ){ | ||||
17107 | |||||
17108 | /* Fill in the token prefix to search for */ | ||||
17109 | token.p[0] = FTS5_MAIN_PREFIX'0'; | ||||
17110 | memcpy(&token.p[1], pToken, nToken); | ||||
17111 | token.n = nToken+1; | ||||
17112 | |||||
17113 | fts5VisitEntries( | ||||
17114 | p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx | ||||
17115 | ); | ||||
17116 | |||||
17117 | fts5TokendataIterSortMap(p, ctx.pT); | ||||
17118 | } | ||||
17119 | |||||
17120 | if( p->rc==SQLITE_OK0 ){ | ||||
17121 | pIter->pTokenDataIter = ctx.pT; | ||||
17122 | }else{ | ||||
17123 | fts5TokendataIterDelete(ctx.pT); | ||||
17124 | } | ||||
17125 | fts5BufferFree(&token)sqlite3Fts5BufferFree(&token); | ||||
17126 | |||||
17127 | return fts5IndexReturn(p); | ||||
17128 | } | ||||
17129 | |||||
17130 | /* | ||||
17131 | ** This is used by xInstToken() to access the token at offset iOff, column | ||||
17132 | ** iCol of row iRowid. The token is returned via output variables *ppOut | ||||
17133 | ** and *pnOut. The iterator passed as the first argument must be a tokendata=1 | ||||
17134 | ** iterator (pIter->pTokenDataIter!=0). | ||||
17135 | ** | ||||
17136 | ** pToken/nToken: | ||||
17137 | */ | ||||
17138 | static int sqlite3Fts5IterToken( | ||||
17139 | Fts5IndexIter *pIndexIter, | ||||
17140 | const char *pToken, int nToken, | ||||
17141 | i64 iRowid, | ||||
17142 | int iCol, | ||||
17143 | int iOff, | ||||
17144 | const char **ppOut, int *pnOut | ||||
17145 | ){ | ||||
17146 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17147 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | ||||
17148 | i64 iPos = (((i64)iCol)<<32) + iOff; | ||||
17149 | Fts5TokenDataMap *aMap = 0; | ||||
17150 | int i1 = 0; | ||||
17151 | int i2 = 0; | ||||
17152 | int iTest = 0; | ||||
17153 | |||||
17154 | assert( pT || (pToken && pIter->nSeg>0) )((void) (0)); | ||||
17155 | if( pT==0 ){ | ||||
17156 | int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken); | ||||
17157 | if( rc!=SQLITE_OK0 ) return rc; | ||||
17158 | pT = pIter->pTokenDataIter; | ||||
17159 | } | ||||
17160 | |||||
17161 | i2 = pT->nMap; | ||||
17162 | aMap = pT->aMap; | ||||
17163 | |||||
17164 | while( i2>i1 ){ | ||||
17165 | iTest = (i1 + i2) / 2; | ||||
17166 | |||||
17167 | if( aMap[iTest].iRowid<iRowid ){ | ||||
17168 | i1 = iTest+1; | ||||
17169 | }else if( aMap[iTest].iRowid>iRowid ){ | ||||
17170 | i2 = iTest; | ||||
17171 | }else{ | ||||
17172 | if( aMap[iTest].iPos<iPos ){ | ||||
17173 | if( aMap[iTest].iPos<0 ){ | ||||
17174 | break; | ||||
17175 | } | ||||
17176 | i1 = iTest+1; | ||||
17177 | }else if( aMap[iTest].iPos>iPos ){ | ||||
17178 | i2 = iTest; | ||||
17179 | }else{ | ||||
17180 | break; | ||||
17181 | } | ||||
17182 | } | ||||
17183 | } | ||||
17184 | |||||
17185 | if( i2>i1 ){ | ||||
17186 | if( pIter->nSeg==0 ){ | ||||
17187 | Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; | ||||
17188 | *ppOut = (const char*)pMap->aSeg[0].term.p+1; | ||||
17189 | *pnOut = pMap->aSeg[0].term.n-1; | ||||
17190 | }else{ | ||||
17191 | Fts5TokenDataMap *p = &aMap[iTest]; | ||||
17192 | *ppOut = (const char*)&pT->terms.p[p->iIter]; | ||||
17193 | *pnOut = aMap[iTest].nByte; | ||||
17194 | } | ||||
17195 | } | ||||
17196 | |||||
17197 | return SQLITE_OK0; | ||||
17198 | } | ||||
17199 | |||||
17200 | /* | ||||
17201 | ** Clear any existing entries from the token-map associated with the | ||||
17202 | ** iterator passed as the only argument. | ||||
17203 | */ | ||||
17204 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ | ||||
17205 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17206 | if( pIter && pIter->pTokenDataIter | ||||
17207 | && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL0) | ||||
17208 | ){ | ||||
17209 | pIter->pTokenDataIter->nMap = 0; | ||||
17210 | } | ||||
17211 | } | ||||
17212 | |||||
17213 | /* | ||||
17214 | ** Set a token-mapping for the iterator passed as the first argument. This | ||||
17215 | ** is used in detail=column or detail=none mode when a token is requested | ||||
17216 | ** using the xInstToken() API. In this case the caller tokenizers the | ||||
17217 | ** current row and configures the token-mapping via multiple calls to this | ||||
17218 | ** function. | ||||
17219 | */ | ||||
17220 | static int sqlite3Fts5IndexIterWriteTokendata( | ||||
17221 | Fts5IndexIter *pIndexIter, | ||||
17222 | const char *pToken, int nToken, | ||||
17223 | i64 iRowid, int iCol, int iOff | ||||
17224 | ){ | ||||
17225 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | ||||
17226 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | ||||
17227 | Fts5Index *p = pIter->pIndex; | ||||
17228 | i64 iPos = (((i64)iCol)<<32) + iOff; | ||||
17229 | |||||
17230 | assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL )((void) (0)); | ||||
17231 | assert( pIter->pTokenDataIter || pIter->nSeg>0 )((void) (0)); | ||||
17232 | if( pIter->nSeg>0 ){ | ||||
17233 | /* This is a prefix term iterator. */ | ||||
17234 | if( pT==0 ){ | ||||
17235 | pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | ||||
17236 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | ||||
17237 | pIter->pTokenDataIter = pT; | ||||
17238 | } | ||||
17239 | if( pT ){ | ||||
17240 | fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos); | ||||
17241 | fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken)sqlite3Fts5BufferAppendBlob(&p->rc,&pT->terms,nToken ,(const u8*)pToken); | ||||
17242 | } | ||||
17243 | }else{ | ||||
17244 | int ii; | ||||
17245 | for(ii=0; ii<pT->nIter; ii++){ | ||||
17246 | Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; | ||||
17247 | if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; | ||||
17248 | } | ||||
17249 | if( ii<pT->nIter ){ | ||||
17250 | fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos); | ||||
17251 | } | ||||
17252 | } | ||||
17253 | return fts5IndexReturn(p); | ||||
17254 | } | ||||
17255 | |||||
17256 | /* | ||||
17257 | ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). | ||||
17258 | */ | ||||
17259 | static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ | ||||
17260 | if( pIndexIter ){ | ||||
17261 | Fts5Index *pIndex = ((Fts5Iter*)pIndexIter)->pIndex; | ||||
17262 | fts5IterClose(pIndexIter); | ||||
17263 | fts5IndexReturn(pIndex); | ||||
17264 | } | ||||
17265 | } | ||||
17266 | |||||
17267 | /* | ||||
17268 | ** Read and decode the "averages" record from the database. | ||||
17269 | ** | ||||
17270 | ** Parameter anSize must point to an array of size nCol, where nCol is | ||||
17271 | ** the number of user defined columns in the FTS table. | ||||
17272 | */ | ||||
17273 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ | ||||
17274 | int nCol = p->pConfig->nCol; | ||||
17275 | Fts5Data *pData; | ||||
17276 | |||||
17277 | *pnRow = 0; | ||||
17278 | memset(anSize, 0, sizeof(i64) * nCol); | ||||
17279 | pData = fts5DataRead(p, FTS5_AVERAGES_ROWID1); | ||||
17280 | if( p->rc==SQLITE_OK0 && pData->nn ){ | ||||
17281 | int i = 0; | ||||
17282 | int iCol; | ||||
17283 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)pnRow); | ||||
17284 | for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ | ||||
17285 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); | ||||
17286 | } | ||||
17287 | } | ||||
17288 | |||||
17289 | fts5DataRelease(pData); | ||||
17290 | return fts5IndexReturn(p); | ||||
17291 | } | ||||
17292 | |||||
17293 | /* | ||||
17294 | ** Replace the current "averages" record with the contents of the buffer | ||||
17295 | ** supplied as the second argument. | ||||
17296 | */ | ||||
17297 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ | ||||
17298 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
17299 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, pData, nData); | ||||
17300 | return fts5IndexReturn(p); | ||||
17301 | } | ||||
17302 | |||||
17303 | /* | ||||
17304 | ** Return the total number of blocks this module has read from the %_data | ||||
17305 | ** table since it was created. | ||||
17306 | */ | ||||
17307 | static int sqlite3Fts5IndexReads(Fts5Index *p){ | ||||
17308 | return p->nRead; | ||||
17309 | } | ||||
17310 | |||||
17311 | /* | ||||
17312 | ** Set the 32-bit cookie value stored at the start of all structure | ||||
17313 | ** records to the value passed as the second argument. | ||||
17314 | ** | ||||
17315 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
17316 | ** occurs. | ||||
17317 | */ | ||||
17318 | static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ | ||||
17319 | int rc; /* Return code */ | ||||
17320 | Fts5Config *pConfig = p->pConfig; /* Configuration object */ | ||||
17321 | u8 aCookie[4]; /* Binary representation of iNew */ | ||||
17322 | sqlite3_blob *pBlob = 0; | ||||
17323 | |||||
17324 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
17325 | sqlite3Fts5Put32(aCookie, iNew); | ||||
17326 | |||||
17327 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, | ||||
17328 | "block", FTS5_STRUCTURE_ROWID10, 1, &pBlob | ||||
17329 | ); | ||||
17330 | if( rc==SQLITE_OK0 ){ | ||||
17331 | sqlite3_blob_writesqlite3_api->blob_write(pBlob, aCookie, 4, 0); | ||||
17332 | rc = sqlite3_blob_closesqlite3_api->blob_close(pBlob); | ||||
17333 | } | ||||
17334 | |||||
17335 | return rc; | ||||
17336 | } | ||||
17337 | |||||
17338 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ | ||||
17339 | Fts5Structure *pStruct; | ||||
17340 | pStruct = fts5StructureRead(p); | ||||
17341 | fts5StructureRelease(pStruct); | ||||
17342 | return fts5IndexReturn(p); | ||||
17343 | } | ||||
17344 | |||||
17345 | /* | ||||
17346 | ** Retrieve the origin value that will be used for the segment currently | ||||
17347 | ** being accumulated in the in-memory hash table when it is flushed to | ||||
17348 | ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to | ||||
17349 | ** the queried value. Or, if an error occurs, an error code is returned | ||||
17350 | ** and the final value of (*piOrigin) is undefined. | ||||
17351 | */ | ||||
17352 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){ | ||||
17353 | Fts5Structure *pStruct; | ||||
17354 | pStruct = fts5StructureRead(p); | ||||
17355 | if( pStruct ){ | ||||
17356 | *piOrigin = pStruct->nOriginCntr; | ||||
17357 | fts5StructureRelease(pStruct); | ||||
17358 | } | ||||
17359 | return fts5IndexReturn(p); | ||||
17360 | } | ||||
17361 | |||||
17362 | /* | ||||
17363 | ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages | ||||
17364 | ** associated with the same segment. This function adds rowid iRowid to | ||||
17365 | ** the hash table. The caller is required to guarantee that there is at | ||||
17366 | ** least one free slot on the page. | ||||
17367 | ** | ||||
17368 | ** If parameter bForce is false and the hash table is deemed to be full | ||||
17369 | ** (more than half of the slots are occupied), then non-zero is returned | ||||
17370 | ** and iRowid not inserted. Or, if bForce is true or if the hash table page | ||||
17371 | ** is not full, iRowid is inserted and zero returned. | ||||
17372 | */ | ||||
17373 | static int fts5IndexTombstoneAddToPage( | ||||
17374 | Fts5Data *pPg, | ||||
17375 | int bForce, | ||||
17376 | int nPg, | ||||
17377 | u64 iRowid | ||||
17378 | ){ | ||||
17379 | const int szKey = TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8); | ||||
17380 | const int nSlot = TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1); | ||||
17381 | const int nElem = fts5GetU32(&pPg->p[4]); | ||||
17382 | int iSlot = (iRowid / nPg) % nSlot; | ||||
17383 | int nCollide = nSlot; | ||||
17384 | |||||
17385 | if( szKey==4 && iRowid>0xFFFFFFFF ) return 2; | ||||
17386 | if( iRowid==0 ){ | ||||
17387 | pPg->p[1] = 0x01; | ||||
17388 | return 0; | ||||
17389 | } | ||||
17390 | |||||
17391 | if( bForce==0 && nElem>=(nSlot/2) ){ | ||||
17392 | return 1; | ||||
17393 | } | ||||
17394 | |||||
17395 | fts5PutU32(&pPg->p[4], nElem+1); | ||||
17396 | if( szKey==4 ){ | ||||
17397 | u32 *aSlot = (u32*)&pPg->p[8]; | ||||
17398 | while( aSlot[iSlot] ){ | ||||
17399 | iSlot = (iSlot + 1) % nSlot; | ||||
17400 | if( nCollide--==0 ) return 0; | ||||
17401 | } | ||||
17402 | fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid); | ||||
17403 | }else{ | ||||
17404 | u64 *aSlot = (u64*)&pPg->p[8]; | ||||
17405 | while( aSlot[iSlot] ){ | ||||
17406 | iSlot = (iSlot + 1) % nSlot; | ||||
17407 | if( nCollide--==0 ) return 0; | ||||
17408 | } | ||||
17409 | fts5PutU64((u8*)&aSlot[iSlot], iRowid); | ||||
17410 | } | ||||
17411 | |||||
17412 | return 0; | ||||
17413 | } | ||||
17414 | |||||
17415 | /* | ||||
17416 | ** This function attempts to build a new hash containing all the keys | ||||
17417 | ** currently in the tombstone hash table for segment pSeg. The new | ||||
17418 | ** hash will be stored in the nOut buffers passed in array apOut[]. | ||||
17419 | ** All pages of the new hash use key-size szKey (4 or 8). | ||||
17420 | ** | ||||
17421 | ** Return 0 if the hash is successfully rebuilt into the nOut pages. | ||||
17422 | ** Or non-zero if it is not (because one page became overfull). In this | ||||
17423 | ** case the caller should retry with a larger nOut parameter. | ||||
17424 | ** | ||||
17425 | ** Parameter pData1 is page iPg1 of the hash table being rebuilt. | ||||
17426 | */ | ||||
17427 | static int fts5IndexTombstoneRehash( | ||||
17428 | Fts5Index *p, | ||||
17429 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | ||||
17430 | Fts5Data *pData1, /* One page of current hash - or NULL */ | ||||
17431 | int iPg1, /* Which page of the current hash is pData1 */ | ||||
17432 | int szKey, /* 4 or 8, the keysize */ | ||||
17433 | int nOut, /* Number of output pages */ | ||||
17434 | Fts5Data **apOut /* Array of output hash pages */ | ||||
17435 | ){ | ||||
17436 | int ii; | ||||
17437 | int res = 0; | ||||
17438 | |||||
17439 | /* Initialize the headers of all the output pages */ | ||||
17440 | for(ii=0; ii<nOut; ii++){ | ||||
17441 | apOut[ii]->p[0] = szKey; | ||||
17442 | fts5PutU32(&apOut[ii]->p[4], 0); | ||||
17443 | } | ||||
17444 | |||||
17445 | /* Loop through the current pages of the hash table. */ | ||||
17446 | for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){ | ||||
17447 | Fts5Data *pData = 0; /* Page ii of the current hash table */ | ||||
17448 | Fts5Data *pFree = 0; /* Free this at the end of the loop */ | ||||
17449 | |||||
17450 | if( iPg1==ii ){ | ||||
17451 | pData = pData1; | ||||
17452 | }else{ | ||||
17453 | pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) )); | ||||
17454 | } | ||||
17455 | |||||
17456 | if( pData ){ | ||||
17457 | int szKeyIn = TOMBSTONE_KEYSIZE(pData)(pData->p[0]==4 ? 4 : 8); | ||||
17458 | int nSlotIn = (pData->nn - 8) / szKeyIn; | ||||
17459 | int iIn; | ||||
17460 | for(iIn=0; iIn<nSlotIn; iIn++){ | ||||
17461 | u64 iVal = 0; | ||||
17462 | |||||
17463 | /* Read the value from slot iIn of the input page into iVal. */ | ||||
17464 | if( szKeyIn==4 ){ | ||||
17465 | u32 *aSlot = (u32*)&pData->p[8]; | ||||
17466 | if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]); | ||||
17467 | }else{ | ||||
17468 | u64 *aSlot = (u64*)&pData->p[8]; | ||||
17469 | if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]); | ||||
17470 | } | ||||
17471 | |||||
17472 | /* If iVal is not 0 at this point, insert it into the new hash table */ | ||||
17473 | if( iVal ){ | ||||
17474 | Fts5Data *pPg = apOut[(iVal % nOut)]; | ||||
17475 | res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal); | ||||
17476 | if( res ) break; | ||||
17477 | } | ||||
17478 | } | ||||
17479 | |||||
17480 | /* If this is page 0 of the old hash, copy the rowid-0-flag from the | ||||
17481 | ** old hash to the new. */ | ||||
17482 | if( ii==0 ){ | ||||
17483 | apOut[0]->p[1] = pData->p[1]; | ||||
17484 | } | ||||
17485 | } | ||||
17486 | fts5DataRelease(pFree); | ||||
17487 | } | ||||
17488 | |||||
17489 | return res; | ||||
17490 | } | ||||
17491 | |||||
17492 | /* | ||||
17493 | ** This is called to rebuild the hash table belonging to segment pSeg. | ||||
17494 | ** If parameter pData1 is not NULL, then one page of the existing hash table | ||||
17495 | ** has already been loaded - pData1, which is page iPg1. The key-size for | ||||
17496 | ** the new hash table is szKey (4 or 8). | ||||
17497 | ** | ||||
17498 | ** If successful, the new hash table is not written to disk. Instead, | ||||
17499 | ** output parameter (*pnOut) is set to the number of pages in the new | ||||
17500 | ** hash table, and (*papOut) to point to an array of buffers containing | ||||
17501 | ** the new page data. | ||||
17502 | ** | ||||
17503 | ** If an error occurs, an error code is left in the Fts5Index object and | ||||
17504 | ** both output parameters set to 0 before returning. | ||||
17505 | */ | ||||
17506 | static void fts5IndexTombstoneRebuild( | ||||
17507 | Fts5Index *p, | ||||
17508 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | ||||
17509 | Fts5Data *pData1, /* One page of current hash - or NULL */ | ||||
17510 | int iPg1, /* Which page of the current hash is pData1 */ | ||||
17511 | int szKey, /* 4 or 8, the keysize */ | ||||
17512 | int *pnOut, /* OUT: Number of output pages */ | ||||
17513 | Fts5Data ***papOut /* OUT: Output hash pages */ | ||||
17514 | ){ | ||||
17515 | const int MINSLOT = 32; | ||||
17516 | int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey)(((MINSLOT) > ((p->pConfig->pgsz - 8) / szKey)) ? (MINSLOT ) : ((p->pConfig->pgsz - 8) / szKey)); | ||||
17517 | int nSlot = 0; /* Number of slots in each output page */ | ||||
17518 | int nOut = 0; | ||||
17519 | |||||
17520 | /* Figure out how many output pages (nOut) and how many slots per | ||||
17521 | ** page (nSlot). There are three possibilities: | ||||
17522 | ** | ||||
17523 | ** 1. The hash table does not yet exist. In this case the new hash | ||||
17524 | ** table will consist of a single page with MINSLOT slots. | ||||
17525 | ** | ||||
17526 | ** 2. The hash table exists but is currently a single page. In this | ||||
17527 | ** case an attempt is made to grow the page to accommodate the new | ||||
17528 | ** entry. The page is allowed to grow up to nSlotPerPage (see above) | ||||
17529 | ** slots. | ||||
17530 | ** | ||||
17531 | ** 3. The hash table already consists of more than one page, or of | ||||
17532 | ** a single page already so large that it cannot be grown. In this | ||||
17533 | ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage | ||||
17534 | ** slots each, where nPg is the current number of pages in the | ||||
17535 | ** hash table. | ||||
17536 | */ | ||||
17537 | if( pSeg->nPgTombstone==0 ){ | ||||
17538 | /* Case 1. */ | ||||
17539 | nOut = 1; | ||||
17540 | nSlot = MINSLOT; | ||||
17541 | }else if( pSeg->nPgTombstone==1 ){ | ||||
17542 | /* Case 2. */ | ||||
17543 | int nElem = (int)fts5GetU32(&pData1->p[4]); | ||||
17544 | assert( pData1 && iPg1==0 )((void) (0)); | ||||
17545 | nOut = 1; | ||||
17546 | nSlot = MAX(nElem*4, MINSLOT)(((nElem*4) > (MINSLOT)) ? (nElem*4) : (MINSLOT)); | ||||
17547 | if( nSlot>nSlotPerPage ) nOut = 0; | ||||
17548 | } | ||||
17549 | if( nOut==0 ){ | ||||
17550 | /* Case 3. */ | ||||
17551 | nOut = (pSeg->nPgTombstone * 2 + 1); | ||||
17552 | nSlot = nSlotPerPage; | ||||
17553 | } | ||||
17554 | |||||
17555 | /* Allocate the required array and output pages */ | ||||
17556 | while( 1 ){ | ||||
17557 | int res = 0; | ||||
17558 | int ii = 0; | ||||
17559 | int szPage = 0; | ||||
17560 | Fts5Data **apOut = 0; | ||||
17561 | |||||
17562 | /* Allocate space for the new hash table */ | ||||
17563 | assert( nSlot>=MINSLOT )((void) (0)); | ||||
17564 | apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut); | ||||
17565 | szPage = 8 + nSlot*szKey; | ||||
17566 | for(ii=0; ii<nOut; ii++){ | ||||
17567 | Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc, | ||||
17568 | sizeof(Fts5Data)+szPage | ||||
17569 | ); | ||||
17570 | if( pNew ){ | ||||
17571 | pNew->nn = szPage; | ||||
17572 | pNew->p = (u8*)&pNew[1]; | ||||
17573 | apOut[ii] = pNew; | ||||
17574 | } | ||||
17575 | } | ||||
17576 | |||||
17577 | /* Rebuild the hash table. */ | ||||
17578 | if( p->rc==SQLITE_OK0 ){ | ||||
17579 | res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut); | ||||
17580 | } | ||||
17581 | if( res==0 ){ | ||||
17582 | if( p->rc ){ | ||||
17583 | fts5IndexFreeArray(apOut, nOut); | ||||
17584 | apOut = 0; | ||||
17585 | nOut = 0; | ||||
17586 | } | ||||
17587 | *pnOut = nOut; | ||||
17588 | *papOut = apOut; | ||||
17589 | break; | ||||
17590 | } | ||||
17591 | |||||
17592 | /* If control flows to here, it was not possible to rebuild the hash | ||||
17593 | ** table. Free all buffers and then try again with more pages. */ | ||||
17594 | assert( p->rc==SQLITE_OK )((void) (0)); | ||||
17595 | fts5IndexFreeArray(apOut, nOut); | ||||
17596 | nSlot = nSlotPerPage; | ||||
17597 | nOut = nOut*2 + 1; | ||||
17598 | } | ||||
17599 | } | ||||
17600 | |||||
17601 | |||||
17602 | /* | ||||
17603 | ** Add a tombstone for rowid iRowid to segment pSeg. | ||||
17604 | */ | ||||
17605 | static void fts5IndexTombstoneAdd( | ||||
17606 | Fts5Index *p, | ||||
17607 | Fts5StructureSegment *pSeg, | ||||
17608 | u64 iRowid | ||||
17609 | ){ | ||||
17610 | Fts5Data *pPg = 0; | ||||
17611 | int iPg = -1; | ||||
17612 | int szKey = 0; | ||||
17613 | int nHash = 0; | ||||
17614 | Fts5Data **apHash = 0; | ||||
17615 | |||||
17616 | p->nContentlessDelete++; | ||||
17617 | |||||
17618 | if( pSeg->nPgTombstone>0 ){ | ||||
17619 | iPg = iRowid % pSeg->nPgTombstone; | ||||
17620 | pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) )); | ||||
17621 | if( pPg==0 ){ | ||||
17622 | assert( p->rc!=SQLITE_OK )((void) (0)); | ||||
17623 | return; | ||||
17624 | } | ||||
17625 | |||||
17626 | if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){ | ||||
17627 | fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) ), pPg->p, pPg->nn); | ||||
17628 | fts5DataRelease(pPg); | ||||
17629 | return; | ||||
17630 | } | ||||
17631 | } | ||||
17632 | |||||
17633 | /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */ | ||||
17634 | szKey = pPg ? TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) : 4; | ||||
17635 | if( iRowid>0xFFFFFFFF ) szKey = 8; | ||||
17636 | |||||
17637 | /* Rebuild the hash table */ | ||||
17638 | fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash); | ||||
17639 | assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) )((void) (0)); | ||||
17640 | |||||
17641 | /* If all has succeeded, write the new rowid into one of the new hash | ||||
17642 | ** table pages, then write them all out to disk. */ | ||||
17643 | if( nHash ){ | ||||
17644 | int ii = 0; | ||||
17645 | fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid); | ||||
17646 | for(ii=0; ii<nHash; ii++){ | ||||
17647 | i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) ); | ||||
17648 | fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn); | ||||
17649 | } | ||||
17650 | pSeg->nPgTombstone = nHash; | ||||
17651 | fts5StructureWrite(p, p->pStruct); | ||||
17652 | } | ||||
17653 | |||||
17654 | fts5DataRelease(pPg); | ||||
17655 | fts5IndexFreeArray(apHash, nHash); | ||||
17656 | } | ||||
17657 | |||||
17658 | /* | ||||
17659 | ** Add iRowid to the tombstone list of the segment or segments that contain | ||||
17660 | ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite | ||||
17661 | ** error code otherwise. | ||||
17662 | */ | ||||
17663 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){ | ||||
17664 | Fts5Structure *pStruct; | ||||
17665 | pStruct = fts5StructureRead(p); | ||||
17666 | if( pStruct ){ | ||||
17667 | int bFound = 0; /* True after pSeg->nEntryTombstone incr. */ | ||||
17668 | int iLvl; | ||||
17669 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | ||||
17670 | int iSeg; | ||||
17671 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | ||||
17672 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | ||||
17673 | if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){ | ||||
17674 | if( bFound==0 ){ | ||||
17675 | pSeg->nEntryTombstone++; | ||||
17676 | bFound = 1; | ||||
17677 | } | ||||
17678 | fts5IndexTombstoneAdd(p, pSeg, iRowid); | ||||
17679 | } | ||||
17680 | } | ||||
17681 | } | ||||
17682 | fts5StructureRelease(pStruct); | ||||
17683 | } | ||||
17684 | return fts5IndexReturn(p); | ||||
17685 | } | ||||
17686 | |||||
17687 | /************************************************************************* | ||||
17688 | ************************************************************************** | ||||
17689 | ** Below this point is the implementation of the integrity-check | ||||
17690 | ** functionality. | ||||
17691 | */ | ||||
17692 | |||||
17693 | /* | ||||
17694 | ** Return a simple checksum value based on the arguments. | ||||
17695 | */ | ||||
17696 | static u64 sqlite3Fts5IndexEntryCksum( | ||||
17697 | i64 iRowid, | ||||
17698 | int iCol, | ||||
17699 | int iPos, | ||||
17700 | int iIdx, | ||||
17701 | const char *pTerm, | ||||
17702 | int nTerm | ||||
17703 | ){ | ||||
17704 | int i; | ||||
17705 | u64 ret = iRowid; | ||||
17706 | ret += (ret<<3) + iCol; | ||||
17707 | ret += (ret<<3) + iPos; | ||||
17708 | if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX'0' + iIdx); | ||||
17709 | for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; | ||||
17710 | return ret; | ||||
17711 | } | ||||
17712 | |||||
17713 | #ifdef SQLITE_DEBUG | ||||
17714 | /* | ||||
17715 | ** This function is purely an internal test. It does not contribute to | ||||
17716 | ** FTS functionality, or even the integrity-check, in any way. | ||||
17717 | ** | ||||
17718 | ** Instead, it tests that the same set of pgno/rowid combinations are | ||||
17719 | ** visited regardless of whether the doclist-index identified by parameters | ||||
17720 | ** iSegid/iLeaf is iterated in forwards or reverse order. | ||||
17721 | */ | ||||
17722 | static void fts5TestDlidxReverse( | ||||
17723 | Fts5Index *p, | ||||
17724 | int iSegid, /* Segment id to load from */ | ||||
17725 | int iLeaf /* Load doclist-index for this leaf */ | ||||
17726 | ){ | ||||
17727 | Fts5DlidxIter *pDlidx = 0; | ||||
17728 | u64 cksum1 = 13; | ||||
17729 | u64 cksum2 = 13; | ||||
17730 | |||||
17731 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); | ||||
17732 | fts5DlidxIterEof(p, pDlidx)==0; | ||||
17733 | fts5DlidxIterNext(p, pDlidx) | ||||
17734 | ){ | ||||
17735 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | ||||
17736 | int pgno = fts5DlidxIterPgno(pDlidx); | ||||
17737 | assert( pgno>iLeaf )((void) (0)); | ||||
17738 | cksum1 += iRowid + ((i64)pgno<<32); | ||||
17739 | } | ||||
17740 | fts5DlidxIterFree(pDlidx); | ||||
17741 | pDlidx = 0; | ||||
17742 | |||||
17743 | for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); | ||||
17744 | fts5DlidxIterEof(p, pDlidx)==0; | ||||
17745 | fts5DlidxIterPrev(p, pDlidx) | ||||
17746 | ){ | ||||
17747 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | ||||
17748 | int pgno = fts5DlidxIterPgno(pDlidx); | ||||
17749 | assert( fts5DlidxIterPgno(pDlidx)>iLeaf )((void) (0)); | ||||
17750 | cksum2 += iRowid + ((i64)pgno<<32); | ||||
17751 | } | ||||
17752 | fts5DlidxIterFree(pDlidx); | ||||
17753 | pDlidx = 0; | ||||
17754 | |||||
17755 | if( p->rc==SQLITE_OK0 && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17756 | } | ||||
17757 | |||||
17758 | static int fts5QueryCksum( | ||||
17759 | Fts5Index *p, /* Fts5 index object */ | ||||
17760 | int iIdx, | ||||
17761 | const char *z, /* Index key to query for */ | ||||
17762 | int n, /* Size of index key in bytes */ | ||||
17763 | int flags, /* Flags for Fts5IndexQuery */ | ||||
17764 | u64 *pCksum /* IN/OUT: Checksum value */ | ||||
17765 | ){ | ||||
17766 | int eDetail = p->pConfig->eDetail; | ||||
17767 | u64 cksum = *pCksum; | ||||
17768 | Fts5IndexIter *pIter = 0; | ||||
17769 | int rc = sqlite3Fts5IndexQuery( | ||||
17770 | p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA0x0080), 0, &pIter | ||||
17771 | ); | ||||
17772 | |||||
17773 | while( rc==SQLITE_OK0 && ALWAYS(pIter!=0)(pIter!=0) && 0==sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | ||||
17774 | i64 rowid = pIter->iRowid; | ||||
17775 | |||||
17776 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
17777 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); | ||||
17778 | }else{ | ||||
17779 | Fts5PoslistReader sReader; | ||||
17780 | for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); | ||||
17781 | sReader.bEof==0; | ||||
17782 | sqlite3Fts5PoslistReaderNext(&sReader) | ||||
17783 | ){ | ||||
17784 | int iCol = FTS5_POS2COLUMN(sReader.iPos)(int)((sReader.iPos >> 32) & 0x7FFFFFFF); | ||||
17785 | int iOff = FTS5_POS2OFFSET(sReader.iPos)(int)(sReader.iPos & 0x7FFFFFFF); | ||||
17786 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); | ||||
17787 | } | ||||
17788 | } | ||||
17789 | if( rc==SQLITE_OK0 ){ | ||||
17790 | rc = sqlite3Fts5IterNext(pIter); | ||||
17791 | } | ||||
17792 | } | ||||
17793 | fts5IterClose(pIter); | ||||
17794 | |||||
17795 | *pCksum = cksum; | ||||
17796 | return rc; | ||||
17797 | } | ||||
17798 | |||||
17799 | /* | ||||
17800 | ** Check if buffer z[], size n bytes, contains as series of valid utf-8 | ||||
17801 | ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not | ||||
17802 | ** contain valid utf-8, return non-zero. | ||||
17803 | */ | ||||
17804 | static int fts5TestUtf8(const char *z, int n){ | ||||
17805 | int i = 0; | ||||
17806 | assert_nc( n>0 )((void) (0)); | ||||
17807 | while( i<n ){ | ||||
17808 | if( (z[i] & 0x80)==0x00 ){ | ||||
17809 | i++; | ||||
17810 | }else | ||||
17811 | if( (z[i] & 0xE0)==0xC0 ){ | ||||
17812 | if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1; | ||||
17813 | i += 2; | ||||
17814 | }else | ||||
17815 | if( (z[i] & 0xF0)==0xE0 ){ | ||||
17816 | if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | ||||
17817 | i += 3; | ||||
17818 | }else | ||||
17819 | if( (z[i] & 0xF8)==0xF0 ){ | ||||
17820 | if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | ||||
17821 | if( (z[i+2] & 0xC0)!=0x80 ) return 1; | ||||
17822 | i += 3; | ||||
17823 | }else{ | ||||
17824 | return 1; | ||||
17825 | } | ||||
17826 | } | ||||
17827 | |||||
17828 | return 0; | ||||
17829 | } | ||||
17830 | |||||
17831 | /* | ||||
17832 | ** This function is also purely an internal test. It does not contribute to | ||||
17833 | ** FTS functionality, or even the integrity-check, in any way. | ||||
17834 | */ | ||||
17835 | static void fts5TestTerm( | ||||
17836 | Fts5Index *p, | ||||
17837 | Fts5Buffer *pPrev, /* Previous term */ | ||||
17838 | const char *z, int n, /* Possibly new term to test */ | ||||
17839 | u64 expected, | ||||
17840 | u64 *pCksum | ||||
17841 | ){ | ||||
17842 | int rc = p->rc; | ||||
17843 | if( pPrev->n==0 ){ | ||||
17844 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | ||||
17845 | }else | ||||
17846 | if( rc==SQLITE_OK0 && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ | ||||
17847 | u64 cksum3 = *pCksum; | ||||
17848 | const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ | ||||
17849 | int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ | ||||
17850 | int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX'0'); | ||||
17851 | int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX0x0001); | ||||
17852 | u64 ck1 = 0; | ||||
17853 | u64 ck2 = 0; | ||||
17854 | |||||
17855 | /* Check that the results returned for ASC and DESC queries are | ||||
17856 | ** the same. If not, call this corruption. */ | ||||
17857 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); | ||||
17858 | if( rc==SQLITE_OK0 ){ | ||||
17859 | int f = flags|FTS5INDEX_QUERY_DESC0x0002; | ||||
17860 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | ||||
17861 | } | ||||
17862 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17863 | |||||
17864 | /* If this is a prefix query, check that the results returned if the | ||||
17865 | ** the index is disabled are the same. In both ASC and DESC order. | ||||
17866 | ** | ||||
17867 | ** This check may only be performed if the hash table is empty. This | ||||
17868 | ** is because the hash table only supports a single scan query at | ||||
17869 | ** a time, and the multi-iter loop from which this function is called | ||||
17870 | ** is already performing such a scan. | ||||
17871 | ** | ||||
17872 | ** Also only do this if buffer zTerm contains nTerm bytes of valid | ||||
17873 | ** utf-8. Otherwise, the last part of the buffer contents might contain | ||||
17874 | ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 | ||||
17875 | ** character stored in the main fts index, which will cause the | ||||
17876 | ** test to fail. */ | ||||
17877 | if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ | ||||
17878 | if( iIdx>0 && rc==SQLITE_OK0 ){ | ||||
17879 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004; | ||||
17880 | ck2 = 0; | ||||
17881 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | ||||
17882 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17883 | } | ||||
17884 | if( iIdx>0 && rc==SQLITE_OK0 ){ | ||||
17885 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004|FTS5INDEX_QUERY_DESC0x0002; | ||||
17886 | ck2 = 0; | ||||
17887 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | ||||
17888 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17889 | } | ||||
17890 | } | ||||
17891 | |||||
17892 | cksum3 ^= ck1; | ||||
17893 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | ||||
17894 | |||||
17895 | if( rc==SQLITE_OK0 && cksum3!=expected ){ | ||||
17896 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17897 | } | ||||
17898 | *pCksum = cksum3; | ||||
17899 | } | ||||
17900 | p->rc = rc; | ||||
17901 | } | ||||
17902 | |||||
17903 | #else | ||||
17904 | # define fts5TestDlidxReverse(x,y,z) | ||||
17905 | # define fts5TestTerm(u,v,w,x,y,z) | ||||
17906 | #endif | ||||
17907 | |||||
17908 | /* | ||||
17909 | ** Check that: | ||||
17910 | ** | ||||
17911 | ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and | ||||
17912 | ** contain zero terms. | ||||
17913 | ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and | ||||
17914 | ** contain zero rowids. | ||||
17915 | */ | ||||
17916 | static void fts5IndexIntegrityCheckEmpty( | ||||
17917 | Fts5Index *p, | ||||
17918 | Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ | ||||
17919 | int iFirst, | ||||
17920 | int iNoRowid, | ||||
17921 | int iLast | ||||
17922 | ){ | ||||
17923 | int i; | ||||
17924 | |||||
17925 | /* Now check that the iter.nEmpty leaves following the current leaf | ||||
17926 | ** (a) exist and (b) contain no terms. */ | ||||
17927 | for(i=iFirst; p->rc==SQLITE_OK0 && i<=iLast; i++){ | ||||
17928 | Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(i)) )); | ||||
17929 | if( pLeaf ){ | ||||
17930 | if( !fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17931 | if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17932 | } | ||||
17933 | fts5DataRelease(pLeaf); | ||||
17934 | } | ||||
17935 | } | ||||
17936 | |||||
17937 | static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ | ||||
17938 | i64 iTermOff = 0; | ||||
17939 | int ii; | ||||
17940 | |||||
17941 | Fts5Buffer buf1 = {0,0,0}; | ||||
17942 | Fts5Buffer buf2 = {0,0,0}; | ||||
17943 | |||||
17944 | ii = pLeaf->szLeaf; | ||||
17945 | while( ii<pLeaf->nn && p->rc==SQLITE_OK0 ){ | ||||
17946 | int res; | ||||
17947 | i64 iOff; | ||||
17948 | int nIncr; | ||||
17949 | |||||
17950 | ii += fts5GetVarint32(&pLeaf->p[ii], nIncr)sqlite3Fts5GetVarint32(&pLeaf->p[ii],(u32*)&(nIncr )); | ||||
17951 | iTermOff += nIncr; | ||||
17952 | iOff = iTermOff; | ||||
17953 | |||||
17954 | if( iOff>=pLeaf->szLeaf ){ | ||||
17955 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17956 | }else if( iTermOff==nIncr ){ | ||||
17957 | int nByte; | ||||
17958 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | ||||
17959 | if( (iOff+nByte)>pLeaf->szLeaf ){ | ||||
17960 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17961 | }else{ | ||||
17962 | fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferSet(&p->rc,&buf1,nByte,&pLeaf ->p[iOff]); | ||||
17963 | } | ||||
17964 | }else{ | ||||
17965 | int nKeep, nByte; | ||||
17966 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nKeep )); | ||||
17967 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | ||||
17968 | if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ | ||||
17969 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17970 | }else{ | ||||
17971 | buf1.n = nKeep; | ||||
17972 | fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf1,nByte,& pLeaf->p[iOff]); | ||||
17973 | } | ||||
17974 | |||||
17975 | if( p->rc==SQLITE_OK0 ){ | ||||
17976 | res = fts5BufferCompare(&buf1, &buf2); | ||||
17977 | if( res<=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
17978 | } | ||||
17979 | } | ||||
17980 | fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p)sqlite3Fts5BufferSet(&p->rc,&buf2,buf1.n,buf1.p); | ||||
17981 | } | ||||
17982 | |||||
17983 | fts5BufferFree(&buf1)sqlite3Fts5BufferFree(&buf1); | ||||
17984 | fts5BufferFree(&buf2)sqlite3Fts5BufferFree(&buf2); | ||||
17985 | } | ||||
17986 | |||||
17987 | static void fts5IndexIntegrityCheckSegment( | ||||
17988 | Fts5Index *p, /* FTS5 backend object */ | ||||
17989 | Fts5StructureSegment *pSeg /* Segment to check internal consistency */ | ||||
17990 | ){ | ||||
17991 | Fts5Config *pConfig = p->pConfig; | ||||
17992 | int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5); | ||||
17993 | sqlite3_stmt *pStmt = 0; | ||||
17994 | int rc2; | ||||
17995 | int iIdxPrevLeaf = pSeg->pgnoFirst-1; | ||||
17996 | int iDlidxPrevLeaf = pSeg->pgnoLast; | ||||
17997 | |||||
17998 | if( pSeg->pgnoFirst==0 ) return; | ||||
17999 | |||||
18000 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | ||||
18001 | "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d " | ||||
18002 | "ORDER BY 1, 2", | ||||
18003 | pConfig->zDb, pConfig->zName, pSeg->iSegid | ||||
18004 | )); | ||||
18005 | |||||
18006 | /* Iterate through the b-tree hierarchy. */ | ||||
18007 | while( p->rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | ||||
18008 | i64 iRow; /* Rowid for this leaf */ | ||||
18009 | Fts5Data *pLeaf; /* Data for this leaf */ | ||||
18010 | |||||
18011 | const char *zIdxTerm = (const char*)sqlite3_column_blobsqlite3_api->column_blob(pStmt, 1); | ||||
18012 | int nIdxTerm = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, 1); | ||||
18013 | int iIdxLeaf = sqlite3_column_intsqlite3_api->column_int(pStmt, 2); | ||||
18014 | int bIdxDlidx = sqlite3_column_intsqlite3_api->column_int(pStmt, 3); | ||||
18015 | |||||
18016 | /* If the leaf in question has already been trimmed from the segment, | ||||
18017 | ** ignore this b-tree entry. Otherwise, load it into memory. */ | ||||
18018 | if( iIdxLeaf<pSeg->pgnoFirst ) continue; | ||||
18019 | iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iIdxLeaf)) ); | ||||
18020 | pLeaf = fts5LeafRead(p, iRow); | ||||
18021 | if( pLeaf==0 ) break; | ||||
18022 | |||||
18023 | /* Check that the leaf contains at least one term, and that it is equal | ||||
18024 | ** to or larger than the split-key in zIdxTerm. Also check that if there | ||||
18025 | ** is also a rowid pointer within the leaf page header, it points to a | ||||
18026 | ** location before the term. */ | ||||
18027 | if( pLeaf->nn<=pLeaf->szLeaf ){ | ||||
18028 | |||||
18029 | if( nIdxTerm==0 | ||||
18030 | && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5 | ||||
18031 | && pLeaf->nn==pLeaf->szLeaf | ||||
18032 | && pLeaf->nn==4 | ||||
18033 | ){ | ||||
18034 | /* special case - the very first page in a segment keeps its %_idx | ||||
18035 | ** entry even if all the terms are removed from it by secure-delete | ||||
18036 | ** operations. */ | ||||
18037 | }else{ | ||||
18038 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18039 | } | ||||
18040 | |||||
18041 | }else{ | ||||
18042 | int iOff; /* Offset of first term on leaf */ | ||||
18043 | int iRowidOff; /* Offset of first rowid on leaf */ | ||||
18044 | int nTerm; /* Size of term on leaf in bytes */ | ||||
18045 | int res; /* Comparison of term and split-key */ | ||||
18046 | |||||
18047 | iOff = fts5LeafFirstTermOff(pLeaf); | ||||
18048 | iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | ||||
18049 | if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){ | ||||
18050 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18051 | }else{ | ||||
18052 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nTerm )); | ||||
18053 | res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm))(((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm)))<=0 ? 0 : memcmp((&pLeaf->p[iOff]), (zIdxTerm), ((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm))))); | ||||
18054 | if( res==0 ) res = nTerm - nIdxTerm; | ||||
18055 | if( res<0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18056 | } | ||||
18057 | |||||
18058 | fts5IntegrityCheckPgidx(p, pLeaf); | ||||
18059 | } | ||||
18060 | fts5DataRelease(pLeaf); | ||||
18061 | if( p->rc ) break; | ||||
18062 | |||||
18063 | /* Now check that the iter.nEmpty leaves following the current leaf | ||||
18064 | ** (a) exist and (b) contain no terms. */ | ||||
18065 | fts5IndexIntegrityCheckEmpty( | ||||
18066 | p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 | ||||
18067 | ); | ||||
18068 | if( p->rc ) break; | ||||
18069 | |||||
18070 | /* If there is a doclist-index, check that it looks right. */ | ||||
18071 | if( bIdxDlidx ){ | ||||
18072 | Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ | ||||
18073 | int iPrevLeaf = iIdxLeaf; | ||||
18074 | int iSegid = pSeg->iSegid; | ||||
18075 | int iPg = 0; | ||||
18076 | i64 iKey; | ||||
18077 | |||||
18078 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); | ||||
18079 | fts5DlidxIterEof(p, pDlidx)==0; | ||||
18080 | fts5DlidxIterNext(p, pDlidx) | ||||
18081 | ){ | ||||
18082 | |||||
18083 | /* Check any rowid-less pages that occur before the current leaf. */ | ||||
18084 | for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ | ||||
18085 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPg)) ); | ||||
18086 | pLeaf = fts5DataRead(p, iKey); | ||||
18087 | if( pLeaf ){ | ||||
18088 | if( fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))!=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18089 | fts5DataRelease(pLeaf); | ||||
18090 | } | ||||
18091 | } | ||||
18092 | iPrevLeaf = fts5DlidxIterPgno(pDlidx); | ||||
18093 | |||||
18094 | /* Check that the leaf page indicated by the iterator really does | ||||
18095 | ** contain the rowid suggested by the same. */ | ||||
18096 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPrevLeaf)) ); | ||||
18097 | pLeaf = fts5DataRead(p, iKey); | ||||
18098 | if( pLeaf ){ | ||||
18099 | i64 iRowid; | ||||
18100 | int iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | ||||
18101 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | ||||
18102 | if( iRowidOff>=pLeaf->szLeaf ){ | ||||
18103 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18104 | }else if( bSecureDelete==0 || iRowidOff>0 ){ | ||||
18105 | i64 iDlRowid = fts5DlidxIterRowid(pDlidx); | ||||
18106 | fts5GetVarintsqlite3Fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); | ||||
18107 | if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){ | ||||
18108 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18109 | } | ||||
18110 | } | ||||
18111 | fts5DataRelease(pLeaf); | ||||
18112 | } | ||||
18113 | } | ||||
18114 | |||||
18115 | iDlidxPrevLeaf = iPg; | ||||
18116 | fts5DlidxIterFree(pDlidx); | ||||
18117 | fts5TestDlidxReverse(p, iSegid, iIdxLeaf); | ||||
18118 | }else{ | ||||
18119 | iDlidxPrevLeaf = pSeg->pgnoLast; | ||||
18120 | /* TODO: Check there is no doclist index */ | ||||
18121 | } | ||||
18122 | |||||
18123 | iIdxPrevLeaf = iIdxLeaf; | ||||
18124 | } | ||||
18125 | |||||
18126 | rc2 = sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
18127 | if( p->rc==SQLITE_OK0 ) p->rc = rc2; | ||||
18128 | |||||
18129 | /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ | ||||
18130 | #if 0 | ||||
18131 | if( p->rc==SQLITE_OK0 && iter.iLeaf!=pSeg->pgnoLast ){ | ||||
18132 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18133 | } | ||||
18134 | #endif | ||||
18135 | } | ||||
18136 | |||||
18137 | |||||
18138 | /* | ||||
18139 | ** Run internal checks to ensure that the FTS index (a) is internally | ||||
18140 | ** consistent and (b) contains entries for which the XOR of the checksums | ||||
18141 | ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. | ||||
18142 | ** | ||||
18143 | ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the | ||||
18144 | ** checksum does not match. Return SQLITE_OK if all checks pass without | ||||
18145 | ** error, or some other SQLite error code if another error (e.g. OOM) | ||||
18146 | ** occurs. | ||||
18147 | */ | ||||
18148 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){ | ||||
18149 | int eDetail = p->pConfig->eDetail; | ||||
18150 | u64 cksum2 = 0; /* Checksum based on contents of indexes */ | ||||
18151 | Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ | ||||
18152 | Fts5Iter *pIter; /* Used to iterate through entire index */ | ||||
18153 | Fts5Structure *pStruct; /* Index structure */ | ||||
18154 | int iLvl, iSeg; | ||||
18155 | |||||
18156 | #ifdef SQLITE_DEBUG | ||||
18157 | /* Used by extra internal tests only run if NDEBUG is not defined */ | ||||
18158 | u64 cksum3 = 0; /* Checksum based on contents of indexes */ | ||||
18159 | Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ | ||||
18160 | #endif | ||||
18161 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | ||||
18162 | |||||
18163 | /* Load the FTS index structure */ | ||||
18164 | pStruct = fts5StructureRead(p); | ||||
18165 | if( pStruct==0 ){ | ||||
18166 | assert( p->rc!=SQLITE_OK )((void) (0)); | ||||
18167 | return fts5IndexReturn(p); | ||||
18168 | } | ||||
18169 | |||||
18170 | /* Check that the internal nodes of each segment match the leaves */ | ||||
18171 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | ||||
18172 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | ||||
18173 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | ||||
18174 | fts5IndexIntegrityCheckSegment(p, pSeg); | ||||
18175 | } | ||||
18176 | } | ||||
18177 | |||||
18178 | /* The cksum argument passed to this function is a checksum calculated | ||||
18179 | ** based on all expected entries in the FTS index (including prefix index | ||||
18180 | ** entries). This block checks that a checksum calculated based on the | ||||
18181 | ** actual contents of FTS index is identical. | ||||
18182 | ** | ||||
18183 | ** Two versions of the same checksum are calculated. The first (stack | ||||
18184 | ** variable cksum2) based on entries extracted from the full-text index | ||||
18185 | ** while doing a linear scan of each individual index in turn. | ||||
18186 | ** | ||||
18187 | ** As each term visited by the linear scans, a separate query for the | ||||
18188 | ** same term is performed. cksum3 is calculated based on the entries | ||||
18189 | ** extracted by these queries. | ||||
18190 | */ | ||||
18191 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); | ||||
18192 | fts5MultiIterEof(p, pIter)==0; | ||||
18193 | fts5MultiIterNext(p, pIter, 0, 0) | ||||
18194 | ){ | ||||
18195 | int n; /* Size of term in bytes */ | ||||
18196 | i64 iPos = 0; /* Position read from poslist */ | ||||
18197 | int iOff = 0; /* Offset within poslist */ | ||||
18198 | i64 iRowid = fts5MultiIterRowid(pIter); | ||||
18199 | char *z = (char*)fts5MultiIterTerm(pIter, &n); | ||||
18200 | |||||
18201 | /* If this is a new term, query for it. Update cksum3 with the results. */ | ||||
18202 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); | ||||
18203 | if( p->rc ) break; | ||||
18204 | |||||
18205 | if( eDetail==FTS5_DETAIL_NONE1 ){ | ||||
18206 | if( 0==fts5MultiIterIsEmpty(p, pIter) ){ | ||||
18207 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); | ||||
18208 | } | ||||
18209 | }else{ | ||||
18210 | poslist.n = 0; | ||||
18211 | fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); | ||||
18212 | fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0")sqlite3Fts5BufferAppendBlob(&p->rc,&poslist,4,(const u8*)"\0\0\0\0"); | ||||
18213 | while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ | ||||
18214 | int iCol = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | ||||
18215 | int iTokOff = FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF); | ||||
18216 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); | ||||
18217 | } | ||||
18218 | } | ||||
18219 | } | ||||
18220 | fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); | ||||
18221 | |||||
18222 | fts5MultiIterFree(pIter); | ||||
18223 | if( p->rc==SQLITE_OK0 && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18224 | |||||
18225 | fts5StructureRelease(pStruct); | ||||
18226 | #ifdef SQLITE_DEBUG | ||||
18227 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | ||||
18228 | #endif | ||||
18229 | fts5BufferFree(&poslist)sqlite3Fts5BufferFree(&poslist); | ||||
18230 | return fts5IndexReturn(p); | ||||
18231 | } | ||||
18232 | |||||
18233 | /************************************************************************* | ||||
18234 | ************************************************************************** | ||||
18235 | ** Below this point is the implementation of the fts5_decode() scalar | ||||
18236 | ** function only. | ||||
18237 | */ | ||||
18238 | |||||
18239 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18240 | /* | ||||
18241 | ** Decode a segment-data rowid from the %_data table. This function is | ||||
18242 | ** the opposite of macro FTS5_SEGMENT_ROWID(). | ||||
18243 | */ | ||||
18244 | static void fts5DecodeRowid( | ||||
18245 | i64 iRowid, /* Rowid from %_data table */ | ||||
18246 | int *pbTombstone, /* OUT: Tombstone hash flag */ | ||||
18247 | int *piSegid, /* OUT: Segment id */ | ||||
18248 | int *pbDlidx, /* OUT: Dlidx flag */ | ||||
18249 | int *piHeight, /* OUT: Height */ | ||||
18250 | int *piPgno /* OUT: Page number */ | ||||
18251 | ){ | ||||
18252 | *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B31) - 1)); | ||||
18253 | iRowid >>= FTS5_DATA_PAGE_B31; | ||||
18254 | |||||
18255 | *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B5) - 1)); | ||||
18256 | iRowid >>= FTS5_DATA_HEIGHT_B5; | ||||
18257 | |||||
18258 | *pbDlidx = (int)(iRowid & 0x0001); | ||||
18259 | iRowid >>= FTS5_DATA_DLI_B1; | ||||
18260 | |||||
18261 | *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B16) - 1)); | ||||
18262 | iRowid >>= FTS5_DATA_ID_B16; | ||||
18263 | |||||
18264 | *pbTombstone = (int)(iRowid & 0x0001); | ||||
18265 | } | ||||
18266 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18267 | |||||
18268 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18269 | static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ | ||||
18270 | int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */ | ||||
18271 | fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | ||||
18272 | |||||
18273 | if( iSegid==0 ){ | ||||
18274 | if( iKey==FTS5_AVERAGES_ROWID1 ){ | ||||
18275 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); | ||||
18276 | }else{ | ||||
18277 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); | ||||
18278 | } | ||||
18279 | } | ||||
18280 | else{ | ||||
18281 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}", | ||||
18282 | bDlidx ? "dlidx " : "", | ||||
18283 | bTomb ? "tombstone " : "", | ||||
18284 | iSegid, iHeight, iPgno | ||||
18285 | ); | ||||
18286 | } | ||||
18287 | } | ||||
18288 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18289 | |||||
18290 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18291 | static void fts5DebugStructure( | ||||
18292 | int *pRc, /* IN/OUT: error code */ | ||||
18293 | Fts5Buffer *pBuf, | ||||
18294 | Fts5Structure *p | ||||
18295 | ){ | ||||
18296 | int iLvl, iSeg; /* Iterate through levels, segments */ | ||||
18297 | |||||
18298 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ | ||||
18299 | Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; | ||||
18300 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, | ||||
18301 | " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg | ||||
18302 | ); | ||||
18303 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | ||||
18304 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | ||||
18305 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d", | ||||
18306 | pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast | ||||
18307 | ); | ||||
18308 | if( pSeg->iOrigin1>0 ){ | ||||
18309 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld", | ||||
18310 | pSeg->iOrigin1, pSeg->iOrigin2 | ||||
18311 | ); | ||||
18312 | } | ||||
18313 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | ||||
18314 | } | ||||
18315 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | ||||
18316 | } | ||||
18317 | } | ||||
18318 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18319 | |||||
18320 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18321 | /* | ||||
18322 | ** This is part of the fts5_decode() debugging aid. | ||||
18323 | ** | ||||
18324 | ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This | ||||
18325 | ** function appends a human-readable representation of the same object | ||||
18326 | ** to the buffer passed as the second argument. | ||||
18327 | */ | ||||
18328 | static void fts5DecodeStructure( | ||||
18329 | int *pRc, /* IN/OUT: error code */ | ||||
18330 | Fts5Buffer *pBuf, | ||||
18331 | const u8 *pBlob, int nBlob | ||||
18332 | ){ | ||||
18333 | int rc; /* Return code */ | ||||
18334 | Fts5Structure *p = 0; /* Decoded structure object */ | ||||
18335 | |||||
18336 | rc = fts5StructureDecode(pBlob, nBlob, 0, &p); | ||||
18337 | if( rc!=SQLITE_OK0 ){ | ||||
18338 | *pRc = rc; | ||||
18339 | return; | ||||
18340 | } | ||||
18341 | |||||
18342 | fts5DebugStructure(pRc, pBuf, p); | ||||
18343 | fts5StructureRelease(p); | ||||
18344 | } | ||||
18345 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18346 | |||||
18347 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18348 | /* | ||||
18349 | ** This is part of the fts5_decode() debugging aid. | ||||
18350 | ** | ||||
18351 | ** Arguments pBlob/nBlob contain an "averages" record. This function | ||||
18352 | ** appends a human-readable representation of record to the buffer passed | ||||
18353 | ** as the second argument. | ||||
18354 | */ | ||||
18355 | static void fts5DecodeAverages( | ||||
18356 | int *pRc, /* IN/OUT: error code */ | ||||
18357 | Fts5Buffer *pBuf, | ||||
18358 | const u8 *pBlob, int nBlob | ||||
18359 | ){ | ||||
18360 | int i = 0; | ||||
18361 | const char *zSpace = ""; | ||||
18362 | |||||
18363 | while( i<nBlob ){ | ||||
18364 | u64 iVal; | ||||
18365 | i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); | ||||
18366 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); | ||||
18367 | zSpace = " "; | ||||
18368 | } | ||||
18369 | } | ||||
18370 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18371 | |||||
18372 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18373 | /* | ||||
18374 | ** Buffer (a/n) is assumed to contain a list of serialized varints. Read | ||||
18375 | ** each varint and append its string representation to buffer pBuf. Return | ||||
18376 | ** after either the input buffer is exhausted or a 0 value is read. | ||||
18377 | ** | ||||
18378 | ** The return value is the number of bytes read from the input buffer. | ||||
18379 | */ | ||||
18380 | static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | ||||
18381 | int iOff = 0; | ||||
18382 | while( iOff<n ){ | ||||
18383 | int iVal; | ||||
18384 | iOff += fts5GetVarint32(&a[iOff], iVal)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(iVal)); | ||||
18385 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); | ||||
18386 | } | ||||
18387 | return iOff; | ||||
18388 | } | ||||
18389 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18390 | |||||
18391 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18392 | /* | ||||
18393 | ** The start of buffer (a/n) contains the start of a doclist. The doclist | ||||
18394 | ** may or may not finish within the buffer. This function appends a text | ||||
18395 | ** representation of the part of the doclist that is present to buffer | ||||
18396 | ** pBuf. | ||||
18397 | ** | ||||
18398 | ** The return value is the number of bytes read from the input buffer. | ||||
18399 | */ | ||||
18400 | static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | ||||
18401 | i64 iDocid = 0; | ||||
18402 | int iOff = 0; | ||||
18403 | |||||
18404 | if( n>0 ){ | ||||
18405 | iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); | ||||
18406 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | ||||
18407 | } | ||||
18408 | while( iOff<n ){ | ||||
18409 | int nPos; | ||||
18410 | int bDel; | ||||
18411 | iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); | ||||
18412 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); | ||||
18413 | iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)(((n-iOff) < (nPos)) ? (n-iOff) : (nPos))); | ||||
18414 | if( iOff<n ){ | ||||
18415 | i64 iDelta; | ||||
18416 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); | ||||
18417 | iDocid += iDelta; | ||||
18418 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | ||||
18419 | } | ||||
18420 | } | ||||
18421 | |||||
18422 | return iOff; | ||||
18423 | } | ||||
18424 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18425 | |||||
18426 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18427 | /* | ||||
18428 | ** This function is part of the fts5_decode() debugging function. It is | ||||
18429 | ** only ever used with detail=none tables. | ||||
18430 | ** | ||||
18431 | ** Buffer (pData/nData) contains a doclist in the format used by detail=none | ||||
18432 | ** tables. This function appends a human-readable version of that list to | ||||
18433 | ** buffer pBuf. | ||||
18434 | ** | ||||
18435 | ** If *pRc is other than SQLITE_OK when this function is called, it is a | ||||
18436 | ** no-op. If an OOM or other error occurs within this function, *pRc is | ||||
18437 | ** set to an SQLite error code before returning. The final state of buffer | ||||
18438 | ** pBuf is undefined in this case. | ||||
18439 | */ | ||||
18440 | static void fts5DecodeRowidList( | ||||
18441 | int *pRc, /* IN/OUT: Error code */ | ||||
18442 | Fts5Buffer *pBuf, /* Buffer to append text to */ | ||||
18443 | const u8 *pData, int nData /* Data to decode list-of-rowids from */ | ||||
18444 | ){ | ||||
18445 | int i = 0; | ||||
18446 | i64 iRowid = 0; | ||||
18447 | |||||
18448 | while( i<nData ){ | ||||
18449 | const char *zApp = ""; | ||||
18450 | u64 iVal; | ||||
18451 | i += sqlite3Fts5GetVarint(&pData[i], &iVal); | ||||
18452 | iRowid += iVal; | ||||
18453 | |||||
18454 | if( i<nData && pData[i]==0x00 ){ | ||||
18455 | i++; | ||||
18456 | if( i<nData && pData[i]==0x00 ){ | ||||
18457 | i++; | ||||
18458 | zApp = "+"; | ||||
18459 | }else{ | ||||
18460 | zApp = "*"; | ||||
18461 | } | ||||
18462 | } | ||||
18463 | |||||
18464 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); | ||||
18465 | } | ||||
18466 | } | ||||
18467 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18468 | |||||
18469 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18470 | static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){ | ||||
18471 | int ii; | ||||
18472 | fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1)( (u32)((pBuf)->n) + (u32)(pTerm->n*2 + 1) <= (u32)( (pBuf)->nSpace) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),( pTerm->n*2 + 1)+(pBuf)->n) ); | ||||
18473 | if( *pRc==SQLITE_OK0 ){ | ||||
18474 | for(ii=0; ii<pTerm->n; ii++){ | ||||
18475 | if( pTerm->p[ii]==0x00 ){ | ||||
18476 | pBuf->p[pBuf->n++] = '\\'; | ||||
18477 | pBuf->p[pBuf->n++] = '0'; | ||||
18478 | }else{ | ||||
18479 | pBuf->p[pBuf->n++] = pTerm->p[ii]; | ||||
18480 | } | ||||
18481 | } | ||||
18482 | pBuf->p[pBuf->n] = 0x00; | ||||
18483 | } | ||||
18484 | } | ||||
18485 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18486 | |||||
18487 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18488 | /* | ||||
18489 | ** The implementation of user-defined scalar function fts5_decode(). | ||||
18490 | */ | ||||
18491 | static void fts5DecodeFunction( | ||||
18492 | sqlite3_context *pCtx, /* Function call context */ | ||||
18493 | int nArg, /* Number of args (always 2) */ | ||||
18494 | sqlite3_value **apVal /* Function arguments */ | ||||
18495 | ){ | ||||
18496 | i64 iRowid; /* Rowid for record being decoded */ | ||||
18497 | int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ | ||||
18498 | int bTomb; | ||||
18499 | const u8 *aBlob; int n; /* Record to decode */ | ||||
18500 | u8 *a = 0; | ||||
18501 | Fts5Buffer s; /* Build up text to return here */ | ||||
18502 | int rc = SQLITE_OK0; /* Return code */ | ||||
18503 | sqlite3_int64 nSpace = 0; | ||||
18504 | int eDetailNone = (sqlite3_user_datasqlite3_api->user_data(pCtx)!=0); | ||||
18505 | |||||
18506 | assert( nArg==2 )((void) (0)); | ||||
18507 | UNUSED_PARAM(nArg)(void)(nArg); | ||||
18508 | memset(&s, 0, sizeof(Fts5Buffer)); | ||||
18509 | iRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); | ||||
18510 | |||||
18511 | /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] | ||||
18512 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents | ||||
18513 | ** buffer overreads even if the record is corrupt. */ | ||||
18514 | n = sqlite3_value_bytessqlite3_api->value_bytes(apVal[1]); | ||||
18515 | aBlob = sqlite3_value_blobsqlite3_api->value_blob(apVal[1]); | ||||
18516 | nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING8; | ||||
18517 | a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); | ||||
18518 | if( a==0 ) goto decode_out; | ||||
18519 | if( n>0 ) memcpy(a, aBlob, n); | ||||
18520 | |||||
18521 | fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | ||||
18522 | |||||
18523 | fts5DebugRowid(&rc, &s, iRowid); | ||||
18524 | if( bDlidx ){ | ||||
18525 | Fts5Data dlidx; | ||||
18526 | Fts5DlidxLvl lvl; | ||||
18527 | |||||
18528 | dlidx.p = a; | ||||
18529 | dlidx.nn = n; | ||||
18530 | |||||
18531 | memset(&lvl, 0, sizeof(Fts5DlidxLvl)); | ||||
18532 | lvl.pData = &dlidx; | ||||
18533 | lvl.iLeafPgno = iPgno; | ||||
18534 | |||||
18535 | for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ | ||||
18536 | sqlite3Fts5BufferAppendPrintf(&rc, &s, | ||||
18537 | " %d(%lld)", lvl.iLeafPgno, lvl.iRowid | ||||
18538 | ); | ||||
18539 | } | ||||
18540 | }else if( bTomb ){ | ||||
18541 | u32 nElem = fts5GetU32(&a[4]); | ||||
18542 | int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8; | ||||
18543 | int nSlot = (n - 8) / szKey; | ||||
18544 | int ii; | ||||
18545 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem); | ||||
18546 | if( aBlob[1] ){ | ||||
18547 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0"); | ||||
18548 | } | ||||
18549 | for(ii=0; ii<nSlot; ii++){ | ||||
18550 | u64 iVal = 0; | ||||
18551 | if( szKey==4 ){ | ||||
18552 | u32 *aSlot = (u32*)&aBlob[8]; | ||||
18553 | if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]); | ||||
18554 | }else{ | ||||
18555 | u64 *aSlot = (u64*)&aBlob[8]; | ||||
18556 | if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]); | ||||
18557 | } | ||||
18558 | if( iVal!=0 ){ | ||||
18559 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal); | ||||
18560 | } | ||||
18561 | } | ||||
18562 | }else if( iSegid==0 ){ | ||||
18563 | if( iRowid==FTS5_AVERAGES_ROWID1 ){ | ||||
18564 | fts5DecodeAverages(&rc, &s, a, n); | ||||
18565 | }else{ | ||||
18566 | fts5DecodeStructure(&rc, &s, a, n); | ||||
18567 | } | ||||
18568 | }else if( eDetailNone ){ | ||||
18569 | Fts5Buffer term; /* Current term read from page */ | ||||
18570 | int szLeaf; | ||||
18571 | int iPgidxOff = szLeaf = fts5GetU16(&a[2]); | ||||
18572 | int iTermOff; | ||||
18573 | int nKeep = 0; | ||||
18574 | int iOff; | ||||
18575 | |||||
18576 | memset(&term, 0, sizeof(Fts5Buffer)); | ||||
18577 | |||||
18578 | /* Decode any entries that occur before the first term. */ | ||||
18579 | if( szLeaf<n ){ | ||||
18580 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | ||||
18581 | }else{ | ||||
18582 | iTermOff = szLeaf; | ||||
18583 | } | ||||
18584 | fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); | ||||
18585 | |||||
18586 | iOff = iTermOff; | ||||
18587 | while( iOff<szLeaf && rc==SQLITE_OK0 ){ | ||||
18588 | int nAppend; | ||||
18589 | |||||
18590 | /* Read the term data for the next term*/ | ||||
18591 | iOff += fts5GetVarint32(&a[iOff], nAppend)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nAppend)); | ||||
18592 | term.n = nKeep; | ||||
18593 | fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nAppend,&a[ iOff]); | ||||
18594 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | ||||
18595 | fts5BufferAppendTerm(&rc, &s, &term); | ||||
18596 | iOff += nAppend; | ||||
18597 | |||||
18598 | /* Figure out where the doclist for this term ends */ | ||||
18599 | if( iPgidxOff<n ){ | ||||
18600 | int nIncr; | ||||
18601 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nIncr)); | ||||
18602 | iTermOff += nIncr; | ||||
18603 | }else{ | ||||
18604 | iTermOff = szLeaf; | ||||
18605 | } | ||||
18606 | if( iTermOff>szLeaf ){ | ||||
18607 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18608 | }else{ | ||||
18609 | fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); | ||||
18610 | } | ||||
18611 | iOff = iTermOff; | ||||
18612 | if( iOff<szLeaf ){ | ||||
18613 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | ||||
18614 | } | ||||
18615 | } | ||||
18616 | |||||
18617 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | ||||
18618 | }else{ | ||||
18619 | Fts5Buffer term; /* Current term read from page */ | ||||
18620 | int szLeaf; /* Offset of pgidx in a[] */ | ||||
18621 | int iPgidxOff; | ||||
18622 | int iPgidxPrev = 0; /* Previous value read from pgidx */ | ||||
18623 | int iTermOff = 0; | ||||
18624 | int iRowidOff = 0; | ||||
18625 | int iOff; | ||||
18626 | int nDoclist; | ||||
18627 | |||||
18628 | memset(&term, 0, sizeof(Fts5Buffer)); | ||||
18629 | |||||
18630 | if( n<4 ){ | ||||
18631 | sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); | ||||
18632 | goto decode_out; | ||||
18633 | }else{ | ||||
18634 | iRowidOff = fts5GetU16(&a[0]); | ||||
18635 | iPgidxOff = szLeaf = fts5GetU16(&a[2]); | ||||
18636 | if( iPgidxOff<n ){ | ||||
18637 | fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | ||||
18638 | }else if( iPgidxOff>n ){ | ||||
18639 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18640 | goto decode_out; | ||||
18641 | } | ||||
18642 | } | ||||
18643 | |||||
18644 | /* Decode the position list tail at the start of the page */ | ||||
18645 | if( iRowidOff!=0 ){ | ||||
18646 | iOff = iRowidOff; | ||||
18647 | }else if( iTermOff!=0 ){ | ||||
18648 | iOff = iTermOff; | ||||
18649 | }else{ | ||||
18650 | iOff = szLeaf; | ||||
18651 | } | ||||
18652 | if( iOff>n ){ | ||||
18653 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18654 | goto decode_out; | ||||
18655 | } | ||||
18656 | fts5DecodePoslist(&rc, &s, &a[4], iOff-4); | ||||
18657 | |||||
18658 | /* Decode any more doclist data that appears on the page before the | ||||
18659 | ** first term. */ | ||||
18660 | nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; | ||||
18661 | if( nDoclist+iOff>n ){ | ||||
18662 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18663 | goto decode_out; | ||||
18664 | } | ||||
18665 | fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); | ||||
18666 | |||||
18667 | while( iPgidxOff<n && rc==SQLITE_OK0 ){ | ||||
18668 | int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ | ||||
18669 | int nByte; /* Bytes of data */ | ||||
18670 | int iEnd; | ||||
18671 | |||||
18672 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | ||||
18673 | iPgidxPrev += nByte; | ||||
18674 | iOff = iPgidxPrev; | ||||
18675 | |||||
18676 | if( iPgidxOff<n ){ | ||||
18677 | fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | ||||
18678 | iEnd = iPgidxPrev + nByte; | ||||
18679 | }else{ | ||||
18680 | iEnd = szLeaf; | ||||
18681 | } | ||||
18682 | if( iEnd>szLeaf ){ | ||||
18683 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18684 | break; | ||||
18685 | } | ||||
18686 | |||||
18687 | if( bFirst==0 ){ | ||||
18688 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | ||||
18689 | if( nByte>term.n ){ | ||||
18690 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18691 | break; | ||||
18692 | } | ||||
18693 | term.n = nByte; | ||||
18694 | } | ||||
18695 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | ||||
18696 | if( iOff+nByte>n ){ | ||||
18697 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
18698 | break; | ||||
18699 | } | ||||
18700 | fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nByte,&a[iOff ]); | ||||
18701 | iOff += nByte; | ||||
18702 | |||||
18703 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | ||||
18704 | fts5BufferAppendTerm(&rc, &s, &term); | ||||
18705 | iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); | ||||
18706 | } | ||||
18707 | |||||
18708 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | ||||
18709 | } | ||||
18710 | |||||
18711 | decode_out: | ||||
18712 | sqlite3_freesqlite3_api->free(a); | ||||
18713 | if( rc==SQLITE_OK0 ){ | ||||
18714 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
18715 | }else{ | ||||
18716 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | ||||
18717 | } | ||||
18718 | fts5BufferFree(&s)sqlite3Fts5BufferFree(&s); | ||||
18719 | } | ||||
18720 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18721 | |||||
18722 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18723 | /* | ||||
18724 | ** The implementation of user-defined scalar function fts5_rowid(). | ||||
18725 | */ | ||||
18726 | static void fts5RowidFunction( | ||||
18727 | sqlite3_context *pCtx, /* Function call context */ | ||||
18728 | int nArg, /* Number of args (always 2) */ | ||||
18729 | sqlite3_value **apVal /* Function arguments */ | ||||
18730 | ){ | ||||
18731 | const char *zArg; | ||||
18732 | if( nArg==0 ){ | ||||
18733 | sqlite3_result_errorsqlite3_api->result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); | ||||
18734 | }else{ | ||||
18735 | zArg = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | ||||
18736 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "segment") ){ | ||||
18737 | i64 iRowid; | ||||
18738 | int segid, pgno; | ||||
18739 | if( nArg!=3 ){ | ||||
18740 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | ||||
18741 | "should be: fts5_rowid('segment', segid, pgno))", -1 | ||||
18742 | ); | ||||
18743 | }else{ | ||||
18744 | segid = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | ||||
18745 | pgno = sqlite3_value_intsqlite3_api->value_int(apVal[2]); | ||||
18746 | iRowid = FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | ||||
18747 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, iRowid); | ||||
18748 | } | ||||
18749 | }else{ | ||||
18750 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | ||||
18751 | "first arg to fts5_rowid() must be 'segment'" , -1 | ||||
18752 | ); | ||||
18753 | } | ||||
18754 | } | ||||
18755 | } | ||||
18756 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18757 | |||||
18758 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18759 | |||||
18760 | typedef struct Fts5StructVtab Fts5StructVtab; | ||||
18761 | struct Fts5StructVtab { | ||||
18762 | sqlite3_vtab base; | ||||
18763 | }; | ||||
18764 | |||||
18765 | typedef struct Fts5StructVcsr Fts5StructVcsr; | ||||
18766 | struct Fts5StructVcsr { | ||||
18767 | sqlite3_vtab_cursor base; | ||||
18768 | Fts5Structure *pStruct; | ||||
18769 | int iLevel; | ||||
18770 | int iSeg; | ||||
18771 | int iRowid; | ||||
18772 | }; | ||||
18773 | |||||
18774 | /* | ||||
18775 | ** Create a new fts5_structure() table-valued function. | ||||
18776 | */ | ||||
18777 | static int fts5structConnectMethod( | ||||
18778 | sqlite3 *db, | ||||
18779 | void *pAux, | ||||
18780 | int argc, const char *const*argv, | ||||
18781 | sqlite3_vtab **ppVtab, | ||||
18782 | char **pzErr | ||||
18783 | ){ | ||||
18784 | Fts5StructVtab *pNew = 0; | ||||
18785 | int rc = SQLITE_OK0; | ||||
18786 | |||||
18787 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, | ||||
18788 | "CREATE TABLE xyz(" | ||||
18789 | "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, " | ||||
18790 | "npgtombstone, nentrytombstone, nentry, struct HIDDEN);" | ||||
18791 | ); | ||||
18792 | if( rc==SQLITE_OK0 ){ | ||||
18793 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | ||||
18794 | } | ||||
18795 | |||||
18796 | *ppVtab = (sqlite3_vtab*)pNew; | ||||
18797 | return rc; | ||||
18798 | } | ||||
18799 | |||||
18800 | /* | ||||
18801 | ** We must have a single struct=? constraint that will be passed through | ||||
18802 | ** into the xFilter method. If there is no valid struct=? constraint, | ||||
18803 | ** then return an SQLITE_CONSTRAINT error. | ||||
18804 | */ | ||||
18805 | static int fts5structBestIndexMethod( | ||||
18806 | sqlite3_vtab *tab, | ||||
18807 | sqlite3_index_info *pIdxInfo | ||||
18808 | ){ | ||||
18809 | int i; | ||||
18810 | int rc = SQLITE_CONSTRAINT19; | ||||
18811 | struct sqlite3_index_constraint *p; | ||||
18812 | pIdxInfo->estimatedCost = (double)100; | ||||
18813 | pIdxInfo->estimatedRows = 100; | ||||
18814 | pIdxInfo->idxNum = 0; | ||||
18815 | for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){ | ||||
18816 | if( p->usable==0 ) continue; | ||||
18817 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && p->iColumn==11 ){ | ||||
18818 | rc = SQLITE_OK0; | ||||
18819 | pIdxInfo->aConstraintUsage[i].omit = 1; | ||||
18820 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; | ||||
18821 | break; | ||||
18822 | } | ||||
18823 | } | ||||
18824 | return rc; | ||||
18825 | } | ||||
18826 | |||||
18827 | /* | ||||
18828 | ** This method is the destructor for bytecodevtab objects. | ||||
18829 | */ | ||||
18830 | static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){ | ||||
18831 | Fts5StructVtab *p = (Fts5StructVtab*)pVtab; | ||||
18832 | sqlite3_freesqlite3_api->free(p); | ||||
18833 | return SQLITE_OK0; | ||||
18834 | } | ||||
18835 | |||||
18836 | /* | ||||
18837 | ** Constructor for a new bytecodevtab_cursor object. | ||||
18838 | */ | ||||
18839 | static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){ | ||||
18840 | int rc = SQLITE_OK0; | ||||
18841 | Fts5StructVcsr *pNew = 0; | ||||
18842 | |||||
18843 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | ||||
18844 | *ppCsr = (sqlite3_vtab_cursor*)pNew; | ||||
18845 | |||||
18846 | return SQLITE_OK0; | ||||
18847 | } | ||||
18848 | |||||
18849 | /* | ||||
18850 | ** Destructor for a bytecodevtab_cursor. | ||||
18851 | */ | ||||
18852 | static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){ | ||||
18853 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | ||||
18854 | fts5StructureRelease(pCsr->pStruct); | ||||
18855 | sqlite3_freesqlite3_api->free(pCsr); | ||||
18856 | return SQLITE_OK0; | ||||
18857 | } | ||||
18858 | |||||
18859 | |||||
18860 | /* | ||||
18861 | ** Advance a bytecodevtab_cursor to its next row of output. | ||||
18862 | */ | ||||
18863 | static int fts5structNextMethod(sqlite3_vtab_cursor *cur){ | ||||
18864 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | ||||
18865 | Fts5Structure *p = pCsr->pStruct; | ||||
18866 | |||||
18867 | assert( pCsr->pStruct )((void) (0)); | ||||
18868 | pCsr->iSeg++; | ||||
18869 | pCsr->iRowid++; | ||||
18870 | while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){ | ||||
18871 | pCsr->iLevel++; | ||||
18872 | pCsr->iSeg = 0; | ||||
18873 | } | ||||
18874 | if( pCsr->iLevel>=p->nLevel ){ | ||||
18875 | fts5StructureRelease(pCsr->pStruct); | ||||
18876 | pCsr->pStruct = 0; | ||||
18877 | } | ||||
18878 | return SQLITE_OK0; | ||||
18879 | } | ||||
18880 | |||||
18881 | /* | ||||
18882 | ** Return TRUE if the cursor has been moved off of the last | ||||
18883 | ** row of output. | ||||
18884 | */ | ||||
18885 | static int fts5structEofMethod(sqlite3_vtab_cursor *cur){ | ||||
18886 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | ||||
18887 | return pCsr->pStruct==0; | ||||
18888 | } | ||||
18889 | |||||
18890 | static int fts5structRowidMethod( | ||||
18891 | sqlite3_vtab_cursor *cur, | ||||
18892 | sqlite_int64 *piRowid | ||||
18893 | ){ | ||||
18894 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | ||||
18895 | *piRowid = pCsr->iRowid; | ||||
18896 | return SQLITE_OK0; | ||||
18897 | } | ||||
18898 | |||||
18899 | /* | ||||
18900 | ** Return values of columns for the row at which the bytecodevtab_cursor | ||||
18901 | ** is currently pointing. | ||||
18902 | */ | ||||
18903 | static int fts5structColumnMethod( | ||||
18904 | sqlite3_vtab_cursor *cur, /* The cursor */ | ||||
18905 | sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ | ||||
18906 | int i /* Which column to return */ | ||||
18907 | ){ | ||||
18908 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | ||||
18909 | Fts5Structure *p = pCsr->pStruct; | ||||
18910 | Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg]; | ||||
18911 | |||||
18912 | switch( i ){ | ||||
18913 | case 0: /* level */ | ||||
18914 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iLevel); | ||||
18915 | break; | ||||
18916 | case 1: /* segment */ | ||||
18917 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg); | ||||
18918 | break; | ||||
18919 | case 2: /* merge */ | ||||
18920 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge); | ||||
18921 | break; | ||||
18922 | case 3: /* segid */ | ||||
18923 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->iSegid); | ||||
18924 | break; | ||||
18925 | case 4: /* leaf1 */ | ||||
18926 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoFirst); | ||||
18927 | break; | ||||
18928 | case 5: /* leaf2 */ | ||||
18929 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoLast); | ||||
18930 | break; | ||||
18931 | case 6: /* origin1 */ | ||||
18932 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin1); | ||||
18933 | break; | ||||
18934 | case 7: /* origin2 */ | ||||
18935 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin2); | ||||
18936 | break; | ||||
18937 | case 8: /* npgtombstone */ | ||||
18938 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->nPgTombstone); | ||||
18939 | break; | ||||
18940 | case 9: /* nentrytombstone */ | ||||
18941 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntryTombstone); | ||||
18942 | break; | ||||
18943 | case 10: /* nentry */ | ||||
18944 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntry); | ||||
18945 | break; | ||||
18946 | } | ||||
18947 | return SQLITE_OK0; | ||||
18948 | } | ||||
18949 | |||||
18950 | /* | ||||
18951 | ** Initialize a cursor. | ||||
18952 | ** | ||||
18953 | ** idxNum==0 means show all subprograms | ||||
18954 | ** idxNum==1 means show only the main bytecode and omit subprograms. | ||||
18955 | */ | ||||
18956 | static int fts5structFilterMethod( | ||||
18957 | sqlite3_vtab_cursor *pVtabCursor, | ||||
18958 | int idxNum, const char *idxStr, | ||||
18959 | int argc, sqlite3_value **argv | ||||
18960 | ){ | ||||
18961 | Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor; | ||||
18962 | int rc = SQLITE_OK0; | ||||
18963 | |||||
18964 | const u8 *aBlob = 0; | ||||
18965 | int nBlob = 0; | ||||
18966 | |||||
18967 | assert( argc==1 )((void) (0)); | ||||
18968 | fts5StructureRelease(pCsr->pStruct); | ||||
18969 | pCsr->pStruct = 0; | ||||
18970 | |||||
18971 | nBlob = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); | ||||
18972 | aBlob = (const u8*)sqlite3_value_blobsqlite3_api->value_blob(argv[0]); | ||||
18973 | rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct); | ||||
18974 | if( rc==SQLITE_OK0 ){ | ||||
18975 | pCsr->iLevel = 0; | ||||
18976 | pCsr->iRowid = 0; | ||||
18977 | pCsr->iSeg = -1; | ||||
18978 | rc = fts5structNextMethod(pVtabCursor); | ||||
18979 | } | ||||
18980 | |||||
18981 | return rc; | ||||
18982 | } | ||||
18983 | |||||
18984 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | ||||
18985 | |||||
18986 | /* | ||||
18987 | ** This is called as part of registering the FTS5 module with database | ||||
18988 | ** connection db. It registers several user-defined scalar functions useful | ||||
18989 | ** with FTS5. | ||||
18990 | ** | ||||
18991 | ** If successful, SQLITE_OK is returned. If an error occurs, some other | ||||
18992 | ** SQLite error code is returned instead. | ||||
18993 | */ | ||||
18994 | static int sqlite3Fts5IndexInit(sqlite3 *db){ | ||||
18995 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | ||||
18996 | int rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
18997 | db, "fts5_decode", 2, SQLITE_UTF81, 0, fts5DecodeFunction, 0, 0 | ||||
18998 | ); | ||||
18999 | |||||
19000 | if( rc==SQLITE_OK0 ){ | ||||
19001 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
19002 | db, "fts5_decode_none", 2, | ||||
19003 | SQLITE_UTF81, (void*)db, fts5DecodeFunction, 0, 0 | ||||
19004 | ); | ||||
19005 | } | ||||
19006 | |||||
19007 | if( rc==SQLITE_OK0 ){ | ||||
19008 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
19009 | db, "fts5_rowid", -1, SQLITE_UTF81, 0, fts5RowidFunction, 0, 0 | ||||
19010 | ); | ||||
19011 | } | ||||
19012 | |||||
19013 | if( rc==SQLITE_OK0 ){ | ||||
19014 | static const sqlite3_module fts5structure_module = { | ||||
19015 | 0, /* iVersion */ | ||||
19016 | 0, /* xCreate */ | ||||
19017 | fts5structConnectMethod, /* xConnect */ | ||||
19018 | fts5structBestIndexMethod, /* xBestIndex */ | ||||
19019 | fts5structDisconnectMethod, /* xDisconnect */ | ||||
19020 | 0, /* xDestroy */ | ||||
19021 | fts5structOpenMethod, /* xOpen */ | ||||
19022 | fts5structCloseMethod, /* xClose */ | ||||
19023 | fts5structFilterMethod, /* xFilter */ | ||||
19024 | fts5structNextMethod, /* xNext */ | ||||
19025 | fts5structEofMethod, /* xEof */ | ||||
19026 | fts5structColumnMethod, /* xColumn */ | ||||
19027 | fts5structRowidMethod, /* xRowid */ | ||||
19028 | 0, /* xUpdate */ | ||||
19029 | 0, /* xBegin */ | ||||
19030 | 0, /* xSync */ | ||||
19031 | 0, /* xCommit */ | ||||
19032 | 0, /* xRollback */ | ||||
19033 | 0, /* xFindFunction */ | ||||
19034 | 0, /* xRename */ | ||||
19035 | 0, /* xSavepoint */ | ||||
19036 | 0, /* xRelease */ | ||||
19037 | 0, /* xRollbackTo */ | ||||
19038 | 0, /* xShadowName */ | ||||
19039 | 0 /* xIntegrity */ | ||||
19040 | }; | ||||
19041 | rc = sqlite3_create_modulesqlite3_api->create_module(db, "fts5_structure", &fts5structure_module, 0); | ||||
19042 | } | ||||
19043 | return rc; | ||||
19044 | #else | ||||
19045 | return SQLITE_OK0; | ||||
19046 | UNUSED_PARAM(db)(void)(db); | ||||
19047 | #endif | ||||
19048 | } | ||||
19049 | |||||
19050 | |||||
19051 | static int sqlite3Fts5IndexReset(Fts5Index *p){ | ||||
19052 | assert( p->pStruct==0 || p->iStructVersion!=0 )((void) (0)); | ||||
19053 | if( fts5IndexDataVersion(p)!=p->iStructVersion ){ | ||||
19054 | fts5StructureInvalidate(p); | ||||
19055 | } | ||||
19056 | return fts5IndexReturn(p); | ||||
19057 | } | ||||
19058 | |||||
19059 | #line 1 "fts5_main.c" | ||||
19060 | /* | ||||
19061 | ** 2014 Jun 09 | ||||
19062 | ** | ||||
19063 | ** The author disclaims copyright to this source code. In place of | ||||
19064 | ** a legal notice, here is a blessing: | ||||
19065 | ** | ||||
19066 | ** May you do good and not evil. | ||||
19067 | ** May you find forgiveness for yourself and forgive others. | ||||
19068 | ** May you share freely, never taking more than you give. | ||||
19069 | ** | ||||
19070 | ****************************************************************************** | ||||
19071 | ** | ||||
19072 | ** This is an SQLite module implementing full-text search. | ||||
19073 | */ | ||||
19074 | |||||
19075 | |||||
19076 | /* #include "fts5Int.h" */ | ||||
19077 | |||||
19078 | /* | ||||
19079 | ** This variable is set to false when running tests for which the on disk | ||||
19080 | ** structures should not be corrupt. Otherwise, true. If it is false, extra | ||||
19081 | ** assert() conditions in the fts5 code are activated - conditions that are | ||||
19082 | ** only true if it is guaranteed that the fts5 database is not corrupt. | ||||
19083 | */ | ||||
19084 | #ifdef SQLITE_DEBUG | ||||
19085 | int sqlite3_fts5_may_be_corrupt = 1; | ||||
19086 | #endif | ||||
19087 | |||||
19088 | |||||
19089 | typedef struct Fts5Auxdata Fts5Auxdata; | ||||
19090 | typedef struct Fts5Auxiliary Fts5Auxiliary; | ||||
19091 | typedef struct Fts5Cursor Fts5Cursor; | ||||
19092 | typedef struct Fts5FullTable Fts5FullTable; | ||||
19093 | typedef struct Fts5Sorter Fts5Sorter; | ||||
19094 | typedef struct Fts5TokenizerModule Fts5TokenizerModule; | ||||
19095 | |||||
19096 | /* | ||||
19097 | ** NOTES ON TRANSACTIONS: | ||||
19098 | ** | ||||
19099 | ** SQLite invokes the following virtual table methods as transactions are | ||||
19100 | ** opened and closed by the user: | ||||
19101 | ** | ||||
19102 | ** xBegin(): Start of a new transaction. | ||||
19103 | ** xSync(): Initial part of two-phase commit. | ||||
19104 | ** xCommit(): Final part of two-phase commit. | ||||
19105 | ** xRollback(): Rollback the transaction. | ||||
19106 | ** | ||||
19107 | ** Anything that is required as part of a commit that may fail is performed | ||||
19108 | ** in the xSync() callback. Current versions of SQLite ignore any errors | ||||
19109 | ** returned by xCommit(). | ||||
19110 | ** | ||||
19111 | ** And as sub-transactions are opened/closed: | ||||
19112 | ** | ||||
19113 | ** xSavepoint(int S): Open savepoint S. | ||||
19114 | ** xRelease(int S): Commit and close savepoint S. | ||||
19115 | ** xRollbackTo(int S): Rollback to start of savepoint S. | ||||
19116 | ** | ||||
19117 | ** During a write-transaction the fts5_index.c module may cache some data | ||||
19118 | ** in-memory. It is flushed to disk whenever xSync(), xRelease() or | ||||
19119 | ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() | ||||
19120 | ** is called. | ||||
19121 | ** | ||||
19122 | ** Additionally, if SQLITE_DEBUG is defined, an instance of the following | ||||
19123 | ** structure is used to record the current transaction state. This information | ||||
19124 | ** is not required, but it is used in the assert() statements executed by | ||||
19125 | ** function fts5CheckTransactionState() (see below). | ||||
19126 | */ | ||||
19127 | struct Fts5TransactionState { | ||||
19128 | int eState; /* 0==closed, 1==open, 2==synced */ | ||||
19129 | int iSavepoint; /* Number of open savepoints (0 -> none) */ | ||||
19130 | }; | ||||
19131 | |||||
19132 | /* | ||||
19133 | ** A single object of this type is allocated when the FTS5 module is | ||||
19134 | ** registered with a database handle. It is used to store pointers to | ||||
19135 | ** all registered FTS5 extensions - tokenizers and auxiliary functions. | ||||
19136 | */ | ||||
19137 | struct Fts5Global { | ||||
19138 | fts5_api api; /* User visible part of object (see fts5.h) */ | ||||
19139 | sqlite3 *db; /* Associated database connection */ | ||||
19140 | i64 iNextId; /* Used to allocate unique cursor ids */ | ||||
19141 | Fts5Auxiliary *pAux; /* First in list of all aux. functions */ | ||||
19142 | Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ | ||||
19143 | Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ | ||||
19144 | Fts5Cursor *pCsr; /* First in list of all open cursors */ | ||||
19145 | u32 aLocaleHdr[4]; | ||||
19146 | }; | ||||
19147 | |||||
19148 | /* | ||||
19149 | ** Size of header on fts5_locale() values. And macro to access a buffer | ||||
19150 | ** containing a copy of the header from an Fts5Config pointer. | ||||
19151 | */ | ||||
19152 | #define FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | ||||
19153 | #define FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) | ||||
19154 | |||||
19155 | #define FTS5_INSTTOKEN_SUBTYPE73 73 | ||||
19156 | |||||
19157 | /* | ||||
19158 | ** Each auxiliary function registered with the FTS5 module is represented | ||||
19159 | ** by an object of the following type. All such objects are stored as part | ||||
19160 | ** of the Fts5Global.pAux list. | ||||
19161 | */ | ||||
19162 | struct Fts5Auxiliary { | ||||
19163 | Fts5Global *pGlobal; /* Global context for this function */ | ||||
19164 | char *zFunc; /* Function name (nul-terminated) */ | ||||
19165 | void *pUserData; /* User-data pointer */ | ||||
19166 | fts5_extension_function xFunc; /* Callback function */ | ||||
19167 | void (*xDestroy)(void*); /* Destructor function */ | ||||
19168 | Fts5Auxiliary *pNext; /* Next registered auxiliary function */ | ||||
19169 | }; | ||||
19170 | |||||
19171 | /* | ||||
19172 | ** Each tokenizer module registered with the FTS5 module is represented | ||||
19173 | ** by an object of the following type. All such objects are stored as part | ||||
19174 | ** of the Fts5Global.pTok list. | ||||
19175 | ** | ||||
19176 | ** bV2Native: | ||||
19177 | ** True if the tokenizer was registered using xCreateTokenizer_v2(), false | ||||
19178 | ** for xCreateTokenizer(). If this variable is true, then x2 is populated | ||||
19179 | ** with the routines as supplied by the caller and x1 contains synthesized | ||||
19180 | ** wrapper routines. In this case the user-data pointer passed to | ||||
19181 | ** x1.xCreate should be a pointer to the Fts5TokenizerModule structure, | ||||
19182 | ** not a copy of pUserData. | ||||
19183 | ** | ||||
19184 | ** Of course, if bV2Native is false, then x1 contains the real routines and | ||||
19185 | ** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule | ||||
19186 | ** object should be passed to x2.xCreate. | ||||
19187 | ** | ||||
19188 | ** The synthesized wrapper routines are necessary for xFindTokenizer(_v2) | ||||
19189 | ** calls. | ||||
19190 | */ | ||||
19191 | struct Fts5TokenizerModule { | ||||
19192 | char *zName; /* Name of tokenizer */ | ||||
19193 | void *pUserData; /* User pointer passed to xCreate() */ | ||||
19194 | int bV2Native; /* True if v2 native tokenizer */ | ||||
19195 | fts5_tokenizer x1; /* Tokenizer functions */ | ||||
19196 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | ||||
19197 | void (*xDestroy)(void*); /* Destructor function */ | ||||
19198 | Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ | ||||
19199 | }; | ||||
19200 | |||||
19201 | struct Fts5FullTable { | ||||
19202 | Fts5Table p; /* Public class members from fts5Int.h */ | ||||
19203 | Fts5Storage *pStorage; /* Document store */ | ||||
19204 | Fts5Global *pGlobal; /* Global (connection wide) data */ | ||||
19205 | Fts5Cursor *pSortCsr; /* Sort data from this cursor */ | ||||
19206 | int iSavepoint; /* Successful xSavepoint()+1 */ | ||||
19207 | |||||
19208 | #ifdef SQLITE_DEBUG | ||||
19209 | struct Fts5TransactionState ts; | ||||
19210 | #endif | ||||
19211 | }; | ||||
19212 | |||||
19213 | struct Fts5MatchPhrase { | ||||
19214 | Fts5Buffer *pPoslist; /* Pointer to current poslist */ | ||||
19215 | int nTerm; /* Size of phrase in terms */ | ||||
19216 | }; | ||||
19217 | |||||
19218 | /* | ||||
19219 | ** pStmt: | ||||
19220 | ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; | ||||
19221 | ** | ||||
19222 | ** aIdx[]: | ||||
19223 | ** There is one entry in the aIdx[] array for each phrase in the query, | ||||
19224 | ** the value of which is the offset within aPoslist[] following the last | ||||
19225 | ** byte of the position list for the corresponding phrase. | ||||
19226 | */ | ||||
19227 | struct Fts5Sorter { | ||||
19228 | sqlite3_stmt *pStmt; | ||||
19229 | i64 iRowid; /* Current rowid */ | ||||
19230 | const u8 *aPoslist; /* Position lists for current row */ | ||||
19231 | int nIdx; /* Number of entries in aIdx[] */ | ||||
19232 | int aIdx[FLEXARRAY]; /* Offsets into aPoslist for current row */ | ||||
19233 | }; | ||||
19234 | |||||
19235 | /* Size (int bytes) of an Fts5Sorter object with N indexes */ | ||||
19236 | #define SZ_FTS5SORTER(N)(__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) (offsetof(Fts5Sorter,nIdx)__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) | ||||
19237 | |||||
19238 | /* | ||||
19239 | ** Virtual-table cursor object. | ||||
19240 | ** | ||||
19241 | ** iSpecial: | ||||
19242 | ** If this is a 'special' query (refer to function fts5SpecialMatch()), | ||||
19243 | ** then this variable contains the result of the query. | ||||
19244 | ** | ||||
19245 | ** iFirstRowid, iLastRowid: | ||||
19246 | ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the | ||||
19247 | ** cursor iterates in ascending order of rowids, iFirstRowid is the lower | ||||
19248 | ** limit of rowids to return, and iLastRowid the upper. In other words, the | ||||
19249 | ** WHERE clause in the user's query might have been: | ||||
19250 | ** | ||||
19251 | ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid | ||||
19252 | ** | ||||
19253 | ** If the cursor iterates in descending order of rowid, iFirstRowid | ||||
19254 | ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid | ||||
19255 | ** the lower. | ||||
19256 | */ | ||||
19257 | struct Fts5Cursor { | ||||
19258 | sqlite3_vtab_cursor base; /* Base class used by SQLite core */ | ||||
19259 | Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ | ||||
19260 | int *aColumnSize; /* Values for xColumnSize() */ | ||||
19261 | i64 iCsrId; /* Cursor id */ | ||||
19262 | |||||
19263 | /* Zero from this point onwards on cursor reset */ | ||||
19264 | int ePlan; /* FTS5_PLAN_XXX value */ | ||||
19265 | int bDesc; /* True for "ORDER BY rowid DESC" queries */ | ||||
19266 | i64 iFirstRowid; /* Return no rowids earlier than this */ | ||||
19267 | i64 iLastRowid; /* Return no rowids later than this */ | ||||
19268 | sqlite3_stmt *pStmt; /* Statement used to read %_content */ | ||||
19269 | Fts5Expr *pExpr; /* Expression for MATCH queries */ | ||||
19270 | Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ | ||||
19271 | int csrflags; /* Mask of cursor flags (see below) */ | ||||
19272 | i64 iSpecial; /* Result of special query */ | ||||
19273 | |||||
19274 | /* "rank" function. Populated on demand from vtab.xColumn(). */ | ||||
19275 | char *zRank; /* Custom rank function */ | ||||
19276 | char *zRankArgs; /* Custom rank function args */ | ||||
19277 | Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ | ||||
19278 | int nRankArg; /* Number of trailing arguments for rank() */ | ||||
19279 | sqlite3_value **apRankArg; /* Array of trailing arguments */ | ||||
19280 | sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ | ||||
19281 | |||||
19282 | /* Auxiliary data storage */ | ||||
19283 | Fts5Auxiliary *pAux; /* Currently executing extension function */ | ||||
19284 | Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ | ||||
19285 | |||||
19286 | /* Cache used by auxiliary API functions xInst() and xInstCount() */ | ||||
19287 | Fts5PoslistReader *aInstIter; /* One for each phrase */ | ||||
19288 | int nInstAlloc; /* Size of aInst[] array (entries / 3) */ | ||||
19289 | int nInstCount; /* Number of phrase instances */ | ||||
19290 | int *aInst; /* 3 integers per phrase instance */ | ||||
19291 | }; | ||||
19292 | |||||
19293 | /* | ||||
19294 | ** Bits that make up the "idxNum" parameter passed indirectly by | ||||
19295 | ** xBestIndex() to xFilter(). | ||||
19296 | */ | ||||
19297 | #define FTS5_BI_MATCH0x0001 0x0001 /* <tbl> MATCH ? */ | ||||
19298 | #define FTS5_BI_RANK0x0002 0x0002 /* rank MATCH ? */ | ||||
19299 | #define FTS5_BI_ROWID_EQ0x0004 0x0004 /* rowid == ? */ | ||||
19300 | #define FTS5_BI_ROWID_LE0x0008 0x0008 /* rowid <= ? */ | ||||
19301 | #define FTS5_BI_ROWID_GE0x0010 0x0010 /* rowid >= ? */ | ||||
19302 | |||||
19303 | #define FTS5_BI_ORDER_RANK0x0020 0x0020 | ||||
19304 | #define FTS5_BI_ORDER_ROWID0x0040 0x0040 | ||||
19305 | #define FTS5_BI_ORDER_DESC0x0080 0x0080 | ||||
19306 | |||||
19307 | /* | ||||
19308 | ** Values for Fts5Cursor.csrflags | ||||
19309 | */ | ||||
19310 | #define FTS5CSR_EOF0x01 0x01 | ||||
19311 | #define FTS5CSR_REQUIRE_CONTENT0x02 0x02 | ||||
19312 | #define FTS5CSR_REQUIRE_DOCSIZE0x04 0x04 | ||||
19313 | #define FTS5CSR_REQUIRE_INST0x08 0x08 | ||||
19314 | #define FTS5CSR_FREE_ZRANK0x10 0x10 | ||||
19315 | #define FTS5CSR_REQUIRE_RESEEK0x20 0x20 | ||||
19316 | #define FTS5CSR_REQUIRE_POSLIST0x40 0x40 | ||||
19317 | |||||
19318 | #define BitFlagAllTest(x,y)(((x) & (y))==(y)) (((x) & (y))==(y)) | ||||
19319 | #define BitFlagTest(x,y)(((x) & (y))!=0) (((x) & (y))!=0) | ||||
19320 | |||||
19321 | |||||
19322 | /* | ||||
19323 | ** Macros to Set(), Clear() and Test() cursor flags. | ||||
19324 | */ | ||||
19325 | #define CsrFlagSet(pCsr, flag)((pCsr)->csrflags |= (flag)) ((pCsr)->csrflags |= (flag)) | ||||
19326 | #define CsrFlagClear(pCsr, flag)((pCsr)->csrflags &= ~(flag)) ((pCsr)->csrflags &= ~(flag)) | ||||
19327 | #define CsrFlagTest(pCsr, flag)((pCsr)->csrflags & (flag)) ((pCsr)->csrflags & (flag)) | ||||
19328 | |||||
19329 | struct Fts5Auxdata { | ||||
19330 | Fts5Auxiliary *pAux; /* Extension to which this belongs */ | ||||
19331 | void *pPtr; /* Pointer value */ | ||||
19332 | void(*xDelete)(void*); /* Destructor */ | ||||
19333 | Fts5Auxdata *pNext; /* Next object in linked list */ | ||||
19334 | }; | ||||
19335 | |||||
19336 | #ifdef SQLITE_DEBUG | ||||
19337 | #define FTS5_BEGIN 1 | ||||
19338 | #define FTS5_SYNC 2 | ||||
19339 | #define FTS5_COMMIT 3 | ||||
19340 | #define FTS5_ROLLBACK 4 | ||||
19341 | #define FTS5_SAVEPOINT 5 | ||||
19342 | #define FTS5_RELEASE 6 | ||||
19343 | #define FTS5_ROLLBACKTO 7 | ||||
19344 | static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ | ||||
19345 | switch( op ){ | ||||
19346 | case FTS5_BEGIN: | ||||
19347 | assert( p->ts.eState==0 )((void) (0)); | ||||
19348 | p->ts.eState = 1; | ||||
19349 | p->ts.iSavepoint = -1; | ||||
19350 | break; | ||||
19351 | |||||
19352 | case FTS5_SYNC: | ||||
19353 | assert( p->ts.eState==1 || p->ts.eState==2 )((void) (0)); | ||||
19354 | p->ts.eState = 2; | ||||
19355 | break; | ||||
19356 | |||||
19357 | case FTS5_COMMIT: | ||||
19358 | assert( p->ts.eState==2 )((void) (0)); | ||||
19359 | p->ts.eState = 0; | ||||
19360 | break; | ||||
19361 | |||||
19362 | case FTS5_ROLLBACK: | ||||
19363 | assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 )((void) (0)); | ||||
19364 | p->ts.eState = 0; | ||||
19365 | break; | ||||
19366 | |||||
19367 | case FTS5_SAVEPOINT: | ||||
19368 | assert( p->ts.eState>=1 )((void) (0)); | ||||
19369 | assert( iSavepoint>=0 )((void) (0)); | ||||
19370 | assert( iSavepoint>=p->ts.iSavepoint )((void) (0)); | ||||
19371 | p->ts.iSavepoint = iSavepoint; | ||||
19372 | break; | ||||
19373 | |||||
19374 | case FTS5_RELEASE: | ||||
19375 | assert( p->ts.eState>=1 )((void) (0)); | ||||
19376 | assert( iSavepoint>=0 )((void) (0)); | ||||
19377 | assert( iSavepoint<=p->ts.iSavepoint )((void) (0)); | ||||
19378 | p->ts.iSavepoint = iSavepoint-1; | ||||
19379 | break; | ||||
19380 | |||||
19381 | case FTS5_ROLLBACKTO: | ||||
19382 | assert( p->ts.eState>=1 )((void) (0)); | ||||
19383 | assert( iSavepoint>=-1 )((void) (0)); | ||||
19384 | /* The following assert() can fail if another vtab strikes an error | ||||
19385 | ** within an xSavepoint() call then SQLite calls xRollbackTo() - without | ||||
19386 | ** having called xSavepoint() on this vtab. */ | ||||
19387 | /* assert( iSavepoint<=p->ts.iSavepoint ); */ | ||||
19388 | p->ts.iSavepoint = iSavepoint; | ||||
19389 | break; | ||||
19390 | } | ||||
19391 | } | ||||
19392 | #else | ||||
19393 | # define fts5CheckTransactionState(x,y,z) | ||||
19394 | #endif | ||||
19395 | |||||
19396 | /* | ||||
19397 | ** Return true if pTab is a contentless table. If parameter bIncludeUnindexed | ||||
19398 | ** is true, this includes contentless tables that store UNINDEXED columns | ||||
19399 | ** only. | ||||
19400 | */ | ||||
19401 | static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){ | ||||
19402 | int eContent = pTab->p.pConfig->eContent; | ||||
19403 | return ( | ||||
19404 | eContent==FTS5_CONTENT_NONE1 | ||||
19405 | || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED3) | ||||
19406 | ); | ||||
19407 | } | ||||
19408 | |||||
19409 | /* | ||||
19410 | ** Delete a virtual table handle allocated by fts5InitVtab(). | ||||
19411 | */ | ||||
19412 | static void fts5FreeVtab(Fts5FullTable *pTab){ | ||||
19413 | if( pTab ){ | ||||
19414 | sqlite3Fts5IndexClose(pTab->p.pIndex); | ||||
19415 | sqlite3Fts5StorageClose(pTab->pStorage); | ||||
19416 | sqlite3Fts5ConfigFree(pTab->p.pConfig); | ||||
19417 | sqlite3_freesqlite3_api->free(pTab); | ||||
19418 | } | ||||
19419 | } | ||||
19420 | |||||
19421 | /* | ||||
19422 | ** The xDisconnect() virtual table method. | ||||
19423 | */ | ||||
19424 | static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ | ||||
19425 | fts5FreeVtab((Fts5FullTable*)pVtab); | ||||
19426 | return SQLITE_OK0; | ||||
19427 | } | ||||
19428 | |||||
19429 | /* | ||||
19430 | ** The xDestroy() virtual table method. | ||||
19431 | */ | ||||
19432 | static int fts5DestroyMethod(sqlite3_vtab *pVtab){ | ||||
19433 | Fts5Table *pTab = (Fts5Table*)pVtab; | ||||
19434 | int rc = sqlite3Fts5DropAll(pTab->pConfig); | ||||
19435 | if( rc==SQLITE_OK0 ){ | ||||
19436 | fts5FreeVtab((Fts5FullTable*)pVtab); | ||||
19437 | } | ||||
19438 | return rc; | ||||
19439 | } | ||||
19440 | |||||
19441 | /* | ||||
19442 | ** This function is the implementation of both the xConnect and xCreate | ||||
19443 | ** methods of the FTS3 virtual table. | ||||
19444 | ** | ||||
19445 | ** The argv[] array contains the following: | ||||
19446 | ** | ||||
19447 | ** argv[0] -> module name ("fts5") | ||||
19448 | ** argv[1] -> database name | ||||
19449 | ** argv[2] -> table name | ||||
19450 | ** argv[...] -> "column name" and other module argument fields. | ||||
19451 | */ | ||||
19452 | static int fts5InitVtab( | ||||
19453 | int bCreate, /* True for xCreate, false for xConnect */ | ||||
19454 | sqlite3 *db, /* The SQLite database connection */ | ||||
19455 | void *pAux, /* Hash table containing tokenizers */ | ||||
19456 | int argc, /* Number of elements in argv array */ | ||||
19457 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
19458 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | ||||
19459 | char **pzErr /* Write any error message here */ | ||||
19460 | ){ | ||||
19461 | Fts5Global *pGlobal = (Fts5Global*)pAux; | ||||
19462 | const char **azConfig = (const char**)argv; | ||||
19463 | int rc = SQLITE_OK0; /* Return code */ | ||||
19464 | Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ | ||||
19465 | Fts5FullTable *pTab = 0; /* New virtual table object */ | ||||
19466 | |||||
19467 | /* Allocate the new vtab object and parse the configuration */ | ||||
19468 | pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable)); | ||||
19469 | if( rc==SQLITE_OK0 ){ | ||||
19470 | rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); | ||||
19471 | assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 )((void) (0)); | ||||
19472 | } | ||||
19473 | if( rc==SQLITE_OK0 ){ | ||||
19474 | pConfig->pzErrmsg = pzErr; | ||||
19475 | pTab->p.pConfig = pConfig; | ||||
19476 | pTab->pGlobal = pGlobal; | ||||
19477 | if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){ | ||||
19478 | rc = sqlite3Fts5LoadTokenizer(pConfig); | ||||
19479 | } | ||||
19480 | } | ||||
19481 | |||||
19482 | /* Open the index sub-system */ | ||||
19483 | if( rc==SQLITE_OK0 ){ | ||||
19484 | rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr); | ||||
19485 | } | ||||
19486 | |||||
19487 | /* Open the storage sub-system */ | ||||
19488 | if( rc==SQLITE_OK0 ){ | ||||
19489 | rc = sqlite3Fts5StorageOpen( | ||||
19490 | pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr | ||||
19491 | ); | ||||
19492 | } | ||||
19493 | |||||
19494 | /* Call sqlite3_declare_vtab() */ | ||||
19495 | if( rc==SQLITE_OK0 ){ | ||||
19496 | rc = sqlite3Fts5ConfigDeclareVtab(pConfig); | ||||
19497 | } | ||||
19498 | |||||
19499 | /* Load the initial configuration */ | ||||
19500 | if( rc==SQLITE_OK0 ){ | ||||
19501 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie-1); | ||||
19502 | } | ||||
19503 | |||||
19504 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
19505 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT1, (int)1); | ||||
19506 | } | ||||
19507 | if( rc==SQLITE_OK0 ){ | ||||
19508 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_INNOCUOUS2); | ||||
19509 | } | ||||
19510 | |||||
19511 | if( pConfig ) pConfig->pzErrmsg = 0; | ||||
19512 | if( rc!=SQLITE_OK0 ){ | ||||
19513 | fts5FreeVtab(pTab); | ||||
19514 | pTab = 0; | ||||
19515 | }else if( bCreate ){ | ||||
19516 | fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); | ||||
19517 | } | ||||
19518 | *ppVTab = (sqlite3_vtab*)pTab; | ||||
19519 | return rc; | ||||
19520 | } | ||||
19521 | |||||
19522 | /* | ||||
19523 | ** The xConnect() and xCreate() methods for the virtual table. All the | ||||
19524 | ** work is done in function fts5InitVtab(). | ||||
19525 | */ | ||||
19526 | static int fts5ConnectMethod( | ||||
19527 | sqlite3 *db, /* Database connection */ | ||||
19528 | void *pAux, /* Pointer to tokenizer hash table */ | ||||
19529 | int argc, /* Number of elements in argv array */ | ||||
19530 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
19531 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | ||||
19532 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | ||||
19533 | ){ | ||||
19534 | return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); | ||||
19535 | } | ||||
19536 | static int fts5CreateMethod( | ||||
19537 | sqlite3 *db, /* Database connection */ | ||||
19538 | void *pAux, /* Pointer to tokenizer hash table */ | ||||
19539 | int argc, /* Number of elements in argv array */ | ||||
19540 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
19541 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | ||||
19542 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | ||||
19543 | ){ | ||||
19544 | return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); | ||||
19545 | } | ||||
19546 | |||||
19547 | /* | ||||
19548 | ** The different query plans. | ||||
19549 | */ | ||||
19550 | #define FTS5_PLAN_MATCH1 1 /* (<tbl> MATCH ?) */ | ||||
19551 | #define FTS5_PLAN_SOURCE2 2 /* A source cursor for SORTED_MATCH */ | ||||
19552 | #define FTS5_PLAN_SPECIAL3 3 /* An internal query */ | ||||
19553 | #define FTS5_PLAN_SORTED_MATCH4 4 /* (<tbl> MATCH ? ORDER BY rank) */ | ||||
19554 | #define FTS5_PLAN_SCAN5 5 /* No usable constraint */ | ||||
19555 | #define FTS5_PLAN_ROWID6 6 /* (rowid = ?) */ | ||||
19556 | |||||
19557 | /* | ||||
19558 | ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this | ||||
19559 | ** extension is currently being used by a version of SQLite too old to | ||||
19560 | ** support index-info flags. In that case this function is a no-op. | ||||
19561 | */ | ||||
19562 | static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ | ||||
19563 | #if SQLITE_VERSION_NUMBER3050001>=3008012 | ||||
19564 | #ifndef SQLITE_CORE | ||||
19565 | if( sqlite3_libversion_numbersqlite3_api->libversion_number()>=3008012 ) | ||||
19566 | #endif | ||||
19567 | { | ||||
19568 | pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE0x00000001; | ||||
19569 | } | ||||
19570 | #endif | ||||
19571 | } | ||||
19572 | |||||
19573 | static int fts5UsePatternMatch( | ||||
19574 | Fts5Config *pConfig, | ||||
19575 | struct sqlite3_index_constraint *p | ||||
19576 | ){ | ||||
19577 | assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB )((void) (0)); | ||||
19578 | assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE )((void) (0)); | ||||
19579 | if( pConfig->t.ePattern==FTS5_PATTERN_GLOB66 && p->op==FTS5_PATTERN_GLOB66 ){ | ||||
19580 | return 1; | ||||
19581 | } | ||||
19582 | if( pConfig->t.ePattern==FTS5_PATTERN_LIKE65 | ||||
19583 | && (p->op==FTS5_PATTERN_LIKE65 || p->op==FTS5_PATTERN_GLOB66) | ||||
19584 | ){ | ||||
19585 | return 1; | ||||
19586 | } | ||||
19587 | return 0; | ||||
19588 | } | ||||
19589 | |||||
19590 | /* | ||||
19591 | ** Implementation of the xBestIndex method for FTS5 tables. Within the | ||||
19592 | ** WHERE constraint, it searches for the following: | ||||
19593 | ** | ||||
19594 | ** 1. A MATCH constraint against the table column. | ||||
19595 | ** 2. A MATCH constraint against the "rank" column. | ||||
19596 | ** 3. A MATCH constraint against some other column. | ||||
19597 | ** 4. An == constraint against the rowid column. | ||||
19598 | ** 5. A < or <= constraint against the rowid column. | ||||
19599 | ** 6. A > or >= constraint against the rowid column. | ||||
19600 | ** | ||||
19601 | ** Within the ORDER BY, the following are supported: | ||||
19602 | ** | ||||
19603 | ** 5. ORDER BY rank [ASC|DESC] | ||||
19604 | ** 6. ORDER BY rowid [ASC|DESC] | ||||
19605 | ** | ||||
19606 | ** Information for the xFilter call is passed via both the idxNum and | ||||
19607 | ** idxStr variables. Specifically, idxNum is a bitmask of the following | ||||
19608 | ** flags used to encode the ORDER BY clause: | ||||
19609 | ** | ||||
19610 | ** FTS5_BI_ORDER_RANK | ||||
19611 | ** FTS5_BI_ORDER_ROWID | ||||
19612 | ** FTS5_BI_ORDER_DESC | ||||
19613 | ** | ||||
19614 | ** idxStr is used to encode data from the WHERE clause. For each argument | ||||
19615 | ** passed to the xFilter method, the following is appended to idxStr: | ||||
19616 | ** | ||||
19617 | ** Match against table column: "m" | ||||
19618 | ** Match against rank column: "r" | ||||
19619 | ** Match against other column: "M<column-number>" | ||||
19620 | ** LIKE against other column: "L<column-number>" | ||||
19621 | ** GLOB against other column: "G<column-number>" | ||||
19622 | ** Equality constraint against the rowid: "=" | ||||
19623 | ** A < or <= against the rowid: "<" | ||||
19624 | ** A > or >= against the rowid: ">" | ||||
19625 | ** | ||||
19626 | ** This function ensures that there is at most one "r" or "=". And that if | ||||
19627 | ** there exists an "=" then there is no "<" or ">". | ||||
19628 | ** | ||||
19629 | ** If an unusable MATCH operator is present in the WHERE clause, then | ||||
19630 | ** SQLITE_CONSTRAINT is returned. | ||||
19631 | ** | ||||
19632 | ** Costs are assigned as follows: | ||||
19633 | ** | ||||
19634 | ** a) If a MATCH operator is present, the cost depends on the other | ||||
19635 | ** constraints also present. As follows: | ||||
19636 | ** | ||||
19637 | ** * No other constraints: cost=1000.0 | ||||
19638 | ** * One rowid range constraint: cost=750.0 | ||||
19639 | ** * Both rowid range constraints: cost=500.0 | ||||
19640 | ** * An == rowid constraint: cost=100.0 | ||||
19641 | ** | ||||
19642 | ** b) Otherwise, if there is no MATCH: | ||||
19643 | ** | ||||
19644 | ** * No other constraints: cost=1000000.0 | ||||
19645 | ** * One rowid range constraint: cost=750000.0 | ||||
19646 | ** * Both rowid range constraints: cost=250000.0 | ||||
19647 | ** * An == rowid constraint: cost=10.0 | ||||
19648 | ** | ||||
19649 | ** Costs are not modified by the ORDER BY clause. | ||||
19650 | */ | ||||
19651 | static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ | ||||
19652 | Fts5Table *pTab = (Fts5Table*)pVTab; | ||||
19653 | Fts5Config *pConfig = pTab->pConfig; | ||||
19654 | const int nCol = pConfig->nCol; | ||||
19655 | int idxFlags = 0; /* Parameter passed through to xFilter() */ | ||||
19656 | int i; | ||||
19657 | |||||
19658 | char *idxStr; | ||||
19659 | int iIdxStr = 0; | ||||
19660 | int iCons = 0; | ||||
19661 | |||||
19662 | int bSeenEq = 0; | ||||
19663 | int bSeenGt = 0; | ||||
19664 | int bSeenLt = 0; | ||||
19665 | int nSeenMatch = 0; | ||||
19666 | int bSeenRank = 0; | ||||
19667 | |||||
19668 | |||||
19669 | assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | ||||
19670 | assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | ||||
19671 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | ||||
19672 | assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | ||||
19673 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | ||||
19674 | |||||
19675 | if( pConfig->bLock ){ | ||||
19676 | pTab->base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | ||||
19677 | "recursively defined fts5 content table" | ||||
19678 | ); | ||||
19679 | return SQLITE_ERROR1; | ||||
19680 | } | ||||
19681 | |||||
19682 | idxStr = (char*)sqlite3_mallocsqlite3_api->malloc(pInfo->nConstraint * 8 + 1); | ||||
19683 | if( idxStr==0 ) return SQLITE_NOMEM7; | ||||
19684 | pInfo->idxStr = idxStr; | ||||
19685 | pInfo->needToFreeIdxStr = 1; | ||||
19686 | |||||
19687 | for(i=0; i<pInfo->nConstraint; i++){ | ||||
19688 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | ||||
19689 | int iCol = p->iColumn; | ||||
19690 | if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH64 | ||||
19691 | || (p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol>=nCol) | ||||
19692 | ){ | ||||
19693 | /* A MATCH operator or equivalent */ | ||||
19694 | if( p->usable==0 || iCol<0 ){ | ||||
19695 | /* As there exists an unusable MATCH constraint this is an | ||||
19696 | ** unusable plan. Return SQLITE_CONSTRAINT. */ | ||||
19697 | idxStr[iIdxStr] = 0; | ||||
19698 | return SQLITE_CONSTRAINT19; | ||||
19699 | }else{ | ||||
19700 | if( iCol==nCol+1 ){ | ||||
19701 | if( bSeenRank ) continue; | ||||
19702 | idxStr[iIdxStr++] = 'r'; | ||||
19703 | bSeenRank = 1; | ||||
19704 | }else{ | ||||
19705 | nSeenMatch++; | ||||
19706 | idxStr[iIdxStr++] = 'M'; | ||||
19707 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | ||||
19708 | idxStr += strlen(&idxStr[iIdxStr]); | ||||
19709 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | ||||
19710 | } | ||||
19711 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | ||||
19712 | pInfo->aConstraintUsage[i].omit = 1; | ||||
19713 | } | ||||
19714 | }else if( p->usable ){ | ||||
19715 | if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){ | ||||
19716 | assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB )((void) (0)); | ||||
19717 | idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE65 ? 'L' : 'G'; | ||||
19718 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | ||||
19719 | idxStr += strlen(&idxStr[iIdxStr]); | ||||
19720 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | ||||
19721 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | ||||
19722 | nSeenMatch++; | ||||
19723 | }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol<0 ){ | ||||
19724 | idxStr[iIdxStr++] = '='; | ||||
19725 | bSeenEq = 1; | ||||
19726 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | ||||
19727 | } | ||||
19728 | } | ||||
19729 | } | ||||
19730 | |||||
19731 | if( bSeenEq==0 ){ | ||||
19732 | for(i=0; i<pInfo->nConstraint; i++){ | ||||
19733 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | ||||
19734 | if( p->iColumn<0 && p->usable ){ | ||||
19735 | int op = p->op; | ||||
19736 | if( op==SQLITE_INDEX_CONSTRAINT_LT16 || op==SQLITE_INDEX_CONSTRAINT_LE8 ){ | ||||
19737 | if( bSeenLt ) continue; | ||||
19738 | idxStr[iIdxStr++] = '<'; | ||||
19739 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | ||||
19740 | bSeenLt = 1; | ||||
19741 | }else | ||||
19742 | if( op==SQLITE_INDEX_CONSTRAINT_GT4 || op==SQLITE_INDEX_CONSTRAINT_GE32 ){ | ||||
19743 | if( bSeenGt ) continue; | ||||
19744 | idxStr[iIdxStr++] = '>'; | ||||
19745 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | ||||
19746 | bSeenGt = 1; | ||||
19747 | } | ||||
19748 | } | ||||
19749 | } | ||||
19750 | } | ||||
19751 | idxStr[iIdxStr] = '\0'; | ||||
19752 | |||||
19753 | /* Set idxFlags flags for the ORDER BY clause | ||||
19754 | ** | ||||
19755 | ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". | ||||
19756 | */ | ||||
19757 | if( pInfo->nOrderBy==1 ){ | ||||
19758 | int iSort = pInfo->aOrderBy[0].iColumn; | ||||
19759 | if( iSort==(pConfig->nCol+1) && nSeenMatch>0 ){ | ||||
19760 | idxFlags |= FTS5_BI_ORDER_RANK0x0020; | ||||
19761 | }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ | ||||
19762 | idxFlags |= FTS5_BI_ORDER_ROWID0x0040; | ||||
19763 | } | ||||
19764 | if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID)(((idxFlags) & (0x0020|0x0040))!=0) ){ | ||||
19765 | pInfo->orderByConsumed = 1; | ||||
19766 | if( pInfo->aOrderBy[0].desc ){ | ||||
19767 | idxFlags |= FTS5_BI_ORDER_DESC0x0080; | ||||
19768 | } | ||||
19769 | } | ||||
19770 | } | ||||
19771 | |||||
19772 | /* Calculate the estimated cost based on the flags set in idxFlags. */ | ||||
19773 | if( bSeenEq ){ | ||||
19774 | pInfo->estimatedCost = nSeenMatch ? 1000.0 : 10.0; | ||||
19775 | if( nSeenMatch==0 ) fts5SetUniqueFlag(pInfo); | ||||
19776 | }else if( bSeenLt && bSeenGt ){ | ||||
19777 | pInfo->estimatedCost = nSeenMatch ? 5000.0 : 250000.0; | ||||
19778 | }else if( bSeenLt || bSeenGt ){ | ||||
19779 | pInfo->estimatedCost = nSeenMatch ? 7500.0 : 750000.0; | ||||
19780 | }else{ | ||||
19781 | pInfo->estimatedCost = nSeenMatch ? 10000.0 : 1000000.0; | ||||
19782 | } | ||||
19783 | for(i=1; i<nSeenMatch; i++){ | ||||
19784 | pInfo->estimatedCost *= 0.4; | ||||
19785 | } | ||||
19786 | |||||
19787 | pInfo->idxNum = idxFlags; | ||||
19788 | return SQLITE_OK0; | ||||
19789 | } | ||||
19790 | |||||
19791 | static int fts5NewTransaction(Fts5FullTable *pTab){ | ||||
19792 | Fts5Cursor *pCsr; | ||||
19793 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | ||||
19794 | if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK0; | ||||
19795 | } | ||||
19796 | return sqlite3Fts5StorageReset(pTab->pStorage); | ||||
19797 | } | ||||
19798 | |||||
19799 | /* | ||||
19800 | ** Implementation of xOpen method. | ||||
19801 | */ | ||||
19802 | static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ | ||||
19803 | Fts5FullTable *pTab = (Fts5FullTable*)pVTab; | ||||
19804 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
19805 | Fts5Cursor *pCsr = 0; /* New cursor object */ | ||||
19806 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | ||||
19807 | int rc; /* Return code */ | ||||
19808 | |||||
19809 | rc = fts5NewTransaction(pTab); | ||||
19810 | if( rc==SQLITE_OK0 ){ | ||||
19811 | nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); | ||||
19812 | pCsr = (Fts5Cursor*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
19813 | if( pCsr ){ | ||||
19814 | Fts5Global *pGlobal = pTab->pGlobal; | ||||
19815 | memset(pCsr, 0, (size_t)nByte); | ||||
19816 | pCsr->aColumnSize = (int*)&pCsr[1]; | ||||
19817 | pCsr->pNext = pGlobal->pCsr; | ||||
19818 | pGlobal->pCsr = pCsr; | ||||
19819 | pCsr->iCsrId = ++pGlobal->iNextId; | ||||
19820 | }else{ | ||||
19821 | rc = SQLITE_NOMEM7; | ||||
19822 | } | ||||
19823 | } | ||||
19824 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | ||||
19825 | return rc; | ||||
19826 | } | ||||
19827 | |||||
19828 | static int fts5StmtType(Fts5Cursor *pCsr){ | ||||
19829 | if( pCsr->ePlan==FTS5_PLAN_SCAN5 ){ | ||||
19830 | return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC1 : FTS5_STMT_SCAN_ASC0; | ||||
19831 | } | ||||
19832 | return FTS5_STMT_LOOKUP2; | ||||
19833 | } | ||||
19834 | |||||
19835 | /* | ||||
19836 | ** This function is called after the cursor passed as the only argument | ||||
19837 | ** is moved to point at a different row. It clears all cached data | ||||
19838 | ** specific to the previous row stored by the cursor object. | ||||
19839 | */ | ||||
19840 | static void fts5CsrNewrow(Fts5Cursor *pCsr){ | ||||
19841 | CsrFlagSet(pCsr,((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | ||||
19842 | FTS5CSR_REQUIRE_CONTENT((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | ||||
19843 | | FTS5CSR_REQUIRE_DOCSIZE((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | ||||
19844 | | FTS5CSR_REQUIRE_INST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | ||||
19845 | | FTS5CSR_REQUIRE_POSLIST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | ||||
19846 | )((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)); | ||||
19847 | } | ||||
19848 | |||||
19849 | static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ | ||||
19850 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
19851 | Fts5Auxdata *pData; | ||||
19852 | Fts5Auxdata *pNext; | ||||
19853 | |||||
19854 | sqlite3_freesqlite3_api->free(pCsr->aInstIter); | ||||
19855 | sqlite3_freesqlite3_api->free(pCsr->aInst); | ||||
19856 | if( pCsr->pStmt ){ | ||||
19857 | int eStmt = fts5StmtType(pCsr); | ||||
19858 | sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); | ||||
19859 | } | ||||
19860 | if( pCsr->pSorter ){ | ||||
19861 | Fts5Sorter *pSorter = pCsr->pSorter; | ||||
19862 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | ||||
19863 | sqlite3_freesqlite3_api->free(pSorter); | ||||
19864 | } | ||||
19865 | |||||
19866 | if( pCsr->ePlan!=FTS5_PLAN_SOURCE2 ){ | ||||
19867 | sqlite3Fts5ExprFree(pCsr->pExpr); | ||||
19868 | } | ||||
19869 | |||||
19870 | for(pData=pCsr->pAuxdata; pData; pData=pNext){ | ||||
19871 | pNext = pData->pNext; | ||||
19872 | if( pData->xDelete ) pData->xDelete(pData->pPtr); | ||||
19873 | sqlite3_freesqlite3_api->free(pData); | ||||
19874 | } | ||||
19875 | |||||
19876 | sqlite3_finalizesqlite3_api->finalize(pCsr->pRankArgStmt); | ||||
19877 | sqlite3_freesqlite3_api->free(pCsr->apRankArg); | ||||
19878 | |||||
19879 | if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags & (0x10)) ){ | ||||
19880 | sqlite3_freesqlite3_api->free(pCsr->zRank); | ||||
19881 | sqlite3_freesqlite3_api->free(pCsr->zRankArgs); | ||||
19882 | } | ||||
19883 | |||||
19884 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | ||||
19885 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); | ||||
19886 | } | ||||
19887 | |||||
19888 | |||||
19889 | /* | ||||
19890 | ** Close the cursor. For additional information see the documentation | ||||
19891 | ** on the xClose method of the virtual table interface. | ||||
19892 | */ | ||||
19893 | static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ | ||||
19894 | if( pCursor ){ | ||||
19895 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | ||||
19896 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
19897 | Fts5Cursor **pp; | ||||
19898 | |||||
19899 | fts5FreeCursorComponents(pCsr); | ||||
19900 | /* Remove the cursor from the Fts5Global.pCsr list */ | ||||
19901 | for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); | ||||
19902 | *pp = pCsr->pNext; | ||||
19903 | |||||
19904 | sqlite3_freesqlite3_api->free(pCsr); | ||||
19905 | } | ||||
19906 | return SQLITE_OK0; | ||||
19907 | } | ||||
19908 | |||||
19909 | static int fts5SorterNext(Fts5Cursor *pCsr){ | ||||
19910 | Fts5Sorter *pSorter = pCsr->pSorter; | ||||
19911 | int rc; | ||||
19912 | |||||
19913 | rc = sqlite3_stepsqlite3_api->step(pSorter->pStmt); | ||||
19914 | if( rc==SQLITE_DONE101 ){ | ||||
19915 | rc = SQLITE_OK0; | ||||
19916 | CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags |= (0x01|0x02)); | ||||
19917 | }else if( rc==SQLITE_ROW100 ){ | ||||
19918 | const u8 *a; | ||||
19919 | const u8 *aBlob; | ||||
19920 | int nBlob; | ||||
19921 | int i; | ||||
19922 | int iOff = 0; | ||||
19923 | rc = SQLITE_OK0; | ||||
19924 | |||||
19925 | pSorter->iRowid = sqlite3_column_int64sqlite3_api->column_int64(pSorter->pStmt, 0); | ||||
19926 | nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pSorter->pStmt, 1); | ||||
19927 | aBlob = a = sqlite3_column_blobsqlite3_api->column_blob(pSorter->pStmt, 1); | ||||
19928 | |||||
19929 | /* nBlob==0 in detail=none mode. */ | ||||
19930 | if( nBlob>0 ){ | ||||
19931 | for(i=0; i<(pSorter->nIdx-1); i++){ | ||||
19932 | int iVal; | ||||
19933 | a += fts5GetVarint32(a, iVal)sqlite3Fts5GetVarint32(a,(u32*)&(iVal)); | ||||
19934 | iOff += iVal; | ||||
19935 | pSorter->aIdx[i] = iOff; | ||||
19936 | } | ||||
19937 | pSorter->aIdx[i] = &aBlob[nBlob] - a; | ||||
19938 | pSorter->aPoslist = a; | ||||
19939 | } | ||||
19940 | |||||
19941 | fts5CsrNewrow(pCsr); | ||||
19942 | } | ||||
19943 | |||||
19944 | return rc; | ||||
19945 | } | ||||
19946 | |||||
19947 | |||||
19948 | /* | ||||
19949 | ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors | ||||
19950 | ** open on table pTab. | ||||
19951 | */ | ||||
19952 | static void fts5TripCursors(Fts5FullTable *pTab){ | ||||
19953 | Fts5Cursor *pCsr; | ||||
19954 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | ||||
19955 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | ||||
19956 | && pCsr->base.pVtab==(sqlite3_vtab*)pTab | ||||
19957 | ){ | ||||
19958 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags |= (0x20)); | ||||
19959 | } | ||||
19960 | } | ||||
19961 | } | ||||
19962 | |||||
19963 | /* | ||||
19964 | ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first | ||||
19965 | ** argument, close and reopen all Fts5IndexIter iterators that the cursor | ||||
19966 | ** is using. Then attempt to move the cursor to a rowid equal to or laster | ||||
19967 | ** (in the cursors sort order - ASC or DESC) than the current rowid. | ||||
19968 | ** | ||||
19969 | ** If the new rowid is not equal to the old, set output parameter *pbSkip | ||||
19970 | ** to 1 before returning. Otherwise, leave it unchanged. | ||||
19971 | ** | ||||
19972 | ** Return SQLITE_OK if successful or if no reseek was required, or an | ||||
19973 | ** error code if an error occurred. | ||||
19974 | */ | ||||
19975 | static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ | ||||
19976 | int rc = SQLITE_OK0; | ||||
19977 | assert( *pbSkip==0 )((void) (0)); | ||||
19978 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags & (0x20)) ){ | ||||
19979 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
19980 | int bDesc = pCsr->bDesc; | ||||
19981 | i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); | ||||
19982 | |||||
19983 | rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc); | ||||
19984 | if( rc==SQLITE_OK0 && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ | ||||
19985 | *pbSkip = 1; | ||||
19986 | } | ||||
19987 | |||||
19988 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags &= ~(0x20)); | ||||
19989 | fts5CsrNewrow(pCsr); | ||||
19990 | if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ | ||||
19991 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | ||||
19992 | *pbSkip = 1; | ||||
19993 | } | ||||
19994 | } | ||||
19995 | return rc; | ||||
19996 | } | ||||
19997 | |||||
19998 | |||||
19999 | /* | ||||
20000 | ** Advance the cursor to the next row in the table that matches the | ||||
20001 | ** search criteria. | ||||
20002 | ** | ||||
20003 | ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned | ||||
20004 | ** even if we reach end-of-file. The fts5EofMethod() will be called | ||||
20005 | ** subsequently to determine whether or not an EOF was hit. | ||||
20006 | */ | ||||
20007 | static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ | ||||
20008 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
20009 | int rc; | ||||
20010 | |||||
20011 | assert( (pCsr->ePlan<3)==((void) (0)) | ||||
20012 | (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)((void) (0)) | ||||
20013 | )((void) (0)); | ||||
20014 | assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) )((void) (0)); | ||||
20015 | |||||
20016 | /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table, | ||||
20017 | ** clear any token mappings accumulated at the fts5_index.c level. In | ||||
20018 | ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH, | ||||
20019 | ** we need to retain the mappings for the entire query. */ | ||||
20020 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | ||||
20021 | && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata | ||||
20022 | ){ | ||||
20023 | sqlite3Fts5ExprClearTokens(pCsr->pExpr); | ||||
20024 | } | ||||
20025 | |||||
20026 | if( pCsr->ePlan<3 ){ | ||||
20027 | int bSkip = 0; | ||||
20028 | if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; | ||||
20029 | rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); | ||||
20030 | CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr))((pCsr)->csrflags |= (sqlite3Fts5ExprEof(pCsr->pExpr))); | ||||
20031 | fts5CsrNewrow(pCsr); | ||||
20032 | }else{ | ||||
20033 | switch( pCsr->ePlan ){ | ||||
20034 | case FTS5_PLAN_SPECIAL3: { | ||||
20035 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | ||||
20036 | rc = SQLITE_OK0; | ||||
20037 | break; | ||||
20038 | } | ||||
20039 | |||||
20040 | case FTS5_PLAN_SORTED_MATCH4: { | ||||
20041 | rc = fts5SorterNext(pCsr); | ||||
20042 | break; | ||||
20043 | } | ||||
20044 | |||||
20045 | default: { | ||||
20046 | Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig; | ||||
20047 | pConfig->bLock++; | ||||
20048 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | ||||
20049 | pConfig->bLock--; | ||||
20050 | if( rc!=SQLITE_ROW100 ){ | ||||
20051 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | ||||
20052 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | ||||
20053 | if( rc!=SQLITE_OK0 ){ | ||||
20054 | pCursor->pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | ||||
20055 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db) | ||||
20056 | ); | ||||
20057 | } | ||||
20058 | }else{ | ||||
20059 | rc = SQLITE_OK0; | ||||
20060 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags |= (0x04)); | ||||
20061 | } | ||||
20062 | break; | ||||
20063 | } | ||||
20064 | } | ||||
20065 | } | ||||
20066 | |||||
20067 | return rc; | ||||
20068 | } | ||||
20069 | |||||
20070 | |||||
20071 | static int fts5PrepareStatement( | ||||
20072 | sqlite3_stmt **ppStmt, | ||||
20073 | Fts5Config *pConfig, | ||||
20074 | const char *zFmt, | ||||
20075 | ... | ||||
20076 | ){ | ||||
20077 | sqlite3_stmt *pRet = 0; | ||||
20078 | int rc; | ||||
20079 | char *zSql; | ||||
20080 | va_list ap; | ||||
20081 | |||||
20082 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
20083 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
20084 | if( zSql==0 ){ | ||||
20085 | rc = SQLITE_NOMEM7; | ||||
20086 | }else{ | ||||
20087 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | ||||
20088 | SQLITE_PREPARE_PERSISTENT0x01, &pRet, 0); | ||||
20089 | if( rc!=SQLITE_OK0 ){ | ||||
20090 | sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db)); | ||||
20091 | } | ||||
20092 | sqlite3_freesqlite3_api->free(zSql); | ||||
20093 | } | ||||
20094 | |||||
20095 | va_end(ap)__builtin_va_end(ap); | ||||
20096 | *ppStmt = pRet; | ||||
20097 | return rc; | ||||
20098 | } | ||||
20099 | |||||
20100 | static int fts5CursorFirstSorted( | ||||
20101 | Fts5FullTable *pTab, | ||||
20102 | Fts5Cursor *pCsr, | ||||
20103 | int bDesc | ||||
20104 | ){ | ||||
20105 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
20106 | Fts5Sorter *pSorter; | ||||
20107 | int nPhrase; | ||||
20108 | sqlite3_int64 nByte; | ||||
20109 | int rc; | ||||
20110 | const char *zRank = pCsr->zRank; | ||||
20111 | const char *zRankArgs = pCsr->zRankArgs; | ||||
20112 | |||||
20113 | nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | ||||
20114 | nByte = SZ_FTS5SORTER(nPhrase)(__builtin_offsetof(Fts5Sorter, nIdx)+((nPhrase+2)/2)*sizeof( i64)); | ||||
20115 | pSorter = (Fts5Sorter*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
20116 | if( pSorter==0 ) return SQLITE_NOMEM7; | ||||
20117 | memset(pSorter, 0, (size_t)nByte); | ||||
20118 | pSorter->nIdx = nPhrase; | ||||
20119 | |||||
20120 | /* TODO: It would be better to have some system for reusing statement | ||||
20121 | ** handles here, rather than preparing a new one for each query. But that | ||||
20122 | ** is not possible as SQLite reference counts the virtual table objects. | ||||
20123 | ** And since the statement required here reads from this very virtual | ||||
20124 | ** table, saving it creates a circular reference. | ||||
20125 | ** | ||||
20126 | ** If SQLite a built-in statement cache, this wouldn't be a problem. */ | ||||
20127 | rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, | ||||
20128 | "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s", | ||||
20129 | pConfig->zDb, pConfig->zName, zRank, pConfig->zName, | ||||
20130 | (zRankArgs ? ", " : ""), | ||||
20131 | (zRankArgs ? zRankArgs : ""), | ||||
20132 | bDesc ? "DESC" : "ASC" | ||||
20133 | ); | ||||
20134 | |||||
20135 | pCsr->pSorter = pSorter; | ||||
20136 | if( rc==SQLITE_OK0 ){ | ||||
20137 | assert( pTab->pSortCsr==0 )((void) (0)); | ||||
20138 | pTab->pSortCsr = pCsr; | ||||
20139 | rc = fts5SorterNext(pCsr); | ||||
20140 | pTab->pSortCsr = 0; | ||||
20141 | } | ||||
20142 | |||||
20143 | if( rc!=SQLITE_OK0 ){ | ||||
20144 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | ||||
20145 | sqlite3_freesqlite3_api->free(pSorter); | ||||
20146 | pCsr->pSorter = 0; | ||||
20147 | } | ||||
20148 | |||||
20149 | return rc; | ||||
20150 | } | ||||
20151 | |||||
20152 | static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){ | ||||
20153 | int rc; | ||||
20154 | Fts5Expr *pExpr = pCsr->pExpr; | ||||
20155 | rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc); | ||||
20156 | if( sqlite3Fts5ExprEof(pExpr) ){ | ||||
20157 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | ||||
20158 | } | ||||
20159 | fts5CsrNewrow(pCsr); | ||||
20160 | return rc; | ||||
20161 | } | ||||
20162 | |||||
20163 | /* | ||||
20164 | ** Process a "special" query. A special query is identified as one with a | ||||
20165 | ** MATCH expression that begins with a '*' character. The remainder of | ||||
20166 | ** the text passed to the MATCH operator are used as the special query | ||||
20167 | ** parameters. | ||||
20168 | */ | ||||
20169 | static int fts5SpecialMatch( | ||||
20170 | Fts5FullTable *pTab, | ||||
20171 | Fts5Cursor *pCsr, | ||||
20172 | const char *zQuery | ||||
20173 | ){ | ||||
20174 | int rc = SQLITE_OK0; /* Return code */ | ||||
20175 | const char *z = zQuery; /* Special query text */ | ||||
20176 | int n; /* Number of bytes in text at z */ | ||||
20177 | |||||
20178 | while( z[0]==' ' ) z++; | ||||
20179 | for(n=0; z[n] && z[n]!=' '; n++); | ||||
20180 | |||||
20181 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | ||||
20182 | pCsr->ePlan = FTS5_PLAN_SPECIAL3; | ||||
20183 | |||||
20184 | if( n==5 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("reads", z, n) ){ | ||||
20185 | pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex); | ||||
20186 | } | ||||
20187 | else if( n==2 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("id", z, n) ){ | ||||
20188 | pCsr->iSpecial = pCsr->iCsrId; | ||||
20189 | } | ||||
20190 | else{ | ||||
20191 | /* An unrecognized directive. Return an error message. */ | ||||
20192 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("unknown special query: %.*s", n, z); | ||||
20193 | rc = SQLITE_ERROR1; | ||||
20194 | } | ||||
20195 | |||||
20196 | return rc; | ||||
20197 | } | ||||
20198 | |||||
20199 | /* | ||||
20200 | ** Search for an auxiliary function named zName that can be used with table | ||||
20201 | ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary | ||||
20202 | ** structure. Otherwise, if no such function exists, return NULL. | ||||
20203 | */ | ||||
20204 | static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){ | ||||
20205 | Fts5Auxiliary *pAux; | ||||
20206 | |||||
20207 | for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ | ||||
20208 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pAux->zFunc)==0 ) return pAux; | ||||
20209 | } | ||||
20210 | |||||
20211 | /* No function of the specified name was found. Return 0. */ | ||||
20212 | return 0; | ||||
20213 | } | ||||
20214 | |||||
20215 | |||||
20216 | static int fts5FindRankFunction(Fts5Cursor *pCsr){ | ||||
20217 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
20218 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
20219 | int rc = SQLITE_OK0; | ||||
20220 | Fts5Auxiliary *pAux = 0; | ||||
20221 | const char *zRank = pCsr->zRank; | ||||
20222 | const char *zRankArgs = pCsr->zRankArgs; | ||||
20223 | |||||
20224 | if( zRankArgs ){ | ||||
20225 | char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); | ||||
20226 | if( zSql ){ | ||||
20227 | sqlite3_stmt *pStmt = 0; | ||||
20228 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | ||||
20229 | SQLITE_PREPARE_PERSISTENT0x01, &pStmt, 0); | ||||
20230 | sqlite3_freesqlite3_api->free(zSql); | ||||
20231 | assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 )((void) (0)); | ||||
20232 | if( rc==SQLITE_OK0 ){ | ||||
20233 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | ||||
20234 | sqlite3_int64 nByte; | ||||
20235 | pCsr->nRankArg = sqlite3_column_countsqlite3_api->column_count(pStmt); | ||||
20236 | nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; | ||||
20237 | pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); | ||||
20238 | if( rc==SQLITE_OK0 ){ | ||||
20239 | int i; | ||||
20240 | for(i=0; i<pCsr->nRankArg; i++){ | ||||
20241 | pCsr->apRankArg[i] = sqlite3_column_valuesqlite3_api->column_value(pStmt, i); | ||||
20242 | } | ||||
20243 | } | ||||
20244 | pCsr->pRankArgStmt = pStmt; | ||||
20245 | }else{ | ||||
20246 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
20247 | assert( rc!=SQLITE_OK )((void) (0)); | ||||
20248 | } | ||||
20249 | } | ||||
20250 | } | ||||
20251 | } | ||||
20252 | |||||
20253 | if( rc==SQLITE_OK0 ){ | ||||
20254 | pAux = fts5FindAuxiliary(pTab, zRank); | ||||
20255 | if( pAux==0 ){ | ||||
20256 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | ||||
20257 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("no such function: %s", zRank); | ||||
20258 | rc = SQLITE_ERROR1; | ||||
20259 | } | ||||
20260 | } | ||||
20261 | |||||
20262 | pCsr->pRank = pAux; | ||||
20263 | return rc; | ||||
20264 | } | ||||
20265 | |||||
20266 | |||||
20267 | static int fts5CursorParseRank( | ||||
20268 | Fts5Config *pConfig, | ||||
20269 | Fts5Cursor *pCsr, | ||||
20270 | sqlite3_value *pRank | ||||
20271 | ){ | ||||
20272 | int rc = SQLITE_OK0; | ||||
20273 | if( pRank ){ | ||||
20274 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(pRank); | ||||
20275 | char *zRank = 0; | ||||
20276 | char *zRankArgs = 0; | ||||
20277 | |||||
20278 | if( z==0 ){ | ||||
20279 | if( sqlite3_value_typesqlite3_api->value_type(pRank)==SQLITE_NULL5 ) rc = SQLITE_ERROR1; | ||||
20280 | }else{ | ||||
20281 | rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); | ||||
20282 | } | ||||
20283 | if( rc==SQLITE_OK0 ){ | ||||
20284 | pCsr->zRank = zRank; | ||||
20285 | pCsr->zRankArgs = zRankArgs; | ||||
20286 | CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags |= (0x10)); | ||||
20287 | }else if( rc==SQLITE_ERROR1 ){ | ||||
20288 | pCsr->base.pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | ||||
20289 | "parse error in rank function: %s", z | ||||
20290 | ); | ||||
20291 | } | ||||
20292 | }else{ | ||||
20293 | if( pConfig->zRank ){ | ||||
20294 | pCsr->zRank = (char*)pConfig->zRank; | ||||
20295 | pCsr->zRankArgs = (char*)pConfig->zRankArgs; | ||||
20296 | }else{ | ||||
20297 | pCsr->zRank = (char*)FTS5_DEFAULT_RANK"bm25"; | ||||
20298 | pCsr->zRankArgs = 0; | ||||
20299 | } | ||||
20300 | } | ||||
20301 | return rc; | ||||
20302 | } | ||||
20303 | |||||
20304 | static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ | ||||
20305 | if( pVal ){ | ||||
20306 | int eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal); | ||||
20307 | if( eType==SQLITE_INTEGER1 ){ | ||||
20308 | return sqlite3_value_int64sqlite3_api->value_int64(pVal); | ||||
20309 | } | ||||
20310 | } | ||||
20311 | return iDefault; | ||||
20312 | } | ||||
20313 | |||||
20314 | /* | ||||
20315 | ** Set the error message on the virtual table passed as the first argument. | ||||
20316 | */ | ||||
20317 | static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ | ||||
20318 | va_list ap; /* ... printf arguments */ | ||||
20319 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | ||||
20320 | sqlite3_freesqlite3_api->free(p->p.base.zErrMsg); | ||||
20321 | p->p.base.zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | ||||
20322 | va_end(ap)__builtin_va_end(ap); | ||||
20323 | } | ||||
20324 | |||||
20325 | /* | ||||
20326 | ** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale | ||||
20327 | ** specified by pLocale/nLocale. The buffer indicated by pLocale must remain | ||||
20328 | ** valid until after the final call to sqlite3Fts5Tokenize() that will use | ||||
20329 | ** the locale. | ||||
20330 | */ | ||||
20331 | static void sqlite3Fts5SetLocale( | ||||
20332 | Fts5Config *pConfig, | ||||
20333 | const char *zLocale, | ||||
20334 | int nLocale | ||||
20335 | ){ | ||||
20336 | Fts5TokenizerConfig *pT = &pConfig->t; | ||||
20337 | pT->pLocale = zLocale; | ||||
20338 | pT->nLocale = nLocale; | ||||
20339 | } | ||||
20340 | |||||
20341 | /* | ||||
20342 | ** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale(). | ||||
20343 | */ | ||||
20344 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ | ||||
20345 | sqlite3Fts5SetLocale(pConfig, 0, 0); | ||||
20346 | } | ||||
20347 | |||||
20348 | /* | ||||
20349 | ** Return true if the value passed as the only argument is an | ||||
20350 | ** fts5_locale() value. | ||||
20351 | */ | ||||
20352 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ | ||||
20353 | int ret = 0; | ||||
20354 | if( sqlite3_value_typesqlite3_api->value_type(pVal)==SQLITE_BLOB4 ){ | ||||
20355 | /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case. | ||||
20356 | ** If the blob was created using zeroblob(), then sqlite3_value_blob() | ||||
20357 | ** may call malloc(). If this malloc() fails, then the values returned | ||||
20358 | ** by both value_blob() and value_bytes() will be 0. If value_bytes() were | ||||
20359 | ** called first, then the NULL pointer returned by value_blob() might | ||||
20360 | ** be dereferenced. */ | ||||
20361 | const u8 *pBlob = sqlite3_value_blobsqlite3_api->value_blob(pVal); | ||||
20362 | int nBlob = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
20363 | if( nBlob>FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | ||||
20364 | && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)), FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))) | ||||
20365 | ){ | ||||
20366 | ret = 1; | ||||
20367 | } | ||||
20368 | } | ||||
20369 | return ret; | ||||
20370 | } | ||||
20371 | |||||
20372 | /* | ||||
20373 | ** Value pVal is guaranteed to be an fts5_locale() value, according to | ||||
20374 | ** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale | ||||
20375 | ** from the value and returns them separately. | ||||
20376 | ** | ||||
20377 | ** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set | ||||
20378 | ** to point to buffers containing the text and locale, as utf-8, | ||||
20379 | ** respectively. In this case output parameters (*pnText) and (*pnLoc) are | ||||
20380 | ** set to the sizes in bytes of these two buffers. | ||||
20381 | ** | ||||
20382 | ** Or, if an error occurs, then an SQLite error code is returned. The final | ||||
20383 | ** value of the four output parameters is undefined in this case. | ||||
20384 | */ | ||||
20385 | static int sqlite3Fts5DecodeLocaleValue( | ||||
20386 | sqlite3_value *pVal, | ||||
20387 | const char **ppText, | ||||
20388 | int *pnText, | ||||
20389 | const char **ppLoc, | ||||
20390 | int *pnLoc | ||||
20391 | ){ | ||||
20392 | const char *p = sqlite3_value_blobsqlite3_api->value_blob(pVal); | ||||
20393 | int n = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
20394 | int nLoc = 0; | ||||
20395 | |||||
20396 | assert( sqlite3_value_type(pVal)==SQLITE_BLOB )((void) (0)); | ||||
20397 | assert( n>FTS5_LOCALE_HDR_SIZE )((void) (0)); | ||||
20398 | |||||
20399 | for(nLoc=FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); p[nLoc]; nLoc++){ | ||||
20400 | if( nLoc==(n-1) ){ | ||||
20401 | return SQLITE_MISMATCH20; | ||||
20402 | } | ||||
20403 | } | ||||
20404 | *ppLoc = &p[FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))]; | ||||
20405 | *pnLoc = nLoc - FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | ||||
20406 | |||||
20407 | *ppText = &p[nLoc+1]; | ||||
20408 | *pnText = n - nLoc - 1; | ||||
20409 | return SQLITE_OK0; | ||||
20410 | } | ||||
20411 | |||||
20412 | /* | ||||
20413 | ** Argument pVal is the text of a full-text search expression. It may or | ||||
20414 | ** may not have been wrapped by fts5_locale(). This function extracts | ||||
20415 | ** the text of the expression, and sets output variable (*pzText) to | ||||
20416 | ** point to a nul-terminated buffer containing the expression. | ||||
20417 | ** | ||||
20418 | ** If pVal was an fts5_locale() value, then sqlite3Fts5SetLocale() is called | ||||
20419 | ** to set the tokenizer to use the specified locale. | ||||
20420 | ** | ||||
20421 | ** If output variable (*pbFreeAndReset) is set to true, then the caller | ||||
20422 | ** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer | ||||
20423 | ** locale, and (b) call sqlite3_free() to free (*pzText). | ||||
20424 | */ | ||||
20425 | static int fts5ExtractExprText( | ||||
20426 | Fts5Config *pConfig, /* Fts5 configuration */ | ||||
20427 | sqlite3_value *pVal, /* Value to extract expression text from */ | ||||
20428 | char **pzText, /* OUT: nul-terminated buffer of text */ | ||||
20429 | int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */ | ||||
20430 | ){ | ||||
20431 | int rc = SQLITE_OK0; | ||||
20432 | |||||
20433 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | ||||
20434 | const char *pText = 0; | ||||
20435 | int nText = 0; | ||||
20436 | const char *pLoc = 0; | ||||
20437 | int nLoc = 0; | ||||
20438 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | ||||
20439 | *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, pText); | ||||
20440 | if( rc==SQLITE_OK0 ){ | ||||
20441 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
20442 | } | ||||
20443 | *pbFreeAndReset = 1; | ||||
20444 | }else{ | ||||
20445 | *pzText = (char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
20446 | *pbFreeAndReset = 0; | ||||
20447 | } | ||||
20448 | |||||
20449 | return rc; | ||||
20450 | } | ||||
20451 | |||||
20452 | |||||
20453 | /* | ||||
20454 | ** This is the xFilter interface for the virtual table. See | ||||
20455 | ** the virtual table xFilter method documentation for additional | ||||
20456 | ** information. | ||||
20457 | ** | ||||
20458 | ** There are three possible query strategies: | ||||
20459 | ** | ||||
20460 | ** 1. Full-text search using a MATCH operator. | ||||
20461 | ** 2. A by-rowid lookup. | ||||
20462 | ** 3. A full-table scan. | ||||
20463 | */ | ||||
20464 | static int fts5FilterMethod( | ||||
20465 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | ||||
20466 | int idxNum, /* Strategy index */ | ||||
20467 | const char *idxStr, /* Unused */ | ||||
20468 | int nVal, /* Number of elements in apVal */ | ||||
20469 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | ||||
20470 | ){ | ||||
20471 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | ||||
20472 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
20473 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
20474 | int rc = SQLITE_OK0; /* Error code */ | ||||
20475 | int bDesc; /* True if ORDER BY [rank|rowid] DESC */ | ||||
20476 | int bOrderByRank; /* True if ORDER BY rank */ | ||||
20477 | sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ | ||||
20478 | sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ | ||||
20479 | sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ | ||||
20480 | sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ | ||||
20481 | int iCol; /* Column on LHS of MATCH operator */ | ||||
20482 | char **pzErrmsg = pConfig->pzErrmsg; | ||||
20483 | int bPrefixInsttoken = pConfig->bPrefixInsttoken; | ||||
20484 | int i; | ||||
20485 | int iIdxStr = 0; | ||||
20486 | Fts5Expr *pExpr = 0; | ||||
20487 | |||||
20488 | assert( pConfig->bLock==0 )((void) (0)); | ||||
20489 | if( pCsr->ePlan ){ | ||||
20490 | fts5FreeCursorComponents(pCsr); | ||||
20491 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); | ||||
20492 | } | ||||
20493 | |||||
20494 | assert( pCsr->pStmt==0 )((void) (0)); | ||||
20495 | assert( pCsr->pExpr==0 )((void) (0)); | ||||
20496 | assert( pCsr->csrflags==0 )((void) (0)); | ||||
20497 | assert( pCsr->pRank==0 )((void) (0)); | ||||
20498 | assert( pCsr->zRank==0 )((void) (0)); | ||||
20499 | assert( pCsr->zRankArgs==0 )((void) (0)); | ||||
20500 | assert( pTab->pSortCsr==0 || nVal==0 )((void) (0)); | ||||
20501 | |||||
20502 | assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg )((void) (0)); | ||||
20503 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | ||||
20504 | |||||
20505 | /* Decode the arguments passed through to this function. */ | ||||
20506 | for(i=0; i<nVal; i++){ | ||||
20507 | switch( idxStr[iIdxStr++] ){ | ||||
20508 | case 'r': | ||||
20509 | pRank = apVal[i]; | ||||
20510 | break; | ||||
20511 | case 'M': { | ||||
20512 | char *zText = 0; | ||||
20513 | int bFreeAndReset = 0; | ||||
20514 | int bInternal = 0; | ||||
20515 | |||||
20516 | rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); | ||||
20517 | if( rc!=SQLITE_OK0 ) goto filter_out; | ||||
20518 | if( zText==0 ) zText = ""; | ||||
20519 | if( sqlite3_value_subtypesqlite3_api->value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE73 ){ | ||||
20520 | pConfig->bPrefixInsttoken = 1; | ||||
20521 | } | ||||
20522 | |||||
20523 | iCol = 0; | ||||
20524 | do{ | ||||
20525 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | ||||
20526 | iIdxStr++; | ||||
20527 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | ||||
20528 | |||||
20529 | if( zText[0]=='*' ){ | ||||
20530 | /* The user has issued a query of the form "MATCH '*...'". This | ||||
20531 | ** indicates that the MATCH expression is not a full text query, | ||||
20532 | ** but a request for an internal parameter. */ | ||||
20533 | rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); | ||||
20534 | bInternal = 1; | ||||
20535 | }else{ | ||||
20536 | char **pzErr = &pTab->p.base.zErrMsg; | ||||
20537 | rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); | ||||
20538 | if( rc==SQLITE_OK0 ){ | ||||
20539 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | ||||
20540 | pExpr = 0; | ||||
20541 | } | ||||
20542 | } | ||||
20543 | |||||
20544 | if( bFreeAndReset ){ | ||||
20545 | sqlite3_freesqlite3_api->free(zText); | ||||
20546 | sqlite3Fts5ClearLocale(pConfig); | ||||
20547 | } | ||||
20548 | |||||
20549 | if( bInternal || rc!=SQLITE_OK0 ) goto filter_out; | ||||
20550 | |||||
20551 | break; | ||||
20552 | } | ||||
20553 | case 'L': | ||||
20554 | case 'G': { | ||||
20555 | int bGlob = (idxStr[iIdxStr-1]=='G'); | ||||
20556 | const char *zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[i]); | ||||
20557 | iCol = 0; | ||||
20558 | do{ | ||||
20559 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | ||||
20560 | iIdxStr++; | ||||
20561 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | ||||
20562 | if( zText ){ | ||||
20563 | rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr); | ||||
20564 | } | ||||
20565 | if( rc==SQLITE_OK0 ){ | ||||
20566 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | ||||
20567 | pExpr = 0; | ||||
20568 | } | ||||
20569 | if( rc!=SQLITE_OK0 ) goto filter_out; | ||||
20570 | break; | ||||
20571 | } | ||||
20572 | case '=': | ||||
20573 | pRowidEq = apVal[i]; | ||||
20574 | break; | ||||
20575 | case '<': | ||||
20576 | pRowidLe = apVal[i]; | ||||
20577 | break; | ||||
20578 | default: assert( idxStr[iIdxStr-1]=='>' )((void) (0)); | ||||
20579 | pRowidGe = apVal[i]; | ||||
20580 | break; | ||||
20581 | } | ||||
20582 | } | ||||
20583 | bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK0x0020) ? 1 : 0); | ||||
20584 | pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC0x0080) ? 1 : 0); | ||||
20585 | |||||
20586 | /* Set the cursor upper and lower rowid limits. Only some strategies | ||||
20587 | ** actually use them. This is ok, as the xBestIndex() method leaves the | ||||
20588 | ** sqlite3_index_constraint.omit flag clear for range constraints | ||||
20589 | ** on the rowid field. */ | ||||
20590 | if( pRowidEq ){ | ||||
20591 | pRowidLe = pRowidGe = pRowidEq; | ||||
20592 | } | ||||
20593 | if( bDesc ){ | ||||
20594 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | ||||
20595 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | ||||
20596 | }else{ | ||||
20597 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | ||||
20598 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | ||||
20599 | } | ||||
20600 | |||||
20601 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | ||||
20602 | if( rc!=SQLITE_OK0 ) goto filter_out; | ||||
20603 | |||||
20604 | if( pTab->pSortCsr ){ | ||||
20605 | /* If pSortCsr is non-NULL, then this call is being made as part of | ||||
20606 | ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is | ||||
20607 | ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will | ||||
20608 | ** return results to the user for this query. The current cursor | ||||
20609 | ** (pCursor) is used to execute the query issued by function | ||||
20610 | ** fts5CursorFirstSorted() above. */ | ||||
20611 | assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 )((void) (0)); | ||||
20612 | assert( nVal==0 && bOrderByRank==0 && bDesc==0 )((void) (0)); | ||||
20613 | assert( pCsr->iLastRowid==LARGEST_INT64 )((void) (0)); | ||||
20614 | assert( pCsr->iFirstRowid==SMALLEST_INT64 )((void) (0)); | ||||
20615 | if( pTab->pSortCsr->bDesc ){ | ||||
20616 | pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid; | ||||
20617 | pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid; | ||||
20618 | }else{ | ||||
20619 | pCsr->iLastRowid = pTab->pSortCsr->iLastRowid; | ||||
20620 | pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid; | ||||
20621 | } | ||||
20622 | pCsr->ePlan = FTS5_PLAN_SOURCE2; | ||||
20623 | pCsr->pExpr = pTab->pSortCsr->pExpr; | ||||
20624 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | ||||
20625 | }else if( pCsr->pExpr ){ | ||||
20626 | assert( rc==SQLITE_OK )((void) (0)); | ||||
20627 | rc = fts5CursorParseRank(pConfig, pCsr, pRank); | ||||
20628 | if( rc==SQLITE_OK0 ){ | ||||
20629 | if( bOrderByRank ){ | ||||
20630 | pCsr->ePlan = FTS5_PLAN_SORTED_MATCH4; | ||||
20631 | rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); | ||||
20632 | }else{ | ||||
20633 | pCsr->ePlan = FTS5_PLAN_MATCH1; | ||||
20634 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | ||||
20635 | } | ||||
20636 | } | ||||
20637 | }else if( pConfig->zContent==0 ){ | ||||
20638 | fts5SetVtabError(pTab,"%s: table does not support scanning",pConfig->zName); | ||||
20639 | rc = SQLITE_ERROR1; | ||||
20640 | }else{ | ||||
20641 | /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup | ||||
20642 | ** by rowid (ePlan==FTS5_PLAN_ROWID). */ | ||||
20643 | pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID6 : FTS5_PLAN_SCAN5); | ||||
20644 | rc = sqlite3Fts5StorageStmt( | ||||
20645 | pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg | ||||
20646 | ); | ||||
20647 | if( rc==SQLITE_OK0 ){ | ||||
20648 | if( pRowidEq!=0 ){ | ||||
20649 | assert( pCsr->ePlan==FTS5_PLAN_ROWID )((void) (0)); | ||||
20650 | sqlite3_bind_valuesqlite3_api->bind_value(pCsr->pStmt, 1, pRowidEq); | ||||
20651 | }else{ | ||||
20652 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); | ||||
20653 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); | ||||
20654 | } | ||||
20655 | rc = fts5NextMethod(pCursor); | ||||
20656 | } | ||||
20657 | } | ||||
20658 | |||||
20659 | filter_out: | ||||
20660 | sqlite3Fts5ExprFree(pExpr); | ||||
20661 | pConfig->pzErrmsg = pzErrmsg; | ||||
20662 | pConfig->bPrefixInsttoken = bPrefixInsttoken; | ||||
20663 | return rc; | ||||
20664 | } | ||||
20665 | |||||
20666 | /* | ||||
20667 | ** This is the xEof method of the virtual table. SQLite calls this | ||||
20668 | ** routine to find out if it has reached the end of a result set. | ||||
20669 | */ | ||||
20670 | static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ | ||||
20671 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
20672 | return (CsrFlagTest(pCsr, FTS5CSR_EOF)((pCsr)->csrflags & (0x01)) ? 1 : 0); | ||||
20673 | } | ||||
20674 | |||||
20675 | /* | ||||
20676 | ** Return the rowid that the cursor currently points to. | ||||
20677 | */ | ||||
20678 | static i64 fts5CursorRowid(Fts5Cursor *pCsr){ | ||||
20679 | assert( pCsr->ePlan==FTS5_PLAN_MATCH((void) (0)) | ||||
20680 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH((void) (0)) | ||||
20681 | || pCsr->ePlan==FTS5_PLAN_SOURCE((void) (0)) | ||||
20682 | || pCsr->ePlan==FTS5_PLAN_SCAN((void) (0)) | ||||
20683 | || pCsr->ePlan==FTS5_PLAN_ROWID((void) (0)) | ||||
20684 | )((void) (0)); | ||||
20685 | if( pCsr->pSorter ){ | ||||
20686 | return pCsr->pSorter->iRowid; | ||||
20687 | }else if( pCsr->ePlan>=FTS5_PLAN_SCAN5 ){ | ||||
20688 | return sqlite3_column_int64sqlite3_api->column_int64(pCsr->pStmt, 0); | ||||
20689 | }else{ | ||||
20690 | return sqlite3Fts5ExprRowid(pCsr->pExpr); | ||||
20691 | } | ||||
20692 | } | ||||
20693 | |||||
20694 | /* | ||||
20695 | ** This is the xRowid method. The SQLite core calls this routine to | ||||
20696 | ** retrieve the rowid for the current row of the result set. fts5 | ||||
20697 | ** exposes %_content.rowid as the rowid for the virtual table. The | ||||
20698 | ** rowid should be written to *pRowid. | ||||
20699 | */ | ||||
20700 | static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ | ||||
20701 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
20702 | int ePlan = pCsr->ePlan; | ||||
20703 | |||||
20704 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | ||||
20705 | if( ePlan==FTS5_PLAN_SPECIAL3 ){ | ||||
20706 | *pRowid = 0; | ||||
20707 | }else{ | ||||
20708 | *pRowid = fts5CursorRowid(pCsr); | ||||
20709 | } | ||||
20710 | |||||
20711 | return SQLITE_OK0; | ||||
20712 | } | ||||
20713 | |||||
20714 | |||||
20715 | /* | ||||
20716 | ** If the cursor requires seeking (bSeekRequired flag is set), seek it. | ||||
20717 | ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. | ||||
20718 | ** | ||||
20719 | ** If argument bErrormsg is true and an error occurs, an error message may | ||||
20720 | ** be left in sqlite3_vtab.zErrMsg. | ||||
20721 | */ | ||||
20722 | static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ | ||||
20723 | int rc = SQLITE_OK0; | ||||
20724 | |||||
20725 | /* If the cursor does not yet have a statement handle, obtain one now. */ | ||||
20726 | if( pCsr->pStmt==0 ){ | ||||
20727 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
20728 | int eStmt = fts5StmtType(pCsr); | ||||
20729 | rc = sqlite3Fts5StorageStmt( | ||||
20730 | pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0) | ||||
20731 | ); | ||||
20732 | assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 )((void) (0)); | ||||
20733 | assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) )((void) (0)); | ||||
20734 | } | ||||
20735 | |||||
20736 | if( rc==SQLITE_OK0 && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags & (0x02)) ){ | ||||
20737 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | ||||
20738 | assert( pCsr->pExpr )((void) (0)); | ||||
20739 | sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | ||||
20740 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); | ||||
20741 | pTab->pConfig->bLock++; | ||||
20742 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | ||||
20743 | pTab->pConfig->bLock--; | ||||
20744 | if( rc==SQLITE_ROW100 ){ | ||||
20745 | rc = SQLITE_OK0; | ||||
20746 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags &= ~(0x02)); | ||||
20747 | }else{ | ||||
20748 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | ||||
20749 | if( rc==SQLITE_OK0 ){ | ||||
20750 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
20751 | fts5SetVtabError((Fts5FullTable*)pTab, | ||||
20752 | "fts5: missing row %lld from content table %s", | ||||
20753 | fts5CursorRowid(pCsr), | ||||
20754 | pTab->pConfig->zContent | ||||
20755 | ); | ||||
20756 | }else if( pTab->pConfig->pzErrmsg ){ | ||||
20757 | fts5SetVtabError((Fts5FullTable*)pTab, | ||||
20758 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pTab->pConfig->db) | ||||
20759 | ); | ||||
20760 | } | ||||
20761 | } | ||||
20762 | } | ||||
20763 | return rc; | ||||
20764 | } | ||||
20765 | |||||
20766 | /* | ||||
20767 | ** This function is called to handle an FTS INSERT command. In other words, | ||||
20768 | ** an INSERT statement of the form: | ||||
20769 | ** | ||||
20770 | ** INSERT INTO fts(fts) VALUES($pCmd) | ||||
20771 | ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) | ||||
20772 | ** | ||||
20773 | ** Argument pVal is the value assigned to column "fts" by the INSERT | ||||
20774 | ** statement. This function returns SQLITE_OK if successful, or an SQLite | ||||
20775 | ** error code if an error occurs. | ||||
20776 | ** | ||||
20777 | ** The commands implemented by this function are documented in the "Special | ||||
20778 | ** INSERT Directives" section of the documentation. It should be updated if | ||||
20779 | ** more commands are added to this function. | ||||
20780 | */ | ||||
20781 | static int fts5SpecialInsert( | ||||
20782 | Fts5FullTable *pTab, /* Fts5 table object */ | ||||
20783 | const char *zCmd, /* Text inserted into table-name column */ | ||||
20784 | sqlite3_value *pVal /* Value inserted into rank column */ | ||||
20785 | ){ | ||||
20786 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
20787 | int rc = SQLITE_OK0; | ||||
20788 | int bError = 0; | ||||
20789 | int bLoadConfig = 0; | ||||
20790 | |||||
20791 | if( 0==sqlite3_stricmpsqlite3_api->stricmp("delete-all", zCmd) ){ | ||||
20792 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
20793 | fts5SetVtabError(pTab, | ||||
20794 | "'delete-all' may only be used with a " | ||||
20795 | "contentless or external content fts5 table" | ||||
20796 | ); | ||||
20797 | rc = SQLITE_ERROR1; | ||||
20798 | }else{ | ||||
20799 | rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); | ||||
20800 | } | ||||
20801 | bLoadConfig = 1; | ||||
20802 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("rebuild", zCmd) ){ | ||||
20803 | if( fts5IsContentless(pTab, 1) ){ | ||||
20804 | fts5SetVtabError(pTab, | ||||
20805 | "'rebuild' may not be used with a contentless fts5 table" | ||||
20806 | ); | ||||
20807 | rc = SQLITE_ERROR1; | ||||
20808 | }else{ | ||||
20809 | rc = sqlite3Fts5StorageRebuild(pTab->pStorage); | ||||
20810 | } | ||||
20811 | bLoadConfig = 1; | ||||
20812 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("optimize", zCmd) ){ | ||||
20813 | rc = sqlite3Fts5StorageOptimize(pTab->pStorage); | ||||
20814 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("merge", zCmd) ){ | ||||
20815 | int nMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
20816 | rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); | ||||
20817 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("integrity-check", zCmd) ){ | ||||
20818 | int iArg = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
20819 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg); | ||||
20820 | #ifdef SQLITE_DEBUG | ||||
20821 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("prefix-index", zCmd) ){ | ||||
20822 | pConfig->bPrefixIndex = sqlite3_value_intsqlite3_api->value_int(pVal); | ||||
20823 | #endif | ||||
20824 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("flush", zCmd) ){ | ||||
20825 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | ||||
20826 | }else{ | ||||
20827 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | ||||
20828 | if( rc==SQLITE_OK0 ){ | ||||
20829 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | ||||
20830 | } | ||||
20831 | if( rc==SQLITE_OK0 ){ | ||||
20832 | rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError); | ||||
20833 | } | ||||
20834 | if( rc==SQLITE_OK0 ){ | ||||
20835 | if( bError ){ | ||||
20836 | rc = SQLITE_ERROR1; | ||||
20837 | }else{ | ||||
20838 | rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); | ||||
20839 | } | ||||
20840 | } | ||||
20841 | } | ||||
20842 | |||||
20843 | if( rc==SQLITE_OK0 && bLoadConfig ){ | ||||
20844 | pTab->p.pConfig->iCookie--; | ||||
20845 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | ||||
20846 | } | ||||
20847 | |||||
20848 | return rc; | ||||
20849 | } | ||||
20850 | |||||
20851 | static int fts5SpecialDelete( | ||||
20852 | Fts5FullTable *pTab, | ||||
20853 | sqlite3_value **apVal | ||||
20854 | ){ | ||||
20855 | int rc = SQLITE_OK0; | ||||
20856 | int eType1 = sqlite3_value_typesqlite3_api->value_type(apVal[1]); | ||||
20857 | if( eType1==SQLITE_INTEGER1 ){ | ||||
20858 | sqlite3_int64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | ||||
20859 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0); | ||||
20860 | } | ||||
20861 | return rc; | ||||
20862 | } | ||||
20863 | |||||
20864 | static void fts5StorageInsert( | ||||
20865 | int *pRc, | ||||
20866 | Fts5FullTable *pTab, | ||||
20867 | sqlite3_value **apVal, | ||||
20868 | i64 *piRowid | ||||
20869 | ){ | ||||
20870 | int rc = *pRc; | ||||
20871 | if( rc==SQLITE_OK0 ){ | ||||
20872 | rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid); | ||||
20873 | } | ||||
20874 | if( rc==SQLITE_OK0 ){ | ||||
20875 | rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); | ||||
20876 | } | ||||
20877 | *pRc = rc; | ||||
20878 | } | ||||
20879 | |||||
20880 | /* | ||||
20881 | ** | ||||
20882 | ** This function is called when the user attempts an UPDATE on a contentless | ||||
20883 | ** table. Parameter bRowidModified is true if the UPDATE statement modifies | ||||
20884 | ** the rowid value. Parameter apVal[] contains the new values for each user | ||||
20885 | ** defined column of the fts5 table. pConfig is the configuration object of the | ||||
20886 | ** table being updated (guaranteed to be contentless). The contentless_delete=1 | ||||
20887 | ** and contentless_unindexed=1 options may or may not be set. | ||||
20888 | ** | ||||
20889 | ** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite | ||||
20890 | ** error code if it cannot. In this case an error message is also loaded into | ||||
20891 | ** pConfig. Output parameter (*pbContent) is set to true if the caller should | ||||
20892 | ** update the %_content table only - not the FTS index or any other shadow | ||||
20893 | ** table. This occurs when an UPDATE modifies only UNINDEXED columns of the | ||||
20894 | ** table. | ||||
20895 | ** | ||||
20896 | ** An UPDATE may proceed if: | ||||
20897 | ** | ||||
20898 | ** * The only columns modified are UNINDEXED columns, or | ||||
20899 | ** | ||||
20900 | ** * The contentless_delete=1 option was specified and all of the indexed | ||||
20901 | ** columns (not a subset) have been modified. | ||||
20902 | */ | ||||
20903 | static int fts5ContentlessUpdate( | ||||
20904 | Fts5Config *pConfig, | ||||
20905 | sqlite3_value **apVal, | ||||
20906 | int bRowidModified, | ||||
20907 | int *pbContent | ||||
20908 | ){ | ||||
20909 | int ii; | ||||
20910 | int bSeenIndex = 0; /* Have seen modified indexed column */ | ||||
20911 | int bSeenIndexNC = 0; /* Have seen unmodified indexed column */ | ||||
20912 | int rc = SQLITE_OK0; | ||||
20913 | |||||
20914 | for(ii=0; ii<pConfig->nCol; ii++){ | ||||
20915 | if( pConfig->abUnindexed[ii]==0 ){ | ||||
20916 | if( sqlite3_value_nochangesqlite3_api->value_nochange(apVal[ii]) ){ | ||||
20917 | bSeenIndexNC++; | ||||
20918 | }else{ | ||||
20919 | bSeenIndex++; | ||||
20920 | } | ||||
20921 | } | ||||
20922 | } | ||||
20923 | |||||
20924 | if( bSeenIndex==0 && bRowidModified==0 ){ | ||||
20925 | *pbContent = 1; | ||||
20926 | }else{ | ||||
20927 | if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){ | ||||
20928 | rc = SQLITE_ERROR1; | ||||
20929 | sqlite3Fts5ConfigErrmsg(pConfig, | ||||
20930 | (pConfig->bContentlessDelete ? | ||||
20931 | "%s a subset of columns on fts5 contentless-delete table: %s" : | ||||
20932 | "%s contentless fts5 table: %s") | ||||
20933 | , "cannot UPDATE", pConfig->zName | ||||
20934 | ); | ||||
20935 | } | ||||
20936 | } | ||||
20937 | |||||
20938 | return rc; | ||||
20939 | } | ||||
20940 | |||||
20941 | /* | ||||
20942 | ** This function is the implementation of the xUpdate callback used by | ||||
20943 | ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be | ||||
20944 | ** inserted, updated or deleted. | ||||
20945 | ** | ||||
20946 | ** A delete specifies a single argument - the rowid of the row to remove. | ||||
20947 | ** | ||||
20948 | ** Update and insert operations pass: | ||||
20949 | ** | ||||
20950 | ** 1. The "old" rowid, or NULL. | ||||
20951 | ** 2. The "new" rowid. | ||||
20952 | ** 3. Values for each of the nCol matchable columns. | ||||
20953 | ** 4. Values for the two hidden columns (<tablename> and "rank"). | ||||
20954 | */ | ||||
20955 | static int fts5UpdateMethod( | ||||
20956 | sqlite3_vtab *pVtab, /* Virtual table handle */ | ||||
20957 | int nArg, /* Size of argument array */ | ||||
20958 | sqlite3_value **apVal, /* Array of arguments */ | ||||
20959 | sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ | ||||
20960 | ){ | ||||
20961 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
20962 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
20963 | int eType0; /* value_type() of apVal[0] */ | ||||
20964 | int rc = SQLITE_OK0; /* Return code */ | ||||
20965 | |||||
20966 | /* A transaction must be open when this is called. */ | ||||
20967 | assert( pTab->ts.eState==1 || pTab->ts.eState==2 )((void) (0)); | ||||
20968 | |||||
20969 | assert( pVtab->zErrMsg==0 )((void) (0)); | ||||
20970 | assert( nArg==1 || nArg==(2+pConfig->nCol+2) )((void) (0)); | ||||
20971 | assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER((void) (0)) | ||||
20972 | || sqlite3_value_type(apVal[0])==SQLITE_NULL((void) (0)) | ||||
20973 | )((void) (0)); | ||||
20974 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | ||||
20975 | if( pConfig->pgsz==0 ){ | ||||
20976 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie); | ||||
20977 | if( rc!=SQLITE_OK0 ) return rc; | ||||
20978 | } | ||||
20979 | |||||
20980 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | ||||
20981 | |||||
20982 | /* Put any active cursors into REQUIRE_SEEK state. */ | ||||
20983 | fts5TripCursors(pTab); | ||||
20984 | |||||
20985 | eType0 = sqlite3_value_typesqlite3_api->value_type(apVal[0]); | ||||
20986 | if( eType0==SQLITE_NULL5 | ||||
20987 | && sqlite3_value_typesqlite3_api->value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL5 | ||||
20988 | ){ | ||||
20989 | /* A "special" INSERT op. These are handled separately. */ | ||||
20990 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2+pConfig->nCol]); | ||||
20991 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | ||||
20992 | && 0==sqlite3_stricmpsqlite3_api->stricmp("delete", z) | ||||
20993 | ){ | ||||
20994 | if( pConfig->bContentlessDelete ){ | ||||
20995 | fts5SetVtabError(pTab, | ||||
20996 | "'delete' may not be used with a contentless_delete=1 table" | ||||
20997 | ); | ||||
20998 | rc = SQLITE_ERROR1; | ||||
20999 | }else{ | ||||
21000 | rc = fts5SpecialDelete(pTab, apVal); | ||||
21001 | } | ||||
21002 | }else{ | ||||
21003 | rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); | ||||
21004 | } | ||||
21005 | }else{ | ||||
21006 | /* A regular INSERT, UPDATE or DELETE statement. The trick here is that | ||||
21007 | ** any conflict on the rowid value must be detected before any | ||||
21008 | ** modifications are made to the database file. There are 4 cases: | ||||
21009 | ** | ||||
21010 | ** 1) DELETE | ||||
21011 | ** 2) UPDATE (rowid not modified) | ||||
21012 | ** 3) UPDATE (rowid modified) | ||||
21013 | ** 4) INSERT | ||||
21014 | ** | ||||
21015 | ** Cases 3 and 4 may violate the rowid constraint. | ||||
21016 | */ | ||||
21017 | int eConflict = SQLITE_ABORT4; | ||||
21018 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || pConfig->bContentlessDelete ){ | ||||
21019 | eConflict = sqlite3_vtab_on_conflictsqlite3_api->vtab_on_conflict(pConfig->db); | ||||
21020 | } | ||||
21021 | |||||
21022 | assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL )((void) (0)); | ||||
21023 | assert( nArg!=1 || eType0==SQLITE_INTEGER )((void) (0)); | ||||
21024 | |||||
21025 | /* DELETE */ | ||||
21026 | if( nArg==1 ){ | ||||
21027 | /* It is only possible to DELETE from a contentless table if the | ||||
21028 | ** contentless_delete=1 flag is set. */ | ||||
21029 | if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){ | ||||
21030 | fts5SetVtabError(pTab, | ||||
21031 | "cannot DELETE from contentless fts5 table: %s", pConfig->zName | ||||
21032 | ); | ||||
21033 | rc = SQLITE_ERROR1; | ||||
21034 | }else{ | ||||
21035 | i64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Rowid to delete */ | ||||
21036 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); | ||||
21037 | } | ||||
21038 | } | ||||
21039 | |||||
21040 | /* INSERT or UPDATE */ | ||||
21041 | else{ | ||||
21042 | int eType1 = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[1]); | ||||
21043 | |||||
21044 | /* It is an error to write an fts5_locale() value to a table without | ||||
21045 | ** the locale=1 option. */ | ||||
21046 | if( pConfig->bLocale==0 ){ | ||||
21047 | int ii; | ||||
21048 | for(ii=0; ii<pConfig->nCol; ii++){ | ||||
21049 | sqlite3_value *pVal = apVal[ii+2]; | ||||
21050 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | ||||
21051 | fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); | ||||
21052 | rc = SQLITE_MISMATCH20; | ||||
21053 | goto update_out; | ||||
21054 | } | ||||
21055 | } | ||||
21056 | } | ||||
21057 | |||||
21058 | if( eType0!=SQLITE_INTEGER1 ){ | ||||
21059 | /* An INSERT statement. If the conflict-mode is REPLACE, first remove | ||||
21060 | ** the current entry (if any). */ | ||||
21061 | if( eConflict==SQLITE_REPLACE5 && eType1==SQLITE_INTEGER1 ){ | ||||
21062 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* Rowid to delete */ | ||||
21063 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); | ||||
21064 | } | ||||
21065 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | ||||
21066 | } | ||||
21067 | |||||
21068 | /* UPDATE */ | ||||
21069 | else{ | ||||
21070 | Fts5Storage *pStorage = pTab->pStorage; | ||||
21071 | i64 iOld = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Old rowid */ | ||||
21072 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* New rowid */ | ||||
21073 | int bContent = 0; /* Content only update */ | ||||
21074 | |||||
21075 | /* If this is a contentless table (including contentless_unindexed=1 | ||||
21076 | ** tables), check if the UPDATE may proceed. */ | ||||
21077 | if( fts5IsContentless(pTab, 1) ){ | ||||
21078 | rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent); | ||||
21079 | if( rc!=SQLITE_OK0 ) goto update_out; | ||||
21080 | } | ||||
21081 | |||||
21082 | if( eType1!=SQLITE_INTEGER1 ){ | ||||
21083 | rc = SQLITE_MISMATCH20; | ||||
21084 | }else if( iOld!=iNew ){ | ||||
21085 | assert( bContent==0 )((void) (0)); | ||||
21086 | if( eConflict==SQLITE_REPLACE5 ){ | ||||
21087 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | ||||
21088 | if( rc==SQLITE_OK0 ){ | ||||
21089 | rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0); | ||||
21090 | } | ||||
21091 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | ||||
21092 | }else{ | ||||
21093 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | ||||
21094 | if( rc==SQLITE_OK0 ){ | ||||
21095 | rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid); | ||||
21096 | } | ||||
21097 | if( rc==SQLITE_OK0 ){ | ||||
21098 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0); | ||||
21099 | } | ||||
21100 | if( rc==SQLITE_OK0 ){ | ||||
21101 | rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid); | ||||
21102 | } | ||||
21103 | } | ||||
21104 | }else if( bContent ){ | ||||
21105 | /* This occurs when an UPDATE on a contentless table affects *only* | ||||
21106 | ** UNINDEXED columns. This is a no-op for contentless_unindexed=0 | ||||
21107 | ** tables, or a write to the %_content table only for =1 tables. */ | ||||
21108 | assert( fts5IsContentless(pTab, 1) )((void) (0)); | ||||
21109 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | ||||
21110 | if( rc==SQLITE_OK0 ){ | ||||
21111 | rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid); | ||||
21112 | } | ||||
21113 | }else{ | ||||
21114 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | ||||
21115 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | ||||
21116 | } | ||||
21117 | sqlite3Fts5StorageReleaseDeleteRow(pStorage); | ||||
21118 | } | ||||
21119 | } | ||||
21120 | } | ||||
21121 | |||||
21122 | update_out: | ||||
21123 | pTab->p.pConfig->pzErrmsg = 0; | ||||
21124 | return rc; | ||||
21125 | } | ||||
21126 | |||||
21127 | /* | ||||
21128 | ** Implementation of xSync() method. | ||||
21129 | */ | ||||
21130 | static int fts5SyncMethod(sqlite3_vtab *pVtab){ | ||||
21131 | int rc; | ||||
21132 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
21133 | fts5CheckTransactionState(pTab, FTS5_SYNC, 0); | ||||
21134 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | ||||
21135 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | ||||
21136 | pTab->p.pConfig->pzErrmsg = 0; | ||||
21137 | return rc; | ||||
21138 | } | ||||
21139 | |||||
21140 | /* | ||||
21141 | ** Implementation of xBegin() method. | ||||
21142 | */ | ||||
21143 | static int fts5BeginMethod(sqlite3_vtab *pVtab){ | ||||
21144 | int rc = fts5NewTransaction((Fts5FullTable*)pVtab); | ||||
21145 | if( rc==SQLITE_OK0 ){ | ||||
21146 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0); | ||||
21147 | } | ||||
21148 | return rc; | ||||
21149 | } | ||||
21150 | |||||
21151 | /* | ||||
21152 | ** Implementation of xCommit() method. This is a no-op. The contents of | ||||
21153 | ** the pending-terms hash-table have already been flushed into the database | ||||
21154 | ** by fts5SyncMethod(). | ||||
21155 | */ | ||||
21156 | static int fts5CommitMethod(sqlite3_vtab *pVtab){ | ||||
21157 | UNUSED_PARAM(pVtab)(void)(pVtab); /* Call below is a no-op for NDEBUG builds */ | ||||
21158 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0); | ||||
21159 | return SQLITE_OK0; | ||||
21160 | } | ||||
21161 | |||||
21162 | /* | ||||
21163 | ** Implementation of xRollback(). Discard the contents of the pending-terms | ||||
21164 | ** hash-table. Any changes made to the database are reverted by SQLite. | ||||
21165 | */ | ||||
21166 | static int fts5RollbackMethod(sqlite3_vtab *pVtab){ | ||||
21167 | int rc; | ||||
21168 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
21169 | fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); | ||||
21170 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | ||||
21171 | pTab->p.pConfig->pgsz = 0; | ||||
21172 | return rc; | ||||
21173 | } | ||||
21174 | |||||
21175 | static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); | ||||
21176 | |||||
21177 | static void *fts5ApiUserData(Fts5Context *pCtx){ | ||||
21178 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21179 | return pCsr->pAux->pUserData; | ||||
21180 | } | ||||
21181 | |||||
21182 | static int fts5ApiColumnCount(Fts5Context *pCtx){ | ||||
21183 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21184 | return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; | ||||
21185 | } | ||||
21186 | |||||
21187 | static int fts5ApiColumnTotalSize( | ||||
21188 | Fts5Context *pCtx, | ||||
21189 | int iCol, | ||||
21190 | sqlite3_int64 *pnToken | ||||
21191 | ){ | ||||
21192 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21193 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
21194 | return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); | ||||
21195 | } | ||||
21196 | |||||
21197 | static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ | ||||
21198 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21199 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
21200 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); | ||||
21201 | } | ||||
21202 | |||||
21203 | /* | ||||
21204 | ** Implementation of xTokenize_v2() API. | ||||
21205 | */ | ||||
21206 | static int fts5ApiTokenize_v2( | ||||
21207 | Fts5Context *pCtx, | ||||
21208 | const char *pText, int nText, | ||||
21209 | const char *pLoc, int nLoc, | ||||
21210 | void *pUserData, | ||||
21211 | int (*xToken)(void*, int, const char*, int, int, int) | ||||
21212 | ){ | ||||
21213 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21214 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | ||||
21215 | int rc = SQLITE_OK0; | ||||
21216 | |||||
21217 | sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc); | ||||
21218 | rc = sqlite3Fts5Tokenize(pTab->pConfig, | ||||
21219 | FTS5_TOKENIZE_AUX0x0008, pText, nText, pUserData, xToken | ||||
21220 | ); | ||||
21221 | sqlite3Fts5SetLocale(pTab->pConfig, 0, 0); | ||||
21222 | |||||
21223 | return rc; | ||||
21224 | } | ||||
21225 | |||||
21226 | /* | ||||
21227 | ** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0 | ||||
21228 | ** passed as the locale. | ||||
21229 | */ | ||||
21230 | static int fts5ApiTokenize( | ||||
21231 | Fts5Context *pCtx, | ||||
21232 | const char *pText, int nText, | ||||
21233 | void *pUserData, | ||||
21234 | int (*xToken)(void*, int, const char*, int, int, int) | ||||
21235 | ){ | ||||
21236 | return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken); | ||||
21237 | } | ||||
21238 | |||||
21239 | static int fts5ApiPhraseCount(Fts5Context *pCtx){ | ||||
21240 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21241 | return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | ||||
21242 | } | ||||
21243 | |||||
21244 | static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ | ||||
21245 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21246 | return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); | ||||
21247 | } | ||||
21248 | |||||
21249 | /* | ||||
21250 | ** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This | ||||
21251 | ** function extracts the text value of column iCol of the current row. | ||||
21252 | ** Additionally, if there is an associated locale, it invokes | ||||
21253 | ** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller | ||||
21254 | ** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point | ||||
21255 | ** after this function returns. | ||||
21256 | ** | ||||
21257 | ** If successful, (*ppText) is set to point to a buffer containing the text | ||||
21258 | ** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that | ||||
21259 | ** buffer in bytes. It is not guaranteed to be nul-terminated. If an error | ||||
21260 | ** occurs, an SQLite error code is returned. The final values of the two | ||||
21261 | ** output parameters are undefined in this case. | ||||
21262 | */ | ||||
21263 | static int fts5TextFromStmt( | ||||
21264 | Fts5Config *pConfig, | ||||
21265 | sqlite3_stmt *pStmt, | ||||
21266 | int iCol, | ||||
21267 | const char **ppText, | ||||
21268 | int *pnText | ||||
21269 | ){ | ||||
21270 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pStmt, iCol+1); | ||||
21271 | const char *pLoc = 0; | ||||
21272 | int nLoc = 0; | ||||
21273 | int rc = SQLITE_OK0; | ||||
21274 | |||||
21275 | if( pConfig->bLocale | ||||
21276 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | ||||
21277 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | ||||
21278 | ){ | ||||
21279 | rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc); | ||||
21280 | }else{ | ||||
21281 | *ppText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
21282 | *pnText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
21283 | if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
21284 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pStmt, iCol+1+pConfig->nCol); | ||||
21285 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, iCol+1+pConfig->nCol); | ||||
21286 | } | ||||
21287 | } | ||||
21288 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
21289 | return rc; | ||||
21290 | } | ||||
21291 | |||||
21292 | static int fts5ApiColumnText( | ||||
21293 | Fts5Context *pCtx, | ||||
21294 | int iCol, | ||||
21295 | const char **pz, | ||||
21296 | int *pn | ||||
21297 | ){ | ||||
21298 | int rc = SQLITE_OK0; | ||||
21299 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21300 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | ||||
21301 | |||||
21302 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | ||||
21303 | if( iCol<0 || iCol>=pTab->pConfig->nCol ){ | ||||
21304 | rc = SQLITE_RANGE25; | ||||
21305 | }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){ | ||||
21306 | *pz = 0; | ||||
21307 | *pn = 0; | ||||
21308 | }else{ | ||||
21309 | rc = fts5SeekCursor(pCsr, 0); | ||||
21310 | if( rc==SQLITE_OK0 ){ | ||||
21311 | rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn); | ||||
21312 | sqlite3Fts5ClearLocale(pTab->pConfig); | ||||
21313 | } | ||||
21314 | } | ||||
21315 | return rc; | ||||
21316 | } | ||||
21317 | |||||
21318 | /* | ||||
21319 | ** This is called by various API functions - xInst, xPhraseFirst, | ||||
21320 | ** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase | ||||
21321 | ** of the current row. This function works for both detail=full tables (in | ||||
21322 | ** which case the position-list was read from the fts index) or for other | ||||
21323 | ** detail= modes if the row content is available. | ||||
21324 | */ | ||||
21325 | static int fts5CsrPoslist( | ||||
21326 | Fts5Cursor *pCsr, /* Fts5 cursor object */ | ||||
21327 | int iPhrase, /* Phrase to find position list for */ | ||||
21328 | const u8 **pa, /* OUT: Pointer to position list buffer */ | ||||
21329 | int *pn /* OUT: Size of (*pa) in bytes */ | ||||
21330 | ){ | ||||
21331 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | ||||
21332 | int rc = SQLITE_OK0; | ||||
21333 | int bLive = (pCsr->pSorter==0); | ||||
21334 | |||||
21335 | if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ | ||||
21336 | rc = SQLITE_RANGE25; | ||||
21337 | }else if( pConfig->eDetail!=FTS5_DETAIL_FULL0 | ||||
21338 | && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | ||||
21339 | ){ | ||||
21340 | *pa = 0; | ||||
21341 | *pn = 0; | ||||
21342 | return SQLITE_OK0; | ||||
21343 | }else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags & (0x40)) ){ | ||||
21344 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | ||||
21345 | Fts5PoslistPopulator *aPopulator; | ||||
21346 | int i; | ||||
21347 | |||||
21348 | aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); | ||||
21349 | if( aPopulator==0 ) rc = SQLITE_NOMEM7; | ||||
21350 | if( rc==SQLITE_OK0 ){ | ||||
21351 | rc = fts5SeekCursor(pCsr, 0); | ||||
21352 | } | ||||
21353 | for(i=0; i<pConfig->nCol && rc==SQLITE_OK0; i++){ | ||||
21354 | const char *z = 0; | ||||
21355 | int n = 0; | ||||
21356 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | ||||
21357 | if( rc==SQLITE_OK0 ){ | ||||
21358 | rc = sqlite3Fts5ExprPopulatePoslists( | ||||
21359 | pConfig, pCsr->pExpr, aPopulator, i, z, n | ||||
21360 | ); | ||||
21361 | } | ||||
21362 | sqlite3Fts5ClearLocale(pConfig); | ||||
21363 | } | ||||
21364 | sqlite3_freesqlite3_api->free(aPopulator); | ||||
21365 | |||||
21366 | if( pCsr->pSorter ){ | ||||
21367 | sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); | ||||
21368 | } | ||||
21369 | } | ||||
21370 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags &= ~(0x40)); | ||||
21371 | } | ||||
21372 | |||||
21373 | if( rc==SQLITE_OK0 ){ | ||||
21374 | if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | ||||
21375 | Fts5Sorter *pSorter = pCsr->pSorter; | ||||
21376 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | ||||
21377 | *pn = pSorter->aIdx[iPhrase] - i1; | ||||
21378 | *pa = &pSorter->aPoslist[i1]; | ||||
21379 | }else{ | ||||
21380 | *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); | ||||
21381 | } | ||||
21382 | }else{ | ||||
21383 | *pa = 0; | ||||
21384 | *pn = 0; | ||||
21385 | } | ||||
21386 | |||||
21387 | return rc; | ||||
21388 | } | ||||
21389 | |||||
21390 | /* | ||||
21391 | ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated | ||||
21392 | ** correctly for the current view. Return SQLITE_OK if successful, or an | ||||
21393 | ** SQLite error code otherwise. | ||||
21394 | */ | ||||
21395 | static int fts5CacheInstArray(Fts5Cursor *pCsr){ | ||||
21396 | int rc = SQLITE_OK0; | ||||
21397 | Fts5PoslistReader *aIter; /* One iterator for each phrase */ | ||||
21398 | int nIter; /* Number of iterators/phrases */ | ||||
21399 | int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol; | ||||
21400 | |||||
21401 | nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | ||||
21402 | if( pCsr->aInstIter==0 ){ | ||||
21403 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter; | ||||
21404 | pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
21405 | } | ||||
21406 | aIter = pCsr->aInstIter; | ||||
21407 | |||||
21408 | if( aIter ){ | ||||
21409 | int nInst = 0; /* Number instances seen so far */ | ||||
21410 | int i; | ||||
21411 | |||||
21412 | /* Initialize all iterators */ | ||||
21413 | for(i=0; i<nIter && rc==SQLITE_OK0; i++){ | ||||
21414 | const u8 *a; | ||||
21415 | int n; | ||||
21416 | rc = fts5CsrPoslist(pCsr, i, &a, &n); | ||||
21417 | if( rc==SQLITE_OK0 ){ | ||||
21418 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | ||||
21419 | } | ||||
21420 | } | ||||
21421 | |||||
21422 | if( rc==SQLITE_OK0 ){ | ||||
21423 | while( 1 ){ | ||||
21424 | int *aInst; | ||||
21425 | int iBest = -1; | ||||
21426 | for(i=0; i<nIter; i++){ | ||||
21427 | if( (aIter[i].bEof==0) | ||||
21428 | && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) | ||||
21429 | ){ | ||||
21430 | iBest = i; | ||||
21431 | } | ||||
21432 | } | ||||
21433 | if( iBest<0 ) break; | ||||
21434 | |||||
21435 | nInst++; | ||||
21436 | if( nInst>=pCsr->nInstAlloc ){ | ||||
21437 | int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; | ||||
21438 | aInst = (int*)sqlite3_realloc64sqlite3_api->realloc64( | ||||
21439 | pCsr->aInst, nNewSize*sizeof(int)*3 | ||||
21440 | ); | ||||
21441 | if( aInst ){ | ||||
21442 | pCsr->aInst = aInst; | ||||
21443 | pCsr->nInstAlloc = nNewSize; | ||||
21444 | }else{ | ||||
21445 | nInst--; | ||||
21446 | rc = SQLITE_NOMEM7; | ||||
21447 | break; | ||||
21448 | } | ||||
21449 | } | ||||
21450 | |||||
21451 | aInst = &pCsr->aInst[3 * (nInst-1)]; | ||||
21452 | aInst[0] = iBest; | ||||
21453 | aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos)(int)((aIter[iBest].iPos >> 32) & 0x7FFFFFFF); | ||||
21454 | aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos)(int)(aIter[iBest].iPos & 0x7FFFFFFF); | ||||
21455 | assert( aInst[1]>=0 )((void) (0)); | ||||
21456 | if( aInst[1]>=nCol ){ | ||||
21457 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
21458 | break; | ||||
21459 | } | ||||
21460 | sqlite3Fts5PoslistReaderNext(&aIter[iBest]); | ||||
21461 | } | ||||
21462 | } | ||||
21463 | |||||
21464 | pCsr->nInstCount = nInst; | ||||
21465 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags &= ~(0x08)); | ||||
21466 | } | ||||
21467 | return rc; | ||||
21468 | } | ||||
21469 | |||||
21470 | static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ | ||||
21471 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21472 | int rc = SQLITE_OK0; | ||||
21473 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | ||||
21474 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) ){ | ||||
21475 | *pnInst = pCsr->nInstCount; | ||||
21476 | } | ||||
21477 | return rc; | ||||
21478 | } | ||||
21479 | |||||
21480 | static int fts5ApiInst( | ||||
21481 | Fts5Context *pCtx, | ||||
21482 | int iIdx, | ||||
21483 | int *piPhrase, | ||||
21484 | int *piCol, | ||||
21485 | int *piOff | ||||
21486 | ){ | ||||
21487 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21488 | int rc = SQLITE_OK0; | ||||
21489 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | ||||
21490 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | ||||
21491 | ){ | ||||
21492 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | ||||
21493 | rc = SQLITE_RANGE25; | ||||
21494 | }else{ | ||||
21495 | *piPhrase = pCsr->aInst[iIdx*3]; | ||||
21496 | *piCol = pCsr->aInst[iIdx*3 + 1]; | ||||
21497 | *piOff = pCsr->aInst[iIdx*3 + 2]; | ||||
21498 | } | ||||
21499 | } | ||||
21500 | return rc; | ||||
21501 | } | ||||
21502 | |||||
21503 | static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ | ||||
21504 | return fts5CursorRowid((Fts5Cursor*)pCtx); | ||||
21505 | } | ||||
21506 | |||||
21507 | static int fts5ColumnSizeCb( | ||||
21508 | void *pContext, /* Pointer to int */ | ||||
21509 | int tflags, | ||||
21510 | const char *pUnused, /* Buffer containing token */ | ||||
21511 | int nUnused, /* Size of token in bytes */ | ||||
21512 | int iUnused1, /* Start offset of token */ | ||||
21513 | int iUnused2 /* End offset of token */ | ||||
21514 | ){ | ||||
21515 | int *pCnt = (int*)pContext; | ||||
21516 | UNUSED_PARAM2(pUnused, nUnused)(void)(pUnused), (void)(nUnused); | ||||
21517 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | ||||
21518 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | ||||
21519 | (*pCnt)++; | ||||
21520 | } | ||||
21521 | return SQLITE_OK0; | ||||
21522 | } | ||||
21523 | |||||
21524 | static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ | ||||
21525 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21526 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
21527 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
21528 | int rc = SQLITE_OK0; | ||||
21529 | |||||
21530 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags & (0x04)) ){ | ||||
21531 | if( pConfig->bColumnsize ){ | ||||
21532 | i64 iRowid = fts5CursorRowid(pCsr); | ||||
21533 | rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); | ||||
21534 | }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | ||||
21535 | int i; | ||||
21536 | for(i=0; i<pConfig->nCol; i++){ | ||||
21537 | if( pConfig->abUnindexed[i]==0 ){ | ||||
21538 | pCsr->aColumnSize[i] = -1; | ||||
21539 | } | ||||
21540 | } | ||||
21541 | }else{ | ||||
21542 | int i; | ||||
21543 | rc = fts5SeekCursor(pCsr, 0); | ||||
21544 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | ||||
21545 | if( pConfig->abUnindexed[i]==0 ){ | ||||
21546 | const char *z = 0; | ||||
21547 | int n = 0; | ||||
21548 | pCsr->aColumnSize[i] = 0; | ||||
21549 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | ||||
21550 | if( rc==SQLITE_OK0 ){ | ||||
21551 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX0x0008, | ||||
21552 | z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb | ||||
21553 | ); | ||||
21554 | } | ||||
21555 | sqlite3Fts5ClearLocale(pConfig); | ||||
21556 | } | ||||
21557 | } | ||||
21558 | } | ||||
21559 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags &= ~(0x04)); | ||||
21560 | } | ||||
21561 | if( iCol<0 ){ | ||||
21562 | int i; | ||||
21563 | *pnToken = 0; | ||||
21564 | for(i=0; i<pConfig->nCol; i++){ | ||||
21565 | *pnToken += pCsr->aColumnSize[i]; | ||||
21566 | } | ||||
21567 | }else if( iCol<pConfig->nCol ){ | ||||
21568 | *pnToken = pCsr->aColumnSize[iCol]; | ||||
21569 | }else{ | ||||
21570 | *pnToken = 0; | ||||
21571 | rc = SQLITE_RANGE25; | ||||
21572 | } | ||||
21573 | return rc; | ||||
21574 | } | ||||
21575 | |||||
21576 | /* | ||||
21577 | ** Implementation of the xSetAuxdata() method. | ||||
21578 | */ | ||||
21579 | static int fts5ApiSetAuxdata( | ||||
21580 | Fts5Context *pCtx, /* Fts5 context */ | ||||
21581 | void *pPtr, /* Pointer to save as auxdata */ | ||||
21582 | void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ | ||||
21583 | ){ | ||||
21584 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21585 | Fts5Auxdata *pData; | ||||
21586 | |||||
21587 | /* Search through the cursors list of Fts5Auxdata objects for one that | ||||
21588 | ** corresponds to the currently executing auxiliary function. */ | ||||
21589 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | ||||
21590 | if( pData->pAux==pCsr->pAux ) break; | ||||
21591 | } | ||||
21592 | |||||
21593 | if( pData ){ | ||||
21594 | if( pData->xDelete ){ | ||||
21595 | pData->xDelete(pData->pPtr); | ||||
21596 | } | ||||
21597 | }else{ | ||||
21598 | int rc = SQLITE_OK0; | ||||
21599 | pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); | ||||
21600 | if( pData==0 ){ | ||||
21601 | if( xDelete ) xDelete(pPtr); | ||||
21602 | return rc; | ||||
21603 | } | ||||
21604 | pData->pAux = pCsr->pAux; | ||||
21605 | pData->pNext = pCsr->pAuxdata; | ||||
21606 | pCsr->pAuxdata = pData; | ||||
21607 | } | ||||
21608 | |||||
21609 | pData->xDelete = xDelete; | ||||
21610 | pData->pPtr = pPtr; | ||||
21611 | return SQLITE_OK0; | ||||
21612 | } | ||||
21613 | |||||
21614 | static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ | ||||
21615 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21616 | Fts5Auxdata *pData; | ||||
21617 | void *pRet = 0; | ||||
21618 | |||||
21619 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | ||||
21620 | if( pData->pAux==pCsr->pAux ) break; | ||||
21621 | } | ||||
21622 | |||||
21623 | if( pData ){ | ||||
21624 | pRet = pData->pPtr; | ||||
21625 | if( bClear ){ | ||||
21626 | pData->pPtr = 0; | ||||
21627 | pData->xDelete = 0; | ||||
21628 | } | ||||
21629 | } | ||||
21630 | |||||
21631 | return pRet; | ||||
21632 | } | ||||
21633 | |||||
21634 | static void fts5ApiPhraseNext( | ||||
21635 | Fts5Context *pCtx, | ||||
21636 | Fts5PhraseIter *pIter, | ||||
21637 | int *piCol, int *piOff | ||||
21638 | ){ | ||||
21639 | if( pIter->a>=pIter->b ){ | ||||
21640 | *piCol = -1; | ||||
21641 | *piOff = -1; | ||||
21642 | }else{ | ||||
21643 | int iVal; | ||||
21644 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | ||||
21645 | if( iVal==1 ){ | ||||
21646 | /* Avoid returning a (*piCol) value that is too large for the table, | ||||
21647 | ** even if the position-list is corrupt. The caller might not be | ||||
21648 | ** expecting it. */ | ||||
21649 | int nCol = ((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab))->pConfig->nCol; | ||||
21650 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | ||||
21651 | *piCol = (iVal>=nCol ? nCol-1 : iVal); | ||||
21652 | *piOff = 0; | ||||
21653 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | ||||
21654 | } | ||||
21655 | *piOff += (iVal-2); | ||||
21656 | } | ||||
21657 | } | ||||
21658 | |||||
21659 | static int fts5ApiPhraseFirst( | ||||
21660 | Fts5Context *pCtx, | ||||
21661 | int iPhrase, | ||||
21662 | Fts5PhraseIter *pIter, | ||||
21663 | int *piCol, int *piOff | ||||
21664 | ){ | ||||
21665 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21666 | int n; | ||||
21667 | int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | ||||
21668 | if( rc==SQLITE_OK0 ){ | ||||
21669 | assert( pIter->a || n==0 )((void) (0)); | ||||
21670 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | ||||
21671 | *piCol = 0; | ||||
21672 | *piOff = 0; | ||||
21673 | fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); | ||||
21674 | } | ||||
21675 | return rc; | ||||
21676 | } | ||||
21677 | |||||
21678 | static void fts5ApiPhraseNextColumn( | ||||
21679 | Fts5Context *pCtx, | ||||
21680 | Fts5PhraseIter *pIter, | ||||
21681 | int *piCol | ||||
21682 | ){ | ||||
21683 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21684 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | ||||
21685 | |||||
21686 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
21687 | if( pIter->a>=pIter->b ){ | ||||
21688 | *piCol = -1; | ||||
21689 | }else{ | ||||
21690 | int iIncr; | ||||
21691 | pIter->a += fts5GetVarint32(&pIter->a[0], iIncr)sqlite3Fts5GetVarint32(&pIter->a[0],(u32*)&(iIncr) ); | ||||
21692 | *piCol += (iIncr-2); | ||||
21693 | } | ||||
21694 | }else{ | ||||
21695 | while( 1 ){ | ||||
21696 | int dummy; | ||||
21697 | if( pIter->a>=pIter->b ){ | ||||
21698 | *piCol = -1; | ||||
21699 | return; | ||||
21700 | } | ||||
21701 | if( pIter->a[0]==0x01 ) break; | ||||
21702 | pIter->a += fts5GetVarint32(pIter->a, dummy)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(dummy)); | ||||
21703 | } | ||||
21704 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | ||||
21705 | } | ||||
21706 | } | ||||
21707 | |||||
21708 | static int fts5ApiPhraseFirstColumn( | ||||
21709 | Fts5Context *pCtx, | ||||
21710 | int iPhrase, | ||||
21711 | Fts5PhraseIter *pIter, | ||||
21712 | int *piCol | ||||
21713 | ){ | ||||
21714 | int rc = SQLITE_OK0; | ||||
21715 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21716 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | ||||
21717 | |||||
21718 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
21719 | Fts5Sorter *pSorter = pCsr->pSorter; | ||||
21720 | int n; | ||||
21721 | if( pSorter ){ | ||||
21722 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | ||||
21723 | n = pSorter->aIdx[iPhrase] - i1; | ||||
21724 | pIter->a = &pSorter->aPoslist[i1]; | ||||
21725 | }else{ | ||||
21726 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); | ||||
21727 | } | ||||
21728 | if( rc==SQLITE_OK0 ){ | ||||
21729 | assert( pIter->a || n==0 )((void) (0)); | ||||
21730 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | ||||
21731 | *piCol = 0; | ||||
21732 | fts5ApiPhraseNextColumn(pCtx, pIter, piCol); | ||||
21733 | } | ||||
21734 | }else{ | ||||
21735 | int n; | ||||
21736 | rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | ||||
21737 | if( rc==SQLITE_OK0 ){ | ||||
21738 | assert( pIter->a || n==0 )((void) (0)); | ||||
21739 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | ||||
21740 | if( n<=0 ){ | ||||
21741 | *piCol = -1; | ||||
21742 | }else if( pIter->a[0]==0x01 ){ | ||||
21743 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | ||||
21744 | }else{ | ||||
21745 | *piCol = 0; | ||||
21746 | } | ||||
21747 | } | ||||
21748 | } | ||||
21749 | |||||
21750 | return rc; | ||||
21751 | } | ||||
21752 | |||||
21753 | /* | ||||
21754 | ** xQueryToken() API implemenetation. | ||||
21755 | */ | ||||
21756 | static int fts5ApiQueryToken( | ||||
21757 | Fts5Context* pCtx, | ||||
21758 | int iPhrase, | ||||
21759 | int iToken, | ||||
21760 | const char **ppOut, | ||||
21761 | int *pnOut | ||||
21762 | ){ | ||||
21763 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21764 | return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); | ||||
21765 | } | ||||
21766 | |||||
21767 | /* | ||||
21768 | ** xInstToken() API implemenetation. | ||||
21769 | */ | ||||
21770 | static int fts5ApiInstToken( | ||||
21771 | Fts5Context *pCtx, | ||||
21772 | int iIdx, | ||||
21773 | int iToken, | ||||
21774 | const char **ppOut, int *pnOut | ||||
21775 | ){ | ||||
21776 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21777 | int rc = SQLITE_OK0; | ||||
21778 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | ||||
21779 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | ||||
21780 | ){ | ||||
21781 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | ||||
21782 | rc = SQLITE_RANGE25; | ||||
21783 | }else{ | ||||
21784 | int iPhrase = pCsr->aInst[iIdx*3]; | ||||
21785 | int iCol = pCsr->aInst[iIdx*3 + 1]; | ||||
21786 | int iOff = pCsr->aInst[iIdx*3 + 2]; | ||||
21787 | i64 iRowid = fts5CursorRowid(pCsr); | ||||
21788 | rc = sqlite3Fts5ExprInstToken( | ||||
21789 | pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut | ||||
21790 | ); | ||||
21791 | } | ||||
21792 | } | ||||
21793 | return rc; | ||||
21794 | } | ||||
21795 | |||||
21796 | |||||
21797 | static int fts5ApiQueryPhrase(Fts5Context*, int, void*, | ||||
21798 | int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) | ||||
21799 | ); | ||||
21800 | |||||
21801 | /* | ||||
21802 | ** The xColumnLocale() API. | ||||
21803 | */ | ||||
21804 | static int fts5ApiColumnLocale( | ||||
21805 | Fts5Context *pCtx, | ||||
21806 | int iCol, | ||||
21807 | const char **pzLocale, | ||||
21808 | int *pnLocale | ||||
21809 | ){ | ||||
21810 | int rc = SQLITE_OK0; | ||||
21811 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21812 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | ||||
21813 | |||||
21814 | *pzLocale = 0; | ||||
21815 | *pnLocale = 0; | ||||
21816 | |||||
21817 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | ||||
21818 | if( iCol<0 || iCol>=pConfig->nCol ){ | ||||
21819 | rc = SQLITE_RANGE25; | ||||
21820 | }else if( | ||||
21821 | pConfig->abUnindexed[iCol]==0 | ||||
21822 | && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | ||||
21823 | && pConfig->bLocale | ||||
21824 | ){ | ||||
21825 | rc = fts5SeekCursor(pCsr, 0); | ||||
21826 | if( rc==SQLITE_OK0 ){ | ||||
21827 | const char *zDummy = 0; | ||||
21828 | int nDummy = 0; | ||||
21829 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &zDummy, &nDummy); | ||||
21830 | if( rc==SQLITE_OK0 ){ | ||||
21831 | *pzLocale = pConfig->t.pLocale; | ||||
21832 | *pnLocale = pConfig->t.nLocale; | ||||
21833 | } | ||||
21834 | sqlite3Fts5ClearLocale(pConfig); | ||||
21835 | } | ||||
21836 | } | ||||
21837 | |||||
21838 | return rc; | ||||
21839 | } | ||||
21840 | |||||
21841 | static const Fts5ExtensionApi sFts5Api = { | ||||
21842 | 4, /* iVersion */ | ||||
21843 | fts5ApiUserData, | ||||
21844 | fts5ApiColumnCount, | ||||
21845 | fts5ApiRowCount, | ||||
21846 | fts5ApiColumnTotalSize, | ||||
21847 | fts5ApiTokenize, | ||||
21848 | fts5ApiPhraseCount, | ||||
21849 | fts5ApiPhraseSize, | ||||
21850 | fts5ApiInstCount, | ||||
21851 | fts5ApiInst, | ||||
21852 | fts5ApiRowid, | ||||
21853 | fts5ApiColumnText, | ||||
21854 | fts5ApiColumnSize, | ||||
21855 | fts5ApiQueryPhrase, | ||||
21856 | fts5ApiSetAuxdata, | ||||
21857 | fts5ApiGetAuxdata, | ||||
21858 | fts5ApiPhraseFirst, | ||||
21859 | fts5ApiPhraseNext, | ||||
21860 | fts5ApiPhraseFirstColumn, | ||||
21861 | fts5ApiPhraseNextColumn, | ||||
21862 | fts5ApiQueryToken, | ||||
21863 | fts5ApiInstToken, | ||||
21864 | fts5ApiColumnLocale, | ||||
21865 | fts5ApiTokenize_v2 | ||||
21866 | }; | ||||
21867 | |||||
21868 | /* | ||||
21869 | ** Implementation of API function xQueryPhrase(). | ||||
21870 | */ | ||||
21871 | static int fts5ApiQueryPhrase( | ||||
21872 | Fts5Context *pCtx, | ||||
21873 | int iPhrase, | ||||
21874 | void *pUserData, | ||||
21875 | int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) | ||||
21876 | ){ | ||||
21877 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | ||||
21878 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | ||||
21879 | int rc; | ||||
21880 | Fts5Cursor *pNew = 0; | ||||
21881 | |||||
21882 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); | ||||
21883 | if( rc==SQLITE_OK0 ){ | ||||
21884 | pNew->ePlan = FTS5_PLAN_MATCH1; | ||||
21885 | pNew->iFirstRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | ||||
21886 | pNew->iLastRowid = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | ||||
21887 | pNew->base.pVtab = (sqlite3_vtab*)pTab; | ||||
21888 | rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); | ||||
21889 | } | ||||
21890 | |||||
21891 | if( rc==SQLITE_OK0 ){ | ||||
21892 | for(rc = fts5CursorFirst(pTab, pNew, 0); | ||||
21893 | rc==SQLITE_OK0 && CsrFlagTest(pNew, FTS5CSR_EOF)((pNew)->csrflags & (0x01))==0; | ||||
21894 | rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) | ||||
21895 | ){ | ||||
21896 | rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); | ||||
21897 | if( rc!=SQLITE_OK0 ){ | ||||
21898 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | ||||
21899 | break; | ||||
21900 | } | ||||
21901 | } | ||||
21902 | } | ||||
21903 | |||||
21904 | fts5CloseMethod((sqlite3_vtab_cursor*)pNew); | ||||
21905 | return rc; | ||||
21906 | } | ||||
21907 | |||||
21908 | static void fts5ApiInvoke( | ||||
21909 | Fts5Auxiliary *pAux, | ||||
21910 | Fts5Cursor *pCsr, | ||||
21911 | sqlite3_context *context, | ||||
21912 | int argc, | ||||
21913 | sqlite3_value **argv | ||||
21914 | ){ | ||||
21915 | assert( pCsr->pAux==0 )((void) (0)); | ||||
21916 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | ||||
21917 | pCsr->pAux = pAux; | ||||
21918 | pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); | ||||
21919 | pCsr->pAux = 0; | ||||
21920 | } | ||||
21921 | |||||
21922 | static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ | ||||
21923 | Fts5Cursor *pCsr; | ||||
21924 | for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | ||||
21925 | if( pCsr->iCsrId==iCsrId ) break; | ||||
21926 | } | ||||
21927 | return pCsr; | ||||
21928 | } | ||||
21929 | |||||
21930 | /* | ||||
21931 | ** Parameter zFmt is a printf() style formatting string. This function | ||||
21932 | ** formats it using the trailing arguments and returns the result as | ||||
21933 | ** an error message to the context passed as the first argument. | ||||
21934 | */ | ||||
21935 | static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){ | ||||
21936 | char *zErr = 0; | ||||
21937 | va_list ap; | ||||
21938 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | ||||
21939 | zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | ||||
21940 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | ||||
21941 | sqlite3_freesqlite3_api->free(zErr); | ||||
21942 | va_end(ap)__builtin_va_end(ap); | ||||
21943 | } | ||||
21944 | |||||
21945 | static void fts5ApiCallback( | ||||
21946 | sqlite3_context *context, | ||||
21947 | int argc, | ||||
21948 | sqlite3_value **argv | ||||
21949 | ){ | ||||
21950 | |||||
21951 | Fts5Auxiliary *pAux; | ||||
21952 | Fts5Cursor *pCsr; | ||||
21953 | i64 iCsrId; | ||||
21954 | |||||
21955 | assert( argc>=1 )((void) (0)); | ||||
21956 | pAux = (Fts5Auxiliary*)sqlite3_user_datasqlite3_api->user_data(context); | ||||
21957 | iCsrId = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | ||||
21958 | |||||
21959 | pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); | ||||
21960 | if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL3) ){ | ||||
21961 | fts5ResultError(context, "no such cursor: %lld", iCsrId); | ||||
21962 | }else{ | ||||
21963 | sqlite3_vtab *pTab = pCsr->base.pVtab; | ||||
21964 | fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); | ||||
21965 | sqlite3_freesqlite3_api->free(pTab->zErrMsg); | ||||
21966 | pTab->zErrMsg = 0; | ||||
21967 | } | ||||
21968 | } | ||||
21969 | |||||
21970 | |||||
21971 | /* | ||||
21972 | ** Given cursor id iId, return a pointer to the corresponding Fts5Table | ||||
21973 | ** object. Or NULL If the cursor id does not exist. | ||||
21974 | */ | ||||
21975 | static Fts5Table *sqlite3Fts5TableFromCsrid( | ||||
21976 | Fts5Global *pGlobal, /* FTS5 global context for db handle */ | ||||
21977 | i64 iCsrId /* Id of cursor to find */ | ||||
21978 | ){ | ||||
21979 | Fts5Cursor *pCsr; | ||||
21980 | pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); | ||||
21981 | if( pCsr ){ | ||||
21982 | return (Fts5Table*)pCsr->base.pVtab; | ||||
21983 | } | ||||
21984 | return 0; | ||||
21985 | } | ||||
21986 | |||||
21987 | /* | ||||
21988 | ** Return a "position-list blob" corresponding to the current position of | ||||
21989 | ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains | ||||
21990 | ** the current position-list for each phrase in the query associated with | ||||
21991 | ** cursor pCsr. | ||||
21992 | ** | ||||
21993 | ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is | ||||
21994 | ** the number of phrases in the query. Following the varints are the | ||||
21995 | ** concatenated position lists for each phrase, in order. | ||||
21996 | ** | ||||
21997 | ** The first varint (if it exists) contains the size of the position list | ||||
21998 | ** for phrase 0. The second (same disclaimer) contains the size of position | ||||
21999 | ** list 1. And so on. There is no size field for the final position list, | ||||
22000 | ** as it can be derived from the total size of the blob. | ||||
22001 | */ | ||||
22002 | static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ | ||||
22003 | int i; | ||||
22004 | int rc = SQLITE_OK0; | ||||
22005 | int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | ||||
22006 | Fts5Buffer val; | ||||
22007 | |||||
22008 | memset(&val, 0, sizeof(Fts5Buffer)); | ||||
22009 | switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ | ||||
22010 | case FTS5_DETAIL_FULL0: | ||||
22011 | |||||
22012 | /* Append the varints */ | ||||
22013 | for(i=0; i<(nPhrase-1); i++){ | ||||
22014 | const u8 *dummy; | ||||
22015 | int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); | ||||
22016 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | ||||
22017 | } | ||||
22018 | |||||
22019 | /* Append the position lists */ | ||||
22020 | for(i=0; i<nPhrase; i++){ | ||||
22021 | const u8 *pPoslist; | ||||
22022 | int nPoslist; | ||||
22023 | nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); | ||||
22024 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | ||||
22025 | } | ||||
22026 | break; | ||||
22027 | |||||
22028 | case FTS5_DETAIL_COLUMNS2: | ||||
22029 | |||||
22030 | /* Append the varints */ | ||||
22031 | for(i=0; rc==SQLITE_OK0 && i<(nPhrase-1); i++){ | ||||
22032 | const u8 *dummy; | ||||
22033 | int nByte; | ||||
22034 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); | ||||
22035 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | ||||
22036 | } | ||||
22037 | |||||
22038 | /* Append the position lists */ | ||||
22039 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | ||||
22040 | const u8 *pPoslist; | ||||
22041 | int nPoslist; | ||||
22042 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); | ||||
22043 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | ||||
22044 | } | ||||
22045 | break; | ||||
22046 | |||||
22047 | default: | ||||
22048 | break; | ||||
22049 | } | ||||
22050 | |||||
22051 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, val.p, val.n, sqlite3_freesqlite3_api->free); | ||||
22052 | return rc; | ||||
22053 | } | ||||
22054 | |||||
22055 | /* | ||||
22056 | ** This is the xColumn method, called by SQLite to request a value from | ||||
22057 | ** the row that the supplied cursor currently points to. | ||||
22058 | */ | ||||
22059 | static int fts5ColumnMethod( | ||||
22060 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | ||||
22061 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | ||||
22062 | int iCol /* Index of column to read value from */ | ||||
22063 | ){ | ||||
22064 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | ||||
22065 | Fts5Config *pConfig = pTab->p.pConfig; | ||||
22066 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | ||||
22067 | int rc = SQLITE_OK0; | ||||
22068 | |||||
22069 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | ||||
22070 | |||||
22071 | if( pCsr->ePlan==FTS5_PLAN_SPECIAL3 ){ | ||||
22072 | if( iCol==pConfig->nCol ){ | ||||
22073 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iSpecial); | ||||
22074 | } | ||||
22075 | }else | ||||
22076 | |||||
22077 | if( iCol==pConfig->nCol ){ | ||||
22078 | /* User is requesting the value of the special column with the same name | ||||
22079 | ** as the table. Return the cursor integer id number. This value is only | ||||
22080 | ** useful in that it may be passed as the first argument to an FTS5 | ||||
22081 | ** auxiliary function. */ | ||||
22082 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iCsrId); | ||||
22083 | }else if( iCol==pConfig->nCol+1 ){ | ||||
22084 | /* The value of the "rank" column. */ | ||||
22085 | |||||
22086 | if( pCsr->ePlan==FTS5_PLAN_SOURCE2 ){ | ||||
22087 | fts5PoslistBlob(pCtx, pCsr); | ||||
22088 | }else if( | ||||
22089 | pCsr->ePlan==FTS5_PLAN_MATCH1 | ||||
22090 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH4 | ||||
22091 | ){ | ||||
22092 | if( pCsr->pRank || SQLITE_OK0==(rc = fts5FindRankFunction(pCsr)) ){ | ||||
22093 | fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); | ||||
22094 | } | ||||
22095 | } | ||||
22096 | }else{ | ||||
22097 | if( !sqlite3_vtab_nochangesqlite3_api->vtab_nochange(pCtx) && pConfig->eContent!=FTS5_CONTENT_NONE1 ){ | ||||
22098 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | ||||
22099 | rc = fts5SeekCursor(pCsr, 1); | ||||
22100 | if( rc==SQLITE_OK0 ){ | ||||
22101 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pCsr->pStmt, iCol+1); | ||||
22102 | if( pConfig->bLocale | ||||
22103 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | ||||
22104 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | ||||
22105 | ){ | ||||
22106 | const char *z = 0; | ||||
22107 | int n = 0; | ||||
22108 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n); | ||||
22109 | if( rc==SQLITE_OK0 ){ | ||||
22110 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
22111 | } | ||||
22112 | sqlite3Fts5ClearLocale(pConfig); | ||||
22113 | }else{ | ||||
22114 | sqlite3_result_valuesqlite3_api->result_value(pCtx, pVal); | ||||
22115 | } | ||||
22116 | } | ||||
22117 | |||||
22118 | pConfig->pzErrmsg = 0; | ||||
22119 | } | ||||
22120 | } | ||||
22121 | |||||
22122 | return rc; | ||||
22123 | } | ||||
22124 | |||||
22125 | |||||
22126 | /* | ||||
22127 | ** This routine implements the xFindFunction method for the FTS3 | ||||
22128 | ** virtual table. | ||||
22129 | */ | ||||
22130 | static int fts5FindFunctionMethod( | ||||
22131 | sqlite3_vtab *pVtab, /* Virtual table handle */ | ||||
22132 | int nUnused, /* Number of SQL function arguments */ | ||||
22133 | const char *zName, /* Name of SQL function */ | ||||
22134 | void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ | ||||
22135 | void **ppArg /* OUT: User data for *pxFunc */ | ||||
22136 | ){ | ||||
22137 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22138 | Fts5Auxiliary *pAux; | ||||
22139 | |||||
22140 | UNUSED_PARAM(nUnused)(void)(nUnused); | ||||
22141 | pAux = fts5FindAuxiliary(pTab, zName); | ||||
22142 | if( pAux ){ | ||||
22143 | *pxFunc = fts5ApiCallback; | ||||
22144 | *ppArg = (void*)pAux; | ||||
22145 | return 1; | ||||
22146 | } | ||||
22147 | |||||
22148 | /* No function of the specified name was found. Return 0. */ | ||||
22149 | return 0; | ||||
22150 | } | ||||
22151 | |||||
22152 | /* | ||||
22153 | ** Implementation of FTS5 xRename method. Rename an fts5 table. | ||||
22154 | */ | ||||
22155 | static int fts5RenameMethod( | ||||
22156 | sqlite3_vtab *pVtab, /* Virtual table handle */ | ||||
22157 | const char *zName /* New name of table */ | ||||
22158 | ){ | ||||
22159 | int rc; | ||||
22160 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22161 | rc = sqlite3Fts5StorageRename(pTab->pStorage, zName); | ||||
22162 | return rc; | ||||
22163 | } | ||||
22164 | |||||
22165 | static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){ | ||||
22166 | fts5TripCursors((Fts5FullTable*)pTab); | ||||
22167 | return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage); | ||||
22168 | } | ||||
22169 | |||||
22170 | /* | ||||
22171 | ** The xSavepoint() method. | ||||
22172 | ** | ||||
22173 | ** Flush the contents of the pending-terms table to disk. | ||||
22174 | */ | ||||
22175 | static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ | ||||
22176 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22177 | int rc = SQLITE_OK0; | ||||
22178 | |||||
22179 | fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); | ||||
22180 | rc = sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); | ||||
22181 | if( rc==SQLITE_OK0 ){ | ||||
22182 | pTab->iSavepoint = iSavepoint+1; | ||||
22183 | } | ||||
22184 | return rc; | ||||
22185 | } | ||||
22186 | |||||
22187 | /* | ||||
22188 | ** The xRelease() method. | ||||
22189 | ** | ||||
22190 | ** This is a no-op. | ||||
22191 | */ | ||||
22192 | static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ | ||||
22193 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22194 | int rc = SQLITE_OK0; | ||||
22195 | fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); | ||||
22196 | if( (iSavepoint+1)<pTab->iSavepoint ){ | ||||
22197 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | ||||
22198 | if( rc==SQLITE_OK0 ){ | ||||
22199 | pTab->iSavepoint = iSavepoint; | ||||
22200 | } | ||||
22201 | } | ||||
22202 | return rc; | ||||
22203 | } | ||||
22204 | |||||
22205 | /* | ||||
22206 | ** The xRollbackTo() method. | ||||
22207 | ** | ||||
22208 | ** Discard the contents of the pending terms table. | ||||
22209 | */ | ||||
22210 | static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ | ||||
22211 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22212 | int rc = SQLITE_OK0; | ||||
22213 | fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); | ||||
22214 | fts5TripCursors(pTab); | ||||
22215 | if( (iSavepoint+1)<=pTab->iSavepoint ){ | ||||
22216 | pTab->p.pConfig->pgsz = 0; | ||||
22217 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | ||||
22218 | } | ||||
22219 | return rc; | ||||
22220 | } | ||||
22221 | |||||
22222 | /* | ||||
22223 | ** Register a new auxiliary function with global context pGlobal. | ||||
22224 | */ | ||||
22225 | static int fts5CreateAux( | ||||
22226 | fts5_api *pApi, /* Global context (one per db handle) */ | ||||
22227 | const char *zName, /* Name of new function */ | ||||
22228 | void *pUserData, /* User data for aux. function */ | ||||
22229 | fts5_extension_function xFunc, /* Aux. function implementation */ | ||||
22230 | void(*xDestroy)(void*) /* Destructor for pUserData */ | ||||
22231 | ){ | ||||
22232 | Fts5Global *pGlobal = (Fts5Global*)pApi; | ||||
22233 | int rc = sqlite3_overload_functionsqlite3_api->overload_function(pGlobal->db, zName, -1); | ||||
22234 | if( rc==SQLITE_OK0 ){ | ||||
22235 | Fts5Auxiliary *pAux; | ||||
22236 | sqlite3_int64 nName; /* Size of zName in bytes, including \0 */ | ||||
22237 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | ||||
22238 | |||||
22239 | nName = strlen(zName) + 1; | ||||
22240 | nByte = sizeof(Fts5Auxiliary) + nName; | ||||
22241 | pAux = (Fts5Auxiliary*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
22242 | if( pAux ){ | ||||
22243 | memset(pAux, 0, (size_t)nByte); | ||||
22244 | pAux->zFunc = (char*)&pAux[1]; | ||||
22245 | memcpy(pAux->zFunc, zName, nName); | ||||
22246 | pAux->pGlobal = pGlobal; | ||||
22247 | pAux->pUserData = pUserData; | ||||
22248 | pAux->xFunc = xFunc; | ||||
22249 | pAux->xDestroy = xDestroy; | ||||
22250 | pAux->pNext = pGlobal->pAux; | ||||
22251 | pGlobal->pAux = pAux; | ||||
22252 | }else{ | ||||
22253 | rc = SQLITE_NOMEM7; | ||||
22254 | } | ||||
22255 | } | ||||
22256 | |||||
22257 | return rc; | ||||
22258 | } | ||||
22259 | |||||
22260 | /* | ||||
22261 | ** This function is used by xCreateTokenizer_v2() and xCreateTokenizer(). | ||||
22262 | ** It allocates and partially populates a new Fts5TokenizerModule object. | ||||
22263 | ** The new object is already linked into the Fts5Global context before | ||||
22264 | ** returning. | ||||
22265 | ** | ||||
22266 | ** If successful, SQLITE_OK is returned and a pointer to the new | ||||
22267 | ** Fts5TokenizerModule object returned via output parameter (*ppNew). All | ||||
22268 | ** that is required is for the caller to fill in the methods in | ||||
22269 | ** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native | ||||
22270 | ** as appropriate. | ||||
22271 | ** | ||||
22272 | ** If an error occurs, an SQLite error code is returned and the final value | ||||
22273 | ** of (*ppNew) undefined. | ||||
22274 | */ | ||||
22275 | static int fts5NewTokenizerModule( | ||||
22276 | Fts5Global *pGlobal, /* Global context (one per db handle) */ | ||||
22277 | const char *zName, /* Name of new function */ | ||||
22278 | void *pUserData, /* User data for aux. function */ | ||||
22279 | void(*xDestroy)(void*), /* Destructor for pUserData */ | ||||
22280 | Fts5TokenizerModule **ppNew | ||||
22281 | ){ | ||||
22282 | int rc = SQLITE_OK0; | ||||
22283 | Fts5TokenizerModule *pNew; | ||||
22284 | sqlite3_int64 nName; /* Size of zName and its \0 terminator */ | ||||
22285 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | ||||
22286 | |||||
22287 | nName = strlen(zName) + 1; | ||||
22288 | nByte = sizeof(Fts5TokenizerModule) + nName; | ||||
22289 | *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
22290 | if( pNew ){ | ||||
22291 | pNew->zName = (char*)&pNew[1]; | ||||
22292 | memcpy(pNew->zName, zName, nName); | ||||
22293 | pNew->pUserData = pUserData; | ||||
22294 | pNew->xDestroy = xDestroy; | ||||
22295 | pNew->pNext = pGlobal->pTok; | ||||
22296 | pGlobal->pTok = pNew; | ||||
22297 | if( pNew->pNext==0 ){ | ||||
22298 | pGlobal->pDfltTok = pNew; | ||||
22299 | } | ||||
22300 | } | ||||
22301 | |||||
22302 | return rc; | ||||
22303 | } | ||||
22304 | |||||
22305 | /* | ||||
22306 | ** An instance of this type is used as the Fts5Tokenizer object for | ||||
22307 | ** wrapper tokenizers - those that provide access to a v1 tokenizer via | ||||
22308 | ** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer | ||||
22309 | ** via the fts5_tokenizer API. | ||||
22310 | */ | ||||
22311 | typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer; | ||||
22312 | struct Fts5VtoVTokenizer { | ||||
22313 | int bV2Native; /* True if v2 native tokenizer */ | ||||
22314 | fts5_tokenizer x1; /* Tokenizer functions */ | ||||
22315 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | ||||
22316 | Fts5Tokenizer *pReal; | ||||
22317 | }; | ||||
22318 | |||||
22319 | /* | ||||
22320 | ** Create a wrapper tokenizer. The context argument pCtx points to the | ||||
22321 | ** Fts5TokenizerModule object. | ||||
22322 | */ | ||||
22323 | static int fts5VtoVCreate( | ||||
22324 | void *pCtx, | ||||
22325 | const char **azArg, | ||||
22326 | int nArg, | ||||
22327 | Fts5Tokenizer **ppOut | ||||
22328 | ){ | ||||
22329 | Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx; | ||||
22330 | Fts5VtoVTokenizer *pNew = 0; | ||||
22331 | int rc = SQLITE_OK0; | ||||
22332 | |||||
22333 | pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | ||||
22334 | if( rc==SQLITE_OK0 ){ | ||||
22335 | pNew->x1 = pMod->x1; | ||||
22336 | pNew->x2 = pMod->x2; | ||||
22337 | pNew->bV2Native = pMod->bV2Native; | ||||
22338 | if( pMod->bV2Native ){ | ||||
22339 | rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | ||||
22340 | }else{ | ||||
22341 | rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | ||||
22342 | } | ||||
22343 | if( rc!=SQLITE_OK0 ){ | ||||
22344 | sqlite3_freesqlite3_api->free(pNew); | ||||
22345 | pNew = 0; | ||||
22346 | } | ||||
22347 | } | ||||
22348 | |||||
22349 | *ppOut = (Fts5Tokenizer*)pNew; | ||||
22350 | return rc; | ||||
22351 | } | ||||
22352 | |||||
22353 | /* | ||||
22354 | ** Delete an Fts5VtoVTokenizer wrapper tokenizer. | ||||
22355 | */ | ||||
22356 | static void fts5VtoVDelete(Fts5Tokenizer *pTok){ | ||||
22357 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | ||||
22358 | if( p ){ | ||||
22359 | if( p->bV2Native ){ | ||||
22360 | p->x2.xDelete(p->pReal); | ||||
22361 | }else{ | ||||
22362 | p->x1.xDelete(p->pReal); | ||||
22363 | } | ||||
22364 | sqlite3_freesqlite3_api->free(p); | ||||
22365 | } | ||||
22366 | } | ||||
22367 | |||||
22368 | |||||
22369 | /* | ||||
22370 | ** xTokenizer method for a wrapper tokenizer that offers the v1 interface | ||||
22371 | ** (no support for locales). | ||||
22372 | */ | ||||
22373 | static int fts5V1toV2Tokenize( | ||||
22374 | Fts5Tokenizer *pTok, | ||||
22375 | void *pCtx, int flags, | ||||
22376 | const char *pText, int nText, | ||||
22377 | int (*xToken)(void*, int, const char*, int, int, int) | ||||
22378 | ){ | ||||
22379 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | ||||
22380 | assert( p->bV2Native )((void) (0)); | ||||
22381 | return p->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken); | ||||
22382 | } | ||||
22383 | |||||
22384 | /* | ||||
22385 | ** xTokenizer method for a wrapper tokenizer that offers the v2 interface | ||||
22386 | ** (with locale support). | ||||
22387 | */ | ||||
22388 | static int fts5V2toV1Tokenize( | ||||
22389 | Fts5Tokenizer *pTok, | ||||
22390 | void *pCtx, int flags, | ||||
22391 | const char *pText, int nText, | ||||
22392 | const char *pLocale, int nLocale, | ||||
22393 | int (*xToken)(void*, int, const char*, int, int, int) | ||||
22394 | ){ | ||||
22395 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | ||||
22396 | assert( p->bV2Native==0 )((void) (0)); | ||||
22397 | UNUSED_PARAM2(pLocale,nLocale)(void)(pLocale), (void)(nLocale); | ||||
22398 | return p->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken); | ||||
22399 | } | ||||
22400 | |||||
22401 | /* | ||||
22402 | ** Register a new tokenizer. This is the implementation of the | ||||
22403 | ** fts5_api.xCreateTokenizer_v2() method. | ||||
22404 | */ | ||||
22405 | static int fts5CreateTokenizer_v2( | ||||
22406 | fts5_api *pApi, /* Global context (one per db handle) */ | ||||
22407 | const char *zName, /* Name of new function */ | ||||
22408 | void *pUserData, /* User data for aux. function */ | ||||
22409 | fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */ | ||||
22410 | void(*xDestroy)(void*) /* Destructor for pUserData */ | ||||
22411 | ){ | ||||
22412 | Fts5Global *pGlobal = (Fts5Global*)pApi; | ||||
22413 | int rc = SQLITE_OK0; | ||||
22414 | |||||
22415 | if( pTokenizer->iVersion>2 ){ | ||||
22416 | rc = SQLITE_ERROR1; | ||||
22417 | }else{ | ||||
22418 | Fts5TokenizerModule *pNew = 0; | ||||
22419 | rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew); | ||||
22420 | if( pNew ){ | ||||
22421 | pNew->x2 = *pTokenizer; | ||||
22422 | pNew->bV2Native = 1; | ||||
22423 | pNew->x1.xCreate = fts5VtoVCreate; | ||||
22424 | pNew->x1.xTokenize = fts5V1toV2Tokenize; | ||||
22425 | pNew->x1.xDelete = fts5VtoVDelete; | ||||
22426 | } | ||||
22427 | } | ||||
22428 | |||||
22429 | return rc; | ||||
22430 | } | ||||
22431 | |||||
22432 | /* | ||||
22433 | ** The fts5_api.xCreateTokenizer() method. | ||||
22434 | */ | ||||
22435 | static int fts5CreateTokenizer( | ||||
22436 | fts5_api *pApi, /* Global context (one per db handle) */ | ||||
22437 | const char *zName, /* Name of new function */ | ||||
22438 | void *pUserData, /* User data for aux. function */ | ||||
22439 | fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ | ||||
22440 | void(*xDestroy)(void*) /* Destructor for pUserData */ | ||||
22441 | ){ | ||||
22442 | Fts5TokenizerModule *pNew = 0; | ||||
22443 | int rc = SQLITE_OK0; | ||||
22444 | |||||
22445 | rc = fts5NewTokenizerModule( | ||||
22446 | (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew | ||||
22447 | ); | ||||
22448 | if( pNew ){ | ||||
22449 | pNew->x1 = *pTokenizer; | ||||
22450 | pNew->x2.xCreate = fts5VtoVCreate; | ||||
22451 | pNew->x2.xTokenize = fts5V2toV1Tokenize; | ||||
22452 | pNew->x2.xDelete = fts5VtoVDelete; | ||||
22453 | } | ||||
22454 | return rc; | ||||
22455 | } | ||||
22456 | |||||
22457 | /* | ||||
22458 | ** Search the global context passed as the first argument for a tokenizer | ||||
22459 | ** module named zName. If found, return a pointer to the Fts5TokenizerModule | ||||
22460 | ** object. Otherwise, return NULL. | ||||
22461 | */ | ||||
22462 | static Fts5TokenizerModule *fts5LocateTokenizer( | ||||
22463 | Fts5Global *pGlobal, /* Global (one per db handle) object */ | ||||
22464 | const char *zName /* Name of tokenizer module to find */ | ||||
22465 | ){ | ||||
22466 | Fts5TokenizerModule *pMod = 0; | ||||
22467 | |||||
22468 | if( zName==0 ){ | ||||
22469 | pMod = pGlobal->pDfltTok; | ||||
22470 | }else{ | ||||
22471 | for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ | ||||
22472 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pMod->zName)==0 ) break; | ||||
22473 | } | ||||
22474 | } | ||||
22475 | |||||
22476 | return pMod; | ||||
22477 | } | ||||
22478 | |||||
22479 | /* | ||||
22480 | ** Find a tokenizer. This is the implementation of the | ||||
22481 | ** fts5_api.xFindTokenizer_v2() method. | ||||
22482 | */ | ||||
22483 | static int fts5FindTokenizer_v2( | ||||
22484 | fts5_api *pApi, /* Global context (one per db handle) */ | ||||
22485 | const char *zName, /* Name of tokenizer */ | ||||
22486 | void **ppUserData, | ||||
22487 | fts5_tokenizer_v2 **ppTokenizer /* Populate this object */ | ||||
22488 | ){ | ||||
22489 | int rc = SQLITE_OK0; | ||||
22490 | Fts5TokenizerModule *pMod; | ||||
22491 | |||||
22492 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | ||||
22493 | if( pMod ){ | ||||
22494 | if( pMod->bV2Native ){ | ||||
22495 | *ppUserData = pMod->pUserData; | ||||
22496 | }else{ | ||||
22497 | *ppUserData = (void*)pMod; | ||||
22498 | } | ||||
22499 | *ppTokenizer = &pMod->x2; | ||||
22500 | }else{ | ||||
22501 | *ppTokenizer = 0; | ||||
22502 | *ppUserData = 0; | ||||
22503 | rc = SQLITE_ERROR1; | ||||
22504 | } | ||||
22505 | |||||
22506 | return rc; | ||||
22507 | } | ||||
22508 | |||||
22509 | /* | ||||
22510 | ** Find a tokenizer. This is the implementation of the | ||||
22511 | ** fts5_api.xFindTokenizer() method. | ||||
22512 | */ | ||||
22513 | static int fts5FindTokenizer( | ||||
22514 | fts5_api *pApi, /* Global context (one per db handle) */ | ||||
22515 | const char *zName, /* Name of new function */ | ||||
22516 | void **ppUserData, | ||||
22517 | fts5_tokenizer *pTokenizer /* Populate this object */ | ||||
22518 | ){ | ||||
22519 | int rc = SQLITE_OK0; | ||||
22520 | Fts5TokenizerModule *pMod; | ||||
22521 | |||||
22522 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | ||||
22523 | if( pMod ){ | ||||
22524 | if( pMod->bV2Native==0 ){ | ||||
22525 | *ppUserData = pMod->pUserData; | ||||
22526 | }else{ | ||||
22527 | *ppUserData = (void*)pMod; | ||||
22528 | } | ||||
22529 | *pTokenizer = pMod->x1; | ||||
22530 | }else{ | ||||
22531 | memset(pTokenizer, 0, sizeof(*pTokenizer)); | ||||
22532 | *ppUserData = 0; | ||||
22533 | rc = SQLITE_ERROR1; | ||||
22534 | } | ||||
22535 | |||||
22536 | return rc; | ||||
22537 | } | ||||
22538 | |||||
22539 | /* | ||||
22540 | ** Attempt to instantiate the tokenizer. | ||||
22541 | */ | ||||
22542 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ | ||||
22543 | const char **azArg = pConfig->t.azArg; | ||||
22544 | const int nArg = pConfig->t.nArg; | ||||
22545 | Fts5TokenizerModule *pMod = 0; | ||||
22546 | int rc = SQLITE_OK0; | ||||
22547 | |||||
22548 | pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]); | ||||
22549 | if( pMod==0 ){ | ||||
22550 | assert( nArg>0 )((void) (0)); | ||||
22551 | rc = SQLITE_ERROR1; | ||||
22552 | sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]); | ||||
22553 | }else{ | ||||
22554 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0; | ||||
22555 | if( pMod->bV2Native ){ | ||||
22556 | xCreate = pMod->x2.xCreate; | ||||
22557 | pConfig->t.pApi2 = &pMod->x2; | ||||
22558 | }else{ | ||||
22559 | pConfig->t.pApi1 = &pMod->x1; | ||||
22560 | xCreate = pMod->x1.xCreate; | ||||
22561 | } | ||||
22562 | |||||
22563 | rc = xCreate(pMod->pUserData, | ||||
22564 | (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok | ||||
22565 | ); | ||||
22566 | |||||
22567 | if( rc!=SQLITE_OK0 ){ | ||||
22568 | if( rc!=SQLITE_NOMEM7 ){ | ||||
22569 | sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor"); | ||||
22570 | } | ||||
22571 | }else if( pMod->bV2Native==0 ){ | ||||
22572 | pConfig->t.ePattern = sqlite3Fts5TokenizerPattern( | ||||
22573 | pMod->x1.xCreate, pConfig->t.pTok | ||||
22574 | ); | ||||
22575 | } | ||||
22576 | } | ||||
22577 | |||||
22578 | if( rc!=SQLITE_OK0 ){ | ||||
22579 | pConfig->t.pApi1 = 0; | ||||
22580 | pConfig->t.pApi2 = 0; | ||||
22581 | pConfig->t.pTok = 0; | ||||
22582 | } | ||||
22583 | |||||
22584 | return rc; | ||||
22585 | } | ||||
22586 | |||||
22587 | |||||
22588 | /* | ||||
22589 | ** xDestroy callback passed to sqlite3_create_module(). This is invoked | ||||
22590 | ** when the db handle is being closed. Free memory associated with | ||||
22591 | ** tokenizers and aux functions registered with this db handle. | ||||
22592 | */ | ||||
22593 | static void fts5ModuleDestroy(void *pCtx){ | ||||
22594 | Fts5TokenizerModule *pTok, *pNextTok; | ||||
22595 | Fts5Auxiliary *pAux, *pNextAux; | ||||
22596 | Fts5Global *pGlobal = (Fts5Global*)pCtx; | ||||
22597 | |||||
22598 | for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ | ||||
22599 | pNextAux = pAux->pNext; | ||||
22600 | if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); | ||||
22601 | sqlite3_freesqlite3_api->free(pAux); | ||||
22602 | } | ||||
22603 | |||||
22604 | for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ | ||||
22605 | pNextTok = pTok->pNext; | ||||
22606 | if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); | ||||
22607 | sqlite3_freesqlite3_api->free(pTok); | ||||
22608 | } | ||||
22609 | |||||
22610 | sqlite3_freesqlite3_api->free(pGlobal); | ||||
22611 | } | ||||
22612 | |||||
22613 | /* | ||||
22614 | ** Implementation of the fts5() function used by clients to obtain the | ||||
22615 | ** API pointer. | ||||
22616 | */ | ||||
22617 | static void fts5Fts5Func( | ||||
22618 | sqlite3_context *pCtx, /* Function call context */ | ||||
22619 | int nArg, /* Number of args */ | ||||
22620 | sqlite3_value **apArg /* Function arguments */ | ||||
22621 | ){ | ||||
22622 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | ||||
22623 | fts5_api **ppApi; | ||||
22624 | UNUSED_PARAM(nArg)(void)(nArg); | ||||
22625 | assert( nArg==1 )((void) (0)); | ||||
22626 | ppApi = (fts5_api**)sqlite3_value_pointersqlite3_api->value_pointer(apArg[0], "fts5_api_ptr"); | ||||
22627 | if( ppApi ) *ppApi = &pGlobal->api; | ||||
22628 | } | ||||
22629 | |||||
22630 | /* | ||||
22631 | ** Implementation of fts5_source_id() function. | ||||
22632 | */ | ||||
22633 | static void fts5SourceIdFunc( | ||||
22634 | sqlite3_context *pCtx, /* Function call context */ | ||||
22635 | int nArg, /* Number of args */ | ||||
22636 | sqlite3_value **apUnused /* Function arguments */ | ||||
22637 | ){ | ||||
22638 | assert( nArg==0 )((void) (0)); | ||||
22639 | UNUSED_PARAM2(nArg, apUnused)(void)(nArg), (void)(apUnused); | ||||
22640 | sqlite3_result_textsqlite3_api->result_text(pCtx, "fts5: 2025-06-06 14:52:32 b77dc5e0f596d2140d9ac682b2893ff65d3a4140aa86067a3efebe29dc914c95", -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
22641 | } | ||||
22642 | |||||
22643 | /* | ||||
22644 | ** Implementation of fts5_locale(LOCALE, TEXT) function. | ||||
22645 | ** | ||||
22646 | ** If parameter LOCALE is NULL, or a zero-length string, then a copy of | ||||
22647 | ** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as | ||||
22648 | ** text, and the value returned is a blob consisting of: | ||||
22649 | ** | ||||
22650 | ** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER). | ||||
22651 | ** * The LOCALE, as utf-8 text, followed by | ||||
22652 | ** * 0x00, followed by | ||||
22653 | ** * The TEXT, as utf-8 text. | ||||
22654 | ** | ||||
22655 | ** There is no final nul-terminator following the TEXT value. | ||||
22656 | */ | ||||
22657 | static void fts5LocaleFunc( | ||||
22658 | sqlite3_context *pCtx, /* Function call context */ | ||||
22659 | int nArg, /* Number of args */ | ||||
22660 | sqlite3_value **apArg /* Function arguments */ | ||||
22661 | ){ | ||||
22662 | const char *zLocale = 0; | ||||
22663 | int nLocale = 0; | ||||
22664 | const char *zText = 0; | ||||
22665 | int nText = 0; | ||||
22666 | |||||
22667 | assert( nArg==2 )((void) (0)); | ||||
22668 | UNUSED_PARAM(nArg)(void)(nArg); | ||||
22669 | |||||
22670 | zLocale = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[0]); | ||||
22671 | nLocale = sqlite3_value_bytessqlite3_api->value_bytes(apArg[0]); | ||||
22672 | |||||
22673 | zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[1]); | ||||
22674 | nText = sqlite3_value_bytessqlite3_api->value_bytes(apArg[1]); | ||||
22675 | |||||
22676 | if( zLocale==0 || zLocale[0]=='\0' ){ | ||||
22677 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
22678 | }else{ | ||||
22679 | Fts5Global *p = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | ||||
22680 | u8 *pBlob = 0; | ||||
22681 | u8 *pCsr = 0; | ||||
22682 | int nBlob = 0; | ||||
22683 | |||||
22684 | nBlob = FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) + nLocale + 1 + nText; | ||||
22685 | pBlob = (u8*)sqlite3_mallocsqlite3_api->malloc(nBlob); | ||||
22686 | if( pBlob==0 ){ | ||||
22687 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | ||||
22688 | return; | ||||
22689 | } | ||||
22690 | |||||
22691 | pCsr = pBlob; | ||||
22692 | memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))); | ||||
22693 | pCsr += FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | ||||
22694 | memcpy(pCsr, zLocale, nLocale); | ||||
22695 | pCsr += nLocale; | ||||
22696 | (*pCsr++) = 0x00; | ||||
22697 | if( zText ) memcpy(pCsr, zText, nText); | ||||
22698 | assert( &pCsr[nText]==&pBlob[nBlob] )((void) (0)); | ||||
22699 | |||||
22700 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, pBlob, nBlob, sqlite3_freesqlite3_api->free); | ||||
22701 | } | ||||
22702 | } | ||||
22703 | |||||
22704 | /* | ||||
22705 | ** Implementation of fts5_insttoken() function. | ||||
22706 | */ | ||||
22707 | static void fts5InsttokenFunc( | ||||
22708 | sqlite3_context *pCtx, /* Function call context */ | ||||
22709 | int nArg, /* Number of args */ | ||||
22710 | sqlite3_value **apArg /* Function arguments */ | ||||
22711 | ){ | ||||
22712 | assert( nArg==1 )((void) (0)); | ||||
22713 | (void)nArg; | ||||
22714 | sqlite3_result_valuesqlite3_api->result_value(pCtx, apArg[0]); | ||||
22715 | sqlite3_result_subtypesqlite3_api->result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE73); | ||||
22716 | } | ||||
22717 | |||||
22718 | /* | ||||
22719 | ** Return true if zName is the extension on one of the shadow tables used | ||||
22720 | ** by this module. | ||||
22721 | */ | ||||
22722 | static int fts5ShadowName(const char *zName){ | ||||
22723 | static const char *azName[] = { | ||||
22724 | "config", "content", "data", "docsize", "idx" | ||||
22725 | }; | ||||
22726 | unsigned int i; | ||||
22727 | for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){ | ||||
22728 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i])==0 ) return 1; | ||||
22729 | } | ||||
22730 | return 0; | ||||
22731 | } | ||||
22732 | |||||
22733 | /* | ||||
22734 | ** Run an integrity check on the FTS5 data structures. Return a string | ||||
22735 | ** if anything is found amiss. Return a NULL pointer if everything is | ||||
22736 | ** OK. | ||||
22737 | */ | ||||
22738 | static int fts5IntegrityMethod( | ||||
22739 | sqlite3_vtab *pVtab, /* the FTS5 virtual table to check */ | ||||
22740 | const char *zSchema, /* Name of schema in which this table lives */ | ||||
22741 | const char *zTabname, /* Name of the table itself */ | ||||
22742 | int isQuick, /* True if this is a quick-check */ | ||||
22743 | char **pzErr /* Write error message here */ | ||||
22744 | ){ | ||||
22745 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | ||||
22746 | int rc; | ||||
22747 | |||||
22748 | assert( pzErr!=0 && *pzErr==0 )((void) (0)); | ||||
22749 | UNUSED_PARAM(isQuick)(void)(isQuick); | ||||
22750 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | ||||
22751 | pTab->p.pConfig->pzErrmsg = pzErr; | ||||
22752 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, 0); | ||||
22753 | if( *pzErr==0 && rc!=SQLITE_OK0 ){ | ||||
22754 | if( (rc&0xff)==SQLITE_CORRUPT11 ){ | ||||
22755 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed inverted index for FTS5 table %s.%s", | ||||
22756 | zSchema, zTabname); | ||||
22757 | rc = (*pzErr) ? SQLITE_OK0 : SQLITE_NOMEM7; | ||||
22758 | }else{ | ||||
22759 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unable to validate the inverted index for" | ||||
22760 | " FTS5 table %s.%s: %s", | ||||
22761 | zSchema, zTabname, sqlite3_errstrsqlite3_api->errstr(rc)); | ||||
22762 | } | ||||
22763 | } | ||||
22764 | |||||
22765 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | ||||
22766 | pTab->p.pConfig->pzErrmsg = 0; | ||||
22767 | |||||
22768 | return rc; | ||||
22769 | } | ||||
22770 | |||||
22771 | static int fts5Init(sqlite3 *db){ | ||||
22772 | static const sqlite3_module fts5Mod = { | ||||
22773 | /* iVersion */ 4, | ||||
22774 | /* xCreate */ fts5CreateMethod, | ||||
22775 | /* xConnect */ fts5ConnectMethod, | ||||
22776 | /* xBestIndex */ fts5BestIndexMethod, | ||||
22777 | /* xDisconnect */ fts5DisconnectMethod, | ||||
22778 | /* xDestroy */ fts5DestroyMethod, | ||||
22779 | /* xOpen */ fts5OpenMethod, | ||||
22780 | /* xClose */ fts5CloseMethod, | ||||
22781 | /* xFilter */ fts5FilterMethod, | ||||
22782 | /* xNext */ fts5NextMethod, | ||||
22783 | /* xEof */ fts5EofMethod, | ||||
22784 | /* xColumn */ fts5ColumnMethod, | ||||
22785 | /* xRowid */ fts5RowidMethod, | ||||
22786 | /* xUpdate */ fts5UpdateMethod, | ||||
22787 | /* xBegin */ fts5BeginMethod, | ||||
22788 | /* xSync */ fts5SyncMethod, | ||||
22789 | /* xCommit */ fts5CommitMethod, | ||||
22790 | /* xRollback */ fts5RollbackMethod, | ||||
22791 | /* xFindFunction */ fts5FindFunctionMethod, | ||||
22792 | /* xRename */ fts5RenameMethod, | ||||
22793 | /* xSavepoint */ fts5SavepointMethod, | ||||
22794 | /* xRelease */ fts5ReleaseMethod, | ||||
22795 | /* xRollbackTo */ fts5RollbackToMethod, | ||||
22796 | /* xShadowName */ fts5ShadowName, | ||||
22797 | /* xIntegrity */ fts5IntegrityMethod | ||||
22798 | }; | ||||
22799 | |||||
22800 | int rc; | ||||
22801 | Fts5Global *pGlobal = 0; | ||||
22802 | |||||
22803 | pGlobal = (Fts5Global*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Global)); | ||||
22804 | if( pGlobal==0 ){ | ||||
22805 | rc = SQLITE_NOMEM7; | ||||
22806 | }else{ | ||||
22807 | void *p = (void*)pGlobal; | ||||
22808 | memset(pGlobal, 0, sizeof(Fts5Global)); | ||||
22809 | pGlobal->db = db; | ||||
22810 | pGlobal->api.iVersion = 3; | ||||
22811 | pGlobal->api.xCreateFunction = fts5CreateAux; | ||||
22812 | pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; | ||||
22813 | pGlobal->api.xFindTokenizer = fts5FindTokenizer; | ||||
22814 | pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; | ||||
22815 | pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; | ||||
22816 | |||||
22817 | /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. | ||||
22818 | ** The constants below were generated randomly. */ | ||||
22819 | sqlite3_randomnesssqlite3_api->randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); | ||||
22820 | pGlobal->aLocaleHdr[0] ^= 0xF924976D; | ||||
22821 | pGlobal->aLocaleHdr[1] ^= 0x16596E13; | ||||
22822 | pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; | ||||
22823 | pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; | ||||
22824 | assert( sizeof(pGlobal->aLocaleHdr)==16 )((void) (0)); | ||||
22825 | |||||
22826 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); | ||||
22827 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5IndexInit(db); | ||||
22828 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5ExprInit(pGlobal, db); | ||||
22829 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5AuxInit(&pGlobal->api); | ||||
22830 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); | ||||
22831 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5VocabInit(pGlobal, db); | ||||
22832 | if( rc==SQLITE_OK0 ){ | ||||
22833 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
22834 | db, "fts5", 1, SQLITE_UTF81, p, fts5Fts5Func, 0, 0 | ||||
22835 | ); | ||||
22836 | } | ||||
22837 | if( rc==SQLITE_OK0 ){ | ||||
22838 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
22839 | db, "fts5_source_id", 0, | ||||
22840 | SQLITE_UTF81|SQLITE_DETERMINISTIC0x000000800|SQLITE_INNOCUOUS0x000200000, | ||||
22841 | p, fts5SourceIdFunc, 0, 0 | ||||
22842 | ); | ||||
22843 | } | ||||
22844 | if( rc==SQLITE_OK0 ){ | ||||
22845 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
22846 | db, "fts5_locale", 2, | ||||
22847 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000|SQLITE_SUBTYPE0x000100000, | ||||
22848 | p, fts5LocaleFunc, 0, 0 | ||||
22849 | ); | ||||
22850 | } | ||||
22851 | if( rc==SQLITE_OK0 ){ | ||||
22852 | rc = sqlite3_create_functionsqlite3_api->create_function( | ||||
22853 | db, "fts5_insttoken", 1, | ||||
22854 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000, | ||||
22855 | p, fts5InsttokenFunc, 0, 0 | ||||
22856 | ); | ||||
22857 | } | ||||
22858 | } | ||||
22859 | |||||
22860 | /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file | ||||
22861 | ** fts5_test_mi.c is compiled and linked into the executable. And call | ||||
22862 | ** its entry point to enable the matchinfo() demo. */ | ||||
22863 | #ifdef SQLITE_FTS5_ENABLE_TEST_MI | ||||
22864 | if( rc==SQLITE_OK0 ){ | ||||
22865 | extern int sqlite3Fts5TestRegisterMatchinfoAPI(fts5_api*); | ||||
22866 | rc = sqlite3Fts5TestRegisterMatchinfoAPI(&pGlobal->api); | ||||
22867 | } | ||||
22868 | #endif | ||||
22869 | |||||
22870 | return rc; | ||||
22871 | } | ||||
22872 | |||||
22873 | /* | ||||
22874 | ** The following functions are used to register the module with SQLite. If | ||||
22875 | ** this module is being built as part of the SQLite core (SQLITE_CORE is | ||||
22876 | ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. | ||||
22877 | ** | ||||
22878 | ** Or, if this module is being built as a loadable extension, | ||||
22879 | ** sqlite3Fts5Init() is omitted and the two standard entry points | ||||
22880 | ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. | ||||
22881 | */ | ||||
22882 | #ifndef SQLITE_CORE | ||||
22883 | #ifdef _WIN32 | ||||
22884 | __declspec(dllexport) | ||||
22885 | #endif | ||||
22886 | int sqlite3_fts_init( | ||||
22887 | sqlite3 *db, | ||||
22888 | char **pzErrMsg, | ||||
22889 | const sqlite3_api_routines *pApi | ||||
22890 | ){ | ||||
22891 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | ||||
22892 | (void)pzErrMsg; /* Unused parameter */ | ||||
22893 | return fts5Init(db); | ||||
22894 | } | ||||
22895 | |||||
22896 | #ifdef _WIN32 | ||||
22897 | __declspec(dllexport) | ||||
22898 | #endif | ||||
22899 | int sqlite3_fts5_init( | ||||
22900 | sqlite3 *db, | ||||
22901 | char **pzErrMsg, | ||||
22902 | const sqlite3_api_routines *pApi | ||||
22903 | ){ | ||||
22904 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | ||||
22905 | (void)pzErrMsg; /* Unused parameter */ | ||||
22906 | return fts5Init(db); | ||||
22907 | } | ||||
22908 | #else | ||||
22909 | int sqlite3Fts5Init(sqlite3 *db){ | ||||
22910 | return fts5Init(db); | ||||
22911 | } | ||||
22912 | #endif | ||||
22913 | |||||
22914 | #line 1 "fts5_storage.c" | ||||
22915 | /* | ||||
22916 | ** 2014 May 31 | ||||
22917 | ** | ||||
22918 | ** The author disclaims copyright to this source code. In place of | ||||
22919 | ** a legal notice, here is a blessing: | ||||
22920 | ** | ||||
22921 | ** May you do good and not evil. | ||||
22922 | ** May you find forgiveness for yourself and forgive others. | ||||
22923 | ** May you share freely, never taking more than you give. | ||||
22924 | ** | ||||
22925 | ****************************************************************************** | ||||
22926 | ** | ||||
22927 | */ | ||||
22928 | |||||
22929 | |||||
22930 | |||||
22931 | /* #include "fts5Int.h" */ | ||||
22932 | |||||
22933 | /* | ||||
22934 | ** pSavedRow: | ||||
22935 | ** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it | ||||
22936 | ** does a by-rowid lookup to retrieve a single row from the %_content | ||||
22937 | ** table or equivalent external-content table/view. | ||||
22938 | ** | ||||
22939 | ** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original | ||||
22940 | ** values for a row being UPDATEd. In that case, the SQL statement is | ||||
22941 | ** not reset and pSavedRow is set to point at it. This is so that the | ||||
22942 | ** insert operation that follows the delete may access the original | ||||
22943 | ** row values for any new values for which sqlite3_value_nochange() returns | ||||
22944 | ** true. i.e. if the user executes: | ||||
22945 | ** | ||||
22946 | ** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1); | ||||
22947 | ** ... | ||||
22948 | ** UPDATE fts SET a=?, b=? WHERE rowid=?; | ||||
22949 | ** | ||||
22950 | ** then the value passed to the xUpdate() method of this table as the | ||||
22951 | ** new.c value is an sqlite3_value_nochange() value. So in this case it | ||||
22952 | ** must be read from the saved row stored in Fts5Storage.pSavedRow. | ||||
22953 | ** | ||||
22954 | ** This is necessary - using sqlite3_value_nochange() instead of just having | ||||
22955 | ** SQLite pass the original value back via xUpdate() - so as not to discard | ||||
22956 | ** any locale information associated with such values. | ||||
22957 | ** | ||||
22958 | */ | ||||
22959 | struct Fts5Storage { | ||||
22960 | Fts5Config *pConfig; | ||||
22961 | Fts5Index *pIndex; | ||||
22962 | int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ | ||||
22963 | i64 nTotalRow; /* Total number of rows in FTS table */ | ||||
22964 | i64 *aTotalSize; /* Total sizes of each column */ | ||||
22965 | sqlite3_stmt *pSavedRow; | ||||
22966 | sqlite3_stmt *aStmt[12]; | ||||
22967 | }; | ||||
22968 | |||||
22969 | |||||
22970 | #if FTS5_STMT_SCAN_ASC0!=0 | ||||
22971 | # error "FTS5_STMT_SCAN_ASC mismatch" | ||||
22972 | #endif | ||||
22973 | #if FTS5_STMT_SCAN_DESC1!=1 | ||||
22974 | # error "FTS5_STMT_SCAN_DESC mismatch" | ||||
22975 | #endif | ||||
22976 | #if FTS5_STMT_LOOKUP2!=2 | ||||
22977 | # error "FTS5_STMT_LOOKUP mismatch" | ||||
22978 | #endif | ||||
22979 | |||||
22980 | #define FTS5_STMT_LOOKUP23 3 | ||||
22981 | #define FTS5_STMT_INSERT_CONTENT4 4 | ||||
22982 | #define FTS5_STMT_REPLACE_CONTENT5 5 | ||||
22983 | #define FTS5_STMT_DELETE_CONTENT6 6 | ||||
22984 | #define FTS5_STMT_REPLACE_DOCSIZE7 7 | ||||
22985 | #define FTS5_STMT_DELETE_DOCSIZE8 8 | ||||
22986 | #define FTS5_STMT_LOOKUP_DOCSIZE9 9 | ||||
22987 | #define FTS5_STMT_REPLACE_CONFIG10 10 | ||||
22988 | #define FTS5_STMT_SCAN11 11 | ||||
22989 | |||||
22990 | /* | ||||
22991 | ** Prepare the two insert statements - Fts5Storage.pInsertContent and | ||||
22992 | ** Fts5Storage.pInsertDocsize - if they have not already been prepared. | ||||
22993 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
22994 | ** occurs. | ||||
22995 | */ | ||||
22996 | static int fts5StorageGetStmt( | ||||
22997 | Fts5Storage *p, /* Storage handle */ | ||||
22998 | int eStmt, /* FTS5_STMT_XXX constant */ | ||||
22999 | sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ | ||||
23000 | char **pzErrMsg /* OUT: Error message (if any) */ | ||||
23001 | ){ | ||||
23002 | int rc = SQLITE_OK0; | ||||
23003 | |||||
23004 | /* If there is no %_docsize table, there should be no requests for | ||||
23005 | ** statements to operate on it. */ | ||||
23006 | assert( p->pConfig->bColumnsize || (((void) (0)) | ||||
23007 | eStmt!=FTS5_STMT_REPLACE_DOCSIZE((void) (0)) | ||||
23008 | && eStmt!=FTS5_STMT_DELETE_DOCSIZE((void) (0)) | ||||
23009 | && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE((void) (0)) | ||||
23010 | ))((void) (0)); | ||||
23011 | |||||
23012 | assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) )((void) (0)); | ||||
23013 | if( p->aStmt[eStmt]==0 ){ | ||||
23014 | const char *azStmt[] = { | ||||
23015 | "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", | ||||
23016 | "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", | ||||
23017 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ | ||||
23018 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */ | ||||
23019 | |||||
23020 | "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ | ||||
23021 | "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ | ||||
23022 | "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ | ||||
23023 | "REPLACE INTO %Q.'%q_docsize' VALUES(?,?%s)", /* REPLACE_DOCSIZE */ | ||||
23024 | "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ | ||||
23025 | |||||
23026 | "SELECT sz%s FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ | ||||
23027 | |||||
23028 | "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ | ||||
23029 | "SELECT %s FROM %s AS T", /* SCAN */ | ||||
23030 | }; | ||||
23031 | Fts5Config *pC = p->pConfig; | ||||
23032 | char *zSql = 0; | ||||
23033 | |||||
23034 | assert( ArraySize(azStmt)==ArraySize(p->aStmt) )((void) (0)); | ||||
23035 | |||||
23036 | switch( eStmt ){ | ||||
23037 | case FTS5_STMT_SCAN11: | ||||
23038 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | ||||
23039 | pC->zContentExprlist, pC->zContent | ||||
23040 | ); | ||||
23041 | break; | ||||
23042 | |||||
23043 | case FTS5_STMT_SCAN_ASC0: | ||||
23044 | case FTS5_STMT_SCAN_DESC1: | ||||
23045 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zContentExprlist, | ||||
23046 | pC->zContent, pC->zContentRowid, pC->zContentRowid, | ||||
23047 | pC->zContentRowid | ||||
23048 | ); | ||||
23049 | break; | ||||
23050 | |||||
23051 | case FTS5_STMT_LOOKUP2: | ||||
23052 | case FTS5_STMT_LOOKUP23: | ||||
23053 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | ||||
23054 | pC->zContentExprlist, pC->zContent, pC->zContentRowid | ||||
23055 | ); | ||||
23056 | break; | ||||
23057 | |||||
23058 | case FTS5_STMT_INSERT_CONTENT4: | ||||
23059 | case FTS5_STMT_REPLACE_CONTENT5: { | ||||
23060 | char *zBind = 0; | ||||
23061 | int i; | ||||
23062 | |||||
23063 | assert( pC->eContent==FTS5_CONTENT_NORMAL((void) (0)) | ||||
23064 | || pC->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | ||||
23065 | )((void) (0)); | ||||
23066 | |||||
23067 | /* Add bindings for the "c*" columns - those that store the actual | ||||
23068 | ** table content. If eContent==NORMAL, then there is one binding | ||||
23069 | ** for each column. Or, if eContent==UNINDEXED, then there are only | ||||
23070 | ** bindings for the UNINDEXED columns. */ | ||||
23071 | for(i=0; rc==SQLITE_OK0 && i<(pC->nCol+1); i++){ | ||||
23072 | if( !i || pC->eContent==FTS5_CONTENT_NORMAL0 || pC->abUnindexed[i-1] ){ | ||||
23073 | zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1); | ||||
23074 | } | ||||
23075 | } | ||||
23076 | |||||
23077 | /* Add bindings for any "l*" columns. Only non-UNINDEXED columns | ||||
23078 | ** require these. */ | ||||
23079 | if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
23080 | for(i=0; rc==SQLITE_OK0 && i<pC->nCol; i++){ | ||||
23081 | if( pC->abUnindexed[i]==0 ){ | ||||
23082 | zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2); | ||||
23083 | } | ||||
23084 | } | ||||
23085 | } | ||||
23086 | |||||
23087 | zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind); | ||||
23088 | sqlite3_freesqlite3_api->free(zBind); | ||||
23089 | break; | ||||
23090 | } | ||||
23091 | |||||
23092 | case FTS5_STMT_REPLACE_DOCSIZE7: | ||||
23093 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName, | ||||
23094 | (pC->bContentlessDelete ? ",?" : "") | ||||
23095 | ); | ||||
23096 | break; | ||||
23097 | |||||
23098 | case FTS5_STMT_LOOKUP_DOCSIZE9: | ||||
23099 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | ||||
23100 | (pC->bContentlessDelete ? ",origin" : ""), | ||||
23101 | pC->zDb, pC->zName | ||||
23102 | ); | ||||
23103 | break; | ||||
23104 | |||||
23105 | default: | ||||
23106 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName); | ||||
23107 | break; | ||||
23108 | } | ||||
23109 | |||||
23110 | if( zSql==0 ){ | ||||
23111 | rc = SQLITE_NOMEM7; | ||||
23112 | }else{ | ||||
23113 | int f = SQLITE_PREPARE_PERSISTENT0x01; | ||||
23114 | if( eStmt>FTS5_STMT_LOOKUP23 ) f |= SQLITE_PREPARE_NO_VTAB0x04; | ||||
23115 | p->pConfig->bLock++; | ||||
23116 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); | ||||
23117 | p->pConfig->bLock--; | ||||
23118 | sqlite3_freesqlite3_api->free(zSql); | ||||
23119 | if( rc!=SQLITE_OK0 && pzErrMsg ){ | ||||
23120 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("%s", sqlite3_errmsgsqlite3_api->errmsg(pC->db)); | ||||
23121 | } | ||||
23122 | if( rc==SQLITE_ERROR1 && eStmt>FTS5_STMT_LOOKUP23 && eStmt<FTS5_STMT_SCAN11 ){ | ||||
23123 | /* One of the internal tables - not the %_content table - is missing. | ||||
23124 | ** This counts as a corrupted table. */ | ||||
23125 | rc = SQLITE_CORRUPT11; | ||||
23126 | } | ||||
23127 | } | ||||
23128 | } | ||||
23129 | |||||
23130 | *ppStmt = p->aStmt[eStmt]; | ||||
23131 | sqlite3_resetsqlite3_api->reset(*ppStmt); | ||||
23132 | return rc; | ||||
23133 | } | ||||
23134 | |||||
23135 | |||||
23136 | static int fts5ExecPrintf( | ||||
23137 | sqlite3 *db, | ||||
23138 | char **pzErr, | ||||
23139 | const char *zFormat, | ||||
23140 | ... | ||||
23141 | ){ | ||||
23142 | int rc; | ||||
23143 | va_list ap; /* ... printf arguments */ | ||||
23144 | char *zSql; | ||||
23145 | |||||
23146 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | ||||
23147 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | ||||
23148 | |||||
23149 | if( zSql==0 ){ | ||||
23150 | rc = SQLITE_NOMEM7; | ||||
23151 | }else{ | ||||
23152 | rc = sqlite3_execsqlite3_api->exec(db, zSql, 0, 0, pzErr); | ||||
23153 | sqlite3_freesqlite3_api->free(zSql); | ||||
23154 | } | ||||
23155 | |||||
23156 | va_end(ap)__builtin_va_end(ap); | ||||
23157 | return rc; | ||||
23158 | } | ||||
23159 | |||||
23160 | /* | ||||
23161 | ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error | ||||
23162 | ** code otherwise. | ||||
23163 | */ | ||||
23164 | static int sqlite3Fts5DropAll(Fts5Config *pConfig){ | ||||
23165 | int rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23166 | "DROP TABLE IF EXISTS %Q.'%q_data';" | ||||
23167 | "DROP TABLE IF EXISTS %Q.'%q_idx';" | ||||
23168 | "DROP TABLE IF EXISTS %Q.'%q_config';", | ||||
23169 | pConfig->zDb, pConfig->zName, | ||||
23170 | pConfig->zDb, pConfig->zName, | ||||
23171 | pConfig->zDb, pConfig->zName | ||||
23172 | ); | ||||
23173 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | ||||
23174 | rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23175 | "DROP TABLE IF EXISTS %Q.'%q_docsize';", | ||||
23176 | pConfig->zDb, pConfig->zName | ||||
23177 | ); | ||||
23178 | } | ||||
23179 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
23180 | rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23181 | "DROP TABLE IF EXISTS %Q.'%q_content';", | ||||
23182 | pConfig->zDb, pConfig->zName | ||||
23183 | ); | ||||
23184 | } | ||||
23185 | return rc; | ||||
23186 | } | ||||
23187 | |||||
23188 | static void fts5StorageRenameOne( | ||||
23189 | Fts5Config *pConfig, /* Current FTS5 configuration */ | ||||
23190 | int *pRc, /* IN/OUT: Error code */ | ||||
23191 | const char *zTail, /* Tail of table name e.g. "data", "config" */ | ||||
23192 | const char *zName /* New name of FTS5 table */ | ||||
23193 | ){ | ||||
23194 | if( *pRc==SQLITE_OK0 ){ | ||||
23195 | *pRc = fts5ExecPrintf(pConfig->db, 0, | ||||
23196 | "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", | ||||
23197 | pConfig->zDb, pConfig->zName, zTail, zName, zTail | ||||
23198 | ); | ||||
23199 | } | ||||
23200 | } | ||||
23201 | |||||
23202 | static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ | ||||
23203 | Fts5Config *pConfig = pStorage->pConfig; | ||||
23204 | int rc = sqlite3Fts5StorageSync(pStorage); | ||||
23205 | |||||
23206 | fts5StorageRenameOne(pConfig, &rc, "data", zName); | ||||
23207 | fts5StorageRenameOne(pConfig, &rc, "idx", zName); | ||||
23208 | fts5StorageRenameOne(pConfig, &rc, "config", zName); | ||||
23209 | if( pConfig->bColumnsize ){ | ||||
23210 | fts5StorageRenameOne(pConfig, &rc, "docsize", zName); | ||||
23211 | } | ||||
23212 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
23213 | fts5StorageRenameOne(pConfig, &rc, "content", zName); | ||||
23214 | } | ||||
23215 | return rc; | ||||
23216 | } | ||||
23217 | |||||
23218 | /* | ||||
23219 | ** Create the shadow table named zPost, with definition zDefn. Return | ||||
23220 | ** SQLITE_OK if successful, or an SQLite error code otherwise. | ||||
23221 | */ | ||||
23222 | static int sqlite3Fts5CreateTable( | ||||
23223 | Fts5Config *pConfig, /* FTS5 configuration */ | ||||
23224 | const char *zPost, /* Shadow table to create (e.g. "content") */ | ||||
23225 | const char *zDefn, /* Columns etc. for shadow table */ | ||||
23226 | int bWithout, /* True for without rowid */ | ||||
23227 | char **pzErr /* OUT: Error message */ | ||||
23228 | ){ | ||||
23229 | int rc; | ||||
23230 | char *zErr = 0; | ||||
23231 | |||||
23232 | rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", | ||||
23233 | pConfig->zDb, pConfig->zName, zPost, zDefn, | ||||
23234 | #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID | ||||
23235 | bWithout?" WITHOUT ROWID": | ||||
23236 | #endif | ||||
23237 | "" | ||||
23238 | ); | ||||
23239 | if( zErr ){ | ||||
23240 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | ||||
23241 | "fts5: error creating shadow table %q_%s: %s", | ||||
23242 | pConfig->zName, zPost, zErr | ||||
23243 | ); | ||||
23244 | sqlite3_freesqlite3_api->free(zErr); | ||||
23245 | } | ||||
23246 | |||||
23247 | return rc; | ||||
23248 | } | ||||
23249 | |||||
23250 | /* | ||||
23251 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | ||||
23252 | ** and initialize the underlying tables | ||||
23253 | ** | ||||
23254 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | ||||
23255 | ** Otherwise, set *pp to NULL and return an SQLite error code. | ||||
23256 | */ | ||||
23257 | static int sqlite3Fts5StorageOpen( | ||||
23258 | Fts5Config *pConfig, | ||||
23259 | Fts5Index *pIndex, | ||||
23260 | int bCreate, | ||||
23261 | Fts5Storage **pp, | ||||
23262 | char **pzErr /* OUT: Error message */ | ||||
23263 | ){ | ||||
23264 | int rc = SQLITE_OK0; | ||||
23265 | Fts5Storage *p; /* New object */ | ||||
23266 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | ||||
23267 | |||||
23268 | nByte = sizeof(Fts5Storage) /* Fts5Storage object */ | ||||
23269 | + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ | ||||
23270 | *pp = p = (Fts5Storage*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | ||||
23271 | if( !p ) return SQLITE_NOMEM7; | ||||
23272 | |||||
23273 | memset(p, 0, (size_t)nByte); | ||||
23274 | p->aTotalSize = (i64*)&p[1]; | ||||
23275 | p->pConfig = pConfig; | ||||
23276 | p->pIndex = pIndex; | ||||
23277 | |||||
23278 | if( bCreate ){ | ||||
23279 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | ||||
23280 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | ||||
23281 | ){ | ||||
23282 | int nDefn = 32 + pConfig->nCol*10; | ||||
23283 | char *zDefn = sqlite3_malloc64sqlite3_api->malloc64(32 + (sqlite3_int64)pConfig->nCol * 20); | ||||
23284 | if( zDefn==0 ){ | ||||
23285 | rc = SQLITE_NOMEM7; | ||||
23286 | }else{ | ||||
23287 | int i; | ||||
23288 | int iOff; | ||||
23289 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); | ||||
23290 | iOff = (int)strlen(zDefn); | ||||
23291 | for(i=0; i<pConfig->nCol; i++){ | ||||
23292 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | ||||
23293 | || pConfig->abUnindexed[i] | ||||
23294 | ){ | ||||
23295 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); | ||||
23296 | iOff += (int)strlen(&zDefn[iOff]); | ||||
23297 | } | ||||
23298 | } | ||||
23299 | if( pConfig->bLocale ){ | ||||
23300 | for(i=0; i<pConfig->nCol; i++){ | ||||
23301 | if( pConfig->abUnindexed[i]==0 ){ | ||||
23302 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i); | ||||
23303 | iOff += (int)strlen(&zDefn[iOff]); | ||||
23304 | } | ||||
23305 | } | ||||
23306 | } | ||||
23307 | rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); | ||||
23308 | } | ||||
23309 | sqlite3_freesqlite3_api->free(zDefn); | ||||
23310 | } | ||||
23311 | |||||
23312 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | ||||
23313 | const char *zCols = "id INTEGER PRIMARY KEY, sz BLOB"; | ||||
23314 | if( pConfig->bContentlessDelete ){ | ||||
23315 | zCols = "id INTEGER PRIMARY KEY, sz BLOB, origin INTEGER"; | ||||
23316 | } | ||||
23317 | rc = sqlite3Fts5CreateTable(pConfig, "docsize", zCols, 0, pzErr); | ||||
23318 | } | ||||
23319 | if( rc==SQLITE_OK0 ){ | ||||
23320 | rc = sqlite3Fts5CreateTable( | ||||
23321 | pConfig, "config", "k PRIMARY KEY, v", 1, pzErr | ||||
23322 | ); | ||||
23323 | } | ||||
23324 | if( rc==SQLITE_OK0 ){ | ||||
23325 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | ||||
23326 | } | ||||
23327 | } | ||||
23328 | |||||
23329 | if( rc ){ | ||||
23330 | sqlite3Fts5StorageClose(p); | ||||
23331 | *pp = 0; | ||||
23332 | } | ||||
23333 | return rc; | ||||
23334 | } | ||||
23335 | |||||
23336 | /* | ||||
23337 | ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). | ||||
23338 | */ | ||||
23339 | static int sqlite3Fts5StorageClose(Fts5Storage *p){ | ||||
23340 | int rc = SQLITE_OK0; | ||||
23341 | if( p ){ | ||||
23342 | int i; | ||||
23343 | |||||
23344 | /* Finalize all SQL statements */ | ||||
23345 | for(i=0; i<ArraySize(p->aStmt)((int)(sizeof(p->aStmt) / sizeof(p->aStmt[0]))); i++){ | ||||
23346 | sqlite3_finalizesqlite3_api->finalize(p->aStmt[i]); | ||||
23347 | } | ||||
23348 | |||||
23349 | sqlite3_freesqlite3_api->free(p); | ||||
23350 | } | ||||
23351 | return rc; | ||||
23352 | } | ||||
23353 | |||||
23354 | typedef struct Fts5InsertCtx Fts5InsertCtx; | ||||
23355 | struct Fts5InsertCtx { | ||||
23356 | Fts5Storage *pStorage; | ||||
23357 | int iCol; | ||||
23358 | int szCol; /* Size of column value in tokens */ | ||||
23359 | }; | ||||
23360 | |||||
23361 | /* | ||||
23362 | ** Tokenization callback used when inserting tokens into the FTS index. | ||||
23363 | */ | ||||
23364 | static int fts5StorageInsertCallback( | ||||
23365 | void *pContext, /* Pointer to Fts5InsertCtx object */ | ||||
23366 | int tflags, | ||||
23367 | const char *pToken, /* Buffer containing token */ | ||||
23368 | int nToken, /* Size of token in bytes */ | ||||
23369 | int iUnused1, /* Start offset of token */ | ||||
23370 | int iUnused2 /* End offset of token */ | ||||
23371 | ){ | ||||
23372 | Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; | ||||
23373 | Fts5Index *pIdx = pCtx->pStorage->pIndex; | ||||
23374 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | ||||
23375 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | ||||
23376 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | ||||
23377 | pCtx->szCol++; | ||||
23378 | } | ||||
23379 | return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); | ||||
23380 | } | ||||
23381 | |||||
23382 | /* | ||||
23383 | ** This function is used as part of an UPDATE statement that modifies the | ||||
23384 | ** rowid of a row. In that case, this function is called first to set | ||||
23385 | ** Fts5Storage.pSavedRow to point to a statement that may be used to | ||||
23386 | ** access the original values of the row being deleted - iDel. | ||||
23387 | ** | ||||
23388 | ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. | ||||
23389 | ** It is not considered an error if row iDel does not exist. In this case | ||||
23390 | ** pSavedRow is not set and SQLITE_OK returned. | ||||
23391 | */ | ||||
23392 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){ | ||||
23393 | int rc = SQLITE_OK0; | ||||
23394 | sqlite3_stmt *pSeek = 0; | ||||
23395 | |||||
23396 | assert( p->pSavedRow==0 )((void) (0)); | ||||
23397 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+1, &pSeek, 0); | ||||
23398 | if( rc==SQLITE_OK0 ){ | ||||
23399 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | ||||
23400 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | ||||
23401 | rc = sqlite3_resetsqlite3_api->reset(pSeek); | ||||
23402 | }else{ | ||||
23403 | p->pSavedRow = pSeek; | ||||
23404 | } | ||||
23405 | } | ||||
23406 | |||||
23407 | return rc; | ||||
23408 | } | ||||
23409 | |||||
23410 | /* | ||||
23411 | ** If a row with rowid iDel is present in the %_content table, add the | ||||
23412 | ** delete-markers to the FTS index necessary to delete it. Do not actually | ||||
23413 | ** remove the %_content row at this time though. | ||||
23414 | ** | ||||
23415 | ** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left | ||||
23416 | ** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access | ||||
23417 | ** the original values of the row being deleted. This is used by UPDATE | ||||
23418 | ** statements. | ||||
23419 | */ | ||||
23420 | static int fts5StorageDeleteFromIndex( | ||||
23421 | Fts5Storage *p, | ||||
23422 | i64 iDel, | ||||
23423 | sqlite3_value **apVal, | ||||
23424 | int bSaveRow /* True to set pSavedRow */ | ||||
23425 | ){ | ||||
23426 | Fts5Config *pConfig = p->pConfig; | ||||
23427 | sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ | ||||
23428 | int rc = SQLITE_OK0; /* Return code */ | ||||
23429 | int rc2; /* sqlite3_reset() return code */ | ||||
23430 | int iCol; | ||||
23431 | Fts5InsertCtx ctx; | ||||
23432 | |||||
23433 | assert( bSaveRow==0 || apVal==0 )((void) (0)); | ||||
23434 | assert( bSaveRow==0 || bSaveRow==1 )((void) (0)); | ||||
23435 | assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 )((void) (0)); | ||||
23436 | |||||
23437 | if( apVal==0 ){ | ||||
23438 | if( p->pSavedRow && bSaveRow ){ | ||||
23439 | pSeek = p->pSavedRow; | ||||
23440 | p->pSavedRow = 0; | ||||
23441 | }else{ | ||||
23442 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+bSaveRow, &pSeek, 0); | ||||
23443 | if( rc!=SQLITE_OK0 ) return rc; | ||||
23444 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | ||||
23445 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | ||||
23446 | return sqlite3_resetsqlite3_api->reset(pSeek); | ||||
23447 | } | ||||
23448 | } | ||||
23449 | } | ||||
23450 | |||||
23451 | ctx.pStorage = p; | ||||
23452 | ctx.iCol = -1; | ||||
23453 | for(iCol=1; rc==SQLITE_OK0 && iCol<=pConfig->nCol; iCol++){ | ||||
23454 | if( pConfig->abUnindexed[iCol-1]==0 ){ | ||||
23455 | sqlite3_value *pVal = 0; | ||||
23456 | const char *pText = 0; | ||||
23457 | int nText = 0; | ||||
23458 | const char *pLoc = 0; | ||||
23459 | int nLoc = 0; | ||||
23460 | |||||
23461 | assert( pSeek==0 || apVal==0 )((void) (0)); | ||||
23462 | assert( pSeek!=0 || apVal!=0 )((void) (0)); | ||||
23463 | if( pSeek ){ | ||||
23464 | pVal = sqlite3_column_valuesqlite3_api->column_value(pSeek, iCol); | ||||
23465 | }else{ | ||||
23466 | pVal = apVal[iCol-1]; | ||||
23467 | } | ||||
23468 | |||||
23469 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | ||||
23470 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | ||||
23471 | }else{ | ||||
23472 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
23473 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
23474 | if( pConfig->bLocale && pSeek ){ | ||||
23475 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pSeek, iCol + pConfig->nCol); | ||||
23476 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pSeek, iCol + pConfig->nCol); | ||||
23477 | } | ||||
23478 | } | ||||
23479 | |||||
23480 | if( rc==SQLITE_OK0 ){ | ||||
23481 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
23482 | ctx.szCol = 0; | ||||
23483 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT0x0004, | ||||
23484 | pText, nText, (void*)&ctx, fts5StorageInsertCallback | ||||
23485 | ); | ||||
23486 | p->aTotalSize[iCol-1] -= (i64)ctx.szCol; | ||||
23487 | if( rc==SQLITE_OK0 && p->aTotalSize[iCol-1]<0 ){ | ||||
23488 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
23489 | } | ||||
23490 | sqlite3Fts5ClearLocale(pConfig); | ||||
23491 | } | ||||
23492 | } | ||||
23493 | } | ||||
23494 | if( rc==SQLITE_OK0 && p->nTotalRow<1 ){ | ||||
23495 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
23496 | }else{ | ||||
23497 | p->nTotalRow--; | ||||
23498 | } | ||||
23499 | |||||
23500 | if( rc==SQLITE_OK0 && bSaveRow ){ | ||||
23501 | assert( p->pSavedRow==0 )((void) (0)); | ||||
23502 | p->pSavedRow = pSeek; | ||||
23503 | }else{ | ||||
23504 | rc2 = sqlite3_resetsqlite3_api->reset(pSeek); | ||||
23505 | if( rc==SQLITE_OK0 ) rc = rc2; | ||||
23506 | } | ||||
23507 | return rc; | ||||
23508 | } | ||||
23509 | |||||
23510 | /* | ||||
23511 | ** Reset any saved statement pSavedRow. Zero pSavedRow as well. This | ||||
23512 | ** should be called by the xUpdate() method of the fts5 table before | ||||
23513 | ** returning from any operation that may have set Fts5Storage.pSavedRow. | ||||
23514 | */ | ||||
23515 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){ | ||||
23516 | assert( pStorage->pSavedRow==0((void) (0)) | ||||
23517 | || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2]((void) (0)) | ||||
23518 | )((void) (0)); | ||||
23519 | sqlite3_resetsqlite3_api->reset(pStorage->pSavedRow); | ||||
23520 | pStorage->pSavedRow = 0; | ||||
23521 | } | ||||
23522 | |||||
23523 | /* | ||||
23524 | ** This function is called to process a DELETE on a contentless_delete=1 | ||||
23525 | ** table. It adds the tombstone required to delete the entry with rowid | ||||
23526 | ** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs, | ||||
23527 | ** an SQLite error code. | ||||
23528 | */ | ||||
23529 | static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){ | ||||
23530 | i64 iOrigin = 0; | ||||
23531 | sqlite3_stmt *pLookup = 0; | ||||
23532 | int rc = SQLITE_OK0; | ||||
23533 | |||||
23534 | assert( p->pConfig->bContentlessDelete )((void) (0)); | ||||
23535 | assert( p->pConfig->eContent==FTS5_CONTENT_NONE((void) (0)) | ||||
23536 | || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | ||||
23537 | )((void) (0)); | ||||
23538 | |||||
23539 | /* Look up the origin of the document in the %_docsize table. Store | ||||
23540 | ** this in stack variable iOrigin. */ | ||||
23541 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | ||||
23542 | if( rc==SQLITE_OK0 ){ | ||||
23543 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iDel); | ||||
23544 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | ||||
23545 | iOrigin = sqlite3_column_int64sqlite3_api->column_int64(pLookup, 1); | ||||
23546 | } | ||||
23547 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | ||||
23548 | } | ||||
23549 | |||||
23550 | if( rc==SQLITE_OK0 && iOrigin!=0 ){ | ||||
23551 | rc = sqlite3Fts5IndexContentlessDelete(p->pIndex, iOrigin, iDel); | ||||
23552 | } | ||||
23553 | |||||
23554 | return rc; | ||||
23555 | } | ||||
23556 | |||||
23557 | /* | ||||
23558 | ** Insert a record into the %_docsize table. Specifically, do: | ||||
23559 | ** | ||||
23560 | ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); | ||||
23561 | ** | ||||
23562 | ** If there is no %_docsize table (as happens if the columnsize=0 option | ||||
23563 | ** is specified when the FTS5 table is created), this function is a no-op. | ||||
23564 | */ | ||||
23565 | static int fts5StorageInsertDocsize( | ||||
23566 | Fts5Storage *p, /* Storage module to write to */ | ||||
23567 | i64 iRowid, /* id value */ | ||||
23568 | Fts5Buffer *pBuf /* sz value */ | ||||
23569 | ){ | ||||
23570 | int rc = SQLITE_OK0; | ||||
23571 | if( p->pConfig->bColumnsize ){ | ||||
23572 | sqlite3_stmt *pReplace = 0; | ||||
23573 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | ||||
23574 | if( rc==SQLITE_OK0 ){ | ||||
23575 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 1, iRowid); | ||||
23576 | if( p->pConfig->bContentlessDelete ){ | ||||
23577 | i64 iOrigin = 0; | ||||
23578 | rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin); | ||||
23579 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 3, iOrigin); | ||||
23580 | } | ||||
23581 | } | ||||
23582 | if( rc==SQLITE_OK0 ){ | ||||
23583 | sqlite3_bind_blobsqlite3_api->bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
23584 | sqlite3_stepsqlite3_api->step(pReplace); | ||||
23585 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | ||||
23586 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | ||||
23587 | } | ||||
23588 | } | ||||
23589 | return rc; | ||||
23590 | } | ||||
23591 | |||||
23592 | /* | ||||
23593 | ** Load the contents of the "averages" record from disk into the | ||||
23594 | ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if | ||||
23595 | ** argument bCache is true, set the p->bTotalsValid flag to indicate | ||||
23596 | ** that the contents of aTotalSize[] and nTotalRow are valid until | ||||
23597 | ** further notice. | ||||
23598 | ** | ||||
23599 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
23600 | ** occurs. | ||||
23601 | */ | ||||
23602 | static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ | ||||
23603 | int rc = SQLITE_OK0; | ||||
23604 | if( p->bTotalsValid==0 ){ | ||||
23605 | rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); | ||||
23606 | p->bTotalsValid = bCache; | ||||
23607 | } | ||||
23608 | return rc; | ||||
23609 | } | ||||
23610 | |||||
23611 | /* | ||||
23612 | ** Store the current contents of the p->nTotalRow and p->aTotalSize[] | ||||
23613 | ** variables in the "averages" record on disk. | ||||
23614 | ** | ||||
23615 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | ||||
23616 | ** occurs. | ||||
23617 | */ | ||||
23618 | static int fts5StorageSaveTotals(Fts5Storage *p){ | ||||
23619 | int nCol = p->pConfig->nCol; | ||||
23620 | int i; | ||||
23621 | Fts5Buffer buf; | ||||
23622 | int rc = SQLITE_OK0; | ||||
23623 | memset(&buf, 0, sizeof(buf)); | ||||
23624 | |||||
23625 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); | ||||
23626 | for(i=0; i<nCol; i++){ | ||||
23627 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); | ||||
23628 | } | ||||
23629 | if( rc==SQLITE_OK0 ){ | ||||
23630 | rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); | ||||
23631 | } | ||||
23632 | sqlite3_freesqlite3_api->free(buf.p); | ||||
23633 | |||||
23634 | return rc; | ||||
23635 | } | ||||
23636 | |||||
23637 | /* | ||||
23638 | ** Remove a row from the FTS table. | ||||
23639 | */ | ||||
23640 | static int sqlite3Fts5StorageDelete( | ||||
23641 | Fts5Storage *p, /* Storage object */ | ||||
23642 | i64 iDel, /* Rowid to delete from table */ | ||||
23643 | sqlite3_value **apVal, /* Optional - values to remove from index */ | ||||
23644 | int bSaveRow /* If true, set pSavedRow for deleted row */ | ||||
23645 | ){ | ||||
23646 | Fts5Config *pConfig = p->pConfig; | ||||
23647 | int rc; | ||||
23648 | sqlite3_stmt *pDel = 0; | ||||
23649 | |||||
23650 | assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 )((void) (0)); | ||||
23651 | rc = fts5StorageLoadTotals(p, 1); | ||||
23652 | |||||
23653 | /* Delete the index records */ | ||||
23654 | if( rc==SQLITE_OK0 ){ | ||||
23655 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); | ||||
23656 | } | ||||
23657 | |||||
23658 | if( rc==SQLITE_OK0 ){ | ||||
23659 | if( p->pConfig->bContentlessDelete ){ | ||||
23660 | rc = fts5StorageContentlessDelete(p, iDel); | ||||
23661 | if( rc==SQLITE_OK0 | ||||
23662 | && bSaveRow | ||||
23663 | && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | ||||
23664 | ){ | ||||
23665 | rc = sqlite3Fts5StorageFindDeleteRow(p, iDel); | ||||
23666 | } | ||||
23667 | }else{ | ||||
23668 | rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); | ||||
23669 | } | ||||
23670 | } | ||||
23671 | |||||
23672 | /* Delete the %_docsize record */ | ||||
23673 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | ||||
23674 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE8, &pDel, 0); | ||||
23675 | if( rc==SQLITE_OK0 ){ | ||||
23676 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | ||||
23677 | sqlite3_stepsqlite3_api->step(pDel); | ||||
23678 | rc = sqlite3_resetsqlite3_api->reset(pDel); | ||||
23679 | } | ||||
23680 | } | ||||
23681 | |||||
23682 | /* Delete the %_content record */ | ||||
23683 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | ||||
23684 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | ||||
23685 | ){ | ||||
23686 | if( rc==SQLITE_OK0 ){ | ||||
23687 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT6, &pDel, 0); | ||||
23688 | } | ||||
23689 | if( rc==SQLITE_OK0 ){ | ||||
23690 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | ||||
23691 | sqlite3_stepsqlite3_api->step(pDel); | ||||
23692 | rc = sqlite3_resetsqlite3_api->reset(pDel); | ||||
23693 | } | ||||
23694 | } | ||||
23695 | |||||
23696 | return rc; | ||||
23697 | } | ||||
23698 | |||||
23699 | /* | ||||
23700 | ** Delete all entries in the FTS5 index. | ||||
23701 | */ | ||||
23702 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ | ||||
23703 | Fts5Config *pConfig = p->pConfig; | ||||
23704 | int rc; | ||||
23705 | |||||
23706 | p->bTotalsValid = 0; | ||||
23707 | |||||
23708 | /* Delete the contents of the %_data and %_docsize tables. */ | ||||
23709 | rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23710 | "DELETE FROM %Q.'%q_data';" | ||||
23711 | "DELETE FROM %Q.'%q_idx';", | ||||
23712 | pConfig->zDb, pConfig->zName, | ||||
23713 | pConfig->zDb, pConfig->zName | ||||
23714 | ); | ||||
23715 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | ||||
23716 | rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23717 | "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName | ||||
23718 | ); | ||||
23719 | } | ||||
23720 | |||||
23721 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | ||||
23722 | rc = fts5ExecPrintf(pConfig->db, 0, | ||||
23723 | "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName | ||||
23724 | ); | ||||
23725 | } | ||||
23726 | |||||
23727 | /* Reinitialize the %_data table. This call creates the initial structure | ||||
23728 | ** and averages records. */ | ||||
23729 | if( rc==SQLITE_OK0 ){ | ||||
23730 | rc = sqlite3Fts5IndexReinit(p->pIndex); | ||||
23731 | } | ||||
23732 | if( rc==SQLITE_OK0 ){ | ||||
23733 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | ||||
23734 | } | ||||
23735 | return rc; | ||||
23736 | } | ||||
23737 | |||||
23738 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ | ||||
23739 | Fts5Buffer buf = {0,0,0}; | ||||
23740 | Fts5Config *pConfig = p->pConfig; | ||||
23741 | sqlite3_stmt *pScan = 0; | ||||
23742 | Fts5InsertCtx ctx; | ||||
23743 | int rc, rc2; | ||||
23744 | |||||
23745 | memset(&ctx, 0, sizeof(Fts5InsertCtx)); | ||||
23746 | ctx.pStorage = p; | ||||
23747 | rc = sqlite3Fts5StorageDeleteAll(p); | ||||
23748 | if( rc==SQLITE_OK0 ){ | ||||
23749 | rc = fts5StorageLoadTotals(p, 1); | ||||
23750 | } | ||||
23751 | |||||
23752 | if( rc==SQLITE_OK0 ){ | ||||
23753 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, pConfig->pzErrmsg); | ||||
23754 | } | ||||
23755 | |||||
23756 | while( rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | ||||
23757 | i64 iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | ||||
23758 | |||||
23759 | sqlite3Fts5BufferZero(&buf); | ||||
23760 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | ||||
23761 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | ||||
23762 | ctx.szCol = 0; | ||||
23763 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | ||||
23764 | int nText = 0; /* Size of pText in bytes */ | ||||
23765 | const char *pText = 0; /* Pointer to buffer containing text value */ | ||||
23766 | int nLoc = 0; /* Size of pLoc in bytes */ | ||||
23767 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | ||||
23768 | |||||
23769 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, ctx.iCol+1); | ||||
23770 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | ||||
23771 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | ||||
23772 | ){ | ||||
23773 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | ||||
23774 | }else{ | ||||
23775 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
23776 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
23777 | if( pConfig->bLocale ){ | ||||
23778 | int iCol = ctx.iCol + 1 + pConfig->nCol; | ||||
23779 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | ||||
23780 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | ||||
23781 | } | ||||
23782 | } | ||||
23783 | |||||
23784 | if( rc==SQLITE_OK0 ){ | ||||
23785 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
23786 | rc = sqlite3Fts5Tokenize(pConfig, | ||||
23787 | FTS5_TOKENIZE_DOCUMENT0x0004, | ||||
23788 | pText, nText, | ||||
23789 | (void*)&ctx, | ||||
23790 | fts5StorageInsertCallback | ||||
23791 | ); | ||||
23792 | sqlite3Fts5ClearLocale(pConfig); | ||||
23793 | } | ||||
23794 | } | ||||
23795 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | ||||
23796 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | ||||
23797 | } | ||||
23798 | p->nTotalRow++; | ||||
23799 | |||||
23800 | if( rc==SQLITE_OK0 ){ | ||||
23801 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | ||||
23802 | } | ||||
23803 | } | ||||
23804 | sqlite3_freesqlite3_api->free(buf.p); | ||||
23805 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | ||||
23806 | if( rc==SQLITE_OK0 ) rc = rc2; | ||||
23807 | |||||
23808 | /* Write the averages record */ | ||||
23809 | if( rc==SQLITE_OK0 ){ | ||||
23810 | rc = fts5StorageSaveTotals(p); | ||||
23811 | } | ||||
23812 | return rc; | ||||
23813 | } | ||||
23814 | |||||
23815 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ | ||||
23816 | return sqlite3Fts5IndexOptimize(p->pIndex); | ||||
23817 | } | ||||
23818 | |||||
23819 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ | ||||
23820 | return sqlite3Fts5IndexMerge(p->pIndex, nMerge); | ||||
23821 | } | ||||
23822 | |||||
23823 | static int sqlite3Fts5StorageReset(Fts5Storage *p){ | ||||
23824 | return sqlite3Fts5IndexReset(p->pIndex); | ||||
23825 | } | ||||
23826 | |||||
23827 | /* | ||||
23828 | ** Allocate a new rowid. This is used for "external content" tables when | ||||
23829 | ** a NULL value is inserted into the rowid column. The new rowid is allocated | ||||
23830 | ** by inserting a dummy row into the %_docsize table. The dummy will be | ||||
23831 | ** overwritten later. | ||||
23832 | ** | ||||
23833 | ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In | ||||
23834 | ** this case the user is required to provide a rowid explicitly. | ||||
23835 | */ | ||||
23836 | static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ | ||||
23837 | int rc = SQLITE_MISMATCH20; | ||||
23838 | if( p->pConfig->bColumnsize ){ | ||||
23839 | sqlite3_stmt *pReplace = 0; | ||||
23840 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | ||||
23841 | if( rc==SQLITE_OK0 ){ | ||||
23842 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | ||||
23843 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | ||||
23844 | sqlite3_stepsqlite3_api->step(pReplace); | ||||
23845 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | ||||
23846 | } | ||||
23847 | if( rc==SQLITE_OK0 ){ | ||||
23848 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | ||||
23849 | } | ||||
23850 | } | ||||
23851 | return rc; | ||||
23852 | } | ||||
23853 | |||||
23854 | /* | ||||
23855 | ** Insert a new row into the FTS content table. | ||||
23856 | */ | ||||
23857 | static int sqlite3Fts5StorageContentInsert( | ||||
23858 | Fts5Storage *p, | ||||
23859 | int bReplace, /* True to use REPLACE instead of INSERT */ | ||||
23860 | sqlite3_value **apVal, | ||||
23861 | i64 *piRowid | ||||
23862 | ){ | ||||
23863 | Fts5Config *pConfig = p->pConfig; | ||||
23864 | int rc = SQLITE_OK0; | ||||
23865 | |||||
23866 | /* Insert the new row into the %_content table. */ | ||||
23867 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | ||||
23868 | && pConfig->eContent!=FTS5_CONTENT_UNINDEXED3 | ||||
23869 | ){ | ||||
23870 | if( sqlite3_value_typesqlite3_api->value_type(apVal[1])==SQLITE_INTEGER1 ){ | ||||
23871 | *piRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | ||||
23872 | }else{ | ||||
23873 | rc = fts5StorageNewRowid(p, piRowid); | ||||
23874 | } | ||||
23875 | }else{ | ||||
23876 | sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ | ||||
23877 | int i; /* Counter variable */ | ||||
23878 | |||||
23879 | assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT )((void) (0)); | ||||
23880 | assert( bReplace==0 || bReplace==1 )((void) (0)); | ||||
23881 | rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT4+bReplace, &pInsert, 0); | ||||
23882 | if( pInsert ) sqlite3_clear_bindingssqlite3_api->clear_bindings(pInsert); | ||||
23883 | |||||
23884 | /* Bind the rowid value */ | ||||
23885 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, 1, apVal[1]); | ||||
23886 | |||||
23887 | /* Loop through values for user-defined columns. i=2 is the leftmost | ||||
23888 | ** user-defined column. As is column 1 of pSavedRow. */ | ||||
23889 | for(i=2; rc==SQLITE_OK0 && i<=pConfig->nCol+1; i++){ | ||||
23890 | int bUnindexed = pConfig->abUnindexed[i-2]; | ||||
23891 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || bUnindexed ){ | ||||
23892 | sqlite3_value *pVal = apVal[i]; | ||||
23893 | |||||
23894 | if( sqlite3_value_nochangesqlite3_api->value_nochange(pVal) && p->pSavedRow ){ | ||||
23895 | /* This is an UPDATE statement, and user-defined column (i-2) was not | ||||
23896 | ** modified. Retrieve the value from Fts5Storage.pSavedRow. */ | ||||
23897 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, i-1); | ||||
23898 | if( pConfig->bLocale && bUnindexed==0 ){ | ||||
23899 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, pConfig->nCol + i, | ||||
23900 | sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, pConfig->nCol + i - 1) | ||||
23901 | ); | ||||
23902 | } | ||||
23903 | }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | ||||
23904 | const char *pText = 0; | ||||
23905 | const char *pLoc = 0; | ||||
23906 | int nText = 0; | ||||
23907 | int nLoc = 0; | ||||
23908 | assert( pConfig->bLocale )((void) (0)); | ||||
23909 | |||||
23910 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | ||||
23911 | if( rc==SQLITE_OK0 ){ | ||||
23912 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
23913 | if( bUnindexed==0 ){ | ||||
23914 | int iLoc = pConfig->nCol + i; | ||||
23915 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | ||||
23916 | } | ||||
23917 | } | ||||
23918 | |||||
23919 | continue; | ||||
23920 | } | ||||
23921 | |||||
23922 | rc = sqlite3_bind_valuesqlite3_api->bind_value(pInsert, i, pVal); | ||||
23923 | } | ||||
23924 | } | ||||
23925 | if( rc==SQLITE_OK0 ){ | ||||
23926 | sqlite3_stepsqlite3_api->step(pInsert); | ||||
23927 | rc = sqlite3_resetsqlite3_api->reset(pInsert); | ||||
23928 | } | ||||
23929 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(pConfig->db); | ||||
23930 | } | ||||
23931 | |||||
23932 | return rc; | ||||
23933 | } | ||||
23934 | |||||
23935 | /* | ||||
23936 | ** Insert new entries into the FTS index and %_docsize table. | ||||
23937 | */ | ||||
23938 | static int sqlite3Fts5StorageIndexInsert( | ||||
23939 | Fts5Storage *p, | ||||
23940 | sqlite3_value **apVal, | ||||
23941 | i64 iRowid | ||||
23942 | ){ | ||||
23943 | Fts5Config *pConfig = p->pConfig; | ||||
23944 | int rc = SQLITE_OK0; /* Return code */ | ||||
23945 | Fts5InsertCtx ctx; /* Tokenization callback context object */ | ||||
23946 | Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ | ||||
23947 | |||||
23948 | memset(&buf, 0, sizeof(Fts5Buffer)); | ||||
23949 | ctx.pStorage = p; | ||||
23950 | rc = fts5StorageLoadTotals(p, 1); | ||||
23951 | |||||
23952 | if( rc==SQLITE_OK0 ){ | ||||
23953 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | ||||
23954 | } | ||||
23955 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | ||||
23956 | ctx.szCol = 0; | ||||
23957 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | ||||
23958 | int nText = 0; /* Size of pText in bytes */ | ||||
23959 | const char *pText = 0; /* Pointer to buffer containing text value */ | ||||
23960 | int nLoc = 0; /* Size of pText in bytes */ | ||||
23961 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | ||||
23962 | |||||
23963 | sqlite3_value *pVal = apVal[ctx.iCol+2]; | ||||
23964 | if( p->pSavedRow && sqlite3_value_nochangesqlite3_api->value_nochange(pVal) ){ | ||||
23965 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, ctx.iCol+1); | ||||
23966 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | ||||
23967 | int iCol = ctx.iCol + 1 + pConfig->nCol; | ||||
23968 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(p->pSavedRow, iCol); | ||||
23969 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(p->pSavedRow, iCol); | ||||
23970 | } | ||||
23971 | }else{ | ||||
23972 | pVal = apVal[ctx.iCol+2]; | ||||
23973 | } | ||||
23974 | |||||
23975 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | ||||
23976 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | ||||
23977 | }else{ | ||||
23978 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
23979 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
23980 | } | ||||
23981 | |||||
23982 | if( rc==SQLITE_OK0 ){ | ||||
23983 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
23984 | rc = sqlite3Fts5Tokenize(pConfig, | ||||
23985 | FTS5_TOKENIZE_DOCUMENT0x0004, pText, nText, (void*)&ctx, | ||||
23986 | fts5StorageInsertCallback | ||||
23987 | ); | ||||
23988 | sqlite3Fts5ClearLocale(pConfig); | ||||
23989 | } | ||||
23990 | } | ||||
23991 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | ||||
23992 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | ||||
23993 | } | ||||
23994 | p->nTotalRow++; | ||||
23995 | |||||
23996 | /* Write the %_docsize record */ | ||||
23997 | if( rc==SQLITE_OK0 ){ | ||||
23998 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | ||||
23999 | } | ||||
24000 | sqlite3_freesqlite3_api->free(buf.p); | ||||
24001 | |||||
24002 | return rc; | ||||
24003 | } | ||||
24004 | |||||
24005 | static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ | ||||
24006 | Fts5Config *pConfig = p->pConfig; | ||||
24007 | char *zSql; | ||||
24008 | int rc; | ||||
24009 | |||||
24010 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT count(*) FROM %Q.'%q_%s'", | ||||
24011 | pConfig->zDb, pConfig->zName, zSuffix | ||||
24012 | ); | ||||
24013 | if( zSql==0 ){ | ||||
24014 | rc = SQLITE_NOMEM7; | ||||
24015 | }else{ | ||||
24016 | sqlite3_stmt *pCnt = 0; | ||||
24017 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); | ||||
24018 | if( rc==SQLITE_OK0 ){ | ||||
24019 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pCnt) ){ | ||||
24020 | *pnRow = sqlite3_column_int64sqlite3_api->column_int64(pCnt, 0); | ||||
24021 | } | ||||
24022 | rc = sqlite3_finalizesqlite3_api->finalize(pCnt); | ||||
24023 | } | ||||
24024 | } | ||||
24025 | |||||
24026 | sqlite3_freesqlite3_api->free(zSql); | ||||
24027 | return rc; | ||||
24028 | } | ||||
24029 | |||||
24030 | /* | ||||
24031 | ** Context object used by sqlite3Fts5StorageIntegrity(). | ||||
24032 | */ | ||||
24033 | typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; | ||||
24034 | struct Fts5IntegrityCtx { | ||||
24035 | i64 iRowid; | ||||
24036 | int iCol; | ||||
24037 | int szCol; | ||||
24038 | u64 cksum; | ||||
24039 | Fts5Termset *pTermset; | ||||
24040 | Fts5Config *pConfig; | ||||
24041 | }; | ||||
24042 | |||||
24043 | |||||
24044 | /* | ||||
24045 | ** Tokenization callback used by integrity check. | ||||
24046 | */ | ||||
24047 | static int fts5StorageIntegrityCallback( | ||||
24048 | void *pContext, /* Pointer to Fts5IntegrityCtx object */ | ||||
24049 | int tflags, | ||||
24050 | const char *pToken, /* Buffer containing token */ | ||||
24051 | int nToken, /* Size of token in bytes */ | ||||
24052 | int iUnused1, /* Start offset of token */ | ||||
24053 | int iUnused2 /* End offset of token */ | ||||
24054 | ){ | ||||
24055 | Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; | ||||
24056 | Fts5Termset *pTermset = pCtx->pTermset; | ||||
24057 | int bPresent; | ||||
24058 | int ii; | ||||
24059 | int rc = SQLITE_OK0; | ||||
24060 | int iPos; | ||||
24061 | int iCol; | ||||
24062 | |||||
24063 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | ||||
24064 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | ||||
24065 | |||||
24066 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | ||||
24067 | pCtx->szCol++; | ||||
24068 | } | ||||
24069 | |||||
24070 | switch( pCtx->pConfig->eDetail ){ | ||||
24071 | case FTS5_DETAIL_FULL0: | ||||
24072 | iPos = pCtx->szCol-1; | ||||
24073 | iCol = pCtx->iCol; | ||||
24074 | break; | ||||
24075 | |||||
24076 | case FTS5_DETAIL_COLUMNS2: | ||||
24077 | iPos = pCtx->iCol; | ||||
24078 | iCol = 0; | ||||
24079 | break; | ||||
24080 | |||||
24081 | default: | ||||
24082 | assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | ||||
24083 | iPos = 0; | ||||
24084 | iCol = 0; | ||||
24085 | break; | ||||
24086 | } | ||||
24087 | |||||
24088 | rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); | ||||
24089 | if( rc==SQLITE_OK0 && bPresent==0 ){ | ||||
24090 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | ||||
24091 | pCtx->iRowid, iCol, iPos, 0, pToken, nToken | ||||
24092 | ); | ||||
24093 | } | ||||
24094 | |||||
24095 | for(ii=0; rc==SQLITE_OK0 && ii<pCtx->pConfig->nPrefix; ii++){ | ||||
24096 | const int nChar = pCtx->pConfig->aPrefix[ii]; | ||||
24097 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | ||||
24098 | if( nByte ){ | ||||
24099 | rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); | ||||
24100 | if( bPresent==0 ){ | ||||
24101 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | ||||
24102 | pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte | ||||
24103 | ); | ||||
24104 | } | ||||
24105 | } | ||||
24106 | } | ||||
24107 | |||||
24108 | return rc; | ||||
24109 | } | ||||
24110 | |||||
24111 | /* | ||||
24112 | ** Check that the contents of the FTS index match that of the %_content | ||||
24113 | ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return | ||||
24114 | ** some other SQLite error code if an error occurs while attempting to | ||||
24115 | ** determine this. | ||||
24116 | */ | ||||
24117 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ | ||||
24118 | Fts5Config *pConfig = p->pConfig; | ||||
24119 | int rc = SQLITE_OK0; /* Return code */ | ||||
24120 | int *aColSize; /* Array of size pConfig->nCol */ | ||||
24121 | i64 *aTotalSize; /* Array of size pConfig->nCol */ | ||||
24122 | Fts5IntegrityCtx ctx; | ||||
24123 | sqlite3_stmt *pScan; | ||||
24124 | int bUseCksum; | ||||
24125 | |||||
24126 | memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); | ||||
24127 | ctx.pConfig = p->pConfig; | ||||
24128 | aTotalSize = (i64*)sqlite3_malloc64sqlite3_api->malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64))); | ||||
24129 | if( !aTotalSize ) return SQLITE_NOMEM7; | ||||
24130 | aColSize = (int*)&aTotalSize[pConfig->nCol]; | ||||
24131 | memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); | ||||
24132 | |||||
24133 | bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL0 | ||||
24134 | || (pConfig->eContent==FTS5_CONTENT_EXTERNAL2 && iArg) | ||||
24135 | ); | ||||
24136 | if( bUseCksum ){ | ||||
24137 | /* Generate the expected index checksum based on the contents of the | ||||
24138 | ** %_content table. This block stores the checksum in ctx.cksum. */ | ||||
24139 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, 0); | ||||
24140 | if( rc==SQLITE_OK0 ){ | ||||
24141 | int rc2; | ||||
24142 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | ||||
24143 | int i; | ||||
24144 | ctx.iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | ||||
24145 | ctx.szCol = 0; | ||||
24146 | if( pConfig->bColumnsize ){ | ||||
24147 | rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); | ||||
24148 | } | ||||
24149 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | ||||
24150 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | ||||
24151 | } | ||||
24152 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | ||||
24153 | if( pConfig->abUnindexed[i]==0 ){ | ||||
24154 | const char *pText = 0; | ||||
24155 | int nText = 0; | ||||
24156 | const char *pLoc = 0; | ||||
24157 | int nLoc = 0; | ||||
24158 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, i+1); | ||||
24159 | |||||
24160 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | ||||
24161 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | ||||
24162 | ){ | ||||
24163 | rc = sqlite3Fts5DecodeLocaleValue( | ||||
24164 | pVal, &pText, &nText, &pLoc, &nLoc | ||||
24165 | ); | ||||
24166 | }else{ | ||||
24167 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | ||||
24168 | int iCol = i + 1 + pConfig->nCol; | ||||
24169 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | ||||
24170 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | ||||
24171 | } | ||||
24172 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | ||||
24173 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | ||||
24174 | } | ||||
24175 | |||||
24176 | ctx.iCol = i; | ||||
24177 | ctx.szCol = 0; | ||||
24178 | |||||
24179 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
24180 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | ||||
24181 | } | ||||
24182 | |||||
24183 | if( rc==SQLITE_OK0 ){ | ||||
24184 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | ||||
24185 | rc = sqlite3Fts5Tokenize(pConfig, | ||||
24186 | FTS5_TOKENIZE_DOCUMENT0x0004, | ||||
24187 | pText, nText, | ||||
24188 | (void*)&ctx, | ||||
24189 | fts5StorageIntegrityCallback | ||||
24190 | ); | ||||
24191 | sqlite3Fts5ClearLocale(pConfig); | ||||
24192 | } | ||||
24193 | |||||
24194 | /* If this is not a columnsize=0 database, check that the number | ||||
24195 | ** of tokens in the value matches the aColSize[] value read from | ||||
24196 | ** the %_docsize table. */ | ||||
24197 | if( rc==SQLITE_OK0 | ||||
24198 | && pConfig->bColumnsize | ||||
24199 | && ctx.szCol!=aColSize[i] | ||||
24200 | ){ | ||||
24201 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24202 | } | ||||
24203 | aTotalSize[i] += ctx.szCol; | ||||
24204 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
24205 | sqlite3Fts5TermsetFree(ctx.pTermset); | ||||
24206 | ctx.pTermset = 0; | ||||
24207 | } | ||||
24208 | } | ||||
24209 | } | ||||
24210 | sqlite3Fts5TermsetFree(ctx.pTermset); | ||||
24211 | ctx.pTermset = 0; | ||||
24212 | |||||
24213 | if( rc!=SQLITE_OK0 ) break; | ||||
24214 | } | ||||
24215 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | ||||
24216 | if( rc==SQLITE_OK0 ) rc = rc2; | ||||
24217 | } | ||||
24218 | |||||
24219 | /* Test that the "totals" (sometimes called "averages") record looks Ok */ | ||||
24220 | if( rc==SQLITE_OK0 ){ | ||||
24221 | int i; | ||||
24222 | rc = fts5StorageLoadTotals(p, 0); | ||||
24223 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | ||||
24224 | if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24225 | } | ||||
24226 | } | ||||
24227 | |||||
24228 | /* Check that the %_docsize and %_content tables contain the expected | ||||
24229 | ** number of rows. */ | ||||
24230 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | ||||
24231 | i64 nRow = 0; | ||||
24232 | rc = fts5StorageCount(p, "content", &nRow); | ||||
24233 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24234 | } | ||||
24235 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | ||||
24236 | i64 nRow = 0; | ||||
24237 | rc = fts5StorageCount(p, "docsize", &nRow); | ||||
24238 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24239 | } | ||||
24240 | } | ||||
24241 | |||||
24242 | /* Pass the expected checksum down to the FTS index module. It will | ||||
24243 | ** verify, amongst other things, that it matches the checksum generated by | ||||
24244 | ** inspecting the index itself. */ | ||||
24245 | if( rc==SQLITE_OK0 ){ | ||||
24246 | rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum); | ||||
24247 | } | ||||
24248 | |||||
24249 | sqlite3_freesqlite3_api->free(aTotalSize); | ||||
24250 | return rc; | ||||
24251 | } | ||||
24252 | |||||
24253 | /* | ||||
24254 | ** Obtain an SQLite statement handle that may be used to read data from the | ||||
24255 | ** %_content table. | ||||
24256 | */ | ||||
24257 | static int sqlite3Fts5StorageStmt( | ||||
24258 | Fts5Storage *p, | ||||
24259 | int eStmt, | ||||
24260 | sqlite3_stmt **pp, | ||||
24261 | char **pzErrMsg | ||||
24262 | ){ | ||||
24263 | int rc; | ||||
24264 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | ||||
24265 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | ||||
24266 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | ||||
24267 | )((void) (0)); | ||||
24268 | rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); | ||||
24269 | if( rc==SQLITE_OK0 ){ | ||||
24270 | assert( p->aStmt[eStmt]==*pp )((void) (0)); | ||||
24271 | p->aStmt[eStmt] = 0; | ||||
24272 | } | ||||
24273 | return rc; | ||||
24274 | } | ||||
24275 | |||||
24276 | /* | ||||
24277 | ** Release an SQLite statement handle obtained via an earlier call to | ||||
24278 | ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function | ||||
24279 | ** must match that passed to the sqlite3Fts5StorageStmt() call. | ||||
24280 | */ | ||||
24281 | static void sqlite3Fts5StorageStmtRelease( | ||||
24282 | Fts5Storage *p, | ||||
24283 | int eStmt, | ||||
24284 | sqlite3_stmt *pStmt | ||||
24285 | ){ | ||||
24286 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | ||||
24287 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | ||||
24288 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | ||||
24289 | )((void) (0)); | ||||
24290 | if( p->aStmt[eStmt]==0 ){ | ||||
24291 | sqlite3_resetsqlite3_api->reset(pStmt); | ||||
24292 | p->aStmt[eStmt] = pStmt; | ||||
24293 | }else{ | ||||
24294 | sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
24295 | } | ||||
24296 | } | ||||
24297 | |||||
24298 | static int fts5StorageDecodeSizeArray( | ||||
24299 | int *aCol, int nCol, /* Array to populate */ | ||||
24300 | const u8 *aBlob, int nBlob /* Record to read varints from */ | ||||
24301 | ){ | ||||
24302 | int i; | ||||
24303 | int iOff = 0; | ||||
24304 | for(i=0; i<nCol; i++){ | ||||
24305 | if( iOff>=nBlob ) return 1; | ||||
24306 | iOff += fts5GetVarint32(&aBlob[iOff], aCol[i])sqlite3Fts5GetVarint32(&aBlob[iOff],(u32*)&(aCol[i])); | ||||
24307 | } | ||||
24308 | return (iOff!=nBlob); | ||||
24309 | } | ||||
24310 | |||||
24311 | /* | ||||
24312 | ** Argument aCol points to an array of integers containing one entry for | ||||
24313 | ** each table column. This function reads the %_docsize record for the | ||||
24314 | ** specified rowid and populates aCol[] with the results. | ||||
24315 | ** | ||||
24316 | ** An SQLite error code is returned if an error occurs, or SQLITE_OK | ||||
24317 | ** otherwise. | ||||
24318 | */ | ||||
24319 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ | ||||
24320 | int nCol = p->pConfig->nCol; /* Number of user columns in table */ | ||||
24321 | sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ | ||||
24322 | int rc; /* Return Code */ | ||||
24323 | |||||
24324 | assert( p->pConfig->bColumnsize )((void) (0)); | ||||
24325 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | ||||
24326 | if( pLookup ){ | ||||
24327 | int bCorrupt = 1; | ||||
24328 | assert( rc==SQLITE_OK )((void) (0)); | ||||
24329 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iRowid); | ||||
24330 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | ||||
24331 | const u8 *aBlob = sqlite3_column_blobsqlite3_api->column_blob(pLookup, 0); | ||||
24332 | int nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pLookup, 0); | ||||
24333 | if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ | ||||
24334 | bCorrupt = 0; | ||||
24335 | } | ||||
24336 | } | ||||
24337 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | ||||
24338 | if( bCorrupt && rc==SQLITE_OK0 ){ | ||||
24339 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24340 | } | ||||
24341 | }else{ | ||||
24342 | assert( rc!=SQLITE_OK )((void) (0)); | ||||
24343 | } | ||||
24344 | |||||
24345 | return rc; | ||||
24346 | } | ||||
24347 | |||||
24348 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ | ||||
24349 | int rc = fts5StorageLoadTotals(p, 0); | ||||
24350 | if( rc==SQLITE_OK0 ){ | ||||
24351 | *pnToken = 0; | ||||
24352 | if( iCol<0 ){ | ||||
24353 | int i; | ||||
24354 | for(i=0; i<p->pConfig->nCol; i++){ | ||||
24355 | *pnToken += p->aTotalSize[i]; | ||||
24356 | } | ||||
24357 | }else if( iCol<p->pConfig->nCol ){ | ||||
24358 | *pnToken = p->aTotalSize[iCol]; | ||||
24359 | }else{ | ||||
24360 | rc = SQLITE_RANGE25; | ||||
24361 | } | ||||
24362 | } | ||||
24363 | return rc; | ||||
24364 | } | ||||
24365 | |||||
24366 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ | ||||
24367 | int rc = fts5StorageLoadTotals(p, 0); | ||||
24368 | if( rc==SQLITE_OK0 ){ | ||||
24369 | /* nTotalRow being zero does not necessarily indicate a corrupt | ||||
24370 | ** database - it might be that the FTS5 table really does contain zero | ||||
24371 | ** rows. However this function is only called from the xRowCount() API, | ||||
24372 | ** and there is no way for that API to be invoked if the table contains | ||||
24373 | ** no rows. Hence the FTS5_CORRUPT return. */ | ||||
24374 | *pnRow = p->nTotalRow; | ||||
24375 | if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
24376 | } | ||||
24377 | return rc; | ||||
24378 | } | ||||
24379 | |||||
24380 | /* | ||||
24381 | ** Flush any data currently held in-memory to disk. | ||||
24382 | */ | ||||
24383 | static int sqlite3Fts5StorageSync(Fts5Storage *p){ | ||||
24384 | int rc = SQLITE_OK0; | ||||
24385 | i64 iLastRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | ||||
24386 | if( p->bTotalsValid ){ | ||||
24387 | rc = fts5StorageSaveTotals(p); | ||||
24388 | if( rc==SQLITE_OK0 ){ | ||||
24389 | p->bTotalsValid = 0; | ||||
24390 | } | ||||
24391 | } | ||||
24392 | if( rc==SQLITE_OK0 ){ | ||||
24393 | rc = sqlite3Fts5IndexSync(p->pIndex); | ||||
24394 | } | ||||
24395 | sqlite3_set_last_insert_rowidsqlite3_api->set_last_insert_rowid(p->pConfig->db, iLastRowid); | ||||
24396 | return rc; | ||||
24397 | } | ||||
24398 | |||||
24399 | static int sqlite3Fts5StorageRollback(Fts5Storage *p){ | ||||
24400 | p->bTotalsValid = 0; | ||||
24401 | return sqlite3Fts5IndexRollback(p->pIndex); | ||||
24402 | } | ||||
24403 | |||||
24404 | static int sqlite3Fts5StorageConfigValue( | ||||
24405 | Fts5Storage *p, | ||||
24406 | const char *z, | ||||
24407 | sqlite3_value *pVal, | ||||
24408 | int iVal | ||||
24409 | ){ | ||||
24410 | sqlite3_stmt *pReplace = 0; | ||||
24411 | int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG10, &pReplace, 0); | ||||
24412 | if( rc==SQLITE_OK0 ){ | ||||
24413 | sqlite3_bind_textsqlite3_api->bind_text(pReplace, 1, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
24414 | if( pVal ){ | ||||
24415 | sqlite3_bind_valuesqlite3_api->bind_value(pReplace, 2, pVal); | ||||
24416 | }else{ | ||||
24417 | sqlite3_bind_intsqlite3_api->bind_int(pReplace, 2, iVal); | ||||
24418 | } | ||||
24419 | sqlite3_stepsqlite3_api->step(pReplace); | ||||
24420 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | ||||
24421 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | ||||
24422 | } | ||||
24423 | if( rc==SQLITE_OK0 && pVal ){ | ||||
24424 | int iNew = p->pConfig->iCookie + 1; | ||||
24425 | rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); | ||||
24426 | if( rc==SQLITE_OK0 ){ | ||||
24427 | p->pConfig->iCookie = iNew; | ||||
24428 | } | ||||
24429 | } | ||||
24430 | return rc; | ||||
24431 | } | ||||
24432 | |||||
24433 | #line 1 "fts5_tokenize.c" | ||||
24434 | /* | ||||
24435 | ** 2014 May 31 | ||||
24436 | ** | ||||
24437 | ** The author disclaims copyright to this source code. In place of | ||||
24438 | ** a legal notice, here is a blessing: | ||||
24439 | ** | ||||
24440 | ** May you do good and not evil. | ||||
24441 | ** May you find forgiveness for yourself and forgive others. | ||||
24442 | ** May you share freely, never taking more than you give. | ||||
24443 | ** | ||||
24444 | ****************************************************************************** | ||||
24445 | */ | ||||
24446 | |||||
24447 | |||||
24448 | /* #include "fts5Int.h" */ | ||||
24449 | |||||
24450 | /************************************************************************** | ||||
24451 | ** Start of ascii tokenizer implementation. | ||||
24452 | */ | ||||
24453 | |||||
24454 | /* | ||||
24455 | ** For tokenizers with no "unicode" modifier, the set of token characters | ||||
24456 | ** is the same as the set of ASCII range alphanumeric characters. | ||||
24457 | */ | ||||
24458 | static unsigned char aAsciiTokenChar[128] = { | ||||
24459 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ | ||||
24460 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ | ||||
24461 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ | ||||
24462 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ | ||||
24463 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ | ||||
24464 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ | ||||
24465 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ | ||||
24466 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ | ||||
24467 | }; | ||||
24468 | |||||
24469 | typedef struct AsciiTokenizer AsciiTokenizer; | ||||
24470 | struct AsciiTokenizer { | ||||
24471 | unsigned char aTokenChar[128]; | ||||
24472 | }; | ||||
24473 | |||||
24474 | static void fts5AsciiAddExceptions( | ||||
24475 | AsciiTokenizer *p, | ||||
24476 | const char *zArg, | ||||
24477 | int bTokenChars | ||||
24478 | ){ | ||||
24479 | int i; | ||||
24480 | for(i=0; zArg[i]; i++){ | ||||
24481 | if( (zArg[i] & 0x80)==0 ){ | ||||
24482 | p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; | ||||
24483 | } | ||||
24484 | } | ||||
24485 | } | ||||
24486 | |||||
24487 | /* | ||||
24488 | ** Delete a "ascii" tokenizer. | ||||
24489 | */ | ||||
24490 | static void fts5AsciiDelete(Fts5Tokenizer *p){ | ||||
24491 | sqlite3_freesqlite3_api->free(p); | ||||
24492 | } | ||||
24493 | |||||
24494 | /* | ||||
24495 | ** Create an "ascii" tokenizer. | ||||
24496 | */ | ||||
24497 | static int fts5AsciiCreate( | ||||
24498 | void *pUnused, | ||||
24499 | const char **azArg, int nArg, | ||||
24500 | Fts5Tokenizer **ppOut | ||||
24501 | ){ | ||||
24502 | int rc = SQLITE_OK0; | ||||
24503 | AsciiTokenizer *p = 0; | ||||
24504 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
24505 | if( nArg%2 ){ | ||||
24506 | rc = SQLITE_ERROR1; | ||||
24507 | }else{ | ||||
24508 | p = sqlite3_mallocsqlite3_api->malloc(sizeof(AsciiTokenizer)); | ||||
24509 | if( p==0 ){ | ||||
24510 | rc = SQLITE_NOMEM7; | ||||
24511 | }else{ | ||||
24512 | int i; | ||||
24513 | memset(p, 0, sizeof(AsciiTokenizer)); | ||||
24514 | memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); | ||||
24515 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | ||||
24516 | const char *zArg = azArg[i+1]; | ||||
24517 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | ||||
24518 | fts5AsciiAddExceptions(p, zArg, 1); | ||||
24519 | }else | ||||
24520 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | ||||
24521 | fts5AsciiAddExceptions(p, zArg, 0); | ||||
24522 | }else{ | ||||
24523 | rc = SQLITE_ERROR1; | ||||
24524 | } | ||||
24525 | } | ||||
24526 | if( rc!=SQLITE_OK0 ){ | ||||
24527 | fts5AsciiDelete((Fts5Tokenizer*)p); | ||||
24528 | p = 0; | ||||
24529 | } | ||||
24530 | } | ||||
24531 | } | ||||
24532 | |||||
24533 | *ppOut = (Fts5Tokenizer*)p; | ||||
24534 | return rc; | ||||
24535 | } | ||||
24536 | |||||
24537 | |||||
24538 | static void asciiFold(char *aOut, const char *aIn, int nByte){ | ||||
24539 | int i; | ||||
24540 | for(i=0; i<nByte; i++){ | ||||
24541 | char c = aIn[i]; | ||||
24542 | if( c>='A' && c<='Z' ) c += 32; | ||||
24543 | aOut[i] = c; | ||||
24544 | } | ||||
24545 | } | ||||
24546 | |||||
24547 | /* | ||||
24548 | ** Tokenize some text using the ascii tokenizer. | ||||
24549 | */ | ||||
24550 | static int fts5AsciiTokenize( | ||||
24551 | Fts5Tokenizer *pTokenizer, | ||||
24552 | void *pCtx, | ||||
24553 | int iUnused, | ||||
24554 | const char *pText, int nText, | ||||
24555 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | ||||
24556 | ){ | ||||
24557 | AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; | ||||
24558 | int rc = SQLITE_OK0; | ||||
24559 | int ie; | ||||
24560 | int is = 0; | ||||
24561 | |||||
24562 | char aFold[64]; | ||||
24563 | int nFold = sizeof(aFold); | ||||
24564 | char *pFold = aFold; | ||||
24565 | unsigned char *a = p->aTokenChar; | ||||
24566 | |||||
24567 | UNUSED_PARAM(iUnused)(void)(iUnused); | ||||
24568 | |||||
24569 | while( is<nText && rc==SQLITE_OK0 ){ | ||||
24570 | int nByte; | ||||
24571 | |||||
24572 | /* Skip any leading divider characters. */ | ||||
24573 | while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ | ||||
24574 | is++; | ||||
24575 | } | ||||
24576 | if( is==nText ) break; | ||||
24577 | |||||
24578 | /* Count the token characters */ | ||||
24579 | ie = is+1; | ||||
24580 | while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ | ||||
24581 | ie++; | ||||
24582 | } | ||||
24583 | |||||
24584 | /* Fold to lower case */ | ||||
24585 | nByte = ie-is; | ||||
24586 | if( nByte>nFold ){ | ||||
24587 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | ||||
24588 | pFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nByte*2); | ||||
24589 | if( pFold==0 ){ | ||||
24590 | rc = SQLITE_NOMEM7; | ||||
24591 | break; | ||||
24592 | } | ||||
24593 | nFold = nByte*2; | ||||
24594 | } | ||||
24595 | asciiFold(pFold, &pText[is], nByte); | ||||
24596 | |||||
24597 | /* Invoke the token callback */ | ||||
24598 | rc = xToken(pCtx, 0, pFold, nByte, is, ie); | ||||
24599 | is = ie+1; | ||||
24600 | } | ||||
24601 | |||||
24602 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | ||||
24603 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | ||||
24604 | return rc; | ||||
24605 | } | ||||
24606 | |||||
24607 | /************************************************************************** | ||||
24608 | ** Start of unicode61 tokenizer implementation. | ||||
24609 | */ | ||||
24610 | |||||
24611 | |||||
24612 | /* | ||||
24613 | ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied | ||||
24614 | ** from the sqlite3 source file utf.c. If this file is compiled as part | ||||
24615 | ** of the amalgamation, they are not required. | ||||
24616 | */ | ||||
24617 | #ifndef SQLITE_AMALGAMATION | ||||
24618 | |||||
24619 | static const unsigned char sqlite3Utf8Trans1[] = { | ||||
24620 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
24621 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||||
24622 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | ||||
24623 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | ||||
24624 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
24625 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||||
24626 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
24627 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | ||||
24628 | }; | ||||
24629 | |||||
24630 | #define READ_UTF8(zIn, zTerm, c)c = *(zIn++); if( c>=0xc0 ){ c = sqlite3Utf8Trans1[c-0xc0] ; while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ c = (c<<6) + (0x3f & *(zIn++)); } if( c<0x80 || ( c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } } \ | ||||
24631 | c = *(zIn++); \ | ||||
24632 | if( c>=0xc0 ){ \ | ||||
24633 | c = sqlite3Utf8Trans1[c-0xc0]; \ | ||||
24634 | while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ \ | ||||
24635 | c = (c<<6) + (0x3f & *(zIn++)); \ | ||||
24636 | } \ | ||||
24637 | if( c<0x80 \ | ||||
24638 | || (c&0xFFFFF800)==0xD800 \ | ||||
24639 | || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ | ||||
24640 | } | ||||
24641 | |||||
24642 | |||||
24643 | #define WRITE_UTF8(zOut, c){ if( c<0x00080 ){ *zOut++ = (unsigned char)(c&0xFF); } else if( c<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((c >>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } else if( c<0x10000 ){ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); *zOut++ = 0x80 + (unsigned char )((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)( c & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((c>> 18) & 0x07); *zOut++ = 0x80 + (unsigned char)((c>>12 ) & 0x3F); *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } } { \ | ||||
24644 | if( c<0x00080 ){ \ | ||||
24645 | *zOut++ = (unsigned char)(c&0xFF); \ | ||||
24646 | } \ | ||||
24647 | else if( c<0x00800 ){ \ | ||||
24648 | *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ | ||||
24649 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | ||||
24650 | } \ | ||||
24651 | else if( c<0x10000 ){ \ | ||||
24652 | *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ | ||||
24653 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | ||||
24654 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | ||||
24655 | }else{ \ | ||||
24656 | *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ | ||||
24657 | *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ | ||||
24658 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | ||||
24659 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | ||||
24660 | } \ | ||||
24661 | } | ||||
24662 | |||||
24663 | #endif /* ifndef SQLITE_AMALGAMATION */ | ||||
24664 | |||||
24665 | #define FTS5_SKIP_UTF8(zIn){ if( ((unsigned char)(*(zIn++)))>=0xc0 ){ while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } } } { \ | ||||
24666 | if( ((unsigned char)(*(zIn++)))>=0xc0 ){ \ | ||||
24667 | while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } \ | ||||
24668 | } \ | ||||
24669 | } | ||||
24670 | |||||
24671 | typedef struct Unicode61Tokenizer Unicode61Tokenizer; | ||||
24672 | struct Unicode61Tokenizer { | ||||
24673 | unsigned char aTokenChar[128]; /* ASCII range token characters */ | ||||
24674 | char *aFold; /* Buffer to fold text into */ | ||||
24675 | int nFold; /* Size of aFold[] in bytes */ | ||||
24676 | int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ | ||||
24677 | int nException; | ||||
24678 | int *aiException; | ||||
24679 | |||||
24680 | unsigned char aCategory[32]; /* True for token char categories */ | ||||
24681 | }; | ||||
24682 | |||||
24683 | /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ | ||||
24684 | #define FTS5_REMOVE_DIACRITICS_NONE0 0 | ||||
24685 | #define FTS5_REMOVE_DIACRITICS_SIMPLE1 1 | ||||
24686 | #define FTS5_REMOVE_DIACRITICS_COMPLEX2 2 | ||||
24687 | |||||
24688 | static int fts5UnicodeAddExceptions( | ||||
24689 | Unicode61Tokenizer *p, /* Tokenizer object */ | ||||
24690 | const char *z, /* Characters to treat as exceptions */ | ||||
24691 | int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ | ||||
24692 | ){ | ||||
24693 | int rc = SQLITE_OK0; | ||||
24694 | int n = (int)strlen(z); | ||||
24695 | int *aNew; | ||||
24696 | |||||
24697 | if( n>0 ){ | ||||
24698 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aiException, | ||||
24699 | (n+p->nException)*sizeof(int)); | ||||
24700 | if( aNew ){ | ||||
24701 | int nNew = p->nException; | ||||
24702 | const unsigned char *zCsr = (const unsigned char*)z; | ||||
24703 | const unsigned char *zTerm = (const unsigned char*)&z[n]; | ||||
24704 | while( zCsr<zTerm ){ | ||||
24705 | u32 iCode; | ||||
24706 | int bToken; | ||||
24707 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | ||||
24708 | if( iCode<128 ){ | ||||
24709 | p->aTokenChar[iCode] = (unsigned char)bTokenChars; | ||||
24710 | }else{ | ||||
24711 | bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]; | ||||
24712 | assert( (bToken==0 || bToken==1) )((void) (0)); | ||||
24713 | assert( (bTokenChars==0 || bTokenChars==1) )((void) (0)); | ||||
24714 | if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ | ||||
24715 | int i; | ||||
24716 | for(i=0; i<nNew; i++){ | ||||
24717 | if( (u32)aNew[i]>iCode ) break; | ||||
24718 | } | ||||
24719 | memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); | ||||
24720 | aNew[i] = iCode; | ||||
24721 | nNew++; | ||||
24722 | } | ||||
24723 | } | ||||
24724 | } | ||||
24725 | p->aiException = aNew; | ||||
24726 | p->nException = nNew; | ||||
24727 | }else{ | ||||
24728 | rc = SQLITE_NOMEM7; | ||||
24729 | } | ||||
24730 | } | ||||
24731 | |||||
24732 | return rc; | ||||
24733 | } | ||||
24734 | |||||
24735 | /* | ||||
24736 | ** Return true if the p->aiException[] array contains the value iCode. | ||||
24737 | */ | ||||
24738 | static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ | ||||
24739 | if( p->nException>0 ){ | ||||
24740 | int *a = p->aiException; | ||||
24741 | int iLo = 0; | ||||
24742 | int iHi = p->nException-1; | ||||
24743 | |||||
24744 | while( iHi>=iLo ){ | ||||
24745 | int iTest = (iHi + iLo) / 2; | ||||
24746 | if( iCode==a[iTest] ){ | ||||
24747 | return 1; | ||||
24748 | }else if( iCode>a[iTest] ){ | ||||
24749 | iLo = iTest+1; | ||||
24750 | }else{ | ||||
24751 | iHi = iTest-1; | ||||
24752 | } | ||||
24753 | } | ||||
24754 | } | ||||
24755 | |||||
24756 | return 0; | ||||
24757 | } | ||||
24758 | |||||
24759 | /* | ||||
24760 | ** Delete a "unicode61" tokenizer. | ||||
24761 | */ | ||||
24762 | static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ | ||||
24763 | if( pTok ){ | ||||
24764 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; | ||||
24765 | sqlite3_freesqlite3_api->free(p->aiException); | ||||
24766 | sqlite3_freesqlite3_api->free(p->aFold); | ||||
24767 | sqlite3_freesqlite3_api->free(p); | ||||
24768 | } | ||||
24769 | return; | ||||
24770 | } | ||||
24771 | |||||
24772 | static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){ | ||||
24773 | const char *z = zCat; | ||||
24774 | |||||
24775 | while( *z ){ | ||||
24776 | while( *z==' ' || *z=='\t' ) z++; | ||||
24777 | if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){ | ||||
24778 | return SQLITE_ERROR1; | ||||
24779 | } | ||||
24780 | while( *z!=' ' && *z!='\t' && *z!='\0' ) z++; | ||||
24781 | } | ||||
24782 | |||||
24783 | sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar); | ||||
24784 | return SQLITE_OK0; | ||||
24785 | } | ||||
24786 | |||||
24787 | /* | ||||
24788 | ** Create a "unicode61" tokenizer. | ||||
24789 | */ | ||||
24790 | static int fts5UnicodeCreate( | ||||
24791 | void *pUnused, | ||||
24792 | const char **azArg, int nArg, | ||||
24793 | Fts5Tokenizer **ppOut | ||||
24794 | ){ | ||||
24795 | int rc = SQLITE_OK0; /* Return code */ | ||||
24796 | Unicode61Tokenizer *p = 0; /* New tokenizer object */ | ||||
24797 | |||||
24798 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
24799 | |||||
24800 | if( nArg%2 ){ | ||||
24801 | rc = SQLITE_ERROR1; | ||||
24802 | }else{ | ||||
24803 | p = (Unicode61Tokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(Unicode61Tokenizer)); | ||||
24804 | if( p ){ | ||||
24805 | const char *zCat = "L* N* Co"; | ||||
24806 | int i; | ||||
24807 | memset(p, 0, sizeof(Unicode61Tokenizer)); | ||||
24808 | |||||
24809 | p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE1; | ||||
24810 | p->nFold = 64; | ||||
24811 | p->aFold = sqlite3_malloc64sqlite3_api->malloc64(p->nFold * sizeof(char)); | ||||
24812 | if( p->aFold==0 ){ | ||||
24813 | rc = SQLITE_NOMEM7; | ||||
24814 | } | ||||
24815 | |||||
24816 | /* Search for a "categories" argument */ | ||||
24817 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | ||||
24818 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | ||||
24819 | zCat = azArg[i+1]; | ||||
24820 | } | ||||
24821 | } | ||||
24822 | if( rc==SQLITE_OK0 ){ | ||||
24823 | rc = unicodeSetCategories(p, zCat); | ||||
24824 | } | ||||
24825 | |||||
24826 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | ||||
24827 | const char *zArg = azArg[i+1]; | ||||
24828 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | ||||
24829 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | ||||
24830 | rc = SQLITE_ERROR1; | ||||
24831 | }else{ | ||||
24832 | p->eRemoveDiacritic = (zArg[0] - '0'); | ||||
24833 | assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE((void) (0)) | ||||
24834 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE((void) (0)) | ||||
24835 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX((void) (0)) | ||||
24836 | )((void) (0)); | ||||
24837 | } | ||||
24838 | }else | ||||
24839 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | ||||
24840 | rc = fts5UnicodeAddExceptions(p, zArg, 1); | ||||
24841 | }else | ||||
24842 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | ||||
24843 | rc = fts5UnicodeAddExceptions(p, zArg, 0); | ||||
24844 | }else | ||||
24845 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | ||||
24846 | /* no-op */ | ||||
24847 | }else{ | ||||
24848 | rc = SQLITE_ERROR1; | ||||
24849 | } | ||||
24850 | } | ||||
24851 | }else{ | ||||
24852 | rc = SQLITE_NOMEM7; | ||||
24853 | } | ||||
24854 | if( rc!=SQLITE_OK0 ){ | ||||
24855 | fts5UnicodeDelete((Fts5Tokenizer*)p); | ||||
24856 | p = 0; | ||||
24857 | } | ||||
24858 | *ppOut = (Fts5Tokenizer*)p; | ||||
24859 | } | ||||
24860 | return rc; | ||||
24861 | } | ||||
24862 | |||||
24863 | /* | ||||
24864 | ** Return true if, for the purposes of tokenizing with the tokenizer | ||||
24865 | ** passed as the first argument, codepoint iCode is considered a token | ||||
24866 | ** character (not a separator). | ||||
24867 | */ | ||||
24868 | static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ | ||||
24869 | return ( | ||||
24870 | p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] | ||||
24871 | ^ fts5UnicodeIsException(p, iCode) | ||||
24872 | ); | ||||
24873 | } | ||||
24874 | |||||
24875 | static int fts5UnicodeTokenize( | ||||
24876 | Fts5Tokenizer *pTokenizer, | ||||
24877 | void *pCtx, | ||||
24878 | int iUnused, | ||||
24879 | const char *pText, int nText, | ||||
24880 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | ||||
24881 | ){ | ||||
24882 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; | ||||
24883 | int rc = SQLITE_OK0; | ||||
24884 | unsigned char *a = p->aTokenChar; | ||||
24885 | |||||
24886 | unsigned char *zTerm = (unsigned char*)&pText[nText]; | ||||
24887 | unsigned char *zCsr = (unsigned char *)pText; | ||||
24888 | |||||
24889 | /* Output buffer */ | ||||
24890 | char *aFold = p->aFold; | ||||
24891 | int nFold = p->nFold; | ||||
24892 | const char *pEnd = &aFold[nFold-6]; | ||||
24893 | |||||
24894 | UNUSED_PARAM(iUnused)(void)(iUnused); | ||||
24895 | |||||
24896 | /* Each iteration of this loop gobbles up a contiguous run of separators, | ||||
24897 | ** then the next token. */ | ||||
24898 | while( rc==SQLITE_OK0 ){ | ||||
24899 | u32 iCode; /* non-ASCII codepoint read from input */ | ||||
24900 | char *zOut = aFold; | ||||
24901 | int is; | ||||
24902 | int ie; | ||||
24903 | |||||
24904 | /* Skip any separator characters. */ | ||||
24905 | while( 1 ){ | ||||
24906 | if( zCsr>=zTerm ) goto tokenize_done; | ||||
24907 | if( *zCsr & 0x80 ) { | ||||
24908 | /* A character outside of the ascii range. Skip past it if it is | ||||
24909 | ** a separator character. Or break out of the loop if it is not. */ | ||||
24910 | is = zCsr - (unsigned char*)pText; | ||||
24911 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | ||||
24912 | if( fts5UnicodeIsAlnum(p, iCode) ){ | ||||
24913 | goto non_ascii_tokenchar; | ||||
24914 | } | ||||
24915 | }else{ | ||||
24916 | if( a[*zCsr] ){ | ||||
24917 | is = zCsr - (unsigned char*)pText; | ||||
24918 | goto ascii_tokenchar; | ||||
24919 | } | ||||
24920 | zCsr++; | ||||
24921 | } | ||||
24922 | } | ||||
24923 | |||||
24924 | /* Run through the tokenchars. Fold them into the output buffer along | ||||
24925 | ** the way. */ | ||||
24926 | while( zCsr<zTerm ){ | ||||
24927 | |||||
24928 | /* Grow the output buffer so that there is sufficient space to fit the | ||||
24929 | ** largest possible utf-8 character. */ | ||||
24930 | if( zOut>pEnd ){ | ||||
24931 | aFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nFold*2); | ||||
24932 | if( aFold==0 ){ | ||||
24933 | rc = SQLITE_NOMEM7; | ||||
24934 | goto tokenize_done; | ||||
24935 | } | ||||
24936 | zOut = &aFold[zOut - p->aFold]; | ||||
24937 | memcpy(aFold, p->aFold, nFold); | ||||
24938 | sqlite3_freesqlite3_api->free(p->aFold); | ||||
24939 | p->aFold = aFold; | ||||
24940 | p->nFold = nFold = nFold*2; | ||||
24941 | pEnd = &aFold[nFold-6]; | ||||
24942 | } | ||||
24943 | |||||
24944 | if( *zCsr & 0x80 ){ | ||||
24945 | /* An non-ascii-range character. Fold it into the output buffer if | ||||
24946 | ** it is a token character, or break out of the loop if it is not. */ | ||||
24947 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | ||||
24948 | if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ | ||||
24949 | non_ascii_tokenchar: | ||||
24950 | iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); | ||||
24951 | if( iCode ) WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | ||||
24952 | }else{ | ||||
24953 | break; | ||||
24954 | } | ||||
24955 | }else if( a[*zCsr]==0 ){ | ||||
24956 | /* An ascii-range separator character. End of token. */ | ||||
24957 | break; | ||||
24958 | }else{ | ||||
24959 | ascii_tokenchar: | ||||
24960 | if( *zCsr>='A' && *zCsr<='Z' ){ | ||||
24961 | *zOut++ = *zCsr + 32; | ||||
24962 | }else{ | ||||
24963 | *zOut++ = *zCsr; | ||||
24964 | } | ||||
24965 | zCsr++; | ||||
24966 | } | ||||
24967 | ie = zCsr - (unsigned char*)pText; | ||||
24968 | } | ||||
24969 | |||||
24970 | /* Invoke the token callback */ | ||||
24971 | rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); | ||||
24972 | } | ||||
24973 | |||||
24974 | tokenize_done: | ||||
24975 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | ||||
24976 | return rc; | ||||
24977 | } | ||||
24978 | |||||
24979 | /************************************************************************** | ||||
24980 | ** Start of porter stemmer implementation. | ||||
24981 | */ | ||||
24982 | |||||
24983 | /* Any tokens larger than this (in bytes) are passed through without | ||||
24984 | ** stemming. */ | ||||
24985 | #define FTS5_PORTER_MAX_TOKEN64 64 | ||||
24986 | |||||
24987 | typedef struct PorterTokenizer PorterTokenizer; | ||||
24988 | struct PorterTokenizer { | ||||
24989 | fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */ | ||||
24990 | Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ | ||||
24991 | char aBuf[FTS5_PORTER_MAX_TOKEN64 + 64]; | ||||
24992 | }; | ||||
24993 | |||||
24994 | /* | ||||
24995 | ** Delete a "porter" tokenizer. | ||||
24996 | */ | ||||
24997 | static void fts5PorterDelete(Fts5Tokenizer *pTok){ | ||||
24998 | if( pTok ){ | ||||
24999 | PorterTokenizer *p = (PorterTokenizer*)pTok; | ||||
25000 | if( p->pTokenizer ){ | ||||
25001 | p->tokenizer_v2.xDelete(p->pTokenizer); | ||||
25002 | } | ||||
25003 | sqlite3_freesqlite3_api->free(p); | ||||
25004 | } | ||||
25005 | } | ||||
25006 | |||||
25007 | /* | ||||
25008 | ** Create a "porter" tokenizer. | ||||
25009 | */ | ||||
25010 | static int fts5PorterCreate( | ||||
25011 | void *pCtx, | ||||
25012 | const char **azArg, int nArg, | ||||
25013 | Fts5Tokenizer **ppOut | ||||
25014 | ){ | ||||
25015 | fts5_api *pApi = (fts5_api*)pCtx; | ||||
25016 | int rc = SQLITE_OK0; | ||||
25017 | PorterTokenizer *pRet; | ||||
25018 | void *pUserdata = 0; | ||||
25019 | const char *zBase = "unicode61"; | ||||
25020 | fts5_tokenizer_v2 *pV2 = 0; | ||||
25021 | |||||
25022 | if( nArg>0 ){ | ||||
25023 | zBase = azArg[0]; | ||||
25024 | } | ||||
25025 | |||||
25026 | pRet = (PorterTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(PorterTokenizer)); | ||||
25027 | if( pRet ){ | ||||
25028 | memset(pRet, 0, sizeof(PorterTokenizer)); | ||||
25029 | rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2); | ||||
25030 | }else{ | ||||
25031 | rc = SQLITE_NOMEM7; | ||||
25032 | } | ||||
25033 | if( rc==SQLITE_OK0 ){ | ||||
25034 | int nArg2 = (nArg>0 ? nArg-1 : 0); | ||||
25035 | const char **az2 = (nArg2 ? &azArg[1] : 0); | ||||
25036 | memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2)); | ||||
25037 | rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer); | ||||
25038 | } | ||||
25039 | |||||
25040 | if( rc!=SQLITE_OK0 ){ | ||||
25041 | fts5PorterDelete((Fts5Tokenizer*)pRet); | ||||
25042 | pRet = 0; | ||||
25043 | } | ||||
25044 | *ppOut = (Fts5Tokenizer*)pRet; | ||||
25045 | return rc; | ||||
25046 | } | ||||
25047 | |||||
25048 | typedef struct PorterContext PorterContext; | ||||
25049 | struct PorterContext { | ||||
25050 | void *pCtx; | ||||
25051 | int (*xToken)(void*, int, const char*, int, int, int); | ||||
25052 | char *aBuf; | ||||
25053 | }; | ||||
25054 | |||||
25055 | typedef struct PorterRule PorterRule; | ||||
25056 | struct PorterRule { | ||||
25057 | const char *zSuffix; | ||||
25058 | int nSuffix; | ||||
25059 | int (*xCond)(char *zStem, int nStem); | ||||
25060 | const char *zOutput; | ||||
25061 | int nOutput; | ||||
25062 | }; | ||||
25063 | |||||
25064 | #if 0 | ||||
25065 | static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ | ||||
25066 | int ret = -1; | ||||
25067 | int nBuf = *pnBuf; | ||||
25068 | PorterRule *p; | ||||
25069 | |||||
25070 | for(p=aRule; p->zSuffix; p++){ | ||||
25071 | assert( strlen(p->zSuffix)==p->nSuffix )((void) (0)); | ||||
25072 | assert( strlen(p->zOutput)==p->nOutput )((void) (0)); | ||||
25073 | if( nBuf<p->nSuffix ) continue; | ||||
25074 | if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; | ||||
25075 | } | ||||
25076 | |||||
25077 | if( p->zSuffix ){ | ||||
25078 | int nStem = nBuf - p->nSuffix; | ||||
25079 | if( p->xCond==0 || p->xCond(aBuf, nStem) ){ | ||||
25080 | memcpy(&aBuf[nStem], p->zOutput, p->nOutput); | ||||
25081 | *pnBuf = nStem + p->nOutput; | ||||
25082 | ret = p - aRule; | ||||
25083 | } | ||||
25084 | } | ||||
25085 | |||||
25086 | return ret; | ||||
25087 | } | ||||
25088 | #endif | ||||
25089 | |||||
25090 | static int fts5PorterIsVowel(char c, int bYIsVowel){ | ||||
25091 | return ( | ||||
25092 | c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') | ||||
25093 | ); | ||||
25094 | } | ||||
25095 | |||||
25096 | static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ | ||||
25097 | int i; | ||||
25098 | int bCons = bPrevCons; | ||||
25099 | |||||
25100 | /* Scan for a vowel */ | ||||
25101 | for(i=0; i<nStem; i++){ | ||||
25102 | if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; | ||||
25103 | } | ||||
25104 | |||||
25105 | /* Scan for a consonent */ | ||||
25106 | for(i++; i<nStem; i++){ | ||||
25107 | if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; | ||||
25108 | } | ||||
25109 | return 0; | ||||
25110 | } | ||||
25111 | |||||
25112 | /* porter rule condition: (m > 0) */ | ||||
25113 | static int fts5Porter_MGt0(char *zStem, int nStem){ | ||||
25114 | return !!fts5PorterGobbleVC(zStem, nStem, 0); | ||||
25115 | } | ||||
25116 | |||||
25117 | /* porter rule condition: (m > 1) */ | ||||
25118 | static int fts5Porter_MGt1(char *zStem, int nStem){ | ||||
25119 | int n; | ||||
25120 | n = fts5PorterGobbleVC(zStem, nStem, 0); | ||||
25121 | if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | ||||
25122 | return 1; | ||||
25123 | } | ||||
25124 | return 0; | ||||
25125 | } | ||||
25126 | |||||
25127 | /* porter rule condition: (m = 1) */ | ||||
25128 | static int fts5Porter_MEq1(char *zStem, int nStem){ | ||||
25129 | int n; | ||||
25130 | n = fts5PorterGobbleVC(zStem, nStem, 0); | ||||
25131 | if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | ||||
25132 | return 1; | ||||
25133 | } | ||||
25134 | return 0; | ||||
25135 | } | ||||
25136 | |||||
25137 | /* porter rule condition: (*o) */ | ||||
25138 | static int fts5Porter_Ostar(char *zStem, int nStem){ | ||||
25139 | if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ | ||||
25140 | return 0; | ||||
25141 | }else{ | ||||
25142 | int i; | ||||
25143 | int mask = 0; | ||||
25144 | int bCons = 0; | ||||
25145 | for(i=0; i<nStem; i++){ | ||||
25146 | bCons = !fts5PorterIsVowel(zStem[i], bCons); | ||||
25147 | assert( bCons==0 || bCons==1 )((void) (0)); | ||||
25148 | mask = (mask << 1) + bCons; | ||||
25149 | } | ||||
25150 | return ((mask & 0x0007)==0x0005); | ||||
25151 | } | ||||
25152 | } | ||||
25153 | |||||
25154 | /* porter rule condition: (m > 1 and (*S or *T)) */ | ||||
25155 | static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ | ||||
25156 | assert( nStem>0 )((void) (0)); | ||||
25157 | return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') | ||||
25158 | && fts5Porter_MGt1(zStem, nStem); | ||||
25159 | } | ||||
25160 | |||||
25161 | /* porter rule condition: (*v*) */ | ||||
25162 | static int fts5Porter_Vowel(char *zStem, int nStem){ | ||||
25163 | int i; | ||||
25164 | for(i=0; i<nStem; i++){ | ||||
25165 | if( fts5PorterIsVowel(zStem[i], i>0) ){ | ||||
25166 | return 1; | ||||
25167 | } | ||||
25168 | } | ||||
25169 | return 0; | ||||
25170 | } | ||||
25171 | |||||
25172 | |||||
25173 | /************************************************************************** | ||||
25174 | *************************************************************************** | ||||
25175 | ** GENERATED CODE STARTS HERE (mkportersteps.tcl) | ||||
25176 | */ | ||||
25177 | |||||
25178 | static int fts5PorterStep4(char *aBuf, int *pnBuf){ | ||||
25179 | int ret = 0; | ||||
25180 | int nBuf = *pnBuf; | ||||
25181 | switch( aBuf[nBuf-2] ){ | ||||
25182 | |||||
25183 | case 'a': | ||||
25184 | if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ | ||||
25185 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | ||||
25186 | *pnBuf = nBuf - 2; | ||||
25187 | } | ||||
25188 | } | ||||
25189 | break; | ||||
25190 | |||||
25191 | case 'c': | ||||
25192 | if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ | ||||
25193 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | ||||
25194 | *pnBuf = nBuf - 4; | ||||
25195 | } | ||||
25196 | }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ | ||||
25197 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | ||||
25198 | *pnBuf = nBuf - 4; | ||||
25199 | } | ||||
25200 | } | ||||
25201 | break; | ||||
25202 | |||||
25203 | case 'e': | ||||
25204 | if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ | ||||
25205 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | ||||
25206 | *pnBuf = nBuf - 2; | ||||
25207 | } | ||||
25208 | } | ||||
25209 | break; | ||||
25210 | |||||
25211 | case 'i': | ||||
25212 | if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ | ||||
25213 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | ||||
25214 | *pnBuf = nBuf - 2; | ||||
25215 | } | ||||
25216 | } | ||||
25217 | break; | ||||
25218 | |||||
25219 | case 'l': | ||||
25220 | if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ | ||||
25221 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | ||||
25222 | *pnBuf = nBuf - 4; | ||||
25223 | } | ||||
25224 | }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ | ||||
25225 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | ||||
25226 | *pnBuf = nBuf - 4; | ||||
25227 | } | ||||
25228 | } | ||||
25229 | break; | ||||
25230 | |||||
25231 | case 'n': | ||||
25232 | if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ | ||||
25233 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25234 | *pnBuf = nBuf - 3; | ||||
25235 | } | ||||
25236 | }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ | ||||
25237 | if( fts5Porter_MGt1(aBuf, nBuf-5) ){ | ||||
25238 | *pnBuf = nBuf - 5; | ||||
25239 | } | ||||
25240 | }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ | ||||
25241 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | ||||
25242 | *pnBuf = nBuf - 4; | ||||
25243 | } | ||||
25244 | }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ | ||||
25245 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25246 | *pnBuf = nBuf - 3; | ||||
25247 | } | ||||
25248 | } | ||||
25249 | break; | ||||
25250 | |||||
25251 | case 'o': | ||||
25252 | if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ | ||||
25253 | if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ | ||||
25254 | *pnBuf = nBuf - 3; | ||||
25255 | } | ||||
25256 | }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ | ||||
25257 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | ||||
25258 | *pnBuf = nBuf - 2; | ||||
25259 | } | ||||
25260 | } | ||||
25261 | break; | ||||
25262 | |||||
25263 | case 's': | ||||
25264 | if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ | ||||
25265 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25266 | *pnBuf = nBuf - 3; | ||||
25267 | } | ||||
25268 | } | ||||
25269 | break; | ||||
25270 | |||||
25271 | case 't': | ||||
25272 | if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ | ||||
25273 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25274 | *pnBuf = nBuf - 3; | ||||
25275 | } | ||||
25276 | }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ | ||||
25277 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25278 | *pnBuf = nBuf - 3; | ||||
25279 | } | ||||
25280 | } | ||||
25281 | break; | ||||
25282 | |||||
25283 | case 'u': | ||||
25284 | if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ | ||||
25285 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25286 | *pnBuf = nBuf - 3; | ||||
25287 | } | ||||
25288 | } | ||||
25289 | break; | ||||
25290 | |||||
25291 | case 'v': | ||||
25292 | if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ | ||||
25293 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25294 | *pnBuf = nBuf - 3; | ||||
25295 | } | ||||
25296 | } | ||||
25297 | break; | ||||
25298 | |||||
25299 | case 'z': | ||||
25300 | if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ | ||||
25301 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | ||||
25302 | *pnBuf = nBuf - 3; | ||||
25303 | } | ||||
25304 | } | ||||
25305 | break; | ||||
25306 | |||||
25307 | } | ||||
25308 | return ret; | ||||
25309 | } | ||||
25310 | |||||
25311 | |||||
25312 | static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ | ||||
25313 | int ret = 0; | ||||
25314 | int nBuf = *pnBuf; | ||||
25315 | switch( aBuf[nBuf-2] ){ | ||||
25316 | |||||
25317 | case 'a': | ||||
25318 | if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ | ||||
25319 | memcpy(&aBuf[nBuf-2], "ate", 3); | ||||
25320 | *pnBuf = nBuf - 2 + 3; | ||||
25321 | ret = 1; | ||||
25322 | } | ||||
25323 | break; | ||||
25324 | |||||
25325 | case 'b': | ||||
25326 | if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ | ||||
25327 | memcpy(&aBuf[nBuf-2], "ble", 3); | ||||
25328 | *pnBuf = nBuf - 2 + 3; | ||||
25329 | ret = 1; | ||||
25330 | } | ||||
25331 | break; | ||||
25332 | |||||
25333 | case 'i': | ||||
25334 | if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ | ||||
25335 | memcpy(&aBuf[nBuf-2], "ize", 3); | ||||
25336 | *pnBuf = nBuf - 2 + 3; | ||||
25337 | ret = 1; | ||||
25338 | } | ||||
25339 | break; | ||||
25340 | |||||
25341 | } | ||||
25342 | return ret; | ||||
25343 | } | ||||
25344 | |||||
25345 | |||||
25346 | static int fts5PorterStep2(char *aBuf, int *pnBuf){ | ||||
25347 | int ret = 0; | ||||
25348 | int nBuf = *pnBuf; | ||||
25349 | switch( aBuf[nBuf-2] ){ | ||||
25350 | |||||
25351 | case 'a': | ||||
25352 | if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ | ||||
25353 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | ||||
25354 | memcpy(&aBuf[nBuf-7], "ate", 3); | ||||
25355 | *pnBuf = nBuf - 7 + 3; | ||||
25356 | } | ||||
25357 | }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ | ||||
25358 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | ||||
25359 | memcpy(&aBuf[nBuf-6], "tion", 4); | ||||
25360 | *pnBuf = nBuf - 6 + 4; | ||||
25361 | } | ||||
25362 | } | ||||
25363 | break; | ||||
25364 | |||||
25365 | case 'c': | ||||
25366 | if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ | ||||
25367 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25368 | memcpy(&aBuf[nBuf-4], "ence", 4); | ||||
25369 | *pnBuf = nBuf - 4 + 4; | ||||
25370 | } | ||||
25371 | }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ | ||||
25372 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25373 | memcpy(&aBuf[nBuf-4], "ance", 4); | ||||
25374 | *pnBuf = nBuf - 4 + 4; | ||||
25375 | } | ||||
25376 | } | ||||
25377 | break; | ||||
25378 | |||||
25379 | case 'e': | ||||
25380 | if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ | ||||
25381 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25382 | memcpy(&aBuf[nBuf-4], "ize", 3); | ||||
25383 | *pnBuf = nBuf - 4 + 3; | ||||
25384 | } | ||||
25385 | } | ||||
25386 | break; | ||||
25387 | |||||
25388 | case 'g': | ||||
25389 | if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ | ||||
25390 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25391 | memcpy(&aBuf[nBuf-4], "log", 3); | ||||
25392 | *pnBuf = nBuf - 4 + 3; | ||||
25393 | } | ||||
25394 | } | ||||
25395 | break; | ||||
25396 | |||||
25397 | case 'l': | ||||
25398 | if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ | ||||
25399 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | ||||
25400 | memcpy(&aBuf[nBuf-3], "ble", 3); | ||||
25401 | *pnBuf = nBuf - 3 + 3; | ||||
25402 | } | ||||
25403 | }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ | ||||
25404 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25405 | memcpy(&aBuf[nBuf-4], "al", 2); | ||||
25406 | *pnBuf = nBuf - 4 + 2; | ||||
25407 | } | ||||
25408 | }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ | ||||
25409 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25410 | memcpy(&aBuf[nBuf-5], "ent", 3); | ||||
25411 | *pnBuf = nBuf - 5 + 3; | ||||
25412 | } | ||||
25413 | }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ | ||||
25414 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | ||||
25415 | memcpy(&aBuf[nBuf-3], "e", 1); | ||||
25416 | *pnBuf = nBuf - 3 + 1; | ||||
25417 | } | ||||
25418 | }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ | ||||
25419 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25420 | memcpy(&aBuf[nBuf-5], "ous", 3); | ||||
25421 | *pnBuf = nBuf - 5 + 3; | ||||
25422 | } | ||||
25423 | } | ||||
25424 | break; | ||||
25425 | |||||
25426 | case 'o': | ||||
25427 | if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ | ||||
25428 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | ||||
25429 | memcpy(&aBuf[nBuf-7], "ize", 3); | ||||
25430 | *pnBuf = nBuf - 7 + 3; | ||||
25431 | } | ||||
25432 | }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ | ||||
25433 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25434 | memcpy(&aBuf[nBuf-5], "ate", 3); | ||||
25435 | *pnBuf = nBuf - 5 + 3; | ||||
25436 | } | ||||
25437 | }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ | ||||
25438 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25439 | memcpy(&aBuf[nBuf-4], "ate", 3); | ||||
25440 | *pnBuf = nBuf - 4 + 3; | ||||
25441 | } | ||||
25442 | } | ||||
25443 | break; | ||||
25444 | |||||
25445 | case 's': | ||||
25446 | if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ | ||||
25447 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25448 | memcpy(&aBuf[nBuf-5], "al", 2); | ||||
25449 | *pnBuf = nBuf - 5 + 2; | ||||
25450 | } | ||||
25451 | }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ | ||||
25452 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | ||||
25453 | memcpy(&aBuf[nBuf-7], "ive", 3); | ||||
25454 | *pnBuf = nBuf - 7 + 3; | ||||
25455 | } | ||||
25456 | }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ | ||||
25457 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | ||||
25458 | memcpy(&aBuf[nBuf-7], "ful", 3); | ||||
25459 | *pnBuf = nBuf - 7 + 3; | ||||
25460 | } | ||||
25461 | }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ | ||||
25462 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | ||||
25463 | memcpy(&aBuf[nBuf-7], "ous", 3); | ||||
25464 | *pnBuf = nBuf - 7 + 3; | ||||
25465 | } | ||||
25466 | } | ||||
25467 | break; | ||||
25468 | |||||
25469 | case 't': | ||||
25470 | if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ | ||||
25471 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25472 | memcpy(&aBuf[nBuf-5], "al", 2); | ||||
25473 | *pnBuf = nBuf - 5 + 2; | ||||
25474 | } | ||||
25475 | }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ | ||||
25476 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25477 | memcpy(&aBuf[nBuf-5], "ive", 3); | ||||
25478 | *pnBuf = nBuf - 5 + 3; | ||||
25479 | } | ||||
25480 | }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ | ||||
25481 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | ||||
25482 | memcpy(&aBuf[nBuf-6], "ble", 3); | ||||
25483 | *pnBuf = nBuf - 6 + 3; | ||||
25484 | } | ||||
25485 | } | ||||
25486 | break; | ||||
25487 | |||||
25488 | } | ||||
25489 | return ret; | ||||
25490 | } | ||||
25491 | |||||
25492 | |||||
25493 | static int fts5PorterStep3(char *aBuf, int *pnBuf){ | ||||
25494 | int ret = 0; | ||||
25495 | int nBuf = *pnBuf; | ||||
25496 | switch( aBuf[nBuf-2] ){ | ||||
25497 | |||||
25498 | case 'a': | ||||
25499 | if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ | ||||
25500 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25501 | memcpy(&aBuf[nBuf-4], "ic", 2); | ||||
25502 | *pnBuf = nBuf - 4 + 2; | ||||
25503 | } | ||||
25504 | } | ||||
25505 | break; | ||||
25506 | |||||
25507 | case 's': | ||||
25508 | if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ | ||||
25509 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | ||||
25510 | *pnBuf = nBuf - 4; | ||||
25511 | } | ||||
25512 | } | ||||
25513 | break; | ||||
25514 | |||||
25515 | case 't': | ||||
25516 | if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ | ||||
25517 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25518 | memcpy(&aBuf[nBuf-5], "ic", 2); | ||||
25519 | *pnBuf = nBuf - 5 + 2; | ||||
25520 | } | ||||
25521 | }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ | ||||
25522 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25523 | memcpy(&aBuf[nBuf-5], "ic", 2); | ||||
25524 | *pnBuf = nBuf - 5 + 2; | ||||
25525 | } | ||||
25526 | } | ||||
25527 | break; | ||||
25528 | |||||
25529 | case 'u': | ||||
25530 | if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ | ||||
25531 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | ||||
25532 | *pnBuf = nBuf - 3; | ||||
25533 | } | ||||
25534 | } | ||||
25535 | break; | ||||
25536 | |||||
25537 | case 'v': | ||||
25538 | if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ | ||||
25539 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25540 | *pnBuf = nBuf - 5; | ||||
25541 | } | ||||
25542 | } | ||||
25543 | break; | ||||
25544 | |||||
25545 | case 'z': | ||||
25546 | if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ | ||||
25547 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | ||||
25548 | memcpy(&aBuf[nBuf-5], "al", 2); | ||||
25549 | *pnBuf = nBuf - 5 + 2; | ||||
25550 | } | ||||
25551 | } | ||||
25552 | break; | ||||
25553 | |||||
25554 | } | ||||
25555 | return ret; | ||||
25556 | } | ||||
25557 | |||||
25558 | |||||
25559 | static int fts5PorterStep1B(char *aBuf, int *pnBuf){ | ||||
25560 | int ret = 0; | ||||
25561 | int nBuf = *pnBuf; | ||||
25562 | switch( aBuf[nBuf-2] ){ | ||||
25563 | |||||
25564 | case 'e': | ||||
25565 | if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ | ||||
25566 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | ||||
25567 | memcpy(&aBuf[nBuf-3], "ee", 2); | ||||
25568 | *pnBuf = nBuf - 3 + 2; | ||||
25569 | } | ||||
25570 | }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ | ||||
25571 | if( fts5Porter_Vowel(aBuf, nBuf-2) ){ | ||||
25572 | *pnBuf = nBuf - 2; | ||||
25573 | ret = 1; | ||||
25574 | } | ||||
25575 | } | ||||
25576 | break; | ||||
25577 | |||||
25578 | case 'n': | ||||
25579 | if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ | ||||
25580 | if( fts5Porter_Vowel(aBuf, nBuf-3) ){ | ||||
25581 | *pnBuf = nBuf - 3; | ||||
25582 | ret = 1; | ||||
25583 | } | ||||
25584 | } | ||||
25585 | break; | ||||
25586 | |||||
25587 | } | ||||
25588 | return ret; | ||||
25589 | } | ||||
25590 | |||||
25591 | /* | ||||
25592 | ** GENERATED CODE ENDS HERE (mkportersteps.tcl) | ||||
25593 | *************************************************************************** | ||||
25594 | **************************************************************************/ | ||||
25595 | |||||
25596 | static void fts5PorterStep1A(char *aBuf, int *pnBuf){ | ||||
25597 | int nBuf = *pnBuf; | ||||
25598 | if( aBuf[nBuf-1]=='s' ){ | ||||
25599 | if( aBuf[nBuf-2]=='e' ){ | ||||
25600 | if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') | ||||
25601 | || (nBuf>3 && aBuf[nBuf-3]=='i' ) | ||||
25602 | ){ | ||||
25603 | *pnBuf = nBuf-2; | ||||
25604 | }else{ | ||||
25605 | *pnBuf = nBuf-1; | ||||
25606 | } | ||||
25607 | } | ||||
25608 | else if( aBuf[nBuf-2]!='s' ){ | ||||
25609 | *pnBuf = nBuf-1; | ||||
25610 | } | ||||
25611 | } | ||||
25612 | } | ||||
25613 | |||||
25614 | static int fts5PorterCb( | ||||
25615 | void *pCtx, | ||||
25616 | int tflags, | ||||
25617 | const char *pToken, | ||||
25618 | int nToken, | ||||
25619 | int iStart, | ||||
25620 | int iEnd | ||||
25621 | ){ | ||||
25622 | PorterContext *p = (PorterContext*)pCtx; | ||||
25623 | |||||
25624 | char *aBuf; | ||||
25625 | int nBuf; | ||||
25626 | |||||
25627 | if( nToken>FTS5_PORTER_MAX_TOKEN64 || nToken<3 ) goto pass_through; | ||||
25628 | aBuf = p->aBuf; | ||||
25629 | nBuf = nToken; | ||||
25630 | memcpy(aBuf, pToken, nBuf); | ||||
25631 | |||||
25632 | /* Step 1. */ | ||||
25633 | fts5PorterStep1A(aBuf, &nBuf); | ||||
25634 | if( fts5PorterStep1B(aBuf, &nBuf) ){ | ||||
25635 | if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ | ||||
25636 | char c = aBuf[nBuf-1]; | ||||
25637 | if( fts5PorterIsVowel(c, 0)==0 | ||||
25638 | && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] | ||||
25639 | ){ | ||||
25640 | nBuf--; | ||||
25641 | }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ | ||||
25642 | aBuf[nBuf++] = 'e'; | ||||
25643 | } | ||||
25644 | } | ||||
25645 | } | ||||
25646 | |||||
25647 | /* Step 1C. */ | ||||
25648 | if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ | ||||
25649 | aBuf[nBuf-1] = 'i'; | ||||
25650 | } | ||||
25651 | |||||
25652 | /* Steps 2 through 4. */ | ||||
25653 | fts5PorterStep2(aBuf, &nBuf); | ||||
25654 | fts5PorterStep3(aBuf, &nBuf); | ||||
25655 | fts5PorterStep4(aBuf, &nBuf); | ||||
25656 | |||||
25657 | /* Step 5a. */ | ||||
25658 | assert( nBuf>0 )((void) (0)); | ||||
25659 | if( aBuf[nBuf-1]=='e' ){ | ||||
25660 | if( fts5Porter_MGt1(aBuf, nBuf-1) | ||||
25661 | || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) | ||||
25662 | ){ | ||||
25663 | nBuf--; | ||||
25664 | } | ||||
25665 | } | ||||
25666 | |||||
25667 | /* Step 5b. */ | ||||
25668 | if( nBuf>1 && aBuf[nBuf-1]=='l' | ||||
25669 | && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) | ||||
25670 | ){ | ||||
25671 | nBuf--; | ||||
25672 | } | ||||
25673 | |||||
25674 | return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); | ||||
25675 | |||||
25676 | pass_through: | ||||
25677 | return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); | ||||
25678 | } | ||||
25679 | |||||
25680 | /* | ||||
25681 | ** Tokenize using the porter tokenizer. | ||||
25682 | */ | ||||
25683 | static int fts5PorterTokenize( | ||||
25684 | Fts5Tokenizer *pTokenizer, | ||||
25685 | void *pCtx, | ||||
25686 | int flags, | ||||
25687 | const char *pText, int nText, | ||||
25688 | const char *pLoc, int nLoc, | ||||
25689 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | ||||
25690 | ){ | ||||
25691 | PorterTokenizer *p = (PorterTokenizer*)pTokenizer; | ||||
25692 | PorterContext sCtx; | ||||
25693 | sCtx.xToken = xToken; | ||||
25694 | sCtx.pCtx = pCtx; | ||||
25695 | sCtx.aBuf = p->aBuf; | ||||
25696 | return p->tokenizer_v2.xTokenize( | ||||
25697 | p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb | ||||
25698 | ); | ||||
25699 | } | ||||
25700 | |||||
25701 | /************************************************************************** | ||||
25702 | ** Start of trigram implementation. | ||||
25703 | */ | ||||
25704 | typedef struct TrigramTokenizer TrigramTokenizer; | ||||
25705 | struct TrigramTokenizer { | ||||
25706 | int bFold; /* True to fold to lower-case */ | ||||
25707 | int iFoldParam; /* Parameter to pass to Fts5UnicodeFold() */ | ||||
25708 | }; | ||||
25709 | |||||
25710 | /* | ||||
25711 | ** Free a trigram tokenizer. | ||||
25712 | */ | ||||
25713 | static void fts5TriDelete(Fts5Tokenizer *p){ | ||||
25714 | sqlite3_freesqlite3_api->free(p); | ||||
25715 | } | ||||
25716 | |||||
25717 | /* | ||||
25718 | ** Allocate a trigram tokenizer. | ||||
25719 | */ | ||||
25720 | static int fts5TriCreate( | ||||
25721 | void *pUnused, | ||||
25722 | const char **azArg, | ||||
25723 | int nArg, | ||||
25724 | Fts5Tokenizer **ppOut | ||||
25725 | ){ | ||||
25726 | int rc = SQLITE_OK0; | ||||
25727 | TrigramTokenizer *pNew = 0; | ||||
25728 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
25729 | if( nArg%2 ){ | ||||
25730 | rc = SQLITE_ERROR1; | ||||
25731 | }else{ | ||||
25732 | int i; | ||||
25733 | pNew = (TrigramTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | ||||
25734 | if( pNew==0 ){ | ||||
25735 | rc = SQLITE_NOMEM7; | ||||
25736 | }else{ | ||||
25737 | pNew->bFold = 1; | ||||
25738 | pNew->iFoldParam = 0; | ||||
25739 | |||||
25740 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | ||||
25741 | const char *zArg = azArg[i+1]; | ||||
25742 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "case_sensitive") ){ | ||||
25743 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ | ||||
25744 | rc = SQLITE_ERROR1; | ||||
25745 | }else{ | ||||
25746 | pNew->bFold = (zArg[0]=='0'); | ||||
25747 | } | ||||
25748 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | ||||
25749 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | ||||
25750 | rc = SQLITE_ERROR1; | ||||
25751 | }else{ | ||||
25752 | pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; | ||||
25753 | } | ||||
25754 | }else{ | ||||
25755 | rc = SQLITE_ERROR1; | ||||
25756 | } | ||||
25757 | } | ||||
25758 | |||||
25759 | if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ | ||||
25760 | rc = SQLITE_ERROR1; | ||||
25761 | } | ||||
25762 | |||||
25763 | if( rc!=SQLITE_OK0 ){ | ||||
25764 | fts5TriDelete((Fts5Tokenizer*)pNew); | ||||
25765 | pNew = 0; | ||||
25766 | } | ||||
25767 | } | ||||
25768 | } | ||||
25769 | *ppOut = (Fts5Tokenizer*)pNew; | ||||
25770 | return rc; | ||||
25771 | } | ||||
25772 | |||||
25773 | /* | ||||
25774 | ** Trigram tokenizer tokenize routine. | ||||
25775 | */ | ||||
25776 | static int fts5TriTokenize( | ||||
25777 | Fts5Tokenizer *pTok, | ||||
25778 | void *pCtx, | ||||
25779 | int unusedFlags, | ||||
25780 | const char *pText, int nText, | ||||
25781 | int (*xToken)(void*, int, const char*, int, int, int) | ||||
25782 | ){ | ||||
25783 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | ||||
25784 | int rc = SQLITE_OK0; | ||||
25785 | char aBuf[32]; | ||||
25786 | char *zOut = aBuf; | ||||
25787 | int ii; | ||||
25788 | const unsigned char *zIn = (const unsigned char*)pText; | ||||
25789 | const unsigned char *zEof = (zIn ? &zIn[nText] : 0); | ||||
25790 | u32 iCode = 0; | ||||
25791 | int aStart[3]; /* Input offset of each character in aBuf[] */ | ||||
25792 | |||||
25793 | UNUSED_PARAM(unusedFlags)(void)(unusedFlags); | ||||
25794 | |||||
25795 | /* Populate aBuf[] with the characters for the first trigram. */ | ||||
25796 | for(ii=0; ii<3; ii++){ | ||||
25797 | do { | ||||
25798 | aStart[ii] = zIn - (const unsigned char*)pText; | ||||
25799 | if( zIn>=zEof ) return SQLITE_OK0; | ||||
25800 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | ||||
25801 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | ||||
25802 | }while( iCode==0 ); | ||||
25803 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | ||||
25804 | } | ||||
25805 | |||||
25806 | /* At the start of each iteration of this loop: | ||||
25807 | ** | ||||
25808 | ** aBuf: Contains 3 characters. The 3 characters of the next trigram. | ||||
25809 | ** zOut: Points to the byte following the last character in aBuf. | ||||
25810 | ** aStart[3]: Contains the byte offset in the input text corresponding | ||||
25811 | ** to the start of each of the three characters in the buffer. | ||||
25812 | */ | ||||
25813 | assert( zIn<=zEof )((void) (0)); | ||||
25814 | while( 1 ){ | ||||
25815 | int iNext; /* Start of character following current tri */ | ||||
25816 | const char *z1; | ||||
25817 | |||||
25818 | /* Read characters from the input up until the first non-diacritic */ | ||||
25819 | do { | ||||
25820 | iNext = zIn - (const unsigned char*)pText; | ||||
25821 | if( zIn>=zEof ){ | ||||
25822 | iCode = 0; | ||||
25823 | break; | ||||
25824 | } | ||||
25825 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | ||||
25826 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | ||||
25827 | }while( iCode==0 ); | ||||
25828 | |||||
25829 | /* Pass the current trigram back to fts5 */ | ||||
25830 | rc = xToken(pCtx, 0, aBuf, zOut-aBuf, aStart[0], iNext); | ||||
25831 | if( iCode==0 || rc!=SQLITE_OK0 ) break; | ||||
25832 | |||||
25833 | /* Remove the first character from buffer aBuf[]. Append the character | ||||
25834 | ** with codepoint iCode. */ | ||||
25835 | z1 = aBuf; | ||||
25836 | FTS5_SKIP_UTF8(z1){ if( ((unsigned char)(*(z1++)))>=0xc0 ){ while( (((unsigned char)*z1) & 0xc0)==0x80 ){ z1++; } } }; | ||||
25837 | memmove(aBuf, z1, zOut - z1); | ||||
25838 | zOut -= (z1 - aBuf); | ||||
25839 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | ||||
25840 | |||||
25841 | /* Update the aStart[] array */ | ||||
25842 | aStart[0] = aStart[1]; | ||||
25843 | aStart[1] = aStart[2]; | ||||
25844 | aStart[2] = iNext; | ||||
25845 | } | ||||
25846 | |||||
25847 | return rc; | ||||
25848 | } | ||||
25849 | |||||
25850 | /* | ||||
25851 | ** Argument xCreate is a pointer to a constructor function for a tokenizer. | ||||
25852 | ** pTok is a tokenizer previously created using the same method. This function | ||||
25853 | ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB | ||||
25854 | ** indicating the style of pattern matching that the tokenizer can support. | ||||
25855 | ** In practice, this is: | ||||
25856 | ** | ||||
25857 | ** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB | ||||
25858 | ** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE | ||||
25859 | ** all other tokenizers - FTS5_PATTERN_NONE | ||||
25860 | */ | ||||
25861 | static int sqlite3Fts5TokenizerPattern( | ||||
25862 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | ||||
25863 | Fts5Tokenizer *pTok | ||||
25864 | ){ | ||||
25865 | if( xCreate==fts5TriCreate ){ | ||||
25866 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | ||||
25867 | if( p->iFoldParam==0 ){ | ||||
25868 | return p->bFold ? FTS5_PATTERN_LIKE65 : FTS5_PATTERN_GLOB66; | ||||
25869 | } | ||||
25870 | } | ||||
25871 | return FTS5_PATTERN_NONE0; | ||||
25872 | } | ||||
25873 | |||||
25874 | /* | ||||
25875 | ** Return true if the tokenizer described by p->azArg[] is the trigram | ||||
25876 | ** tokenizer. This tokenizer needs to be loaded before xBestIndex is | ||||
25877 | ** called for the first time in order to correctly handle LIKE/GLOB. | ||||
25878 | */ | ||||
25879 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){ | ||||
25880 | return (p->nArg>=1 && 0==sqlite3_stricmpsqlite3_api->stricmp(p->azArg[0], "trigram")); | ||||
25881 | } | ||||
25882 | |||||
25883 | |||||
25884 | /* | ||||
25885 | ** Register all built-in tokenizers with FTS5. | ||||
25886 | */ | ||||
25887 | static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ | ||||
25888 | struct BuiltinTokenizer { | ||||
25889 | const char *zName; | ||||
25890 | fts5_tokenizer x; | ||||
25891 | } aBuiltin[] = { | ||||
25892 | { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, | ||||
25893 | { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, | ||||
25894 | { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, | ||||
25895 | }; | ||||
25896 | |||||
25897 | int rc = SQLITE_OK0; /* Return code */ | ||||
25898 | int i; /* To iterate through builtin functions */ | ||||
25899 | |||||
25900 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | ||||
25901 | rc = pApi->xCreateTokenizer(pApi, | ||||
25902 | aBuiltin[i].zName, | ||||
25903 | (void*)pApi, | ||||
25904 | &aBuiltin[i].x, | ||||
25905 | 0 | ||||
25906 | ); | ||||
25907 | } | ||||
25908 | if( rc==SQLITE_OK0 ){ | ||||
25909 | fts5_tokenizer_v2 sPorter = { | ||||
25910 | 2, | ||||
25911 | fts5PorterCreate, | ||||
25912 | fts5PorterDelete, | ||||
25913 | fts5PorterTokenize | ||||
25914 | }; | ||||
25915 | rc = pApi->xCreateTokenizer_v2(pApi, | ||||
25916 | "porter", | ||||
25917 | (void*)pApi, | ||||
25918 | &sPorter, | ||||
25919 | 0 | ||||
25920 | ); | ||||
25921 | } | ||||
25922 | return rc; | ||||
25923 | } | ||||
25924 | |||||
25925 | #line 1 "fts5_unicode2.c" | ||||
25926 | /* | ||||
25927 | ** 2012-05-25 | ||||
25928 | ** | ||||
25929 | ** The author disclaims copyright to this source code. In place of | ||||
25930 | ** a legal notice, here is a blessing: | ||||
25931 | ** | ||||
25932 | ** May you do good and not evil. | ||||
25933 | ** May you find forgiveness for yourself and forgive others. | ||||
25934 | ** May you share freely, never taking more than you give. | ||||
25935 | ** | ||||
25936 | ****************************************************************************** | ||||
25937 | */ | ||||
25938 | |||||
25939 | /* | ||||
25940 | ** DO NOT EDIT THIS MACHINE GENERATED FILE. | ||||
25941 | */ | ||||
25942 | |||||
25943 | |||||
25944 | #include <assert.h> | ||||
25945 | |||||
25946 | |||||
25947 | |||||
25948 | /* | ||||
25949 | ** If the argument is a codepoint corresponding to a lowercase letter | ||||
25950 | ** in the ASCII range with a diacritic added, return the codepoint | ||||
25951 | ** of the ASCII letter only. For example, if passed 235 - "LATIN | ||||
25952 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER | ||||
25953 | ** E"). The resuls of passing a codepoint that corresponds to an | ||||
25954 | ** uppercase letter are undefined. | ||||
25955 | */ | ||||
25956 | static int fts5_remove_diacritic(int c, int bComplex){ | ||||
25957 | unsigned short aDia[] = { | ||||
25958 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | ||||
25959 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | ||||
25960 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | ||||
25961 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | ||||
25962 | 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, | ||||
25963 | 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, | ||||
25964 | 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, | ||||
25965 | 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, | ||||
25966 | 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, | ||||
25967 | 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, | ||||
25968 | 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, | ||||
25969 | 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, | ||||
25970 | 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, | ||||
25971 | 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, | ||||
25972 | 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, | ||||
25973 | 63182, 63242, 63274, 63310, 63368, 63390, | ||||
25974 | }; | ||||
25975 | #define HIBIT((unsigned char)0x80) ((unsigned char)0x80) | ||||
25976 | unsigned char aChar[] = { | ||||
25977 | '\0', 'a', 'c', 'e', 'i', 'n', | ||||
25978 | 'o', 'u', 'y', 'y', 'a', 'c', | ||||
25979 | 'd', 'e', 'e', 'g', 'h', 'i', | ||||
25980 | 'j', 'k', 'l', 'n', 'o', 'r', | ||||
25981 | 's', 't', 'u', 'u', 'w', 'y', | ||||
25982 | 'z', 'o', 'u', 'a', 'i', 'o', | ||||
25983 | 'u', 'u'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'g', 'k', 'o', | ||||
25984 | 'o'|HIBIT((unsigned char)0x80), 'j', 'g', 'n', 'a'|HIBIT((unsigned char)0x80), 'a', | ||||
25985 | 'e', 'i', 'o', 'r', 'u', 's', | ||||
25986 | 't', 'h', 'a', 'e', 'o'|HIBIT((unsigned char)0x80), 'o', | ||||
25987 | 'o'|HIBIT((unsigned char)0x80), 'y', '\0', '\0', '\0', '\0', | ||||
25988 | '\0', '\0', '\0', '\0', 'a', 'b', | ||||
25989 | 'c'|HIBIT((unsigned char)0x80), 'd', 'd', 'e'|HIBIT((unsigned char)0x80), 'e', 'e'|HIBIT((unsigned char)0x80), | ||||
25990 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT((unsigned char)0x80), | ||||
25991 | 'k', 'l', 'l'|HIBIT((unsigned char)0x80), 'l', 'm', 'n', | ||||
25992 | 'o'|HIBIT((unsigned char)0x80), 'p', 'r', 'r'|HIBIT((unsigned char)0x80), 'r', 's', | ||||
25993 | 's'|HIBIT((unsigned char)0x80), 't', 'u', 'u'|HIBIT((unsigned char)0x80), 'v', 'w', | ||||
25994 | 'w', 'x', 'y', 'z', 'h', 't', | ||||
25995 | 'w', 'y', 'a', 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), | ||||
25996 | 'e', 'e'|HIBIT((unsigned char)0x80), 'e'|HIBIT((unsigned char)0x80), 'i', 'o', 'o'|HIBIT((unsigned char)0x80), | ||||
25997 | 'o'|HIBIT((unsigned char)0x80), 'o'|HIBIT((unsigned char)0x80), 'u', 'u'|HIBIT((unsigned char)0x80), 'u'|HIBIT((unsigned char)0x80), 'y', | ||||
25998 | }; | ||||
25999 | |||||
26000 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; | ||||
26001 | int iRes = 0; | ||||
26002 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; | ||||
26003 | int iLo = 0; | ||||
26004 | while( iHi>=iLo ){ | ||||
26005 | int iTest = (iHi + iLo) / 2; | ||||
26006 | if( key >= aDia[iTest] ){ | ||||
26007 | iRes = iTest; | ||||
26008 | iLo = iTest+1; | ||||
26009 | }else{ | ||||
26010 | iHi = iTest-1; | ||||
26011 | } | ||||
26012 | } | ||||
26013 | assert( key>=aDia[iRes] )((void) (0)); | ||||
26014 | if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; | ||||
26015 | return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); | ||||
26016 | } | ||||
26017 | |||||
26018 | |||||
26019 | /* | ||||
26020 | ** Return true if the argument interpreted as a unicode codepoint | ||||
26021 | ** is a diacritical modifier character. | ||||
26022 | */ | ||||
26023 | static int sqlite3Fts5UnicodeIsdiacritic(int c){ | ||||
26024 | unsigned int mask0 = 0x08029FDF; | ||||
26025 | unsigned int mask1 = 0x000361F8; | ||||
26026 | if( c<768 || c>817 ) return 0; | ||||
26027 | return (c < 768+32) ? | ||||
26028 | (mask0 & ((unsigned int)1 << (c-768))) : | ||||
26029 | (mask1 & ((unsigned int)1 << (c-768-32))); | ||||
26030 | } | ||||
26031 | |||||
26032 | |||||
26033 | /* | ||||
26034 | ** Interpret the argument as a unicode codepoint. If the codepoint | ||||
26035 | ** is an upper case character that has a lower case equivalent, | ||||
26036 | ** return the codepoint corresponding to the lower case version. | ||||
26037 | ** Otherwise, return a copy of the argument. | ||||
26038 | ** | ||||
26039 | ** The results are undefined if the value passed to this function | ||||
26040 | ** is less than zero. | ||||
26041 | */ | ||||
26042 | static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ | ||||
26043 | /* Each entry in the following array defines a rule for folding a range | ||||
26044 | ** of codepoints to lower case. The rule applies to a range of nRange | ||||
26045 | ** codepoints starting at codepoint iCode. | ||||
26046 | ** | ||||
26047 | ** If the least significant bit in flags is clear, then the rule applies | ||||
26048 | ** to all nRange codepoints (i.e. all nRange codepoints are upper case and | ||||
26049 | ** need to be folded). Or, if it is set, then the rule only applies to | ||||
26050 | ** every second codepoint in the range, starting with codepoint C. | ||||
26051 | ** | ||||
26052 | ** The 7 most significant bits in flags are an index into the aiOff[] | ||||
26053 | ** array. If a specific codepoint C does require folding, then its lower | ||||
26054 | ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). | ||||
26055 | ** | ||||
26056 | ** The contents of this array are generated by parsing the CaseFolding.txt | ||||
26057 | ** file distributed as part of the "Unicode Character Database". See | ||||
26058 | ** http://www.unicode.org for details. | ||||
26059 | */ | ||||
26060 | static const struct TableEntry { | ||||
26061 | unsigned short iCode; | ||||
26062 | unsigned char flags; | ||||
26063 | unsigned char nRange; | ||||
26064 | } aEntry[] = { | ||||
26065 | {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, | ||||
26066 | {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, | ||||
26067 | {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, | ||||
26068 | {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, | ||||
26069 | {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, | ||||
26070 | {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, | ||||
26071 | {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, | ||||
26072 | {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, | ||||
26073 | {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, | ||||
26074 | {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, | ||||
26075 | {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, | ||||
26076 | {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, | ||||
26077 | {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, | ||||
26078 | {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, | ||||
26079 | {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, | ||||
26080 | {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, | ||||
26081 | {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, | ||||
26082 | {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, | ||||
26083 | {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, | ||||
26084 | {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, | ||||
26085 | {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, | ||||
26086 | {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, | ||||
26087 | {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, | ||||
26088 | {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, | ||||
26089 | {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, | ||||
26090 | {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, | ||||
26091 | {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, | ||||
26092 | {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, | ||||
26093 | {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, | ||||
26094 | {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, | ||||
26095 | {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, | ||||
26096 | {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, | ||||
26097 | {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, | ||||
26098 | {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, | ||||
26099 | {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, | ||||
26100 | {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, | ||||
26101 | {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, | ||||
26102 | {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, | ||||
26103 | {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, | ||||
26104 | {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, | ||||
26105 | {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, | ||||
26106 | {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, | ||||
26107 | {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, | ||||
26108 | {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, | ||||
26109 | {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, | ||||
26110 | {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, | ||||
26111 | {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, | ||||
26112 | {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, | ||||
26113 | {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, | ||||
26114 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, | ||||
26115 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, | ||||
26116 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, | ||||
26117 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, | ||||
26118 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, | ||||
26119 | {65313, 14, 26}, | ||||
26120 | }; | ||||
26121 | static const unsigned short aiOff[] = { | ||||
26122 | 1, 2, 8, 15, 16, 26, 28, 32, | ||||
26123 | 37, 38, 40, 48, 63, 64, 69, 71, | ||||
26124 | 79, 80, 116, 202, 203, 205, 206, 207, | ||||
26125 | 209, 210, 211, 213, 214, 217, 218, 219, | ||||
26126 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | ||||
26127 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | ||||
26128 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | ||||
26129 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | ||||
26130 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | ||||
26131 | 65514, 65521, 65527, 65528, 65529, | ||||
26132 | }; | ||||
26133 | |||||
26134 | int ret = c; | ||||
26135 | |||||
26136 | assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 )((void) (0)); | ||||
26137 | |||||
26138 | if( c<128 ){ | ||||
26139 | if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); | ||||
26140 | }else if( c<65536 ){ | ||||
26141 | const struct TableEntry *p; | ||||
26142 | int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | ||||
26143 | int iLo = 0; | ||||
26144 | int iRes = -1; | ||||
26145 | |||||
26146 | assert( c>aEntry[0].iCode )((void) (0)); | ||||
26147 | while( iHi>=iLo ){ | ||||
26148 | int iTest = (iHi + iLo) / 2; | ||||
26149 | int cmp = (c - aEntry[iTest].iCode); | ||||
26150 | if( cmp>=0 ){ | ||||
26151 | iRes = iTest; | ||||
26152 | iLo = iTest+1; | ||||
26153 | }else{ | ||||
26154 | iHi = iTest-1; | ||||
26155 | } | ||||
26156 | } | ||||
26157 | |||||
26158 | assert( iRes>=0 && c>=aEntry[iRes].iCode )((void) (0)); | ||||
26159 | p = &aEntry[iRes]; | ||||
26160 | if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ | ||||
26161 | ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; | ||||
26162 | assert( ret>0 )((void) (0)); | ||||
26163 | } | ||||
26164 | |||||
26165 | if( eRemoveDiacritic ){ | ||||
26166 | ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); | ||||
26167 | } | ||||
26168 | } | ||||
26169 | |||||
26170 | else if( c>=66560 && c<66600 ){ | ||||
26171 | ret = c + 40; | ||||
26172 | } | ||||
26173 | |||||
26174 | return ret; | ||||
26175 | } | ||||
26176 | |||||
26177 | |||||
26178 | static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ | ||||
26179 | aArray[0] = 1; | ||||
26180 | switch( zCat[0] ){ | ||||
26181 | case 'C': | ||||
26182 | switch( zCat[1] ){ | ||||
26183 | case 'c': aArray[1] = 1; break; | ||||
26184 | case 'f': aArray[2] = 1; break; | ||||
26185 | case 'n': aArray[3] = 1; break; | ||||
26186 | case 's': aArray[4] = 1; break; | ||||
26187 | case 'o': aArray[31] = 1; break; | ||||
26188 | case '*': | ||||
26189 | aArray[1] = 1; | ||||
26190 | aArray[2] = 1; | ||||
26191 | aArray[3] = 1; | ||||
26192 | aArray[4] = 1; | ||||
26193 | aArray[31] = 1; | ||||
26194 | break; | ||||
26195 | default: return 1; } | ||||
26196 | break; | ||||
26197 | |||||
26198 | case 'L': | ||||
26199 | switch( zCat[1] ){ | ||||
26200 | case 'l': aArray[5] = 1; break; | ||||
26201 | case 'm': aArray[6] = 1; break; | ||||
26202 | case 'o': aArray[7] = 1; break; | ||||
26203 | case 't': aArray[8] = 1; break; | ||||
26204 | case 'u': aArray[9] = 1; break; | ||||
26205 | case 'C': aArray[30] = 1; break; | ||||
26206 | case '*': | ||||
26207 | aArray[5] = 1; | ||||
26208 | aArray[6] = 1; | ||||
26209 | aArray[7] = 1; | ||||
26210 | aArray[8] = 1; | ||||
26211 | aArray[9] = 1; | ||||
26212 | aArray[30] = 1; | ||||
26213 | break; | ||||
26214 | default: return 1; } | ||||
26215 | break; | ||||
26216 | |||||
26217 | case 'M': | ||||
26218 | switch( zCat[1] ){ | ||||
26219 | case 'c': aArray[10] = 1; break; | ||||
26220 | case 'e': aArray[11] = 1; break; | ||||
26221 | case 'n': aArray[12] = 1; break; | ||||
26222 | case '*': | ||||
26223 | aArray[10] = 1; | ||||
26224 | aArray[11] = 1; | ||||
26225 | aArray[12] = 1; | ||||
26226 | break; | ||||
26227 | default: return 1; } | ||||
26228 | break; | ||||
26229 | |||||
26230 | case 'N': | ||||
26231 | switch( zCat[1] ){ | ||||
26232 | case 'd': aArray[13] = 1; break; | ||||
26233 | case 'l': aArray[14] = 1; break; | ||||
26234 | case 'o': aArray[15] = 1; break; | ||||
26235 | case '*': | ||||
26236 | aArray[13] = 1; | ||||
26237 | aArray[14] = 1; | ||||
26238 | aArray[15] = 1; | ||||
26239 | break; | ||||
26240 | default: return 1; } | ||||
26241 | break; | ||||
26242 | |||||
26243 | case 'P': | ||||
26244 | switch( zCat[1] ){ | ||||
26245 | case 'c': aArray[16] = 1; break; | ||||
26246 | case 'd': aArray[17] = 1; break; | ||||
26247 | case 'e': aArray[18] = 1; break; | ||||
26248 | case 'f': aArray[19] = 1; break; | ||||
26249 | case 'i': aArray[20] = 1; break; | ||||
26250 | case 'o': aArray[21] = 1; break; | ||||
26251 | case 's': aArray[22] = 1; break; | ||||
26252 | case '*': | ||||
26253 | aArray[16] = 1; | ||||
26254 | aArray[17] = 1; | ||||
26255 | aArray[18] = 1; | ||||
26256 | aArray[19] = 1; | ||||
26257 | aArray[20] = 1; | ||||
26258 | aArray[21] = 1; | ||||
26259 | aArray[22] = 1; | ||||
26260 | break; | ||||
26261 | default: return 1; } | ||||
26262 | break; | ||||
26263 | |||||
26264 | case 'S': | ||||
26265 | switch( zCat[1] ){ | ||||
26266 | case 'c': aArray[23] = 1; break; | ||||
26267 | case 'k': aArray[24] = 1; break; | ||||
26268 | case 'm': aArray[25] = 1; break; | ||||
26269 | case 'o': aArray[26] = 1; break; | ||||
26270 | case '*': | ||||
26271 | aArray[23] = 1; | ||||
26272 | aArray[24] = 1; | ||||
26273 | aArray[25] = 1; | ||||
26274 | aArray[26] = 1; | ||||
26275 | break; | ||||
26276 | default: return 1; } | ||||
26277 | break; | ||||
26278 | |||||
26279 | case 'Z': | ||||
26280 | switch( zCat[1] ){ | ||||
26281 | case 'l': aArray[27] = 1; break; | ||||
26282 | case 'p': aArray[28] = 1; break; | ||||
26283 | case 's': aArray[29] = 1; break; | ||||
26284 | case '*': | ||||
26285 | aArray[27] = 1; | ||||
26286 | aArray[28] = 1; | ||||
26287 | aArray[29] = 1; | ||||
26288 | break; | ||||
26289 | default: return 1; } | ||||
26290 | break; | ||||
26291 | |||||
26292 | |||||
26293 | default: | ||||
26294 | return 1; | ||||
26295 | } | ||||
26296 | return 0; | ||||
26297 | } | ||||
26298 | |||||
26299 | static u16 aFts5UnicodeBlock[] = { | ||||
26300 | 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760, | ||||
26301 | 1760, 1760, 1760, 1760, 1760, 1763, 1765, | ||||
26302 | }; | ||||
26303 | static u16 aFts5UnicodeMap[] = { | ||||
26304 | 0, 32, 33, 36, 37, 40, 41, 42, 43, 44, | ||||
26305 | 45, 46, 48, 58, 60, 63, 65, 91, 92, 93, | ||||
26306 | 94, 95, 96, 97, 123, 124, 125, 126, 127, 160, | ||||
26307 | 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, | ||||
26308 | 174, 175, 176, 177, 178, 180, 181, 182, 184, 185, | ||||
26309 | 186, 187, 188, 191, 192, 215, 216, 223, 247, 248, | ||||
26310 | 256, 312, 313, 329, 330, 377, 383, 385, 387, 388, | ||||
26311 | 391, 394, 396, 398, 402, 403, 405, 406, 409, 412, | ||||
26312 | 414, 415, 417, 418, 423, 427, 428, 431, 434, 436, | ||||
26313 | 437, 440, 442, 443, 444, 446, 448, 452, 453, 454, | ||||
26314 | 455, 456, 457, 458, 459, 460, 461, 477, 478, 496, | ||||
26315 | 497, 498, 499, 500, 503, 505, 506, 564, 570, 572, | ||||
26316 | 573, 575, 577, 580, 583, 584, 592, 660, 661, 688, | ||||
26317 | 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, | ||||
26318 | 880, 884, 885, 886, 890, 891, 894, 900, 902, 903, | ||||
26319 | 904, 908, 910, 912, 913, 931, 940, 975, 977, 978, | ||||
26320 | 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072, | ||||
26321 | 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369, | ||||
26322 | 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473, | ||||
26323 | 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545, | ||||
26324 | 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611, | ||||
26325 | 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758, | ||||
26326 | 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791, | ||||
26327 | 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984, | ||||
26328 | 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075, | ||||
26329 | 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210, | ||||
26330 | 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369, | ||||
26331 | 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416, | ||||
26332 | 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482, | ||||
26333 | 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519, | ||||
26334 | 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561, | ||||
26335 | 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, | ||||
26336 | 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677, | ||||
26337 | 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749, | ||||
26338 | 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790, | ||||
26339 | 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869, | ||||
26340 | 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902, | ||||
26341 | 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947, | ||||
26342 | 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006, | ||||
26343 | 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059, | ||||
26344 | 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134, | ||||
26345 | 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199, | ||||
26346 | 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263, | ||||
26347 | 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302, | ||||
26348 | 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402, | ||||
26349 | 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458, | ||||
26350 | 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544, | ||||
26351 | 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655, | ||||
26352 | 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737, | ||||
26353 | 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773, | ||||
26354 | 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860, | ||||
26355 | 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896, | ||||
26356 | 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967, | ||||
26357 | 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046, | ||||
26358 | 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153, | ||||
26359 | 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190, | ||||
26360 | 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229, | ||||
26361 | 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295, | ||||
26362 | 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704, | ||||
26363 | 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, | ||||
26364 | 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743, | ||||
26365 | 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906, | ||||
26366 | 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068, | ||||
26367 | 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107, | ||||
26368 | 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160, | ||||
26369 | 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435, | ||||
26370 | 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480, | ||||
26371 | 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679, | ||||
26372 | 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754, | ||||
26373 | 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824, | ||||
26374 | 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978, | ||||
26375 | 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043, | ||||
26376 | 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098, | ||||
26377 | 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168, | ||||
26378 | 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288, | ||||
26379 | 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, | ||||
26380 | 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616, | ||||
26381 | 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976, | ||||
26382 | 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033, | ||||
26383 | 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118, | ||||
26384 | 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141, | ||||
26385 | 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184, | ||||
26386 | 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219, | ||||
26387 | 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249, | ||||
26388 | 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275, | ||||
26389 | 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317, | ||||
26390 | 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413, | ||||
26391 | 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459, | ||||
26392 | 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484, | ||||
26393 | 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500, | ||||
26394 | 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523, | ||||
26395 | 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597, | ||||
26396 | 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, | ||||
26397 | 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, | ||||
26398 | 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180, | ||||
26399 | 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665, | ||||
26400 | 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091, | ||||
26401 | 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, | ||||
26402 | 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217, | ||||
26403 | 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627, | ||||
26404 | 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, | ||||
26405 | 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, | ||||
26406 | 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750, | ||||
26407 | 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365, | ||||
26408 | 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393, | ||||
26409 | 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520, | ||||
26410 | 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696, | ||||
26411 | 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780, | ||||
26412 | 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800, | ||||
26413 | 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812, | ||||
26414 | 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904, | ||||
26415 | 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296, | ||||
26416 | 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, | ||||
26417 | 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317, | ||||
26418 | 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347, | ||||
26419 | 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449, | ||||
26420 | 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736, | ||||
26421 | 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, | ||||
26422 | 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981, | ||||
26423 | 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528, | ||||
26424 | 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624, | ||||
26425 | 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800, | ||||
26426 | 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912, | ||||
26427 | 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, | ||||
26428 | 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136, | ||||
26429 | 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264, | ||||
26430 | 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395, | ||||
26431 | 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472, | ||||
26432 | 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588, | ||||
26433 | 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643, | ||||
26434 | 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, | ||||
26435 | 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762, | ||||
26436 | 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003, | ||||
26437 | 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203, | ||||
26438 | 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112, | ||||
26439 | 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320, | ||||
26440 | 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020, | ||||
26441 | 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075, | ||||
26442 | 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, | ||||
26443 | 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097, | ||||
26444 | 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118, | ||||
26445 | 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279, | ||||
26446 | 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294, | ||||
26447 | 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343, | ||||
26448 | 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378, | ||||
26449 | 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490, | ||||
26450 | 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529, | ||||
26451 | 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263, | ||||
26452 | 311, 320, 373, 377, 394, 400, 464, 509, 640, 672, | ||||
26453 | 768, 800, 816, 833, 834, 842, 896, 927, 928, 968, | ||||
26454 | 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103, | ||||
26455 | 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432, | ||||
26456 | 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623, | ||||
26457 | 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912, | ||||
26458 | 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178, | ||||
26459 | 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285, | ||||
26460 | 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416, | ||||
26461 | 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760, | ||||
26462 | 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216, | ||||
26463 | 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248, | ||||
26464 | 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637, | ||||
26465 | 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298, | ||||
26466 | 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441, | ||||
26467 | 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541, | ||||
26468 | 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662, | ||||
26469 | 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922, | ||||
26470 | 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062, | ||||
26471 | 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178, | ||||
26472 | 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961, | ||||
26473 | 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003, | ||||
26474 | 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028, | ||||
26475 | 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099, | ||||
26476 | 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744, | ||||
26477 | 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368, | ||||
26478 | 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971, | ||||
26479 | 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488, | ||||
26480 | 1, 32, 256, 0, 65533, | ||||
26481 | }; | ||||
26482 | static u16 aFts5UnicodeData[] = { | ||||
26483 | 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53, | ||||
26484 | 49, 85, 333, 85, 121, 85, 841, 54, 53, 50, | ||||
26485 | 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61, | ||||
26486 | 53, 151, 58, 53, 56, 58, 39, 52, 57, 34, | ||||
26487 | 58, 56, 58, 57, 79, 56, 37, 85, 56, 47, | ||||
26488 | 39, 51, 111, 53, 745, 57, 233, 773, 57, 261, | ||||
26489 | 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126, | ||||
26490 | 126, 73, 69, 137, 37, 73, 37, 105, 101, 73, | ||||
26491 | 37, 73, 37, 190, 158, 37, 126, 126, 73, 37, | ||||
26492 | 126, 94, 37, 39, 94, 69, 135, 41, 40, 37, | ||||
26493 | 41, 40, 37, 41, 40, 37, 542, 37, 606, 37, | ||||
26494 | 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37, | ||||
26495 | 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582, | ||||
26496 | 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596, | ||||
26497 | 158, 38, 56, 94, 38, 101, 53, 88, 41, 53, | ||||
26498 | 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105, | ||||
26499 | 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541, | ||||
26500 | 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38, | ||||
26501 | 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76, | ||||
26502 | 53, 76, 53, 44, 871, 103, 85, 162, 121, 85, | ||||
26503 | 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684, | ||||
26504 | 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58, | ||||
26505 | 204, 70, 76, 58, 140, 71, 333, 103, 90, 39, | ||||
26506 | 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333, | ||||
26507 | 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300, | ||||
26508 | 38, 108, 38, 172, 501, 807, 108, 53, 39, 359, | ||||
26509 | 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268, | ||||
26510 | 138, 44, 74, 39, 236, 327, 76, 85, 333, 53, | ||||
26511 | 38, 199, 231, 44, 74, 263, 71, 711, 231, 39, | ||||
26512 | 135, 44, 39, 106, 140, 74, 74, 44, 39, 42, | ||||
26513 | 71, 103, 76, 333, 71, 87, 207, 58, 55, 76, | ||||
26514 | 42, 199, 71, 711, 231, 71, 71, 71, 44, 106, | ||||
26515 | 76, 76, 108, 44, 135, 39, 333, 76, 103, 44, | ||||
26516 | 76, 42, 295, 103, 711, 231, 71, 167, 44, 39, | ||||
26517 | 106, 172, 76, 42, 74, 44, 39, 71, 76, 333, | ||||
26518 | 53, 55, 44, 74, 263, 71, 711, 231, 71, 167, | ||||
26519 | 44, 39, 42, 44, 42, 140, 74, 74, 44, 44, | ||||
26520 | 42, 71, 103, 76, 333, 58, 39, 207, 44, 39, | ||||
26521 | 199, 103, 135, 71, 39, 71, 71, 103, 391, 74, | ||||
26522 | 44, 74, 106, 106, 44, 39, 42, 333, 111, 218, | ||||
26523 | 55, 58, 106, 263, 103, 743, 327, 167, 39, 108, | ||||
26524 | 138, 108, 140, 76, 71, 71, 76, 333, 239, 58, | ||||
26525 | 74, 263, 103, 743, 327, 167, 44, 39, 42, 44, | ||||
26526 | 170, 44, 74, 74, 76, 74, 39, 71, 76, 333, | ||||
26527 | 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106, | ||||
26528 | 44, 39, 42, 71, 76, 333, 207, 58, 199, 74, | ||||
26529 | 583, 775, 295, 39, 231, 44, 106, 108, 44, 266, | ||||
26530 | 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268, | ||||
26531 | 53, 333, 85, 71, 39, 71, 39, 39, 135, 231, | ||||
26532 | 103, 39, 39, 71, 135, 44, 71, 204, 76, 39, | ||||
26533 | 167, 38, 204, 333, 135, 39, 122, 501, 58, 53, | ||||
26534 | 122, 76, 218, 333, 335, 58, 44, 58, 44, 58, | ||||
26535 | 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42, | ||||
26536 | 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90, | ||||
26537 | 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76, | ||||
26538 | 74, 76, 39, 333, 213, 199, 74, 76, 135, 108, | ||||
26539 | 39, 106, 71, 234, 103, 140, 423, 44, 74, 76, | ||||
26540 | 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41, | ||||
26541 | 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319, | ||||
26542 | 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151, | ||||
26543 | 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551, | ||||
26544 | 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108, | ||||
26545 | 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76, | ||||
26546 | 42, 236, 266, 44, 74, 364, 117, 38, 117, 55, | ||||
26547 | 39, 44, 333, 335, 213, 49, 149, 108, 61, 333, | ||||
26548 | 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138, | ||||
26549 | 76, 106, 74, 44, 202, 108, 58, 85, 333, 967, | ||||
26550 | 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76, | ||||
26551 | 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44, | ||||
26552 | 74, 268, 202, 332, 44, 333, 333, 245, 38, 213, | ||||
26553 | 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44, | ||||
26554 | 74, 231, 333, 245, 346, 300, 314, 76, 42, 967, | ||||
26555 | 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415, | ||||
26556 | 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159, | ||||
26557 | 266, 268, 74, 76, 181, 333, 103, 333, 967, 198, | ||||
26558 | 85, 277, 108, 53, 428, 42, 236, 135, 44, 135, | ||||
26559 | 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260, | ||||
26560 | 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265, | ||||
26561 | 261, 265, 197, 201, 261, 41, 41, 41, 94, 229, | ||||
26562 | 265, 453, 261, 264, 261, 264, 261, 264, 165, 69, | ||||
26563 | 137, 40, 56, 37, 120, 101, 69, 137, 40, 120, | ||||
26564 | 133, 69, 137, 120, 261, 169, 120, 101, 69, 137, | ||||
26565 | 40, 88, 381, 162, 209, 85, 52, 51, 54, 84, | ||||
26566 | 51, 54, 52, 277, 59, 60, 162, 61, 309, 52, | ||||
26567 | 51, 149, 80, 117, 57, 54, 50, 373, 57, 53, | ||||
26568 | 48, 341, 61, 162, 194, 47, 38, 207, 121, 54, | ||||
26569 | 50, 38, 335, 121, 54, 50, 422, 855, 428, 139, | ||||
26570 | 44, 107, 396, 90, 41, 154, 41, 90, 37, 105, | ||||
26571 | 69, 105, 37, 58, 41, 90, 57, 169, 218, 41, | ||||
26572 | 58, 41, 58, 41, 58, 137, 58, 37, 137, 37, | ||||
26573 | 135, 37, 90, 69, 73, 185, 94, 101, 58, 57, | ||||
26574 | 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186, | ||||
26575 | 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018, | ||||
26576 | 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666, | ||||
26577 | 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217, | ||||
26578 | 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57, | ||||
26579 | 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50, | ||||
26580 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | ||||
26581 | 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50, | ||||
26582 | 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54, | ||||
26583 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | ||||
26584 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | ||||
26585 | 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281, | ||||
26586 | 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69, | ||||
26587 | 254, 105, 37, 94, 37, 94, 165, 70, 105, 37, | ||||
26588 | 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221, | ||||
26589 | 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231, | ||||
26590 | 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52, | ||||
26591 | 51, 117, 52, 51, 53, 52, 51, 309, 49, 85, | ||||
26592 | 49, 53, 52, 51, 85, 52, 51, 54, 50, 54, | ||||
26593 | 50, 54, 50, 54, 50, 181, 38, 341, 81, 858, | ||||
26594 | 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54, | ||||
26595 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 90, | ||||
26596 | 54, 50, 54, 50, 54, 50, 54, 50, 49, 54, | ||||
26597 | 82, 58, 302, 140, 74, 49, 166, 90, 110, 38, | ||||
26598 | 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887, | ||||
26599 | 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178, | ||||
26600 | 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274, | ||||
26601 | 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38, | ||||
26602 | 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333, | ||||
26603 | 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798, | ||||
26604 | 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69, | ||||
26605 | 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382, | ||||
26606 | 70, 37, 231, 44, 103, 44, 135, 44, 743, 74, | ||||
26607 | 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74, | ||||
26608 | 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333, | ||||
26609 | 903, 268, 85, 743, 364, 74, 53, 935, 108, 42, | ||||
26610 | 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333, | ||||
26611 | 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263, | ||||
26612 | 44, 42, 333, 149, 519, 38, 199, 122, 39, 42, | ||||
26613 | 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44, | ||||
26614 | 39, 71, 38, 85, 359, 42, 76, 74, 85, 39, | ||||
26615 | 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74, | ||||
26616 | 44, 74, 44, 74, 53, 42, 44, 333, 39, 39, | ||||
26617 | 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399, | ||||
26618 | 229, 165, 39, 44, 327, 57, 423, 167, 39, 71, | ||||
26619 | 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55, | ||||
26620 | 58, 524, 245, 54, 50, 53, 236, 53, 81, 80, | ||||
26621 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | ||||
26622 | 54, 50, 54, 50, 54, 50, 85, 54, 50, 149, | ||||
26623 | 112, 117, 149, 49, 54, 50, 54, 50, 54, 50, | ||||
26624 | 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34, | ||||
26625 | 117, 55, 117, 54, 50, 53, 57, 53, 49, 85, | ||||
26626 | 333, 85, 121, 85, 841, 54, 53, 50, 56, 48, | ||||
26627 | 56, 837, 54, 57, 50, 57, 54, 50, 53, 54, | ||||
26628 | 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199, | ||||
26629 | 103, 87, 57, 56, 58, 87, 58, 153, 90, 98, | ||||
26630 | 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455, | ||||
26631 | 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575, | ||||
26632 | 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263, | ||||
26633 | 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71, | ||||
26634 | 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799, | ||||
26635 | 71, 39, 108, 76, 140, 135, 103, 871, 108, 44, | ||||
26636 | 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615, | ||||
26637 | 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655, | ||||
26638 | 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34, | ||||
26639 | 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149, | ||||
26640 | 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383, | ||||
26641 | 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182, | ||||
26642 | 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898, | ||||
26643 | 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236, | ||||
26644 | 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837, | ||||
26645 | 841, 229, 581, 841, 837, 41, 73, 41, 73, 137, | ||||
26646 | 265, 133, 37, 229, 357, 841, 837, 73, 137, 265, | ||||
26647 | 233, 837, 73, 137, 169, 41, 233, 837, 841, 837, | ||||
26648 | 841, 837, 841, 837, 841, 837, 841, 837, 841, 901, | ||||
26649 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | ||||
26650 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | ||||
26651 | 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71, | ||||
26652 | 39, 39, 327, 135, 39, 39, 39, 39, 39, 39, | ||||
26653 | 103, 71, 39, 39, 39, 39, 39, 39, 71, 39, | ||||
26654 | 135, 231, 135, 135, 39, 327, 551, 103, 167, 551, | ||||
26655 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, | ||||
26656 | 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, | ||||
26657 | 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, | ||||
26658 | 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, | ||||
26659 | 34, 3074, 7692, 63, 63, | ||||
26660 | }; | ||||
26661 | |||||
26662 | static int sqlite3Fts5UnicodeCategory(u32 iCode) { | ||||
26663 | int iRes = -1; | ||||
26664 | int iHi; | ||||
26665 | int iLo; | ||||
26666 | int ret; | ||||
26667 | u16 iKey; | ||||
26668 | |||||
26669 | if( iCode>=(1<<20) ){ | ||||
26670 | return 0; | ||||
26671 | } | ||||
26672 | iLo = aFts5UnicodeBlock[(iCode>>16)]; | ||||
26673 | iHi = aFts5UnicodeBlock[1+(iCode>>16)]; | ||||
26674 | iKey = (iCode & 0xFFFF); | ||||
26675 | while( iHi>iLo ){ | ||||
26676 | int iTest = (iHi + iLo) / 2; | ||||
26677 | assert( iTest>=iLo && iTest<iHi )((void) (0)); | ||||
26678 | if( iKey>=aFts5UnicodeMap[iTest] ){ | ||||
26679 | iRes = iTest; | ||||
26680 | iLo = iTest+1; | ||||
26681 | }else{ | ||||
26682 | iHi = iTest; | ||||
26683 | } | ||||
26684 | } | ||||
26685 | |||||
26686 | if( iRes<0 ) return 0; | ||||
26687 | if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; | ||||
26688 | ret = aFts5UnicodeData[iRes] & 0x1F; | ||||
26689 | if( ret!=30 ) return ret; | ||||
26690 | return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9; | ||||
26691 | } | ||||
26692 | |||||
26693 | static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ | ||||
26694 | int i = 0; | ||||
26695 | int iTbl = 0; | ||||
26696 | while( i<128 ){ | ||||
26697 | int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; | ||||
26698 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; | ||||
26699 | for(; i<128 && i<n; i++){ | ||||
26700 | aAscii[i] = (u8)bToken; | ||||
26701 | } | ||||
26702 | iTbl++; | ||||
26703 | } | ||||
26704 | aAscii[0] = 0; /* 0x00 is never a token character */ | ||||
26705 | } | ||||
26706 | |||||
26707 | #line 1 "fts5_varint.c" | ||||
26708 | /* | ||||
26709 | ** 2015 May 30 | ||||
26710 | ** | ||||
26711 | ** The author disclaims copyright to this source code. In place of | ||||
26712 | ** a legal notice, here is a blessing: | ||||
26713 | ** | ||||
26714 | ** May you do good and not evil. | ||||
26715 | ** May you find forgiveness for yourself and forgive others. | ||||
26716 | ** May you share freely, never taking more than you give. | ||||
26717 | ** | ||||
26718 | ****************************************************************************** | ||||
26719 | ** | ||||
26720 | ** Routines for varint serialization and deserialization. | ||||
26721 | */ | ||||
26722 | |||||
26723 | |||||
26724 | /* #include "fts5Int.h" */ | ||||
26725 | |||||
26726 | /* | ||||
26727 | ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. | ||||
26728 | ** Except, this version does handle the single byte case that the core | ||||
26729 | ** version depends on being handled before its function is called. | ||||
26730 | */ | ||||
26731 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ | ||||
26732 | u32 a,b; | ||||
26733 | |||||
26734 | /* The 1-byte case. Overwhelmingly the most common. */ | ||||
26735 | a = *p; | ||||
26736 | /* a: p0 (unmasked) */ | ||||
26737 | if (!(a&0x80)) | ||||
26738 | { | ||||
26739 | /* Values between 0 and 127 */ | ||||
26740 | *v = a; | ||||
26741 | return 1; | ||||
26742 | } | ||||
26743 | |||||
26744 | /* The 2-byte case */ | ||||
26745 | p++; | ||||
26746 | b = *p; | ||||
26747 | /* b: p1 (unmasked) */ | ||||
26748 | if (!(b&0x80)) | ||||
26749 | { | ||||
26750 | /* Values between 128 and 16383 */ | ||||
26751 | a &= 0x7f; | ||||
26752 | a = a<<7; | ||||
26753 | *v = a | b; | ||||
26754 | return 2; | ||||
26755 | } | ||||
26756 | |||||
26757 | /* The 3-byte case */ | ||||
26758 | p++; | ||||
26759 | a = a<<14; | ||||
26760 | a |= *p; | ||||
26761 | /* a: p0<<14 | p2 (unmasked) */ | ||||
26762 | if (!(a&0x80)) | ||||
26763 | { | ||||
26764 | /* Values between 16384 and 2097151 */ | ||||
26765 | a &= (0x7f<<14)|(0x7f); | ||||
26766 | b &= 0x7f; | ||||
26767 | b = b<<7; | ||||
26768 | *v = a | b; | ||||
26769 | return 3; | ||||
26770 | } | ||||
26771 | |||||
26772 | /* A 32-bit varint is used to store size information in btrees. | ||||
26773 | ** Objects are rarely larger than 2MiB limit of a 3-byte varint. | ||||
26774 | ** A 3-byte varint is sufficient, for example, to record the size | ||||
26775 | ** of a 1048569-byte BLOB or string. | ||||
26776 | ** | ||||
26777 | ** We only unroll the first 1-, 2-, and 3- byte cases. The very | ||||
26778 | ** rare larger cases can be handled by the slower 64-bit varint | ||||
26779 | ** routine. | ||||
26780 | */ | ||||
26781 | { | ||||
26782 | u64 v64; | ||||
26783 | u8 n; | ||||
26784 | p -= 2; | ||||
26785 | n = sqlite3Fts5GetVarint(p, &v64); | ||||
26786 | *v = ((u32)v64) & 0x7FFFFFFF; | ||||
26787 | assert( n>3 && n<=9 )((void) (0)); | ||||
26788 | return n; | ||||
26789 | } | ||||
26790 | } | ||||
26791 | |||||
26792 | |||||
26793 | /* | ||||
26794 | ** Bitmasks used by sqlite3GetVarint(). These precomputed constants | ||||
26795 | ** are defined here rather than simply putting the constant expressions | ||||
26796 | ** inline in order to work around bugs in the RVT compiler. | ||||
26797 | ** | ||||
26798 | ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f | ||||
26799 | ** | ||||
26800 | ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 | ||||
26801 | */ | ||||
26802 | #define SLOT_2_00x001fc07f 0x001fc07f | ||||
26803 | #define SLOT_4_2_00xf01fc07f 0xf01fc07f | ||||
26804 | |||||
26805 | /* | ||||
26806 | ** Read a 64-bit variable-length integer from memory starting at p[0]. | ||||
26807 | ** Return the number of bytes read. The value is stored in *v. | ||||
26808 | */ | ||||
26809 | static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ | ||||
26810 | u32 a,b,s; | ||||
26811 | |||||
26812 | a = *p; | ||||
26813 | /* a: p0 (unmasked) */ | ||||
26814 | if (!(a&0x80)) | ||||
26815 | { | ||||
26816 | *v = a; | ||||
26817 | return 1; | ||||
26818 | } | ||||
26819 | |||||
26820 | p++; | ||||
26821 | b = *p; | ||||
26822 | /* b: p1 (unmasked) */ | ||||
26823 | if (!(b&0x80)) | ||||
26824 | { | ||||
26825 | a &= 0x7f; | ||||
26826 | a = a<<7; | ||||
26827 | a |= b; | ||||
26828 | *v = a; | ||||
26829 | return 2; | ||||
26830 | } | ||||
26831 | |||||
26832 | /* Verify that constants are precomputed correctly */ | ||||
26833 | assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) )((void) (0)); | ||||
26834 | assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) )((void) (0)); | ||||
26835 | |||||
26836 | p++; | ||||
26837 | a = a<<14; | ||||
26838 | a |= *p; | ||||
26839 | /* a: p0<<14 | p2 (unmasked) */ | ||||
26840 | if (!(a&0x80)) | ||||
26841 | { | ||||
26842 | a &= SLOT_2_00x001fc07f; | ||||
26843 | b &= 0x7f; | ||||
26844 | b = b<<7; | ||||
26845 | a |= b; | ||||
26846 | *v = a; | ||||
26847 | return 3; | ||||
26848 | } | ||||
26849 | |||||
26850 | /* CSE1 from below */ | ||||
26851 | a &= SLOT_2_00x001fc07f; | ||||
26852 | p++; | ||||
26853 | b = b<<14; | ||||
26854 | b |= *p; | ||||
26855 | /* b: p1<<14 | p3 (unmasked) */ | ||||
26856 | if (!(b&0x80)) | ||||
26857 | { | ||||
26858 | b &= SLOT_2_00x001fc07f; | ||||
26859 | /* moved CSE1 up */ | ||||
26860 | /* a &= (0x7f<<14)|(0x7f); */ | ||||
26861 | a = a<<7; | ||||
26862 | a |= b; | ||||
26863 | *v = a; | ||||
26864 | return 4; | ||||
26865 | } | ||||
26866 | |||||
26867 | /* a: p0<<14 | p2 (masked) */ | ||||
26868 | /* b: p1<<14 | p3 (unmasked) */ | ||||
26869 | /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | ||||
26870 | /* moved CSE1 up */ | ||||
26871 | /* a &= (0x7f<<14)|(0x7f); */ | ||||
26872 | b &= SLOT_2_00x001fc07f; | ||||
26873 | s = a; | ||||
26874 | /* s: p0<<14 | p2 (masked) */ | ||||
26875 | |||||
26876 | p++; | ||||
26877 | a = a<<14; | ||||
26878 | a |= *p; | ||||
26879 | /* a: p0<<28 | p2<<14 | p4 (unmasked) */ | ||||
26880 | if (!(a&0x80)) | ||||
26881 | { | ||||
26882 | /* we can skip these cause they were (effectively) done above in calc'ing s */ | ||||
26883 | /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | ||||
26884 | /* b &= (0x7f<<14)|(0x7f); */ | ||||
26885 | b = b<<7; | ||||
26886 | a |= b; | ||||
26887 | s = s>>18; | ||||
26888 | *v = ((u64)s)<<32 | a; | ||||
26889 | return 5; | ||||
26890 | } | ||||
26891 | |||||
26892 | /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | ||||
26893 | s = s<<7; | ||||
26894 | s |= b; | ||||
26895 | /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | ||||
26896 | |||||
26897 | p++; | ||||
26898 | b = b<<14; | ||||
26899 | b |= *p; | ||||
26900 | /* b: p1<<28 | p3<<14 | p5 (unmasked) */ | ||||
26901 | if (!(b&0x80)) | ||||
26902 | { | ||||
26903 | /* we can skip this cause it was (effectively) done above in calc'ing s */ | ||||
26904 | /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | ||||
26905 | a &= SLOT_2_00x001fc07f; | ||||
26906 | a = a<<7; | ||||
26907 | a |= b; | ||||
26908 | s = s>>18; | ||||
26909 | *v = ((u64)s)<<32 | a; | ||||
26910 | return 6; | ||||
26911 | } | ||||
26912 | |||||
26913 | p++; | ||||
26914 | a = a<<14; | ||||
26915 | a |= *p; | ||||
26916 | /* a: p2<<28 | p4<<14 | p6 (unmasked) */ | ||||
26917 | if (!(a&0x80)) | ||||
26918 | { | ||||
26919 | a &= SLOT_4_2_00xf01fc07f; | ||||
26920 | b &= SLOT_2_00x001fc07f; | ||||
26921 | b = b<<7; | ||||
26922 | a |= b; | ||||
26923 | s = s>>11; | ||||
26924 | *v = ((u64)s)<<32 | a; | ||||
26925 | return 7; | ||||
26926 | } | ||||
26927 | |||||
26928 | /* CSE2 from below */ | ||||
26929 | a &= SLOT_2_00x001fc07f; | ||||
26930 | p++; | ||||
26931 | b = b<<14; | ||||
26932 | b |= *p; | ||||
26933 | /* b: p3<<28 | p5<<14 | p7 (unmasked) */ | ||||
26934 | if (!(b&0x80)) | ||||
26935 | { | ||||
26936 | b &= SLOT_4_2_00xf01fc07f; | ||||
26937 | /* moved CSE2 up */ | ||||
26938 | /* a &= (0x7f<<14)|(0x7f); */ | ||||
26939 | a = a<<7; | ||||
26940 | a |= b; | ||||
26941 | s = s>>4; | ||||
26942 | *v = ((u64)s)<<32 | a; | ||||
26943 | return 8; | ||||
26944 | } | ||||
26945 | |||||
26946 | p++; | ||||
26947 | a = a<<15; | ||||
26948 | a |= *p; | ||||
26949 | /* a: p4<<29 | p6<<15 | p8 (unmasked) */ | ||||
26950 | |||||
26951 | /* moved CSE2 up */ | ||||
26952 | /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ | ||||
26953 | b &= SLOT_2_00x001fc07f; | ||||
26954 | b = b<<8; | ||||
26955 | a |= b; | ||||
26956 | |||||
26957 | s = s<<4; | ||||
26958 | b = p[-4]; | ||||
26959 | b &= 0x7f; | ||||
26960 | b = b>>3; | ||||
26961 | s |= b; | ||||
26962 | |||||
26963 | *v = ((u64)s)<<32 | a; | ||||
26964 | |||||
26965 | return 9; | ||||
26966 | } | ||||
26967 | |||||
26968 | /* | ||||
26969 | ** The variable-length integer encoding is as follows: | ||||
26970 | ** | ||||
26971 | ** KEY: | ||||
26972 | ** A = 0xxxxxxx 7 bits of data and one flag bit | ||||
26973 | ** B = 1xxxxxxx 7 bits of data and one flag bit | ||||
26974 | ** C = xxxxxxxx 8 bits of data | ||||
26975 | ** | ||||
26976 | ** 7 bits - A | ||||
26977 | ** 14 bits - BA | ||||
26978 | ** 21 bits - BBA | ||||
26979 | ** 28 bits - BBBA | ||||
26980 | ** 35 bits - BBBBA | ||||
26981 | ** 42 bits - BBBBBA | ||||
26982 | ** 49 bits - BBBBBBA | ||||
26983 | ** 56 bits - BBBBBBBA | ||||
26984 | ** 64 bits - BBBBBBBBC | ||||
26985 | */ | ||||
26986 | |||||
26987 | #ifdef SQLITE_NOINLINE | ||||
26988 | # define FTS5_NOINLINE SQLITE_NOINLINE | ||||
26989 | #else | ||||
26990 | # define FTS5_NOINLINE | ||||
26991 | #endif | ||||
26992 | |||||
26993 | /* | ||||
26994 | ** Write a 64-bit variable-length integer to memory starting at p[0]. | ||||
26995 | ** The length of data write will be between 1 and 9 bytes. The number | ||||
26996 | ** of bytes written is returned. | ||||
26997 | ** | ||||
26998 | ** A variable-length integer consists of the lower 7 bits of each byte | ||||
26999 | ** for all bytes that have the 8th bit set and one byte with the 8th | ||||
27000 | ** bit clear. Except, if we get to the 9th byte, it stores the full | ||||
27001 | ** 8 bits and is the last byte. | ||||
27002 | */ | ||||
27003 | static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ | ||||
27004 | int i, j, n; | ||||
27005 | u8 buf[10]; | ||||
27006 | if( v & (((u64)0xff000000)<<32) ){ | ||||
27007 | p[8] = (u8)v; | ||||
27008 | v >>= 8; | ||||
27009 | for(i=7; i>=0; i--){ | ||||
27010 | p[i] = (u8)((v & 0x7f) | 0x80); | ||||
27011 | v >>= 7; | ||||
27012 | } | ||||
27013 | return 9; | ||||
27014 | } | ||||
27015 | n = 0; | ||||
27016 | do{ | ||||
27017 | buf[n++] = (u8)((v & 0x7f) | 0x80); | ||||
27018 | v >>= 7; | ||||
27019 | }while( v!=0 ); | ||||
27020 | buf[0] &= 0x7f; | ||||
27021 | assert( n<=9 )((void) (0)); | ||||
27022 | for(i=0, j=n-1; j>=0; j--, i++){ | ||||
27023 | p[i] = buf[j]; | ||||
27024 | } | ||||
27025 | return n; | ||||
27026 | } | ||||
27027 | |||||
27028 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ | ||||
27029 | if( v<=0x7f ){ | ||||
27030 | p[0] = v&0x7f; | ||||
27031 | return 1; | ||||
27032 | } | ||||
27033 | if( v<=0x3fff ){ | ||||
27034 | p[0] = ((v>>7)&0x7f)|0x80; | ||||
27035 | p[1] = v&0x7f; | ||||
27036 | return 2; | ||||
27037 | } | ||||
27038 | return fts5PutVarint64(p,v); | ||||
27039 | } | ||||
27040 | |||||
27041 | |||||
27042 | static int sqlite3Fts5GetVarintLen(u32 iVal){ | ||||
27043 | #if 0 | ||||
27044 | if( iVal<(1 << 7 ) ) return 1; | ||||
27045 | #endif | ||||
27046 | assert( iVal>=(1 << 7) )((void) (0)); | ||||
27047 | if( iVal<(1 << 14) ) return 2; | ||||
27048 | if( iVal<(1 << 21) ) return 3; | ||||
27049 | if( iVal<(1 << 28) ) return 4; | ||||
27050 | return 5; | ||||
27051 | } | ||||
27052 | |||||
27053 | #line 1 "fts5_vocab.c" | ||||
27054 | /* | ||||
27055 | ** 2015 May 08 | ||||
27056 | ** | ||||
27057 | ** The author disclaims copyright to this source code. In place of | ||||
27058 | ** a legal notice, here is a blessing: | ||||
27059 | ** | ||||
27060 | ** May you do good and not evil. | ||||
27061 | ** May you find forgiveness for yourself and forgive others. | ||||
27062 | ** May you share freely, never taking more than you give. | ||||
27063 | ** | ||||
27064 | ****************************************************************************** | ||||
27065 | ** | ||||
27066 | ** This is an SQLite virtual table module implementing direct access to an | ||||
27067 | ** existing FTS5 index. The module may create several different types of | ||||
27068 | ** tables: | ||||
27069 | ** | ||||
27070 | ** col: | ||||
27071 | ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); | ||||
27072 | ** | ||||
27073 | ** One row for each term/column combination. The value of $doc is set to | ||||
27074 | ** the number of fts5 rows that contain at least one instance of term | ||||
27075 | ** $term within column $col. Field $cnt is set to the total number of | ||||
27076 | ** instances of term $term in column $col (in any row of the fts5 table). | ||||
27077 | ** | ||||
27078 | ** row: | ||||
27079 | ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); | ||||
27080 | ** | ||||
27081 | ** One row for each term in the database. The value of $doc is set to | ||||
27082 | ** the number of fts5 rows that contain at least one instance of term | ||||
27083 | ** $term. Field $cnt is set to the total number of instances of term | ||||
27084 | ** $term in the database. | ||||
27085 | ** | ||||
27086 | ** instance: | ||||
27087 | ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>)); | ||||
27088 | ** | ||||
27089 | ** One row for each term instance in the database. | ||||
27090 | */ | ||||
27091 | |||||
27092 | |||||
27093 | /* #include "fts5Int.h" */ | ||||
27094 | |||||
27095 | |||||
27096 | typedef struct Fts5VocabTable Fts5VocabTable; | ||||
27097 | typedef struct Fts5VocabCursor Fts5VocabCursor; | ||||
27098 | |||||
27099 | struct Fts5VocabTable { | ||||
27100 | sqlite3_vtab base; | ||||
27101 | char *zFts5Tbl; /* Name of fts5 table */ | ||||
27102 | char *zFts5Db; /* Db containing fts5 table */ | ||||
27103 | sqlite3 *db; /* Database handle */ | ||||
27104 | Fts5Global *pGlobal; /* FTS5 global object for this database */ | ||||
27105 | int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ | ||||
27106 | unsigned bBusy; /* True if busy */ | ||||
27107 | }; | ||||
27108 | |||||
27109 | struct Fts5VocabCursor { | ||||
27110 | sqlite3_vtab_cursor base; | ||||
27111 | sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ | ||||
27112 | Fts5Table *pFts5; /* Associated FTS5 table */ | ||||
27113 | |||||
27114 | int bEof; /* True if this cursor is at EOF */ | ||||
27115 | Fts5IndexIter *pIter; /* Term/rowid iterator object */ | ||||
27116 | void *pStruct; /* From sqlite3Fts5StructureRef() */ | ||||
27117 | |||||
27118 | int nLeTerm; /* Size of zLeTerm in bytes */ | ||||
27119 | char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ | ||||
27120 | int colUsed; /* Copy of sqlite3_index_info.colUsed */ | ||||
27121 | |||||
27122 | /* These are used by 'col' tables only */ | ||||
27123 | int iCol; | ||||
27124 | i64 *aCnt; | ||||
27125 | i64 *aDoc; | ||||
27126 | |||||
27127 | /* Output values used by all tables. */ | ||||
27128 | i64 rowid; /* This table's current rowid value */ | ||||
27129 | Fts5Buffer term; /* Current value of 'term' column */ | ||||
27130 | |||||
27131 | /* Output values Used by 'instance' tables only */ | ||||
27132 | i64 iInstPos; | ||||
27133 | int iInstOff; | ||||
27134 | }; | ||||
27135 | |||||
27136 | #define FTS5_VOCAB_COL0 0 | ||||
27137 | #define FTS5_VOCAB_ROW1 1 | ||||
27138 | #define FTS5_VOCAB_INSTANCE2 2 | ||||
27139 | |||||
27140 | #define FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" "term, col, doc, cnt" | ||||
27141 | #define FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" "term, doc, cnt" | ||||
27142 | #define FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" "term, doc, col, offset" | ||||
27143 | |||||
27144 | /* | ||||
27145 | ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. | ||||
27146 | */ | ||||
27147 | #define FTS5_VOCAB_TERM_EQ0x0100 0x0100 | ||||
27148 | #define FTS5_VOCAB_TERM_GE0x0200 0x0200 | ||||
27149 | #define FTS5_VOCAB_TERM_LE0x0400 0x0400 | ||||
27150 | |||||
27151 | #define FTS5_VOCAB_COLUSED_MASK0xFF 0xFF | ||||
27152 | |||||
27153 | |||||
27154 | /* | ||||
27155 | ** Translate a string containing an fts5vocab table type to an | ||||
27156 | ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output | ||||
27157 | ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message | ||||
27158 | ** and return SQLITE_ERROR. | ||||
27159 | */ | ||||
27160 | static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ | ||||
27161 | int rc = SQLITE_OK0; | ||||
27162 | char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); | ||||
27163 | if( rc==SQLITE_OK0 ){ | ||||
27164 | sqlite3Fts5Dequote(zCopy); | ||||
27165 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "col")==0 ){ | ||||
27166 | *peType = FTS5_VOCAB_COL0; | ||||
27167 | }else | ||||
27168 | |||||
27169 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "row")==0 ){ | ||||
27170 | *peType = FTS5_VOCAB_ROW1; | ||||
27171 | }else | ||||
27172 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "instance")==0 ){ | ||||
27173 | *peType = FTS5_VOCAB_INSTANCE2; | ||||
27174 | }else | ||||
27175 | { | ||||
27176 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("fts5vocab: unknown table type: %Q", zCopy); | ||||
27177 | rc = SQLITE_ERROR1; | ||||
27178 | } | ||||
27179 | sqlite3_freesqlite3_api->free(zCopy); | ||||
27180 | } | ||||
27181 | |||||
27182 | return rc; | ||||
27183 | } | ||||
27184 | |||||
27185 | |||||
27186 | /* | ||||
27187 | ** The xDisconnect() virtual table method. | ||||
27188 | */ | ||||
27189 | static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ | ||||
27190 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | ||||
27191 | sqlite3_freesqlite3_api->free(pTab); | ||||
27192 | return SQLITE_OK0; | ||||
27193 | } | ||||
27194 | |||||
27195 | /* | ||||
27196 | ** The xDestroy() virtual table method. | ||||
27197 | */ | ||||
27198 | static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ | ||||
27199 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | ||||
27200 | sqlite3_freesqlite3_api->free(pTab); | ||||
27201 | return SQLITE_OK0; | ||||
27202 | } | ||||
27203 | |||||
27204 | /* | ||||
27205 | ** This function is the implementation of both the xConnect and xCreate | ||||
27206 | ** methods of the FTS3 virtual table. | ||||
27207 | ** | ||||
27208 | ** The argv[] array contains the following: | ||||
27209 | ** | ||||
27210 | ** argv[0] -> module name ("fts5vocab") | ||||
27211 | ** argv[1] -> database name | ||||
27212 | ** argv[2] -> table name | ||||
27213 | ** | ||||
27214 | ** then: | ||||
27215 | ** | ||||
27216 | ** argv[3] -> name of fts5 table | ||||
27217 | ** argv[4] -> type of fts5vocab table | ||||
27218 | ** | ||||
27219 | ** or, for tables in the TEMP schema only. | ||||
27220 | ** | ||||
27221 | ** argv[3] -> name of fts5 tables database | ||||
27222 | ** argv[4] -> name of fts5 table | ||||
27223 | ** argv[5] -> type of fts5vocab table | ||||
27224 | */ | ||||
27225 | static int fts5VocabInitVtab( | ||||
27226 | sqlite3 *db, /* The SQLite database connection */ | ||||
27227 | void *pAux, /* Pointer to Fts5Global object */ | ||||
27228 | int argc, /* Number of elements in argv array */ | ||||
27229 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
27230 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | ||||
27231 | char **pzErr /* Write any error message here */ | ||||
27232 | ){ | ||||
27233 | const char *azSchema[] = { | ||||
27234 | "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" ")", | ||||
27235 | "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" ")", | ||||
27236 | "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" ")" | ||||
27237 | }; | ||||
27238 | |||||
27239 | Fts5VocabTable *pRet = 0; | ||||
27240 | int rc = SQLITE_OK0; /* Return code */ | ||||
27241 | int bDb; | ||||
27242 | |||||
27243 | bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); | ||||
27244 | |||||
27245 | if( argc!=5 && bDb==0 ){ | ||||
27246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of vtable arguments"); | ||||
27247 | rc = SQLITE_ERROR1; | ||||
27248 | }else{ | ||||
27249 | i64 nByte; /* Bytes of space to allocate */ | ||||
27250 | const char *zDb = bDb ? argv[3] : argv[1]; | ||||
27251 | const char *zTab = bDb ? argv[4] : argv[3]; | ||||
27252 | const char *zType = bDb ? argv[5] : argv[4]; | ||||
27253 | i64 nDb = strlen(zDb)+1; | ||||
27254 | i64 nTab = strlen(zTab)+1; | ||||
27255 | int eType = 0; | ||||
27256 | |||||
27257 | rc = fts5VocabTableType(zType, pzErr, &eType); | ||||
27258 | if( rc==SQLITE_OK0 ){ | ||||
27259 | assert( eType>=0 && eType<ArraySize(azSchema) )((void) (0)); | ||||
27260 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, azSchema[eType]); | ||||
27261 | } | ||||
27262 | |||||
27263 | nByte = sizeof(Fts5VocabTable) + nDb + nTab; | ||||
27264 | pRet = sqlite3Fts5MallocZero(&rc, nByte); | ||||
27265 | if( pRet ){ | ||||
27266 | pRet->pGlobal = (Fts5Global*)pAux; | ||||
27267 | pRet->eType = eType; | ||||
27268 | pRet->db = db; | ||||
27269 | pRet->zFts5Tbl = (char*)&pRet[1]; | ||||
27270 | pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; | ||||
27271 | memcpy(pRet->zFts5Tbl, zTab, nTab); | ||||
27272 | memcpy(pRet->zFts5Db, zDb, nDb); | ||||
27273 | sqlite3Fts5Dequote(pRet->zFts5Tbl); | ||||
27274 | sqlite3Fts5Dequote(pRet->zFts5Db); | ||||
27275 | } | ||||
27276 | } | ||||
27277 | |||||
27278 | *ppVTab = (sqlite3_vtab*)pRet; | ||||
27279 | return rc; | ||||
27280 | } | ||||
27281 | |||||
27282 | |||||
27283 | /* | ||||
27284 | ** The xConnect() and xCreate() methods for the virtual table. All the | ||||
27285 | ** work is done in function fts5VocabInitVtab(). | ||||
27286 | */ | ||||
27287 | static int fts5VocabConnectMethod( | ||||
27288 | sqlite3 *db, /* Database connection */ | ||||
27289 | void *pAux, /* Pointer to tokenizer hash table */ | ||||
27290 | int argc, /* Number of elements in argv array */ | ||||
27291 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
27292 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | ||||
27293 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | ||||
27294 | ){ | ||||
27295 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | ||||
27296 | } | ||||
27297 | static int fts5VocabCreateMethod( | ||||
27298 | sqlite3 *db, /* Database connection */ | ||||
27299 | void *pAux, /* Pointer to tokenizer hash table */ | ||||
27300 | int argc, /* Number of elements in argv array */ | ||||
27301 | const char * const *argv, /* xCreate/xConnect argument array */ | ||||
27302 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | ||||
27303 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | ||||
27304 | ){ | ||||
27305 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | ||||
27306 | } | ||||
27307 | |||||
27308 | /* | ||||
27309 | ** Implementation of the xBestIndex method. | ||||
27310 | ** | ||||
27311 | ** Only constraints of the form: | ||||
27312 | ** | ||||
27313 | ** term <= ? | ||||
27314 | ** term == ? | ||||
27315 | ** term >= ? | ||||
27316 | ** | ||||
27317 | ** are interpreted. Less-than and less-than-or-equal are treated | ||||
27318 | ** identically, as are greater-than and greater-than-or-equal. | ||||
27319 | */ | ||||
27320 | static int fts5VocabBestIndexMethod( | ||||
27321 | sqlite3_vtab *pUnused, | ||||
27322 | sqlite3_index_info *pInfo | ||||
27323 | ){ | ||||
27324 | int i; | ||||
27325 | int iTermEq = -1; | ||||
27326 | int iTermGe = -1; | ||||
27327 | int iTermLe = -1; | ||||
27328 | int idxNum = (int)pInfo->colUsed; | ||||
27329 | int nArg = 0; | ||||
27330 | |||||
27331 | UNUSED_PARAM(pUnused)(void)(pUnused); | ||||
27332 | |||||
27333 | assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed )((void) (0)); | ||||
27334 | |||||
27335 | for(i=0; i<pInfo->nConstraint; i++){ | ||||
27336 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | ||||
27337 | if( p->usable==0 ) continue; | ||||
27338 | if( p->iColumn==0 ){ /* term column */ | ||||
27339 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 ) iTermEq = i; | ||||
27340 | if( p->op==SQLITE_INDEX_CONSTRAINT_LE8 ) iTermLe = i; | ||||
27341 | if( p->op==SQLITE_INDEX_CONSTRAINT_LT16 ) iTermLe = i; | ||||
27342 | if( p->op==SQLITE_INDEX_CONSTRAINT_GE32 ) iTermGe = i; | ||||
27343 | if( p->op==SQLITE_INDEX_CONSTRAINT_GT4 ) iTermGe = i; | ||||
27344 | } | ||||
27345 | } | ||||
27346 | |||||
27347 | if( iTermEq>=0 ){ | ||||
27348 | idxNum |= FTS5_VOCAB_TERM_EQ0x0100; | ||||
27349 | pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; | ||||
27350 | pInfo->estimatedCost = 100; | ||||
27351 | }else{ | ||||
27352 | pInfo->estimatedCost = 1000000; | ||||
27353 | if( iTermGe>=0 ){ | ||||
27354 | idxNum |= FTS5_VOCAB_TERM_GE0x0200; | ||||
27355 | pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; | ||||
27356 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | ||||
27357 | } | ||||
27358 | if( iTermLe>=0 ){ | ||||
27359 | idxNum |= FTS5_VOCAB_TERM_LE0x0400; | ||||
27360 | pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; | ||||
27361 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | ||||
27362 | } | ||||
27363 | } | ||||
27364 | |||||
27365 | /* This virtual table always delivers results in ascending order of | ||||
27366 | ** the "term" column (column 0). So if the user has requested this | ||||
27367 | ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the | ||||
27368 | ** sqlite3_index_info.orderByConsumed flag to tell the core the results | ||||
27369 | ** are already in sorted order. */ | ||||
27370 | if( pInfo->nOrderBy==1 | ||||
27371 | && pInfo->aOrderBy[0].iColumn==0 | ||||
27372 | && pInfo->aOrderBy[0].desc==0 | ||||
27373 | ){ | ||||
27374 | pInfo->orderByConsumed = 1; | ||||
27375 | } | ||||
27376 | |||||
27377 | pInfo->idxNum = idxNum; | ||||
27378 | return SQLITE_OK0; | ||||
27379 | } | ||||
27380 | |||||
27381 | /* | ||||
27382 | ** Implementation of xOpen method. | ||||
27383 | */ | ||||
27384 | static int fts5VocabOpenMethod( | ||||
27385 | sqlite3_vtab *pVTab, | ||||
27386 | sqlite3_vtab_cursor **ppCsr | ||||
27387 | ){ | ||||
27388 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; | ||||
27389 | Fts5Table *pFts5 = 0; | ||||
27390 | Fts5VocabCursor *pCsr = 0; | ||||
27391 | int rc = SQLITE_OK0; | ||||
27392 | sqlite3_stmt *pStmt = 0; | ||||
27393 | char *zSql = 0; | ||||
27394 | |||||
27395 | if( pTab->bBusy ){ | ||||
27396 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | ||||
27397 | "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | ||||
27398 | ); | ||||
27399 | return SQLITE_ERROR1; | ||||
27400 | } | ||||
27401 | zSql = sqlite3Fts5Mprintf(&rc, | ||||
27402 | "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", | ||||
27403 | pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl | ||||
27404 | ); | ||||
27405 | if( zSql ){ | ||||
27406 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pTab->db, zSql, -1, &pStmt, 0); | ||||
27407 | } | ||||
27408 | sqlite3_freesqlite3_api->free(zSql); | ||||
27409 | assert( rc==SQLITE_OK || pStmt==0 )((void) (0)); | ||||
27410 | if( rc==SQLITE_ERROR1 ) rc = SQLITE_OK0; | ||||
27411 | |||||
27412 | pTab->bBusy = 1; | ||||
27413 | if( pStmt && sqlite3_stepsqlite3_api->step(pStmt)==SQLITE_ROW100 ){ | ||||
27414 | i64 iId = sqlite3_column_int64sqlite3_api->column_int64(pStmt, 0); | ||||
27415 | pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId); | ||||
27416 | } | ||||
27417 | pTab->bBusy = 0; | ||||
27418 | |||||
27419 | if( rc==SQLITE_OK0 ){ | ||||
27420 | if( pFts5==0 ){ | ||||
27421 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
27422 | pStmt = 0; | ||||
27423 | if( rc==SQLITE_OK0 ){ | ||||
27424 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | ||||
27425 | "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | ||||
27426 | ); | ||||
27427 | rc = SQLITE_ERROR1; | ||||
27428 | } | ||||
27429 | }else{ | ||||
27430 | rc = sqlite3Fts5FlushToDisk(pFts5); | ||||
27431 | } | ||||
27432 | } | ||||
27433 | |||||
27434 | if( rc==SQLITE_OK0 ){ | ||||
27435 | i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor); | ||||
27436 | pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); | ||||
27437 | } | ||||
27438 | |||||
27439 | if( pCsr ){ | ||||
27440 | pCsr->pFts5 = pFts5; | ||||
27441 | pCsr->pStmt = pStmt; | ||||
27442 | pCsr->aCnt = (i64*)&pCsr[1]; | ||||
27443 | pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol]; | ||||
27444 | }else{ | ||||
27445 | sqlite3_finalizesqlite3_api->finalize(pStmt); | ||||
27446 | } | ||||
27447 | |||||
27448 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | ||||
27449 | return rc; | ||||
27450 | } | ||||
27451 | |||||
27452 | static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ | ||||
27453 | pCsr->rowid = 0; | ||||
27454 | sqlite3Fts5IterClose(pCsr->pIter); | ||||
27455 | sqlite3Fts5StructureRelease(pCsr->pStruct); | ||||
27456 | pCsr->pStruct = 0; | ||||
27457 | pCsr->pIter = 0; | ||||
27458 | sqlite3_freesqlite3_api->free(pCsr->zLeTerm); | ||||
27459 | pCsr->nLeTerm = -1; | ||||
27460 | pCsr->zLeTerm = 0; | ||||
27461 | pCsr->bEof = 0; | ||||
27462 | } | ||||
27463 | |||||
27464 | /* | ||||
27465 | ** Close the cursor. For additional information see the documentation | ||||
27466 | ** on the xClose method of the virtual table interface. | ||||
27467 | */ | ||||
27468 | static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ | ||||
27469 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27470 | fts5VocabResetCursor(pCsr); | ||||
27471 | sqlite3Fts5BufferFree(&pCsr->term); | ||||
27472 | sqlite3_finalizesqlite3_api->finalize(pCsr->pStmt); | ||||
27473 | sqlite3_freesqlite3_api->free(pCsr); | ||||
27474 | return SQLITE_OK0; | ||||
27475 | } | ||||
27476 | |||||
27477 | static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ | ||||
27478 | int rc = SQLITE_OK0; | ||||
27479 | |||||
27480 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | ||||
27481 | pCsr->bEof = 1; | ||||
27482 | }else{ | ||||
27483 | const char *zTerm; | ||||
27484 | int nTerm; | ||||
27485 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | ||||
27486 | if( pCsr->nLeTerm>=0 ){ | ||||
27487 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | ||||
27488 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | ||||
27489 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | ||||
27490 | pCsr->bEof = 1; | ||||
27491 | } | ||||
27492 | } | ||||
27493 | |||||
27494 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | ||||
27495 | } | ||||
27496 | return rc; | ||||
27497 | } | ||||
27498 | |||||
27499 | static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ | ||||
27500 | int eDetail = pCsr->pFts5->pConfig->eDetail; | ||||
27501 | int rc = SQLITE_OK0; | ||||
27502 | Fts5IndexIter *pIter = pCsr->pIter; | ||||
27503 | i64 *pp = &pCsr->iInstPos; | ||||
27504 | int *po = &pCsr->iInstOff; | ||||
27505 | |||||
27506 | assert( sqlite3Fts5IterEof(pIter)==0 )((void) (0)); | ||||
27507 | assert( pCsr->bEof==0 )((void) (0)); | ||||
27508 | while( eDetail==FTS5_DETAIL_NONE1 | ||||
27509 | || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) | ||||
27510 | ){ | ||||
27511 | pCsr->iInstPos = 0; | ||||
27512 | pCsr->iInstOff = 0; | ||||
27513 | |||||
27514 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | ||||
27515 | if( rc==SQLITE_OK0 ){ | ||||
27516 | rc = fts5VocabInstanceNewTerm(pCsr); | ||||
27517 | if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE1 ) break; | ||||
27518 | } | ||||
27519 | if( rc ){ | ||||
27520 | pCsr->bEof = 1; | ||||
27521 | break; | ||||
27522 | } | ||||
27523 | } | ||||
27524 | |||||
27525 | return rc; | ||||
27526 | } | ||||
27527 | |||||
27528 | /* | ||||
27529 | ** Advance the cursor to the next row in the table. | ||||
27530 | */ | ||||
27531 | static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ | ||||
27532 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27533 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | ||||
27534 | int nCol = pCsr->pFts5->pConfig->nCol; | ||||
27535 | int rc; | ||||
27536 | |||||
27537 | rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct); | ||||
27538 | if( rc
| ||||
27539 | pCsr->rowid++; | ||||
27540 | |||||
27541 | if( pTab->eType
| ||||
27542 | return fts5VocabInstanceNext(pCsr); | ||||
27543 | } | ||||
27544 | |||||
27545 | if( pTab->eType==FTS5_VOCAB_COL0 ){ | ||||
27546 | for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ | ||||
27547 | if( pCsr->aDoc[pCsr->iCol] ) break; | ||||
27548 | } | ||||
27549 | } | ||||
27550 | |||||
27551 | if( pTab->eType
| ||||
27552 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | ||||
| |||||
27553 | pCsr->bEof = 1; | ||||
27554 | }else{ | ||||
27555 | const char *zTerm; | ||||
27556 | int nTerm; | ||||
27557 | |||||
27558 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | ||||
27559 | assert( nTerm>=0 )((void) (0)); | ||||
27560 | if( pCsr->nLeTerm>=0 ){ | ||||
27561 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | ||||
27562 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | ||||
27563 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | ||||
27564 | pCsr->bEof = 1; | ||||
27565 | return SQLITE_OK0; | ||||
27566 | } | ||||
27567 | } | ||||
27568 | |||||
27569 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | ||||
27570 | memset(pCsr->aCnt, 0, nCol * sizeof(i64)); | ||||
27571 | memset(pCsr->aDoc, 0, nCol * sizeof(i64)); | ||||
27572 | pCsr->iCol = 0; | ||||
27573 | |||||
27574 | assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW )((void) (0)); | ||||
27575 | while( rc==SQLITE_OK0 ){ | ||||
27576 | int eDetail = pCsr->pFts5->pConfig->eDetail; | ||||
27577 | const u8 *pPos; int nPos; /* Position list */ | ||||
27578 | i64 iPos = 0; /* 64-bit position read from poslist */ | ||||
27579 | int iOff = 0; /* Current offset within position list */ | ||||
27580 | |||||
27581 | pPos = pCsr->pIter->pData; | ||||
27582 | nPos = pCsr->pIter->nData; | ||||
27583 | |||||
27584 | switch( pTab->eType ){ | ||||
27585 | case FTS5_VOCAB_ROW1: | ||||
27586 | /* Do not bother counting the number of instances if the "cnt" | ||||
27587 | ** column is not being read (according to colUsed). */ | ||||
27588 | if( eDetail==FTS5_DETAIL_FULL0 && (pCsr->colUsed & 0x04) ){ | ||||
27589 | while( iPos<nPos ){ | ||||
27590 | u32 ii; | ||||
27591 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | ||||
27592 | if( ii==1 ){ | ||||
27593 | /* New column in the position list */ | ||||
27594 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | ||||
27595 | }else{ | ||||
27596 | /* An instance - increment pCsr->aCnt[] */ | ||||
27597 | pCsr->aCnt[0]++; | ||||
27598 | } | ||||
27599 | } | ||||
27600 | } | ||||
27601 | pCsr->aDoc[0]++; | ||||
27602 | break; | ||||
27603 | |||||
27604 | case FTS5_VOCAB_COL0: | ||||
27605 | if( eDetail==FTS5_DETAIL_FULL0 ){ | ||||
27606 | int iCol = -1; | ||||
27607 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ | ||||
27608 | int ii = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | ||||
27609 | if( iCol!=ii ){ | ||||
27610 | if( ii>=nCol ){ | ||||
27611 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
27612 | break; | ||||
27613 | } | ||||
27614 | pCsr->aDoc[ii]++; | ||||
27615 | iCol = ii; | ||||
27616 | } | ||||
27617 | pCsr->aCnt[ii]++; | ||||
27618 | } | ||||
27619 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
27620 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ | ||||
27621 | assert_nc( iPos>=0 && iPos<nCol )((void) (0)); | ||||
27622 | if( iPos>=nCol ){ | ||||
27623 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
27624 | break; | ||||
27625 | } | ||||
27626 | pCsr->aDoc[iPos]++; | ||||
27627 | } | ||||
27628 | }else{ | ||||
27629 | assert( eDetail==FTS5_DETAIL_NONE )((void) (0)); | ||||
27630 | pCsr->aDoc[0]++; | ||||
27631 | } | ||||
27632 | break; | ||||
27633 | |||||
27634 | default: | ||||
27635 | assert( pTab->eType==FTS5_VOCAB_INSTANCE )((void) (0)); | ||||
27636 | break; | ||||
27637 | } | ||||
27638 | |||||
27639 | if( rc==SQLITE_OK0 ){ | ||||
27640 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | ||||
27641 | } | ||||
27642 | if( pTab->eType==FTS5_VOCAB_INSTANCE2 ) break; | ||||
27643 | |||||
27644 | if( rc==SQLITE_OK0 ){ | ||||
27645 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | ||||
27646 | if( nTerm!=pCsr->term.n | ||||
27647 | || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm)) | ||||
27648 | ){ | ||||
27649 | break; | ||||
27650 | } | ||||
27651 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ) break; | ||||
27652 | } | ||||
27653 | } | ||||
27654 | } | ||||
27655 | } | ||||
27656 | |||||
27657 | if( rc==SQLITE_OK0 && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL0 ){ | ||||
27658 | for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++); | ||||
27659 | if( pCsr->iCol==nCol ){ | ||||
27660 | rc = FTS5_CORRUPT(11 | (1<<8)); | ||||
27661 | } | ||||
27662 | } | ||||
27663 | return rc; | ||||
27664 | } | ||||
27665 | |||||
27666 | /* | ||||
27667 | ** This is the xFilter implementation for the virtual table. | ||||
27668 | */ | ||||
27669 | static int fts5VocabFilterMethod( | ||||
27670 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | ||||
27671 | int idxNum, /* Strategy index */ | ||||
27672 | const char *zUnused, /* Unused */ | ||||
27673 | int nUnused, /* Number of elements in apVal */ | ||||
27674 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | ||||
27675 | ){ | ||||
27676 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | ||||
27677 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27678 | int eType = pTab->eType; | ||||
27679 | int rc = SQLITE_OK0; | ||||
27680 | |||||
27681 | int iVal = 0; | ||||
27682 | int f = FTS5INDEX_QUERY_SCAN0x0008; | ||||
27683 | const char *zTerm = 0; | ||||
27684 | int nTerm = 0; | ||||
27685 | |||||
27686 | sqlite3_value *pEq = 0; | ||||
27687 | sqlite3_value *pGe = 0; | ||||
27688 | sqlite3_value *pLe = 0; | ||||
27689 | |||||
27690 | UNUSED_PARAM2(zUnused, nUnused)(void)(zUnused), (void)(nUnused); | ||||
27691 | |||||
27692 | fts5VocabResetCursor(pCsr); | ||||
| |||||
27693 | if( idxNum & FTS5_VOCAB_TERM_EQ0x0100 ) pEq = apVal[iVal++]; | ||||
27694 | if( idxNum & FTS5_VOCAB_TERM_GE0x0200 ) pGe = apVal[iVal++]; | ||||
27695 | if( idxNum & FTS5_VOCAB_TERM_LE0x0400 ) pLe = apVal[iVal++]; | ||||
27696 | pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK0xFF); | ||||
27697 | |||||
27698 | if( pEq
| ||||
27699 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pEq); | ||||
27700 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pEq); | ||||
27701 | f = FTS5INDEX_QUERY_NOTOKENDATA0x0080; | ||||
27702 | }else{ | ||||
27703 | if( pGe ){ | ||||
27704 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pGe); | ||||
27705 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pGe); | ||||
27706 | } | ||||
27707 | if( pLe
| ||||
27708 | const char *zCopy = (const char *)sqlite3_value_textsqlite3_api->value_text(pLe); | ||||
27709 | if( zCopy==0 ) zCopy = ""; | ||||
27710 | pCsr->nLeTerm = sqlite3_value_bytessqlite3_api->value_bytes(pLe); | ||||
27711 | pCsr->zLeTerm = sqlite3_mallocsqlite3_api->malloc(pCsr->nLeTerm+1); | ||||
27712 | if( pCsr->zLeTerm==0 ){ | ||||
27713 | rc = SQLITE_NOMEM7; | ||||
27714 | }else{ | ||||
27715 | memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); | ||||
27716 | } | ||||
27717 | } | ||||
27718 | } | ||||
27719 | |||||
27720 | if( rc
| ||||
27721 | Fts5Index *pIndex = pCsr->pFts5->pIndex; | ||||
27722 | rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); | ||||
27723 | if( rc==SQLITE_OK0 ){ | ||||
27724 | pCsr->pStruct = sqlite3Fts5StructureRef(pIndex); | ||||
27725 | } | ||||
27726 | } | ||||
27727 | if( rc
| ||||
27728 | rc = fts5VocabInstanceNewTerm(pCsr); | ||||
27729 | } | ||||
27730 | if( rc
| ||||
27731 | && (eType
| ||||
27732 | || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE1) | ||||
27733 | ){ | ||||
27734 | rc = fts5VocabNextMethod(pCursor); | ||||
27735 | } | ||||
27736 | |||||
27737 | return rc; | ||||
27738 | } | ||||
27739 | |||||
27740 | /* | ||||
27741 | ** This is the xEof method of the virtual table. SQLite calls this | ||||
27742 | ** routine to find out if it has reached the end of a result set. | ||||
27743 | */ | ||||
27744 | static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ | ||||
27745 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27746 | return pCsr->bEof; | ||||
27747 | } | ||||
27748 | |||||
27749 | static int fts5VocabColumnMethod( | ||||
27750 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | ||||
27751 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | ||||
27752 | int iCol /* Index of column to read value from */ | ||||
27753 | ){ | ||||
27754 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27755 | int eDetail = pCsr->pFts5->pConfig->eDetail; | ||||
27756 | int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; | ||||
27757 | i64 iVal = 0; | ||||
27758 | |||||
27759 | if( iCol==0 ){ | ||||
27760 | sqlite3_result_textsqlite3_api->result_text( | ||||
27761 | pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1) | ||||
27762 | ); | ||||
27763 | }else if( eType==FTS5_VOCAB_COL0 ){ | ||||
27764 | assert( iCol==1 || iCol==2 || iCol==3 )((void) (0)); | ||||
27765 | if( iCol==1 ){ | ||||
27766 | if( eDetail!=FTS5_DETAIL_NONE1 ){ | ||||
27767 | const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol]; | ||||
27768 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
27769 | } | ||||
27770 | }else if( iCol==2 ){ | ||||
27771 | iVal = pCsr->aDoc[pCsr->iCol]; | ||||
27772 | }else{ | ||||
27773 | iVal = pCsr->aCnt[pCsr->iCol]; | ||||
27774 | } | ||||
27775 | }else if( eType==FTS5_VOCAB_ROW1 ){ | ||||
27776 | assert( iCol==1 || iCol==2 )((void) (0)); | ||||
27777 | if( iCol==1 ){ | ||||
27778 | iVal = pCsr->aDoc[0]; | ||||
27779 | }else{ | ||||
27780 | iVal = pCsr->aCnt[0]; | ||||
27781 | } | ||||
27782 | }else{ | ||||
27783 | assert( eType==FTS5_VOCAB_INSTANCE )((void) (0)); | ||||
27784 | switch( iCol ){ | ||||
27785 | case 1: | ||||
27786 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->pIter->iRowid); | ||||
27787 | break; | ||||
27788 | case 2: { | ||||
27789 | int ii = -1; | ||||
27790 | if( eDetail==FTS5_DETAIL_FULL0 ){ | ||||
27791 | ii = FTS5_POS2COLUMN(pCsr->iInstPos)(int)((pCsr->iInstPos >> 32) & 0x7FFFFFFF); | ||||
27792 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | ||||
27793 | ii = (int)pCsr->iInstPos; | ||||
27794 | } | ||||
27795 | if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){ | ||||
27796 | const char *z = pCsr->pFts5->pConfig->azCol[ii]; | ||||
27797 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | ||||
27798 | } | ||||
27799 | break; | ||||
27800 | } | ||||
27801 | default: { | ||||
27802 | assert( iCol==3 )((void) (0)); | ||||
27803 | if( eDetail==FTS5_DETAIL_FULL0 ){ | ||||
27804 | int ii = FTS5_POS2OFFSET(pCsr->iInstPos)(int)(pCsr->iInstPos & 0x7FFFFFFF); | ||||
27805 | sqlite3_result_intsqlite3_api->result_int(pCtx, ii); | ||||
27806 | } | ||||
27807 | break; | ||||
27808 | } | ||||
27809 | } | ||||
27810 | } | ||||
27811 | |||||
27812 | if( iVal>0 ) sqlite3_result_int64sqlite3_api->result_int64(pCtx, iVal); | ||||
27813 | return SQLITE_OK0; | ||||
27814 | } | ||||
27815 | |||||
27816 | /* | ||||
27817 | ** This is the xRowid method. The SQLite core calls this routine to | ||||
27818 | ** retrieve the rowid for the current row of the result set. The | ||||
27819 | ** rowid should be written to *pRowid. | ||||
27820 | */ | ||||
27821 | static int fts5VocabRowidMethod( | ||||
27822 | sqlite3_vtab_cursor *pCursor, | ||||
27823 | sqlite_int64 *pRowid | ||||
27824 | ){ | ||||
27825 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | ||||
27826 | *pRowid = pCsr->rowid; | ||||
27827 | return SQLITE_OK0; | ||||
27828 | } | ||||
27829 | |||||
27830 | static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ | ||||
27831 | static const sqlite3_module fts5Vocab = { | ||||
27832 | /* iVersion */ 2, | ||||
27833 | /* xCreate */ fts5VocabCreateMethod, | ||||
27834 | /* xConnect */ fts5VocabConnectMethod, | ||||
27835 | /* xBestIndex */ fts5VocabBestIndexMethod, | ||||
27836 | /* xDisconnect */ fts5VocabDisconnectMethod, | ||||
27837 | /* xDestroy */ fts5VocabDestroyMethod, | ||||
27838 | /* xOpen */ fts5VocabOpenMethod, | ||||
27839 | /* xClose */ fts5VocabCloseMethod, | ||||
27840 | /* xFilter */ fts5VocabFilterMethod, | ||||
27841 | /* xNext */ fts5VocabNextMethod, | ||||
27842 | /* xEof */ fts5VocabEofMethod, | ||||
27843 | /* xColumn */ fts5VocabColumnMethod, | ||||
27844 | /* xRowid */ fts5VocabRowidMethod, | ||||
27845 | /* xUpdate */ 0, | ||||
27846 | /* xBegin */ 0, | ||||
27847 | /* xSync */ 0, | ||||
27848 | /* xCommit */ 0, | ||||
27849 | /* xRollback */ 0, | ||||
27850 | /* xFindFunction */ 0, | ||||
27851 | /* xRename */ 0, | ||||
27852 | /* xSavepoint */ 0, | ||||
27853 | /* xRelease */ 0, | ||||
27854 | /* xRollbackTo */ 0, | ||||
27855 | /* xShadowName */ 0, | ||||
27856 | /* xIntegrity */ 0 | ||||
27857 | }; | ||||
27858 | void *p = (void*)pGlobal; | ||||
27859 | |||||
27860 | return sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); | ||||
27861 | } | ||||
27862 | |||||
27863 | |||||
27864 | /* Here ends the fts5.c composite file. */ | ||||
27865 | #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ |