File: | root/firefox-clang/third_party/sqlite3/ext/fts5.c |
Warning: | line 17109, column 16 Array access (via field 'p') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | ||||
2 | /* | |||
3 | ** This, the "fts5.c" source file, is a composite file that is itself | |||
4 | ** assembled from the following files: | |||
5 | ** | |||
6 | ** fts5.h | |||
7 | ** fts5Int.h | |||
8 | ** fts5parse.h <--- Generated from fts5parse.y by Lemon | |||
9 | ** fts5parse.c <--- Generated from fts5parse.y by Lemon | |||
10 | ** fts5_aux.c | |||
11 | ** fts5_buffer.c | |||
12 | ** fts5_config.c | |||
13 | ** fts5_expr.c | |||
14 | ** fts5_hash.c | |||
15 | ** fts5_index.c | |||
16 | ** fts5_main.c | |||
17 | ** fts5_storage.c | |||
18 | ** fts5_tokenize.c | |||
19 | ** fts5_unicode2.c | |||
20 | ** fts5_varint.c | |||
21 | ** fts5_vocab.c | |||
22 | */ | |||
23 | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) | |||
24 | ||||
25 | #if !defined(NDEBUG1) && !defined(SQLITE_DEBUG) | |||
26 | # define NDEBUG1 1 | |||
27 | #endif | |||
28 | #if defined(NDEBUG1) && defined(SQLITE_DEBUG) | |||
29 | # undef NDEBUG1 | |||
30 | #endif | |||
31 | ||||
32 | #ifdef HAVE_STDINT_H1 | |||
33 | #include <stdint.h> | |||
34 | #endif | |||
35 | #ifdef HAVE_INTTYPES_H1 | |||
36 | #include <inttypes.h> | |||
37 | #endif | |||
38 | #line 1 "fts5.h" | |||
39 | /* | |||
40 | ** 2014 May 31 | |||
41 | ** | |||
42 | ** The author disclaims copyright to this source code. In place of | |||
43 | ** a legal notice, here is a blessing: | |||
44 | ** | |||
45 | ** May you do good and not evil. | |||
46 | ** May you find forgiveness for yourself and forgive others. | |||
47 | ** May you share freely, never taking more than you give. | |||
48 | ** | |||
49 | ****************************************************************************** | |||
50 | ** | |||
51 | ** Interfaces to extend FTS5. Using the interfaces defined in this file, | |||
52 | ** FTS5 may be extended with: | |||
53 | ** | |||
54 | ** * custom tokenizers, and | |||
55 | ** * custom auxiliary functions. | |||
56 | */ | |||
57 | ||||
58 | ||||
59 | #ifndef _FTS5_H | |||
60 | #define _FTS5_H | |||
61 | ||||
62 | #include "sqlite3.h" | |||
63 | ||||
64 | #ifdef __cplusplus | |||
65 | extern "C" { | |||
66 | #endif | |||
67 | ||||
68 | /************************************************************************* | |||
69 | ** CUSTOM AUXILIARY FUNCTIONS | |||
70 | ** | |||
71 | ** Virtual table implementations may overload SQL functions by implementing | |||
72 | ** the sqlite3_module.xFindFunction() method. | |||
73 | */ | |||
74 | ||||
75 | typedef struct Fts5ExtensionApi Fts5ExtensionApi; | |||
76 | typedef struct Fts5Context Fts5Context; | |||
77 | typedef struct Fts5PhraseIter Fts5PhraseIter; | |||
78 | ||||
79 | typedef void (*fts5_extension_function)( | |||
80 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
81 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
82 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
83 | int nVal, /* Number of values in apVal[] array */ | |||
84 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
85 | ); | |||
86 | ||||
87 | struct Fts5PhraseIter { | |||
88 | const unsigned char *a; | |||
89 | const unsigned char *b; | |||
90 | }; | |||
91 | ||||
92 | /* | |||
93 | ** EXTENSION API FUNCTIONS | |||
94 | ** | |||
95 | ** xUserData(pFts): | |||
96 | ** Return a copy of the pUserData pointer passed to the xCreateFunction() | |||
97 | ** API when the extension function was registered. | |||
98 | ** | |||
99 | ** xColumnTotalSize(pFts, iCol, pnToken): | |||
100 | ** If parameter iCol is less than zero, set output variable *pnToken | |||
101 | ** to the total number of tokens in the FTS5 table. Or, if iCol is | |||
102 | ** non-negative but less than the number of columns in the table, return | |||
103 | ** the total number of tokens in column iCol, considering all rows in | |||
104 | ** the FTS5 table. | |||
105 | ** | |||
106 | ** If parameter iCol is greater than or equal to the number of columns | |||
107 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | |||
108 | ** an OOM condition or IO error), an appropriate SQLite error code is | |||
109 | ** returned. | |||
110 | ** | |||
111 | ** xColumnCount(pFts): | |||
112 | ** Return the number of columns in the table. | |||
113 | ** | |||
114 | ** xColumnSize(pFts, iCol, pnToken): | |||
115 | ** If parameter iCol is less than zero, set output variable *pnToken | |||
116 | ** to the total number of tokens in the current row. Or, if iCol is | |||
117 | ** non-negative but less than the number of columns in the table, set | |||
118 | ** *pnToken to the number of tokens in column iCol of the current row. | |||
119 | ** | |||
120 | ** If parameter iCol is greater than or equal to the number of columns | |||
121 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | |||
122 | ** an OOM condition or IO error), an appropriate SQLite error code is | |||
123 | ** returned. | |||
124 | ** | |||
125 | ** This function may be quite inefficient if used with an FTS5 table | |||
126 | ** created with the "columnsize=0" option. | |||
127 | ** | |||
128 | ** xColumnText: | |||
129 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
130 | ** number of columns in the table, SQLITE_RANGE is returned. | |||
131 | ** | |||
132 | ** Otherwise, this function attempts to retrieve the text of column iCol of | |||
133 | ** the current document. If successful, (*pz) is set to point to a buffer | |||
134 | ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes | |||
135 | ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, | |||
136 | ** if an error occurs, an SQLite error code is returned and the final values | |||
137 | ** of (*pz) and (*pn) are undefined. | |||
138 | ** | |||
139 | ** xPhraseCount: | |||
140 | ** Returns the number of phrases in the current query expression. | |||
141 | ** | |||
142 | ** xPhraseSize: | |||
143 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
144 | ** number of phrases in the current query, as returned by xPhraseCount, | |||
145 | ** 0 is returned. Otherwise, this function returns the number of tokens in | |||
146 | ** phrase iPhrase of the query. Phrases are numbered starting from zero. | |||
147 | ** | |||
148 | ** xInstCount: | |||
149 | ** Set *pnInst to the total number of occurrences of all phrases within | |||
150 | ** the query within the current row. Return SQLITE_OK if successful, or | |||
151 | ** an error code (i.e. SQLITE_NOMEM) if an error occurs. | |||
152 | ** | |||
153 | ** This API can be quite slow if used with an FTS5 table created with the | |||
154 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | |||
155 | ** with either "detail=none" or "detail=column" and "content=" option | |||
156 | ** (i.e. if it is a contentless table), then this API always returns 0. | |||
157 | ** | |||
158 | ** xInst: | |||
159 | ** Query for the details of phrase match iIdx within the current row. | |||
160 | ** Phrase matches are numbered starting from zero, so the iIdx argument | |||
161 | ** should be greater than or equal to zero and smaller than the value | |||
162 | ** output by xInstCount(). If iIdx is less than zero or greater than | |||
163 | ** or equal to the value returned by xInstCount(), SQLITE_RANGE is returned. | |||
164 | ** | |||
165 | ** Otherwise, output parameter *piPhrase is set to the phrase number, *piCol | |||
166 | ** to the column in which it occurs and *piOff the token offset of the | |||
167 | ** first token of the phrase. SQLITE_OK is returned if successful, or an | |||
168 | ** error code (i.e. SQLITE_NOMEM) if an error occurs. | |||
169 | ** | |||
170 | ** This API can be quite slow if used with an FTS5 table created with the | |||
171 | ** "detail=none" or "detail=column" option. | |||
172 | ** | |||
173 | ** xRowid: | |||
174 | ** Returns the rowid of the current row. | |||
175 | ** | |||
176 | ** xTokenize: | |||
177 | ** Tokenize text using the tokenizer belonging to the FTS5 table. | |||
178 | ** | |||
179 | ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): | |||
180 | ** This API function is used to query the FTS table for phrase iPhrase | |||
181 | ** of the current query. Specifically, a query equivalent to: | |||
182 | ** | |||
183 | ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid | |||
184 | ** | |||
185 | ** with $p set to a phrase equivalent to the phrase iPhrase of the | |||
186 | ** current query is executed. Any column filter that applies to | |||
187 | ** phrase iPhrase of the current query is included in $p. For each | |||
188 | ** row visited, the callback function passed as the fourth argument | |||
189 | ** is invoked. The context and API objects passed to the callback | |||
190 | ** function may be used to access the properties of each matched row. | |||
191 | ** Invoking Api.xUserData() returns a copy of the pointer passed as | |||
192 | ** the third argument to pUserData. | |||
193 | ** | |||
194 | ** If parameter iPhrase is less than zero, or greater than or equal to | |||
195 | ** the number of phrases in the query, as returned by xPhraseCount(), | |||
196 | ** this function returns SQLITE_RANGE. | |||
197 | ** | |||
198 | ** If the callback function returns any value other than SQLITE_OK, the | |||
199 | ** query is abandoned and the xQueryPhrase function returns immediately. | |||
200 | ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. | |||
201 | ** Otherwise, the error code is propagated upwards. | |||
202 | ** | |||
203 | ** If the query runs to completion without incident, SQLITE_OK is returned. | |||
204 | ** Or, if some error occurs before the query completes or is aborted by | |||
205 | ** the callback, an SQLite error code is returned. | |||
206 | ** | |||
207 | ** | |||
208 | ** xSetAuxdata(pFts5, pAux, xDelete) | |||
209 | ** | |||
210 | ** Save the pointer passed as the second argument as the extension function's | |||
211 | ** "auxiliary data". The pointer may then be retrieved by the current or any | |||
212 | ** future invocation of the same fts5 extension function made as part of | |||
213 | ** the same MATCH query using the xGetAuxdata() API. | |||
214 | ** | |||
215 | ** Each extension function is allocated a single auxiliary data slot for | |||
216 | ** each FTS query (MATCH expression). If the extension function is invoked | |||
217 | ** more than once for a single FTS query, then all invocations share a | |||
218 | ** single auxiliary data context. | |||
219 | ** | |||
220 | ** If there is already an auxiliary data pointer when this function is | |||
221 | ** invoked, then it is replaced by the new pointer. If an xDelete callback | |||
222 | ** was specified along with the original pointer, it is invoked at this | |||
223 | ** point. | |||
224 | ** | |||
225 | ** The xDelete callback, if one is specified, is also invoked on the | |||
226 | ** auxiliary data pointer after the FTS5 query has finished. | |||
227 | ** | |||
228 | ** If an error (e.g. an OOM condition) occurs within this function, | |||
229 | ** the auxiliary data is set to NULL and an error code returned. If the | |||
230 | ** xDelete parameter was not NULL, it is invoked on the auxiliary data | |||
231 | ** pointer before returning. | |||
232 | ** | |||
233 | ** | |||
234 | ** xGetAuxdata(pFts5, bClear) | |||
235 | ** | |||
236 | ** Returns the current auxiliary data pointer for the fts5 extension | |||
237 | ** function. See the xSetAuxdata() method for details. | |||
238 | ** | |||
239 | ** If the bClear argument is non-zero, then the auxiliary data is cleared | |||
240 | ** (set to NULL) before this function returns. In this case the xDelete, | |||
241 | ** if any, is not invoked. | |||
242 | ** | |||
243 | ** | |||
244 | ** xRowCount(pFts5, pnRow) | |||
245 | ** | |||
246 | ** This function is used to retrieve the total number of rows in the table. | |||
247 | ** In other words, the same value that would be returned by: | |||
248 | ** | |||
249 | ** SELECT count(*) FROM ftstable; | |||
250 | ** | |||
251 | ** xPhraseFirst() | |||
252 | ** This function is used, along with type Fts5PhraseIter and the xPhraseNext | |||
253 | ** method, to iterate through all instances of a single query phrase within | |||
254 | ** the current row. This is the same information as is accessible via the | |||
255 | ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient | |||
256 | ** to use, this API may be faster under some circumstances. To iterate | |||
257 | ** through instances of phrase iPhrase, use the following code: | |||
258 | ** | |||
259 | ** Fts5PhraseIter iter; | |||
260 | ** int iCol, iOff; | |||
261 | ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); | |||
262 | ** iCol>=0; | |||
263 | ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) | |||
264 | ** ){ | |||
265 | ** // An instance of phrase iPhrase at offset iOff of column iCol | |||
266 | ** } | |||
267 | ** | |||
268 | ** The Fts5PhraseIter structure is defined above. Applications should not | |||
269 | ** modify this structure directly - it should only be used as shown above | |||
270 | ** with the xPhraseFirst() and xPhraseNext() API methods (and by | |||
271 | ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). | |||
272 | ** | |||
273 | ** This API can be quite slow if used with an FTS5 table created with the | |||
274 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | |||
275 | ** with either "detail=none" or "detail=column" and "content=" option | |||
276 | ** (i.e. if it is a contentless table), then this API always iterates | |||
277 | ** through an empty set (all calls to xPhraseFirst() set iCol to -1). | |||
278 | ** | |||
279 | ** In all cases, matches are visited in (column ASC, offset ASC) order. | |||
280 | ** i.e. all those in column 0, sorted by offset, followed by those in | |||
281 | ** column 1, etc. | |||
282 | ** | |||
283 | ** xPhraseNext() | |||
284 | ** See xPhraseFirst above. | |||
285 | ** | |||
286 | ** xPhraseFirstColumn() | |||
287 | ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() | |||
288 | ** and xPhraseNext() APIs described above. The difference is that instead | |||
289 | ** of iterating through all instances of a phrase in the current row, these | |||
290 | ** APIs are used to iterate through the set of columns in the current row | |||
291 | ** that contain one or more instances of a specified phrase. For example: | |||
292 | ** | |||
293 | ** Fts5PhraseIter iter; | |||
294 | ** int iCol; | |||
295 | ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); | |||
296 | ** iCol>=0; | |||
297 | ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) | |||
298 | ** ){ | |||
299 | ** // Column iCol contains at least one instance of phrase iPhrase | |||
300 | ** } | |||
301 | ** | |||
302 | ** This API can be quite slow if used with an FTS5 table created with the | |||
303 | ** "detail=none" option. If the FTS5 table is created with either | |||
304 | ** "detail=none" "content=" option (i.e. if it is a contentless table), | |||
305 | ** then this API always iterates through an empty set (all calls to | |||
306 | ** xPhraseFirstColumn() set iCol to -1). | |||
307 | ** | |||
308 | ** The information accessed using this API and its companion | |||
309 | ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext | |||
310 | ** (or xInst/xInstCount). The chief advantage of this API is that it is | |||
311 | ** significantly more efficient than those alternatives when used with | |||
312 | ** "detail=column" tables. | |||
313 | ** | |||
314 | ** xPhraseNextColumn() | |||
315 | ** See xPhraseFirstColumn above. | |||
316 | ** | |||
317 | ** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) | |||
318 | ** This is used to access token iToken of phrase iPhrase of the current | |||
319 | ** query. Before returning, output parameter *ppToken is set to point | |||
320 | ** to a buffer containing the requested token, and *pnToken to the | |||
321 | ** size of this buffer in bytes. | |||
322 | ** | |||
323 | ** If iPhrase or iToken are less than zero, or if iPhrase is greater than | |||
324 | ** or equal to the number of phrases in the query as reported by | |||
325 | ** xPhraseCount(), or if iToken is equal to or greater than the number of | |||
326 | ** tokens in the phrase, SQLITE_RANGE is returned and *ppToken and *pnToken | |||
327 | are both zeroed. | |||
328 | ** | |||
329 | ** The output text is not a copy of the query text that specified the | |||
330 | ** token. It is the output of the tokenizer module. For tokendata=1 | |||
331 | ** tables, this includes any embedded 0x00 and trailing data. | |||
332 | ** | |||
333 | ** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) | |||
334 | ** This is used to access token iToken of phrase hit iIdx within the | |||
335 | ** current row. If iIdx is less than zero or greater than or equal to the | |||
336 | ** value returned by xInstCount(), SQLITE_RANGE is returned. Otherwise, | |||
337 | ** output variable (*ppToken) is set to point to a buffer containing the | |||
338 | ** matching document token, and (*pnToken) to the size of that buffer in | |||
339 | ** bytes. | |||
340 | ** | |||
341 | ** The output text is not a copy of the document text that was tokenized. | |||
342 | ** It is the output of the tokenizer module. For tokendata=1 tables, this | |||
343 | ** includes any embedded 0x00 and trailing data. | |||
344 | ** | |||
345 | ** This API may be slow in some cases if the token identified by parameters | |||
346 | ** iIdx and iToken matched a prefix token in the query. In most cases, the | |||
347 | ** first call to this API for each prefix token in the query is forced | |||
348 | ** to scan the portion of the full-text index that matches the prefix | |||
349 | ** token to collect the extra data required by this API. If the prefix | |||
350 | ** token matches a large number of token instances in the document set, | |||
351 | ** this may be a performance problem. | |||
352 | ** | |||
353 | ** If the user knows in advance that a query may use this API for a | |||
354 | ** prefix token, FTS5 may be configured to collect all required data as part | |||
355 | ** of the initial querying of the full-text index, avoiding the second scan | |||
356 | ** entirely. This also causes prefix queries that do not use this API to | |||
357 | ** run more slowly and use more memory. FTS5 may be configured in this way | |||
358 | ** either on a per-table basis using the [FTS5 insttoken | 'insttoken'] | |||
359 | ** option, or on a per-query basis using the | |||
360 | ** [fts5_insttoken | fts5_insttoken()] user function. | |||
361 | ** | |||
362 | ** This API can be quite slow if used with an FTS5 table created with the | |||
363 | ** "detail=none" or "detail=column" option. | |||
364 | ** | |||
365 | ** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale) | |||
366 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
367 | ** number of columns in the table, SQLITE_RANGE is returned. | |||
368 | ** | |||
369 | ** Otherwise, this function attempts to retrieve the locale associated | |||
370 | ** with column iCol of the current row. Usually, there is no associated | |||
371 | ** locale, and output parameters (*pzLocale) and (*pnLocale) are set | |||
372 | ** to NULL and 0, respectively. However, if the fts5_locale() function | |||
373 | ** was used to associate a locale with the value when it was inserted | |||
374 | ** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated | |||
375 | ** buffer containing the name of the locale in utf-8 encoding. (*pnLocale) | |||
376 | ** is set to the size in bytes of the buffer, not including the | |||
377 | ** nul-terminator. | |||
378 | ** | |||
379 | ** If successful, SQLITE_OK is returned. Or, if an error occurs, an | |||
380 | ** SQLite error code is returned. The final value of the output parameters | |||
381 | ** is undefined in this case. | |||
382 | ** | |||
383 | ** xTokenize_v2: | |||
384 | ** Tokenize text using the tokenizer belonging to the FTS5 table. This | |||
385 | ** API is the same as the xTokenize() API, except that it allows a tokenizer | |||
386 | ** locale to be specified. | |||
387 | */ | |||
388 | struct Fts5ExtensionApi { | |||
389 | int iVersion; /* Currently always set to 4 */ | |||
390 | ||||
391 | void *(*xUserData)(Fts5Context*); | |||
392 | ||||
393 | int (*xColumnCount)(Fts5Context*); | |||
394 | int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); | |||
395 | int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); | |||
396 | ||||
397 | int (*xTokenize)(Fts5Context*, | |||
398 | const char *pText, int nText, /* Text to tokenize */ | |||
399 | void *pCtx, /* Context passed to xToken() */ | |||
400 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
401 | ); | |||
402 | ||||
403 | int (*xPhraseCount)(Fts5Context*); | |||
404 | int (*xPhraseSize)(Fts5Context*, int iPhrase); | |||
405 | ||||
406 | int (*xInstCount)(Fts5Context*, int *pnInst); | |||
407 | int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); | |||
408 | ||||
409 | sqlite3_int64 (*xRowid)(Fts5Context*); | |||
410 | int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); | |||
411 | int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); | |||
412 | ||||
413 | int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, | |||
414 | int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) | |||
415 | ); | |||
416 | int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); | |||
417 | void *(*xGetAuxdata)(Fts5Context*, int bClear); | |||
418 | ||||
419 | int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); | |||
420 | void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); | |||
421 | ||||
422 | int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); | |||
423 | void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); | |||
424 | ||||
425 | /* Below this point are iVersion>=3 only */ | |||
426 | int (*xQueryToken)(Fts5Context*, | |||
427 | int iPhrase, int iToken, | |||
428 | const char **ppToken, int *pnToken | |||
429 | ); | |||
430 | int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); | |||
431 | ||||
432 | /* Below this point are iVersion>=4 only */ | |||
433 | int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn); | |||
434 | int (*xTokenize_v2)(Fts5Context*, | |||
435 | const char *pText, int nText, /* Text to tokenize */ | |||
436 | const char *pLocale, int nLocale, /* Locale to pass to tokenizer */ | |||
437 | void *pCtx, /* Context passed to xToken() */ | |||
438 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
439 | ); | |||
440 | }; | |||
441 | ||||
442 | /* | |||
443 | ** CUSTOM AUXILIARY FUNCTIONS | |||
444 | *************************************************************************/ | |||
445 | ||||
446 | /************************************************************************* | |||
447 | ** CUSTOM TOKENIZERS | |||
448 | ** | |||
449 | ** Applications may also register custom tokenizer types. A tokenizer | |||
450 | ** is registered by providing fts5 with a populated instance of the | |||
451 | ** following structure. All structure methods must be defined, setting | |||
452 | ** any member of the fts5_tokenizer struct to NULL leads to undefined | |||
453 | ** behaviour. The structure methods are expected to function as follows: | |||
454 | ** | |||
455 | ** xCreate: | |||
456 | ** This function is used to allocate and initialize a tokenizer instance. | |||
457 | ** A tokenizer instance is required to actually tokenize text. | |||
458 | ** | |||
459 | ** The first argument passed to this function is a copy of the (void*) | |||
460 | ** pointer provided by the application when the fts5_tokenizer_v2 object | |||
461 | ** was registered with FTS5 (the third argument to xCreateTokenizer()). | |||
462 | ** The second and third arguments are an array of nul-terminated strings | |||
463 | ** containing the tokenizer arguments, if any, specified following the | |||
464 | ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used | |||
465 | ** to create the FTS5 table. | |||
466 | ** | |||
467 | ** The final argument is an output variable. If successful, (*ppOut) | |||
468 | ** should be set to point to the new tokenizer handle and SQLITE_OK | |||
469 | ** returned. If an error occurs, some value other than SQLITE_OK should | |||
470 | ** be returned. In this case, fts5 assumes that the final value of *ppOut | |||
471 | ** is undefined. | |||
472 | ** | |||
473 | ** xDelete: | |||
474 | ** This function is invoked to delete a tokenizer handle previously | |||
475 | ** allocated using xCreate(). Fts5 guarantees that this function will | |||
476 | ** be invoked exactly once for each successful call to xCreate(). | |||
477 | ** | |||
478 | ** xTokenize: | |||
479 | ** This function is expected to tokenize the nText byte string indicated | |||
480 | ** by argument pText. pText may or may not be nul-terminated. The first | |||
481 | ** argument passed to this function is a pointer to an Fts5Tokenizer object | |||
482 | ** returned by an earlier call to xCreate(). | |||
483 | ** | |||
484 | ** The third argument indicates the reason that FTS5 is requesting | |||
485 | ** tokenization of the supplied text. This is always one of the following | |||
486 | ** four values: | |||
487 | ** | |||
488 | ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into | |||
489 | ** or removed from the FTS table. The tokenizer is being invoked to | |||
490 | ** determine the set of tokens to add to (or delete from) the | |||
491 | ** FTS index. | |||
492 | ** | |||
493 | ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed | |||
494 | ** against the FTS index. The tokenizer is being called to tokenize | |||
495 | ** a bareword or quoted string specified as part of the query. | |||
496 | ** | |||
497 | ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as | |||
498 | ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is | |||
499 | ** followed by a "*" character, indicating that the last token | |||
500 | ** returned by the tokenizer will be treated as a token prefix. | |||
501 | ** | |||
502 | ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to | |||
503 | ** satisfy an fts5_api.xTokenize() request made by an auxiliary | |||
504 | ** function. Or an fts5_api.xColumnSize() request made by the same | |||
505 | ** on a columnsize=0 database. | |||
506 | ** </ul> | |||
507 | ** | |||
508 | ** The sixth and seventh arguments passed to xTokenize() - pLocale and | |||
509 | ** nLocale - are a pointer to a buffer containing the locale to use for | |||
510 | ** tokenization (e.g. "en_US") and its size in bytes, respectively. The | |||
511 | ** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in | |||
512 | ** which case nLocale is always 0) to indicate that the tokenizer should | |||
513 | ** use its default locale. | |||
514 | ** | |||
515 | ** For each token in the input string, the supplied callback xToken() must | |||
516 | ** be invoked. The first argument to it should be a copy of the pointer | |||
517 | ** passed as the second argument to xTokenize(). The third and fourth | |||
518 | ** arguments are a pointer to a buffer containing the token text, and the | |||
519 | ** size of the token in bytes. The 4th and 5th arguments are the byte offsets | |||
520 | ** of the first byte of and first byte immediately following the text from | |||
521 | ** which the token is derived within the input. | |||
522 | ** | |||
523 | ** The second argument passed to the xToken() callback ("tflags") should | |||
524 | ** normally be set to 0. The exception is if the tokenizer supports | |||
525 | ** synonyms. In this case see the discussion below for details. | |||
526 | ** | |||
527 | ** FTS5 assumes the xToken() callback is invoked for each token in the | |||
528 | ** order that they occur within the input text. | |||
529 | ** | |||
530 | ** If an xToken() callback returns any value other than SQLITE_OK, then | |||
531 | ** the tokenization should be abandoned and the xTokenize() method should | |||
532 | ** immediately return a copy of the xToken() return value. Or, if the | |||
533 | ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, | |||
534 | ** if an error occurs with the xTokenize() implementation itself, it | |||
535 | ** may abandon the tokenization and return any error code other than | |||
536 | ** SQLITE_OK or SQLITE_DONE. | |||
537 | ** | |||
538 | ** If the tokenizer is registered using an fts5_tokenizer_v2 object, | |||
539 | ** then the xTokenize() method has two additional arguments - pLocale | |||
540 | ** and nLocale. These specify the locale that the tokenizer should use | |||
541 | ** for the current request. If pLocale and nLocale are both 0, then the | |||
542 | ** tokenizer should use its default locale. Otherwise, pLocale points to | |||
543 | ** an nLocale byte buffer containing the name of the locale to use as utf-8 | |||
544 | ** text. pLocale is not nul-terminated. | |||
545 | ** | |||
546 | ** FTS5_TOKENIZER | |||
547 | ** | |||
548 | ** There is also an fts5_tokenizer object. This is an older, deprecated, | |||
549 | ** version of fts5_tokenizer_v2. It is similar except that: | |||
550 | ** | |||
551 | ** <ul> | |||
552 | ** <li> There is no "iVersion" field, and | |||
553 | ** <li> The xTokenize() method does not take a locale argument. | |||
554 | ** </ul> | |||
555 | ** | |||
556 | ** Legacy fts5_tokenizer tokenizers must be registered using the | |||
557 | ** legacy xCreateTokenizer() function, instead of xCreateTokenizer_v2(). | |||
558 | ** | |||
559 | ** Tokenizer implementations registered using either API may be retrieved | |||
560 | ** using both xFindTokenizer() and xFindTokenizer_v2(). | |||
561 | ** | |||
562 | ** SYNONYM SUPPORT | |||
563 | ** | |||
564 | ** Custom tokenizers may also support synonyms. Consider a case in which a | |||
565 | ** user wishes to query for a phrase such as "first place". Using the | |||
566 | ** built-in tokenizers, the FTS5 query 'first + place' will match instances | |||
567 | ** of "first place" within the document set, but not alternative forms | |||
568 | ** such as "1st place". In some applications, it would be better to match | |||
569 | ** all instances of "first place" or "1st place" regardless of which form | |||
570 | ** the user specified in the MATCH query text. | |||
571 | ** | |||
572 | ** There are several ways to approach this in FTS5: | |||
573 | ** | |||
574 | ** <ol><li> By mapping all synonyms to a single token. In this case, using | |||
575 | ** the above example, this means that the tokenizer returns the | |||
576 | ** same token for inputs "first" and "1st". Say that token is in | |||
577 | ** fact "first", so that when the user inserts the document "I won | |||
578 | ** 1st place" entries are added to the index for tokens "i", "won", | |||
579 | ** "first" and "place". If the user then queries for '1st + place', | |||
580 | ** the tokenizer substitutes "first" for "1st" and the query works | |||
581 | ** as expected. | |||
582 | ** | |||
583 | ** <li> By querying the index for all synonyms of each query term | |||
584 | ** separately. In this case, when tokenizing query text, the | |||
585 | ** tokenizer may provide multiple synonyms for a single term | |||
586 | ** within the document. FTS5 then queries the index for each | |||
587 | ** synonym individually. For example, faced with the query: | |||
588 | ** | |||
589 | ** <codeblock> | |||
590 | ** ... MATCH 'first place'</codeblock> | |||
591 | ** | |||
592 | ** the tokenizer offers both "1st" and "first" as synonyms for the | |||
593 | ** first token in the MATCH query and FTS5 effectively runs a query | |||
594 | ** similar to: | |||
595 | ** | |||
596 | ** <codeblock> | |||
597 | ** ... MATCH '(first OR 1st) place'</codeblock> | |||
598 | ** | |||
599 | ** except that, for the purposes of auxiliary functions, the query | |||
600 | ** still appears to contain just two phrases - "(first OR 1st)" | |||
601 | ** being treated as a single phrase. | |||
602 | ** | |||
603 | ** <li> By adding multiple synonyms for a single term to the FTS index. | |||
604 | ** Using this method, when tokenizing document text, the tokenizer | |||
605 | ** provides multiple synonyms for each token. So that when a | |||
606 | ** document such as "I won first place" is tokenized, entries are | |||
607 | ** added to the FTS index for "i", "won", "first", "1st" and | |||
608 | ** "place". | |||
609 | ** | |||
610 | ** This way, even if the tokenizer does not provide synonyms | |||
611 | ** when tokenizing query text (it should not - to do so would be | |||
612 | ** inefficient), it doesn't matter if the user queries for | |||
613 | ** 'first + place' or '1st + place', as there are entries in the | |||
614 | ** FTS index corresponding to both forms of the first token. | |||
615 | ** </ol> | |||
616 | ** | |||
617 | ** Whether it is parsing document or query text, any call to xToken that | |||
618 | ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit | |||
619 | ** is considered to supply a synonym for the previous token. For example, | |||
620 | ** when parsing the document "I won first place", a tokenizer that supports | |||
621 | ** synonyms would call xToken() 5 times, as follows: | |||
622 | ** | |||
623 | ** <codeblock> | |||
624 | ** xToken(pCtx, 0, "i", 1, 0, 1); | |||
625 | ** xToken(pCtx, 0, "won", 3, 2, 5); | |||
626 | ** xToken(pCtx, 0, "first", 5, 6, 11); | |||
627 | ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); | |||
628 | ** xToken(pCtx, 0, "place", 5, 12, 17); | |||
629 | **</codeblock> | |||
630 | ** | |||
631 | ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time | |||
632 | ** xToken() is called. Multiple synonyms may be specified for a single token | |||
633 | ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. | |||
634 | ** There is no limit to the number of synonyms that may be provided for a | |||
635 | ** single token. | |||
636 | ** | |||
637 | ** In many cases, method (1) above is the best approach. It does not add | |||
638 | ** extra data to the FTS index or require FTS5 to query for multiple terms, | |||
639 | ** so it is efficient in terms of disk space and query speed. However, it | |||
640 | ** does not support prefix queries very well. If, as suggested above, the | |||
641 | ** token "first" is substituted for "1st" by the tokenizer, then the query: | |||
642 | ** | |||
643 | ** <codeblock> | |||
644 | ** ... MATCH '1s*'</codeblock> | |||
645 | ** | |||
646 | ** will not match documents that contain the token "1st" (as the tokenizer | |||
647 | ** will probably not map "1s" to any prefix of "first"). | |||
648 | ** | |||
649 | ** For full prefix support, method (3) may be preferred. In this case, | |||
650 | ** because the index contains entries for both "first" and "1st", prefix | |||
651 | ** queries such as 'fi*' or '1s*' will match correctly. However, because | |||
652 | ** extra entries are added to the FTS index, this method uses more space | |||
653 | ** within the database. | |||
654 | ** | |||
655 | ** Method (2) offers a midpoint between (1) and (3). Using this method, | |||
656 | ** a query such as '1s*' will match documents that contain the literal | |||
657 | ** token "1st", but not "first" (assuming the tokenizer is not able to | |||
658 | ** provide synonyms for prefixes). However, a non-prefix query like '1st' | |||
659 | ** will match against "1st" and "first". This method does not require | |||
660 | ** extra disk space, as no extra entries are added to the FTS index. | |||
661 | ** On the other hand, it may require more CPU cycles to run MATCH queries, | |||
662 | ** as separate queries of the FTS index are required for each synonym. | |||
663 | ** | |||
664 | ** When using methods (2) or (3), it is important that the tokenizer only | |||
665 | ** provide synonyms when tokenizing document text (method (3)) or query | |||
666 | ** text (method (2)), not both. Doing so will not cause any errors, but is | |||
667 | ** inefficient. | |||
668 | */ | |||
669 | typedef struct Fts5Tokenizer Fts5Tokenizer; | |||
670 | typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2; | |||
671 | struct fts5_tokenizer_v2 { | |||
672 | int iVersion; /* Currently always 2 */ | |||
673 | ||||
674 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | |||
675 | void (*xDelete)(Fts5Tokenizer*); | |||
676 | int (*xTokenize)(Fts5Tokenizer*, | |||
677 | void *pCtx, | |||
678 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | |||
679 | const char *pText, int nText, | |||
680 | const char *pLocale, int nLocale, | |||
681 | int (*xToken)( | |||
682 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
683 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
684 | const char *pToken, /* Pointer to buffer containing token */ | |||
685 | int nToken, /* Size of token in bytes */ | |||
686 | int iStart, /* Byte offset of token within input text */ | |||
687 | int iEnd /* Byte offset of end of token within input text */ | |||
688 | ) | |||
689 | ); | |||
690 | }; | |||
691 | ||||
692 | /* | |||
693 | ** New code should use the fts5_tokenizer_v2 type to define tokenizer | |||
694 | ** implementations. The following type is included for legacy applications | |||
695 | ** that still use it. | |||
696 | */ | |||
697 | typedef struct fts5_tokenizer fts5_tokenizer; | |||
698 | struct fts5_tokenizer { | |||
699 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | |||
700 | void (*xDelete)(Fts5Tokenizer*); | |||
701 | int (*xTokenize)(Fts5Tokenizer*, | |||
702 | void *pCtx, | |||
703 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | |||
704 | const char *pText, int nText, | |||
705 | int (*xToken)( | |||
706 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
707 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
708 | const char *pToken, /* Pointer to buffer containing token */ | |||
709 | int nToken, /* Size of token in bytes */ | |||
710 | int iStart, /* Byte offset of token within input text */ | |||
711 | int iEnd /* Byte offset of end of token within input text */ | |||
712 | ) | |||
713 | ); | |||
714 | }; | |||
715 | ||||
716 | ||||
717 | /* Flags that may be passed as the third argument to xTokenize() */ | |||
718 | #define FTS5_TOKENIZE_QUERY0x0001 0x0001 | |||
719 | #define FTS5_TOKENIZE_PREFIX0x0002 0x0002 | |||
720 | #define FTS5_TOKENIZE_DOCUMENT0x0004 0x0004 | |||
721 | #define FTS5_TOKENIZE_AUX0x0008 0x0008 | |||
722 | ||||
723 | /* Flags that may be passed by the tokenizer implementation back to FTS5 | |||
724 | ** as the third argument to the supplied xToken callback. */ | |||
725 | #define FTS5_TOKEN_COLOCATED0x0001 0x0001 /* Same position as prev. token */ | |||
726 | ||||
727 | /* | |||
728 | ** END OF CUSTOM TOKENIZERS | |||
729 | *************************************************************************/ | |||
730 | ||||
731 | /************************************************************************* | |||
732 | ** FTS5 EXTENSION REGISTRATION API | |||
733 | */ | |||
734 | typedef struct fts5_api fts5_api; | |||
735 | struct fts5_api { | |||
736 | int iVersion; /* Currently always set to 3 */ | |||
737 | ||||
738 | /* Create a new tokenizer */ | |||
739 | int (*xCreateTokenizer)( | |||
740 | fts5_api *pApi, | |||
741 | const char *zName, | |||
742 | void *pUserData, | |||
743 | fts5_tokenizer *pTokenizer, | |||
744 | void (*xDestroy)(void*) | |||
745 | ); | |||
746 | ||||
747 | /* Find an existing tokenizer */ | |||
748 | int (*xFindTokenizer)( | |||
749 | fts5_api *pApi, | |||
750 | const char *zName, | |||
751 | void **ppUserData, | |||
752 | fts5_tokenizer *pTokenizer | |||
753 | ); | |||
754 | ||||
755 | /* Create a new auxiliary function */ | |||
756 | int (*xCreateFunction)( | |||
757 | fts5_api *pApi, | |||
758 | const char *zName, | |||
759 | void *pUserData, | |||
760 | fts5_extension_function xFunction, | |||
761 | void (*xDestroy)(void*) | |||
762 | ); | |||
763 | ||||
764 | /* APIs below this point are only available if iVersion>=3 */ | |||
765 | ||||
766 | /* Create a new tokenizer */ | |||
767 | int (*xCreateTokenizer_v2)( | |||
768 | fts5_api *pApi, | |||
769 | const char *zName, | |||
770 | void *pUserData, | |||
771 | fts5_tokenizer_v2 *pTokenizer, | |||
772 | void (*xDestroy)(void*) | |||
773 | ); | |||
774 | ||||
775 | /* Find an existing tokenizer */ | |||
776 | int (*xFindTokenizer_v2)( | |||
777 | fts5_api *pApi, | |||
778 | const char *zName, | |||
779 | void **ppUserData, | |||
780 | fts5_tokenizer_v2 **ppTokenizer | |||
781 | ); | |||
782 | }; | |||
783 | ||||
784 | /* | |||
785 | ** END OF REGISTRATION API | |||
786 | *************************************************************************/ | |||
787 | ||||
788 | #ifdef __cplusplus | |||
789 | } /* end of the 'extern "C"' block */ | |||
790 | #endif | |||
791 | ||||
792 | #endif /* _FTS5_H */ | |||
793 | ||||
794 | #line 1 "fts5Int.h" | |||
795 | /* | |||
796 | ** 2014 May 31 | |||
797 | ** | |||
798 | ** The author disclaims copyright to this source code. In place of | |||
799 | ** a legal notice, here is a blessing: | |||
800 | ** | |||
801 | ** May you do good and not evil. | |||
802 | ** May you find forgiveness for yourself and forgive others. | |||
803 | ** May you share freely, never taking more than you give. | |||
804 | ** | |||
805 | ****************************************************************************** | |||
806 | ** | |||
807 | */ | |||
808 | #ifndef _FTS5INT_H | |||
809 | #define _FTS5INT_H | |||
810 | ||||
811 | /* #include "fts5.h" */ | |||
812 | #include "sqlite3ext.h" | |||
813 | SQLITE_EXTENSION_INIT1const sqlite3_api_routines *sqlite3_api=0; | |||
814 | ||||
815 | #include <string.h> | |||
816 | #include <assert.h> | |||
817 | #include <stddef.h> | |||
818 | ||||
819 | #ifndef SQLITE_AMALGAMATION | |||
820 | ||||
821 | typedef unsigned char u8; | |||
822 | typedef unsigned int u32; | |||
823 | typedef unsigned short u16; | |||
824 | typedef short i16; | |||
825 | typedef sqlite3_int64 i64; | |||
826 | typedef sqlite3_uint64 u64; | |||
827 | ||||
828 | #ifndef ArraySize | |||
829 | # define ArraySize(x)((int)(sizeof(x) / sizeof(x[0]))) ((int)(sizeof(x) / sizeof(x[0]))) | |||
830 | #endif | |||
831 | ||||
832 | #define testcase(x) | |||
833 | ||||
834 | #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) | |||
835 | # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 | |||
836 | #endif | |||
837 | #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) | |||
838 | # define ALWAYS(X)(X) (1) | |||
839 | # define NEVER(X)(X) (0) | |||
840 | #elif !defined(NDEBUG1) | |||
841 | # define ALWAYS(X)(X) ((X)?1:(assert(0)((void) (0)),0)) | |||
842 | # define NEVER(X)(X) ((X)?(assert(0)((void) (0)),1):0) | |||
843 | #else | |||
844 | # define ALWAYS(X)(X) (X) | |||
845 | # define NEVER(X)(X) (X) | |||
846 | #endif | |||
847 | ||||
848 | #define MIN(x,y)(((x) < (y)) ? (x) : (y)) (((x) < (y)) ? (x) : (y)) | |||
849 | #define MAX(x,y)(((x) > (y)) ? (x) : (y)) (((x) > (y)) ? (x) : (y)) | |||
850 | ||||
851 | /* | |||
852 | ** Constants for the largest and smallest possible 64-bit signed integers. | |||
853 | */ | |||
854 | # define LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | |||
855 | # define SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) (((i64)-1) - LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))) | |||
856 | ||||
857 | /* The uptr type is an unsigned integer large enough to hold a pointer | |||
858 | */ | |||
859 | #if defined(HAVE_STDINT_H1) | |||
860 | typedef uintptr_t uptr; | |||
861 | #elif SQLITE_PTRSIZE==4 | |||
862 | typedef u32 uptr; | |||
863 | #else | |||
864 | typedef u64 uptr; | |||
865 | #endif | |||
866 | ||||
867 | #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC | |||
868 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&3)==0) | |||
869 | #else | |||
870 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&7)==0) | |||
871 | #endif | |||
872 | ||||
873 | /* | |||
874 | ** Macros needed to provide flexible arrays in a portable way | |||
875 | */ | |||
876 | #ifndef offsetof | |||
877 | # define offsetof(STRUCTURE,FIELD)__builtin_offsetof(STRUCTURE, FIELD) ((size_t)((char*)&((STRUCTURE*)0)->FIELD)) | |||
878 | #endif | |||
879 | #if defined(__STDC_VERSION__201710L) && (__STDC_VERSION__201710L >= 199901L) | |||
880 | # define FLEXARRAY | |||
881 | #else | |||
882 | # define FLEXARRAY 1 | |||
883 | #endif | |||
884 | ||||
885 | #endif | |||
886 | ||||
887 | /* Truncate very long tokens to this many bytes. Hard limit is | |||
888 | ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset | |||
889 | ** field that occurs at the start of each leaf page (see fts5_index.c). */ | |||
890 | #define FTS5_MAX_TOKEN_SIZE32768 32768 | |||
891 | ||||
892 | /* | |||
893 | ** Maximum number of prefix indexes on single FTS5 table. This must be | |||
894 | ** less than 32. If it is set to anything large than that, an #error | |||
895 | ** directive in fts5_index.c will cause the build to fail. | |||
896 | */ | |||
897 | #define FTS5_MAX_PREFIX_INDEXES31 31 | |||
898 | ||||
899 | /* | |||
900 | ** Maximum segments permitted in a single index | |||
901 | */ | |||
902 | #define FTS5_MAX_SEGMENT2000 2000 | |||
903 | ||||
904 | #define FTS5_DEFAULT_NEARDIST10 10 | |||
905 | #define FTS5_DEFAULT_RANK"bm25" "bm25" | |||
906 | ||||
907 | /* Name of rank and rowid columns */ | |||
908 | #define FTS5_RANK_NAME"rank" "rank" | |||
909 | #define FTS5_ROWID_NAME"rowid" "rowid" | |||
910 | ||||
911 | #ifdef SQLITE_DEBUG | |||
912 | # define FTS5_CORRUPT(11 | (1<<8)) sqlite3Fts5Corrupt() | |||
913 | static int sqlite3Fts5Corrupt(void); | |||
914 | #else | |||
915 | # define FTS5_CORRUPT(11 | (1<<8)) SQLITE_CORRUPT_VTAB(11 | (1<<8)) | |||
916 | #endif | |||
917 | ||||
918 | /* | |||
919 | ** The assert_nc() macro is similar to the assert() macro, except that it | |||
920 | ** is used for assert() conditions that are true only if it can be | |||
921 | ** guranteed that the database is not corrupt. | |||
922 | */ | |||
923 | #ifdef SQLITE_DEBUG | |||
924 | extern int sqlite3_fts5_may_be_corrupt; | |||
925 | # define assert_nc(x)((void) (0)) assert(sqlite3_fts5_may_be_corrupt || (x))((void) (0)) | |||
926 | #else | |||
927 | # define assert_nc(x)((void) (0)) assert(x)((void) (0)) | |||
928 | #endif | |||
929 | ||||
930 | /* | |||
931 | ** A version of memcmp() that does not cause asan errors if one of the pointer | |||
932 | ** parameters is NULL and the number of bytes to compare is zero. | |||
933 | */ | |||
934 | #define fts5Memcmp(s1, s2, n)((n)<=0 ? 0 : memcmp((s1), (s2), (n))) ((n)<=0 ? 0 : memcmp((s1), (s2), (n))) | |||
935 | ||||
936 | /* Mark a function parameter as unused, to suppress nuisance compiler | |||
937 | ** warnings. */ | |||
938 | #ifndef UNUSED_PARAM | |||
939 | # define UNUSED_PARAM(X)(void)(X) (void)(X) | |||
940 | #endif | |||
941 | ||||
942 | #ifndef UNUSED_PARAM2 | |||
943 | # define UNUSED_PARAM2(X, Y)(void)(X), (void)(Y) (void)(X), (void)(Y) | |||
944 | #endif | |||
945 | ||||
946 | typedef struct Fts5Global Fts5Global; | |||
947 | typedef struct Fts5Colset Fts5Colset; | |||
948 | ||||
949 | /* If a NEAR() clump or phrase may only match a specific set of columns, | |||
950 | ** then an object of the following type is used to record the set of columns. | |||
951 | ** Each entry in the aiCol[] array is a column that may be matched. | |||
952 | ** | |||
953 | ** This object is used by fts5_expr.c and fts5_index.c. | |||
954 | */ | |||
955 | struct Fts5Colset { | |||
956 | int nCol; | |||
957 | int aiCol[FLEXARRAY]; | |||
958 | }; | |||
959 | ||||
960 | /* Size (int bytes) of a complete Fts5Colset object with N columns. */ | |||
961 | #define SZ_FTS5COLSET(N)(sizeof(i64)*((N+2)/2)) (sizeof(i64)*((N+2)/2)) | |||
962 | ||||
963 | /************************************************************************** | |||
964 | ** Interface to code in fts5_config.c. fts5_config.c contains contains code | |||
965 | ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. | |||
966 | */ | |||
967 | ||||
968 | typedef struct Fts5Config Fts5Config; | |||
969 | typedef struct Fts5TokenizerConfig Fts5TokenizerConfig; | |||
970 | ||||
971 | struct Fts5TokenizerConfig { | |||
972 | Fts5Tokenizer *pTok; | |||
973 | fts5_tokenizer_v2 *pApi2; | |||
974 | fts5_tokenizer *pApi1; | |||
975 | const char **azArg; | |||
976 | int nArg; | |||
977 | int ePattern; /* FTS_PATTERN_XXX constant */ | |||
978 | const char *pLocale; /* Current locale to use */ | |||
979 | int nLocale; /* Size of pLocale in bytes */ | |||
980 | }; | |||
981 | ||||
982 | /* | |||
983 | ** An instance of the following structure encodes all information that can | |||
984 | ** be gleaned from the CREATE VIRTUAL TABLE statement. | |||
985 | ** | |||
986 | ** And all information loaded from the %_config table. | |||
987 | ** | |||
988 | ** nAutomerge: | |||
989 | ** The minimum number of segments that an auto-merge operation should | |||
990 | ** attempt to merge together. A value of 1 sets the object to use the | |||
991 | ** compile time default. Zero disables auto-merge altogether. | |||
992 | ** | |||
993 | ** bContentlessDelete: | |||
994 | ** True if the contentless_delete option was present in the CREATE | |||
995 | ** VIRTUAL TABLE statement. | |||
996 | ** | |||
997 | ** zContent: | |||
998 | ** | |||
999 | ** zContentRowid: | |||
1000 | ** The value of the content_rowid= option, if one was specified. Or | |||
1001 | ** the string "rowid" otherwise. This text is not quoted - if it is | |||
1002 | ** used as part of an SQL statement it needs to be quoted appropriately. | |||
1003 | ** | |||
1004 | ** zContentExprlist: | |||
1005 | ** | |||
1006 | ** pzErrmsg: | |||
1007 | ** This exists in order to allow the fts5_index.c module to return a | |||
1008 | ** decent error message if it encounters a file-format version it does | |||
1009 | ** not understand. | |||
1010 | ** | |||
1011 | ** bColumnsize: | |||
1012 | ** True if the %_docsize table is created. | |||
1013 | ** | |||
1014 | ** bPrefixIndex: | |||
1015 | ** This is only used for debugging. If set to false, any prefix indexes | |||
1016 | ** are ignored. This value is configured using: | |||
1017 | ** | |||
1018 | ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); | |||
1019 | ** | |||
1020 | ** bLocale: | |||
1021 | ** Set to true if locale=1 was specified when the table was created. | |||
1022 | */ | |||
1023 | struct Fts5Config { | |||
1024 | sqlite3 *db; /* Database handle */ | |||
1025 | Fts5Global *pGlobal; /* Global fts5 object for handle db */ | |||
1026 | char *zDb; /* Database holding FTS index (e.g. "main") */ | |||
1027 | char *zName; /* Name of FTS index */ | |||
1028 | int nCol; /* Number of columns */ | |||
1029 | char **azCol; /* Column names */ | |||
1030 | u8 *abUnindexed; /* True for unindexed columns */ | |||
1031 | int nPrefix; /* Number of prefix indexes */ | |||
1032 | int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ | |||
1033 | int eContent; /* An FTS5_CONTENT value */ | |||
1034 | int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ | |||
1035 | int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ | |||
1036 | char *zContent; /* content table */ | |||
1037 | char *zContentRowid; /* "content_rowid=" option value */ | |||
1038 | int bColumnsize; /* "columnsize=" option value (dflt==1) */ | |||
1039 | int bTokendata; /* "tokendata=" option value (dflt==0) */ | |||
1040 | int bLocale; /* "locale=" option value (dflt==0) */ | |||
1041 | int eDetail; /* FTS5_DETAIL_XXX value */ | |||
1042 | char *zContentExprlist; | |||
1043 | Fts5TokenizerConfig t; | |||
1044 | int bLock; /* True when table is preparing statement */ | |||
1045 | ||||
1046 | ||||
1047 | /* Values loaded from the %_config table */ | |||
1048 | int iVersion; /* fts5 file format 'version' */ | |||
1049 | int iCookie; /* Incremented when %_config is modified */ | |||
1050 | int pgsz; /* Approximate page size used in %_data */ | |||
1051 | int nAutomerge; /* 'automerge' setting */ | |||
1052 | int nCrisisMerge; /* Maximum allowed segments per level */ | |||
1053 | int nUsermerge; /* 'usermerge' setting */ | |||
1054 | int nHashSize; /* Bytes of memory for in-memory hash */ | |||
1055 | char *zRank; /* Name of rank function */ | |||
1056 | char *zRankArgs; /* Arguments to rank function */ | |||
1057 | int bSecureDelete; /* 'secure-delete' */ | |||
1058 | int nDeleteMerge; /* 'deletemerge' */ | |||
1059 | int bPrefixInsttoken; /* 'prefix-insttoken' */ | |||
1060 | ||||
1061 | /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ | |||
1062 | char **pzErrmsg; | |||
1063 | ||||
1064 | #ifdef SQLITE_DEBUG | |||
1065 | int bPrefixIndex; /* True to use prefix-indexes */ | |||
1066 | #endif | |||
1067 | }; | |||
1068 | ||||
1069 | /* Current expected value of %_config table 'version' field. And | |||
1070 | ** the expected version if the 'secure-delete' option has ever been | |||
1071 | ** set on the table. */ | |||
1072 | #define FTS5_CURRENT_VERSION4 4 | |||
1073 | #define FTS5_CURRENT_VERSION_SECUREDELETE5 5 | |||
1074 | ||||
1075 | #define FTS5_CONTENT_NORMAL0 0 | |||
1076 | #define FTS5_CONTENT_NONE1 1 | |||
1077 | #define FTS5_CONTENT_EXTERNAL2 2 | |||
1078 | #define FTS5_CONTENT_UNINDEXED3 3 | |||
1079 | ||||
1080 | #define FTS5_DETAIL_FULL0 0 | |||
1081 | #define FTS5_DETAIL_NONE1 1 | |||
1082 | #define FTS5_DETAIL_COLUMNS2 2 | |||
1083 | ||||
1084 | #define FTS5_PATTERN_NONE0 0 | |||
1085 | #define FTS5_PATTERN_LIKE65 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */ | |||
1086 | #define FTS5_PATTERN_GLOB66 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */ | |||
1087 | ||||
1088 | static int sqlite3Fts5ConfigParse( | |||
1089 | Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** | |||
1090 | ); | |||
1091 | static void sqlite3Fts5ConfigFree(Fts5Config*); | |||
1092 | ||||
1093 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); | |||
1094 | ||||
1095 | static int sqlite3Fts5Tokenize( | |||
1096 | Fts5Config *pConfig, /* FTS5 Configuration object */ | |||
1097 | int flags, /* FTS5_TOKENIZE_* flags */ | |||
1098 | const char *pText, int nText, /* Text to tokenize */ | |||
1099 | void *pCtx, /* Context passed to xToken() */ | |||
1100 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
1101 | ); | |||
1102 | ||||
1103 | static void sqlite3Fts5Dequote(char *z); | |||
1104 | ||||
1105 | /* Load the contents of the %_config table */ | |||
1106 | static int sqlite3Fts5ConfigLoad(Fts5Config*, int); | |||
1107 | ||||
1108 | /* Set the value of a single config attribute */ | |||
1109 | static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); | |||
1110 | ||||
1111 | static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); | |||
1112 | ||||
1113 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...); | |||
1114 | ||||
1115 | /* | |||
1116 | ** End of interface to code in fts5_config.c. | |||
1117 | **************************************************************************/ | |||
1118 | ||||
1119 | /************************************************************************** | |||
1120 | ** Interface to code in fts5_buffer.c. | |||
1121 | */ | |||
1122 | ||||
1123 | /* | |||
1124 | ** Buffer object for the incremental building of string data. | |||
1125 | */ | |||
1126 | typedef struct Fts5Buffer Fts5Buffer; | |||
1127 | struct Fts5Buffer { | |||
1128 | u8 *p; | |||
1129 | int n; | |||
1130 | int nSpace; | |||
1131 | }; | |||
1132 | ||||
1133 | static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); | |||
1134 | static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); | |||
1135 | static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); | |||
1136 | static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); | |||
1137 | static void sqlite3Fts5BufferFree(Fts5Buffer*); | |||
1138 | static void sqlite3Fts5BufferZero(Fts5Buffer*); | |||
1139 | static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); | |||
1140 | static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); | |||
1141 | ||||
1142 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); | |||
1143 | ||||
1144 | #define fts5BufferZero(x)sqlite3Fts5BufferZero(x) sqlite3Fts5BufferZero(x) | |||
1145 | #define fts5BufferAppendVarint(a,b,c)sqlite3Fts5BufferAppendVarint(a,b,(i64)c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c) | |||
1146 | #define fts5BufferFree(a)sqlite3Fts5BufferFree(a) sqlite3Fts5BufferFree(a) | |||
1147 | #define fts5BufferAppendBlob(a,b,c,d)sqlite3Fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) | |||
1148 | #define fts5BufferSet(a,b,c,d)sqlite3Fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) | |||
1149 | ||||
1150 | #define fts5BufferGrow(pRc,pBuf,nn)( (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) ) ( \ | |||
1151 | (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ | |||
1152 | sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ | |||
1153 | ) | |||
1154 | ||||
1155 | /* Write and decode big-endian 32-bit integer values */ | |||
1156 | static void sqlite3Fts5Put32(u8*, int); | |||
1157 | static int sqlite3Fts5Get32(const u8*); | |||
1158 | ||||
1159 | #define FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF) (int)((iPos >> 32) & 0x7FFFFFFF) | |||
1160 | #define FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF) (int)(iPos & 0x7FFFFFFF) | |||
1161 | ||||
1162 | typedef struct Fts5PoslistReader Fts5PoslistReader; | |||
1163 | struct Fts5PoslistReader { | |||
1164 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ | |||
1165 | const u8 *a; /* Position list to iterate through */ | |||
1166 | int n; /* Size of buffer at a[] in bytes */ | |||
1167 | int i; /* Current offset in a[] */ | |||
1168 | ||||
1169 | u8 bFlag; /* For client use (any custom purpose) */ | |||
1170 | ||||
1171 | /* Output variables */ | |||
1172 | u8 bEof; /* Set to true at EOF */ | |||
1173 | i64 iPos; /* (iCol<<32) + iPos */ | |||
1174 | }; | |||
1175 | static int sqlite3Fts5PoslistReaderInit( | |||
1176 | const u8 *a, int n, /* Poslist buffer to iterate through */ | |||
1177 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | |||
1178 | ); | |||
1179 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); | |||
1180 | ||||
1181 | typedef struct Fts5PoslistWriter Fts5PoslistWriter; | |||
1182 | struct Fts5PoslistWriter { | |||
1183 | i64 iPrev; | |||
1184 | }; | |||
1185 | static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); | |||
1186 | static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); | |||
1187 | ||||
1188 | static int sqlite3Fts5PoslistNext64( | |||
1189 | const u8 *a, int n, /* Buffer containing poslist */ | |||
1190 | int *pi, /* IN/OUT: Offset within a[] */ | |||
1191 | i64 *piOff /* IN/OUT: Current offset */ | |||
1192 | ); | |||
1193 | ||||
1194 | /* Malloc utility */ | |||
1195 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte); | |||
1196 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); | |||
1197 | ||||
1198 | /* Character set tests (like isspace(), isalpha() etc.) */ | |||
1199 | static int sqlite3Fts5IsBareword(char t); | |||
1200 | ||||
1201 | ||||
1202 | /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ | |||
1203 | typedef struct Fts5Termset Fts5Termset; | |||
1204 | static int sqlite3Fts5TermsetNew(Fts5Termset**); | |||
1205 | static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); | |||
1206 | static void sqlite3Fts5TermsetFree(Fts5Termset*); | |||
1207 | ||||
1208 | /* | |||
1209 | ** End of interface to code in fts5_buffer.c. | |||
1210 | **************************************************************************/ | |||
1211 | ||||
1212 | /************************************************************************** | |||
1213 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code | |||
1214 | ** to access the data stored in the %_data table. | |||
1215 | */ | |||
1216 | ||||
1217 | typedef struct Fts5Index Fts5Index; | |||
1218 | typedef struct Fts5IndexIter Fts5IndexIter; | |||
1219 | ||||
1220 | struct Fts5IndexIter { | |||
1221 | i64 iRowid; | |||
1222 | const u8 *pData; | |||
1223 | int nData; | |||
1224 | u8 bEof; | |||
1225 | }; | |||
1226 | ||||
1227 | #define sqlite3Fts5IterEof(x)((x)->bEof) ((x)->bEof) | |||
1228 | ||||
1229 | /* | |||
1230 | ** Values used as part of the flags argument passed to IndexQuery(). | |||
1231 | */ | |||
1232 | #define FTS5INDEX_QUERY_PREFIX0x0001 0x0001 /* Prefix query */ | |||
1233 | #define FTS5INDEX_QUERY_DESC0x0002 0x0002 /* Docs in descending rowid order */ | |||
1234 | #define FTS5INDEX_QUERY_TEST_NOIDX0x0004 0x0004 /* Do not use prefix index */ | |||
1235 | #define FTS5INDEX_QUERY_SCAN0x0008 0x0008 /* Scan query (fts5vocab) */ | |||
1236 | ||||
1237 | /* The following are used internally by the fts5_index.c module. They are | |||
1238 | ** defined here only to make it easier to avoid clashes with the flags | |||
1239 | ** above. */ | |||
1240 | #define FTS5INDEX_QUERY_SKIPEMPTY0x0010 0x0010 | |||
1241 | #define FTS5INDEX_QUERY_NOOUTPUT0x0020 0x0020 | |||
1242 | #define FTS5INDEX_QUERY_SKIPHASH0x0040 0x0040 | |||
1243 | #define FTS5INDEX_QUERY_NOTOKENDATA0x0080 0x0080 | |||
1244 | #define FTS5INDEX_QUERY_SCANONETERM0x0100 0x0100 | |||
1245 | ||||
1246 | /* | |||
1247 | ** Create/destroy an Fts5Index object. | |||
1248 | */ | |||
1249 | static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); | |||
1250 | static int sqlite3Fts5IndexClose(Fts5Index *p); | |||
1251 | ||||
1252 | /* | |||
1253 | ** Return a simple checksum value based on the arguments. | |||
1254 | */ | |||
1255 | static u64 sqlite3Fts5IndexEntryCksum( | |||
1256 | i64 iRowid, | |||
1257 | int iCol, | |||
1258 | int iPos, | |||
1259 | int iIdx, | |||
1260 | const char *pTerm, | |||
1261 | int nTerm | |||
1262 | ); | |||
1263 | ||||
1264 | /* | |||
1265 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | |||
1266 | ** size. Return the number of bytes in the nChar character prefix of the | |||
1267 | ** buffer, or 0 if there are less than nChar characters in total. | |||
1268 | */ | |||
1269 | static int sqlite3Fts5IndexCharlenToBytelen( | |||
1270 | const char *p, | |||
1271 | int nByte, | |||
1272 | int nChar | |||
1273 | ); | |||
1274 | ||||
1275 | /* | |||
1276 | ** Open a new iterator to iterate though all rowids that match the | |||
1277 | ** specified token or token prefix. | |||
1278 | */ | |||
1279 | static int sqlite3Fts5IndexQuery( | |||
1280 | Fts5Index *p, /* FTS index to query */ | |||
1281 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | |||
1282 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | |||
1283 | Fts5Colset *pColset, /* Match these columns only */ | |||
1284 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | |||
1285 | ); | |||
1286 | ||||
1287 | /* | |||
1288 | ** The various operations on open token or token prefix iterators opened | |||
1289 | ** using sqlite3Fts5IndexQuery(). | |||
1290 | */ | |||
1291 | static int sqlite3Fts5IterNext(Fts5IndexIter*); | |||
1292 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); | |||
1293 | ||||
1294 | /* | |||
1295 | ** Close an iterator opened by sqlite3Fts5IndexQuery(). | |||
1296 | */ | |||
1297 | static void sqlite3Fts5IterClose(Fts5IndexIter*); | |||
1298 | ||||
1299 | /* | |||
1300 | ** Close the reader blob handle, if it is open. | |||
1301 | */ | |||
1302 | static void sqlite3Fts5IndexCloseReader(Fts5Index*); | |||
1303 | ||||
1304 | /* | |||
1305 | ** This interface is used by the fts5vocab module. | |||
1306 | */ | |||
1307 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); | |||
1308 | static int sqlite3Fts5IterNextScan(Fts5IndexIter*); | |||
1309 | static void *sqlite3Fts5StructureRef(Fts5Index*); | |||
1310 | static void sqlite3Fts5StructureRelease(void*); | |||
1311 | static int sqlite3Fts5StructureTest(Fts5Index*, void*); | |||
1312 | ||||
1313 | /* | |||
1314 | ** Used by xInstToken(): | |||
1315 | */ | |||
1316 | static int sqlite3Fts5IterToken( | |||
1317 | Fts5IndexIter *pIndexIter, | |||
1318 | const char *pToken, int nToken, | |||
1319 | i64 iRowid, | |||
1320 | int iCol, | |||
1321 | int iOff, | |||
1322 | const char **ppOut, int *pnOut | |||
1323 | ); | |||
1324 | ||||
1325 | /* | |||
1326 | ** Insert or remove data to or from the index. Each time a document is | |||
1327 | ** added to or removed from the index, this function is called one or more | |||
1328 | ** times. | |||
1329 | ** | |||
1330 | ** For an insert, it must be called once for each token in the new document. | |||
1331 | ** If the operation is a delete, it must be called (at least) once for each | |||
1332 | ** unique token in the document with an iCol value less than zero. The iPos | |||
1333 | ** argument is ignored for a delete. | |||
1334 | */ | |||
1335 | static int sqlite3Fts5IndexWrite( | |||
1336 | Fts5Index *p, /* Index to write to */ | |||
1337 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
1338 | int iPos, /* Position of token within column */ | |||
1339 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
1340 | ); | |||
1341 | ||||
1342 | /* | |||
1343 | ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to | |||
1344 | ** document iDocid. | |||
1345 | */ | |||
1346 | static int sqlite3Fts5IndexBeginWrite( | |||
1347 | Fts5Index *p, /* Index to write to */ | |||
1348 | int bDelete, /* True if current operation is a delete */ | |||
1349 | i64 iDocid /* Docid to add or remove data from */ | |||
1350 | ); | |||
1351 | ||||
1352 | /* | |||
1353 | ** Flush any data stored in the in-memory hash tables to the database. | |||
1354 | ** Also close any open blob handles. | |||
1355 | */ | |||
1356 | static int sqlite3Fts5IndexSync(Fts5Index *p); | |||
1357 | ||||
1358 | /* | |||
1359 | ** Discard any data stored in the in-memory hash tables. Do not write it | |||
1360 | ** to the database. Additionally, assume that the contents of the %_data | |||
1361 | ** table may have changed on disk. So any in-memory caches of %_data | |||
1362 | ** records must be invalidated. | |||
1363 | */ | |||
1364 | static int sqlite3Fts5IndexRollback(Fts5Index *p); | |||
1365 | ||||
1366 | /* | |||
1367 | ** Get or set the "averages" values. | |||
1368 | */ | |||
1369 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); | |||
1370 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); | |||
1371 | ||||
1372 | /* | |||
1373 | ** Functions called by the storage module as part of integrity-check. | |||
1374 | */ | |||
1375 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum); | |||
1376 | ||||
1377 | /* | |||
1378 | ** Called during virtual module initialization to register UDF | |||
1379 | ** fts5_decode() with SQLite | |||
1380 | */ | |||
1381 | static int sqlite3Fts5IndexInit(sqlite3*); | |||
1382 | ||||
1383 | static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); | |||
1384 | ||||
1385 | /* | |||
1386 | ** Return the total number of entries read from the %_data table by | |||
1387 | ** this connection since it was created. | |||
1388 | */ | |||
1389 | static int sqlite3Fts5IndexReads(Fts5Index *p); | |||
1390 | ||||
1391 | static int sqlite3Fts5IndexReinit(Fts5Index *p); | |||
1392 | static int sqlite3Fts5IndexOptimize(Fts5Index *p); | |||
1393 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); | |||
1394 | static int sqlite3Fts5IndexReset(Fts5Index *p); | |||
1395 | ||||
1396 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); | |||
1397 | ||||
1398 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); | |||
1399 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); | |||
1400 | ||||
1401 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); | |||
1402 | ||||
1403 | /* Used to populate hash tables for xInstToken in detail=none/column mode. */ | |||
1404 | static int sqlite3Fts5IndexIterWriteTokendata( | |||
1405 | Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff | |||
1406 | ); | |||
1407 | ||||
1408 | /* | |||
1409 | ** End of interface to code in fts5_index.c. | |||
1410 | **************************************************************************/ | |||
1411 | ||||
1412 | /************************************************************************** | |||
1413 | ** Interface to code in fts5_varint.c. | |||
1414 | */ | |||
1415 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); | |||
1416 | static int sqlite3Fts5GetVarintLen(u32 iVal); | |||
1417 | static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); | |||
1418 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); | |||
1419 | ||||
1420 | #define fts5GetVarint32(a,b)sqlite3Fts5GetVarint32(a,(u32*)&(b)) sqlite3Fts5GetVarint32(a,(u32*)&(b)) | |||
1421 | #define fts5GetVarintsqlite3Fts5GetVarint sqlite3Fts5GetVarint | |||
1422 | ||||
1423 | #define fts5FastGetVarint32(a, iOff, nVal){ nVal = (a)[iOff++]; if( nVal & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nVal)); } } { \ | |||
1424 | nVal = (a)[iOff++]; \ | |||
1425 | if( nVal & 0x80 ){ \ | |||
1426 | iOff--; \ | |||
1427 | iOff += fts5GetVarint32(&(a)[iOff], nVal)sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nVal)); \ | |||
1428 | } \ | |||
1429 | } | |||
1430 | ||||
1431 | ||||
1432 | /* | |||
1433 | ** End of interface to code in fts5_varint.c. | |||
1434 | **************************************************************************/ | |||
1435 | ||||
1436 | ||||
1437 | /************************************************************************** | |||
1438 | ** Interface to code in fts5_main.c. | |||
1439 | */ | |||
1440 | ||||
1441 | /* | |||
1442 | ** Virtual-table object. | |||
1443 | */ | |||
1444 | typedef struct Fts5Table Fts5Table; | |||
1445 | struct Fts5Table { | |||
1446 | sqlite3_vtab base; /* Base class used by SQLite core */ | |||
1447 | Fts5Config *pConfig; /* Virtual table configuration */ | |||
1448 | Fts5Index *pIndex; /* Full-text index */ | |||
1449 | }; | |||
1450 | ||||
1451 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig); | |||
1452 | ||||
1453 | static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); | |||
1454 | ||||
1455 | static int sqlite3Fts5FlushToDisk(Fts5Table*); | |||
1456 | ||||
1457 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); | |||
1458 | static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc); | |||
1459 | ||||
1460 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); | |||
1461 | static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, | |||
1462 | const char **ppText, int *pnText, const char **ppLoc, int *pnLoc | |||
1463 | ); | |||
1464 | ||||
1465 | /* | |||
1466 | ** End of interface to code in fts5.c. | |||
1467 | **************************************************************************/ | |||
1468 | ||||
1469 | /************************************************************************** | |||
1470 | ** Interface to code in fts5_hash.c. | |||
1471 | */ | |||
1472 | typedef struct Fts5Hash Fts5Hash; | |||
1473 | ||||
1474 | /* | |||
1475 | ** Create a hash table, free a hash table. | |||
1476 | */ | |||
1477 | static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); | |||
1478 | static void sqlite3Fts5HashFree(Fts5Hash*); | |||
1479 | ||||
1480 | static int sqlite3Fts5HashWrite( | |||
1481 | Fts5Hash*, | |||
1482 | i64 iRowid, /* Rowid for this entry */ | |||
1483 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
1484 | int iPos, /* Position of token within column */ | |||
1485 | char bByte, | |||
1486 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
1487 | ); | |||
1488 | ||||
1489 | /* | |||
1490 | ** Empty (but do not delete) a hash table. | |||
1491 | */ | |||
1492 | static void sqlite3Fts5HashClear(Fts5Hash*); | |||
1493 | ||||
1494 | /* | |||
1495 | ** Return true if the hash is empty, false otherwise. | |||
1496 | */ | |||
1497 | static int sqlite3Fts5HashIsEmpty(Fts5Hash*); | |||
1498 | ||||
1499 | static int sqlite3Fts5HashQuery( | |||
1500 | Fts5Hash*, /* Hash table to query */ | |||
1501 | int nPre, | |||
1502 | const char *pTerm, int nTerm, /* Query term */ | |||
1503 | void **ppObj, /* OUT: Pointer to doclist for pTerm */ | |||
1504 | int *pnDoclist /* OUT: Size of doclist in bytes */ | |||
1505 | ); | |||
1506 | ||||
1507 | static int sqlite3Fts5HashScanInit( | |||
1508 | Fts5Hash*, /* Hash table to query */ | |||
1509 | const char *pTerm, int nTerm /* Query prefix */ | |||
1510 | ); | |||
1511 | static void sqlite3Fts5HashScanNext(Fts5Hash*); | |||
1512 | static int sqlite3Fts5HashScanEof(Fts5Hash*); | |||
1513 | static void sqlite3Fts5HashScanEntry(Fts5Hash *, | |||
1514 | const char **pzTerm, /* OUT: term (nul-terminated) */ | |||
1515 | int *pnTerm, /* OUT: Size of term in bytes */ | |||
1516 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | |||
1517 | int *pnDoclist /* OUT: size of doclist in bytes */ | |||
1518 | ); | |||
1519 | ||||
1520 | ||||
1521 | ||||
1522 | /* | |||
1523 | ** End of interface to code in fts5_hash.c. | |||
1524 | **************************************************************************/ | |||
1525 | ||||
1526 | /************************************************************************** | |||
1527 | ** Interface to code in fts5_storage.c. fts5_storage.c contains contains | |||
1528 | ** code to access the data stored in the %_content and %_docsize tables. | |||
1529 | */ | |||
1530 | ||||
1531 | #define FTS5_STMT_SCAN_ASC0 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ | |||
1532 | #define FTS5_STMT_SCAN_DESC1 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ | |||
1533 | #define FTS5_STMT_LOOKUP2 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ | |||
1534 | ||||
1535 | typedef struct Fts5Storage Fts5Storage; | |||
1536 | ||||
1537 | static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); | |||
1538 | static int sqlite3Fts5StorageClose(Fts5Storage *p); | |||
1539 | static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); | |||
1540 | ||||
1541 | static int sqlite3Fts5DropAll(Fts5Config*); | |||
1542 | static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); | |||
1543 | ||||
1544 | static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); | |||
1545 | static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*); | |||
1546 | static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); | |||
1547 | ||||
1548 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); | |||
1549 | ||||
1550 | static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); | |||
1551 | static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); | |||
1552 | ||||
1553 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); | |||
1554 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); | |||
1555 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); | |||
1556 | ||||
1557 | static int sqlite3Fts5StorageSync(Fts5Storage *p); | |||
1558 | static int sqlite3Fts5StorageRollback(Fts5Storage *p); | |||
1559 | ||||
1560 | static int sqlite3Fts5StorageConfigValue( | |||
1561 | Fts5Storage *p, const char*, sqlite3_value*, int | |||
1562 | ); | |||
1563 | ||||
1564 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); | |||
1565 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p); | |||
1566 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p); | |||
1567 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); | |||
1568 | static int sqlite3Fts5StorageReset(Fts5Storage *p); | |||
1569 | ||||
1570 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*); | |||
1571 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel); | |||
1572 | ||||
1573 | /* | |||
1574 | ** End of interface to code in fts5_storage.c. | |||
1575 | **************************************************************************/ | |||
1576 | ||||
1577 | ||||
1578 | /************************************************************************** | |||
1579 | ** Interface to code in fts5_expr.c. | |||
1580 | */ | |||
1581 | typedef struct Fts5Expr Fts5Expr; | |||
1582 | typedef struct Fts5ExprNode Fts5ExprNode; | |||
1583 | typedef struct Fts5Parse Fts5Parse; | |||
1584 | typedef struct Fts5Token Fts5Token; | |||
1585 | typedef struct Fts5ExprPhrase Fts5ExprPhrase; | |||
1586 | typedef struct Fts5ExprNearset Fts5ExprNearset; | |||
1587 | ||||
1588 | struct Fts5Token { | |||
1589 | const char *p; /* Token text (not NULL terminated) */ | |||
1590 | int n; /* Size of buffer p in bytes */ | |||
1591 | }; | |||
1592 | ||||
1593 | /* Parse a MATCH expression. */ | |||
1594 | static int sqlite3Fts5ExprNew( | |||
1595 | Fts5Config *pConfig, | |||
1596 | int bPhraseToAnd, | |||
1597 | int iCol, /* Column on LHS of MATCH operator */ | |||
1598 | const char *zExpr, | |||
1599 | Fts5Expr **ppNew, | |||
1600 | char **pzErr | |||
1601 | ); | |||
1602 | static int sqlite3Fts5ExprPattern( | |||
1603 | Fts5Config *pConfig, | |||
1604 | int bGlob, | |||
1605 | int iCol, | |||
1606 | const char *zText, | |||
1607 | Fts5Expr **pp | |||
1608 | ); | |||
1609 | ||||
1610 | /* | |||
1611 | ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); | |||
1612 | ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); | |||
1613 | ** rc = sqlite3Fts5ExprNext(pExpr) | |||
1614 | ** ){ | |||
1615 | ** // The document with rowid iRowid matches the expression! | |||
1616 | ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); | |||
1617 | ** } | |||
1618 | */ | |||
1619 | static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); | |||
1620 | static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); | |||
1621 | static int sqlite3Fts5ExprEof(Fts5Expr*); | |||
1622 | static i64 sqlite3Fts5ExprRowid(Fts5Expr*); | |||
1623 | ||||
1624 | static void sqlite3Fts5ExprFree(Fts5Expr*); | |||
1625 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2); | |||
1626 | ||||
1627 | /* Called during startup to register a UDF with SQLite */ | |||
1628 | static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); | |||
1629 | ||||
1630 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); | |||
1631 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); | |||
1632 | static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); | |||
1633 | ||||
1634 | typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; | |||
1635 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); | |||
1636 | static int sqlite3Fts5ExprPopulatePoslists( | |||
1637 | Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int | |||
1638 | ); | |||
1639 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); | |||
1640 | ||||
1641 | static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); | |||
1642 | ||||
1643 | static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); | |||
1644 | ||||
1645 | static int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); | |||
1646 | static int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*); | |||
1647 | static void sqlite3Fts5ExprClearTokens(Fts5Expr*); | |||
1648 | ||||
1649 | /******************************************* | |||
1650 | ** The fts5_expr.c API above this point is used by the other hand-written | |||
1651 | ** C code in this module. The interfaces below this point are called by | |||
1652 | ** the parser code in fts5parse.y. */ | |||
1653 | ||||
1654 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); | |||
1655 | ||||
1656 | static Fts5ExprNode *sqlite3Fts5ParseNode( | |||
1657 | Fts5Parse *pParse, | |||
1658 | int eType, | |||
1659 | Fts5ExprNode *pLeft, | |||
1660 | Fts5ExprNode *pRight, | |||
1661 | Fts5ExprNearset *pNear | |||
1662 | ); | |||
1663 | ||||
1664 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | |||
1665 | Fts5Parse *pParse, | |||
1666 | Fts5ExprNode *pLeft, | |||
1667 | Fts5ExprNode *pRight | |||
1668 | ); | |||
1669 | ||||
1670 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | |||
1671 | Fts5Parse *pParse, | |||
1672 | Fts5ExprPhrase *pPhrase, | |||
1673 | Fts5Token *pToken, | |||
1674 | int bPrefix | |||
1675 | ); | |||
1676 | ||||
1677 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); | |||
1678 | ||||
1679 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | |||
1680 | Fts5Parse*, | |||
1681 | Fts5ExprNearset*, | |||
1682 | Fts5ExprPhrase* | |||
1683 | ); | |||
1684 | ||||
1685 | static Fts5Colset *sqlite3Fts5ParseColset( | |||
1686 | Fts5Parse*, | |||
1687 | Fts5Colset*, | |||
1688 | Fts5Token * | |||
1689 | ); | |||
1690 | ||||
1691 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); | |||
1692 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); | |||
1693 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); | |||
1694 | ||||
1695 | static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); | |||
1696 | static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*); | |||
1697 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); | |||
1698 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); | |||
1699 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); | |||
1700 | ||||
1701 | /* | |||
1702 | ** End of interface to code in fts5_expr.c. | |||
1703 | **************************************************************************/ | |||
1704 | ||||
1705 | ||||
1706 | ||||
1707 | /************************************************************************** | |||
1708 | ** Interface to code in fts5_aux.c. | |||
1709 | */ | |||
1710 | ||||
1711 | static int sqlite3Fts5AuxInit(fts5_api*); | |||
1712 | /* | |||
1713 | ** End of interface to code in fts5_aux.c. | |||
1714 | **************************************************************************/ | |||
1715 | ||||
1716 | /************************************************************************** | |||
1717 | ** Interface to code in fts5_tokenizer.c. | |||
1718 | */ | |||
1719 | ||||
1720 | static int sqlite3Fts5TokenizerInit(fts5_api*); | |||
1721 | static int sqlite3Fts5TokenizerPattern( | |||
1722 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | |||
1723 | Fts5Tokenizer *pTok | |||
1724 | ); | |||
1725 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*); | |||
1726 | /* | |||
1727 | ** End of interface to code in fts5_tokenizer.c. | |||
1728 | **************************************************************************/ | |||
1729 | ||||
1730 | /************************************************************************** | |||
1731 | ** Interface to code in fts5_vocab.c. | |||
1732 | */ | |||
1733 | ||||
1734 | static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); | |||
1735 | ||||
1736 | /* | |||
1737 | ** End of interface to code in fts5_vocab.c. | |||
1738 | **************************************************************************/ | |||
1739 | ||||
1740 | ||||
1741 | /************************************************************************** | |||
1742 | ** Interface to automatically generated code in fts5_unicode2.c. | |||
1743 | */ | |||
1744 | static int sqlite3Fts5UnicodeIsdiacritic(int c); | |||
1745 | static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); | |||
1746 | ||||
1747 | static int sqlite3Fts5UnicodeCatParse(const char*, u8*); | |||
1748 | static int sqlite3Fts5UnicodeCategory(u32 iCode); | |||
1749 | static void sqlite3Fts5UnicodeAscii(u8*, u8*); | |||
1750 | /* | |||
1751 | ** End of interface to code in fts5_unicode2.c. | |||
1752 | **************************************************************************/ | |||
1753 | ||||
1754 | #endif | |||
1755 | ||||
1756 | #line 1 "fts5parse.h" | |||
1757 | #define FTS5_OR1 1 | |||
1758 | #define FTS5_AND2 2 | |||
1759 | #define FTS5_NOT3 3 | |||
1760 | #define FTS5_TERM4 4 | |||
1761 | #define FTS5_COLON5 5 | |||
1762 | #define FTS5_MINUS6 6 | |||
1763 | #define FTS5_LCP7 7 | |||
1764 | #define FTS5_RCP8 8 | |||
1765 | #define FTS5_STRING9 9 | |||
1766 | #define FTS5_LP10 10 | |||
1767 | #define FTS5_RP11 11 | |||
1768 | #define FTS5_CARET12 12 | |||
1769 | #define FTS5_COMMA13 13 | |||
1770 | #define FTS5_PLUS14 14 | |||
1771 | #define FTS5_STAR15 15 | |||
1772 | ||||
1773 | #line 1 "fts5parse.c" | |||
1774 | /* This file is automatically generated by Lemon from input grammar | |||
1775 | ** source file "fts5parse.y". | |||
1776 | */ | |||
1777 | /* | |||
1778 | ** 2000-05-29 | |||
1779 | ** | |||
1780 | ** The author disclaims copyright to this source code. In place of | |||
1781 | ** a legal notice, here is a blessing: | |||
1782 | ** | |||
1783 | ** May you do good and not evil. | |||
1784 | ** May you find forgiveness for yourself and forgive others. | |||
1785 | ** May you share freely, never taking more than you give. | |||
1786 | ** | |||
1787 | ************************************************************************* | |||
1788 | ** Driver template for the LEMON parser generator. | |||
1789 | ** | |||
1790 | ** The "lemon" program processes an LALR(1) input grammar file, then uses | |||
1791 | ** this template to construct a parser. The "lemon" program inserts text | |||
1792 | ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the | |||
1793 | ** interstitial "-" characters) contained in this template is changed into | |||
1794 | ** the value of the %name directive from the grammar. Otherwise, the content | |||
1795 | ** of this template is copied straight through into the generate parser | |||
1796 | ** source file. | |||
1797 | ** | |||
1798 | ** The following is the concatenation of all %include directives from the | |||
1799 | ** input grammar file: | |||
1800 | */ | |||
1801 | /************ Begin %include sections from the grammar ************************/ | |||
1802 | #line 47 "fts5parse.y" | |||
1803 | ||||
1804 | /* #include "fts5Int.h" */ | |||
1805 | /* #include "fts5parse.h" */ | |||
1806 | ||||
1807 | /* | |||
1808 | ** Disable all error recovery processing in the parser push-down | |||
1809 | ** automaton. | |||
1810 | */ | |||
1811 | #define fts5YYNOERRORRECOVERY1 1 | |||
1812 | ||||
1813 | /* | |||
1814 | ** Make fts5yytestcase() the same as testcase() | |||
1815 | */ | |||
1816 | #define fts5yytestcase(X) testcase(X) | |||
1817 | ||||
1818 | /* | |||
1819 | ** Indicate that sqlite3ParserFree() will never be called with a null | |||
1820 | ** pointer. | |||
1821 | */ | |||
1822 | #define fts5YYPARSEFREENOTNULL1 1 | |||
1823 | ||||
1824 | /* | |||
1825 | ** Alternative datatype for the argument to the malloc() routine passed | |||
1826 | ** into sqlite3ParserAlloc(). The default is size_t. | |||
1827 | */ | |||
1828 | #define fts5YYMALLOCARGTYPEu64 u64 | |||
1829 | ||||
1830 | #line 58 "fts5parse.sql" | |||
1831 | /**************** End of %include directives **********************************/ | |||
1832 | /* These constants specify the various numeric values for terminal symbols. | |||
1833 | ***************** Begin token definitions *************************************/ | |||
1834 | #ifndef FTS5_OR1 | |||
1835 | #define FTS5_OR1 1 | |||
1836 | #define FTS5_AND2 2 | |||
1837 | #define FTS5_NOT3 3 | |||
1838 | #define FTS5_TERM4 4 | |||
1839 | #define FTS5_COLON5 5 | |||
1840 | #define FTS5_MINUS6 6 | |||
1841 | #define FTS5_LCP7 7 | |||
1842 | #define FTS5_RCP8 8 | |||
1843 | #define FTS5_STRING9 9 | |||
1844 | #define FTS5_LP10 10 | |||
1845 | #define FTS5_RP11 11 | |||
1846 | #define FTS5_CARET12 12 | |||
1847 | #define FTS5_COMMA13 13 | |||
1848 | #define FTS5_PLUS14 14 | |||
1849 | #define FTS5_STAR15 15 | |||
1850 | #endif | |||
1851 | /**************** End token definitions ***************************************/ | |||
1852 | ||||
1853 | /* The next sections is a series of control #defines. | |||
1854 | ** various aspects of the generated parser. | |||
1855 | ** fts5YYCODETYPE is the data type used to store the integer codes | |||
1856 | ** that represent terminal and non-terminal symbols. | |||
1857 | ** "unsigned char" is used if there are fewer than | |||
1858 | ** 256 symbols. Larger types otherwise. | |||
1859 | ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for | |||
1860 | ** any terminal or nonterminal symbol. | |||
1861 | ** fts5YYFALLBACK If defined, this indicates that one or more tokens | |||
1862 | ** (also known as: "terminal symbols") have fall-back | |||
1863 | ** values which should be used if the original symbol | |||
1864 | ** would not parse. This permits keywords to sometimes | |||
1865 | ** be used as identifiers, for example. | |||
1866 | ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers | |||
1867 | ** that indicate what to do in response to the next | |||
1868 | ** token. | |||
1869 | ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal | |||
1870 | ** symbols. Background: A "minor type" is a semantic | |||
1871 | ** value associated with a terminal or non-terminal | |||
1872 | ** symbols. For example, for an "ID" terminal symbol, | |||
1873 | ** the minor type might be the name of the identifier. | |||
1874 | ** Each non-terminal can have a different minor type. | |||
1875 | ** Terminal symbols all have the same minor type, though. | |||
1876 | ** This macros defines the minor type for terminal | |||
1877 | ** symbols. | |||
1878 | ** fts5YYMINORTYPE is the data type used for all minor types. | |||
1879 | ** This is typically a union of many types, one of | |||
1880 | ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union | |||
1881 | ** for terminal symbols is called "fts5yy0". | |||
1882 | ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If | |||
1883 | ** zero the stack is dynamically sized using realloc() | |||
1884 | ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument | |||
1885 | ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument | |||
1886 | ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter | |||
1887 | ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser | |||
1888 | ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser | |||
1889 | ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context | |||
1890 | ** fts5YYREALLOC Name of the realloc() function to use | |||
1891 | ** fts5YYFREE Name of the free() function to use | |||
1892 | ** fts5YYDYNSTACK True if stack space should be extended on heap | |||
1893 | ** fts5YYERRORSYMBOL is the code number of the error symbol. If not | |||
1894 | ** defined, then do no error processing. | |||
1895 | ** fts5YYNSTATE the combined number of states. | |||
1896 | ** fts5YYNRULE the number of rules in the grammar | |||
1897 | ** fts5YYNFTS5TOKEN Number of terminal symbols | |||
1898 | ** fts5YY_MAX_SHIFT Maximum value for shift actions | |||
1899 | ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions | |||
1900 | ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions | |||
1901 | ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error | |||
1902 | ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept | |||
1903 | ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op | |||
1904 | ** fts5YY_MIN_REDUCE Minimum value for reduce actions | |||
1905 | ** fts5YY_MAX_REDUCE Maximum value for reduce actions | |||
1906 | ** fts5YY_MIN_DSTRCTR Minimum symbol value that has a destructor | |||
1907 | ** fts5YY_MAX_DSTRCTR Maximum symbol value that has a destructor | |||
1908 | */ | |||
1909 | #ifndef INTERFACE1 | |||
1910 | # define INTERFACE1 1 | |||
1911 | #endif | |||
1912 | /************* Begin control #defines *****************************************/ | |||
1913 | #define fts5YYCODETYPEunsigned char unsigned char | |||
1914 | #define fts5YYNOCODE27 27 | |||
1915 | #define fts5YYACTIONTYPEunsigned char unsigned char | |||
1916 | #define sqlite3Fts5ParserFTS5TOKENTYPEFts5Token Fts5Token | |||
1917 | typedef union { | |||
1918 | int fts5yyinit; | |||
1919 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yy0; | |||
1920 | int fts5yy4; | |||
1921 | Fts5Colset* fts5yy11; | |||
1922 | Fts5ExprNode* fts5yy24; | |||
1923 | Fts5ExprNearset* fts5yy46; | |||
1924 | Fts5ExprPhrase* fts5yy53; | |||
1925 | } fts5YYMINORTYPE; | |||
1926 | #ifndef fts5YYSTACKDEPTH100 | |||
1927 | #define fts5YYSTACKDEPTH100 100 | |||
1928 | #endif | |||
1929 | #define sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; Fts5Parse *pParse; | |||
1930 | #define sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse ,Fts5Parse *pParse | |||
1931 | #define sqlite3Fts5ParserARG_PARAM,pParse ,pParse | |||
1932 | #define sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; Fts5Parse *pParse=fts5yypParser->pParse; | |||
1933 | #define sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; fts5yypParser->pParse=pParse; | |||
1934 | #define fts5YYREALLOCrealloc realloc | |||
1935 | #define fts5YYFREEfree free | |||
1936 | #define fts5YYDYNSTACK0 0 | |||
1937 | #define sqlite3Fts5ParserCTX_SDECL | |||
1938 | #define sqlite3Fts5ParserCTX_PDECL | |||
1939 | #define sqlite3Fts5ParserCTX_PARAM | |||
1940 | #define sqlite3Fts5ParserCTX_FETCH | |||
1941 | #define sqlite3Fts5ParserCTX_STORE | |||
1942 | #define fts5YYNSTATE35 35 | |||
1943 | #define fts5YYNRULE28 28 | |||
1944 | #define fts5YYNRULE_WITH_ACTION28 28 | |||
1945 | #define fts5YYNFTS5TOKEN16 16 | |||
1946 | #define fts5YY_MAX_SHIFT34 34 | |||
1947 | #define fts5YY_MIN_SHIFTREDUCE52 52 | |||
1948 | #define fts5YY_MAX_SHIFTREDUCE79 79 | |||
1949 | #define fts5YY_ERROR_ACTION80 80 | |||
1950 | #define fts5YY_ACCEPT_ACTION81 81 | |||
1951 | #define fts5YY_NO_ACTION82 82 | |||
1952 | #define fts5YY_MIN_REDUCE83 83 | |||
1953 | #define fts5YY_MAX_REDUCE110 110 | |||
1954 | #define fts5YY_MIN_DSTRCTR16 16 | |||
1955 | #define fts5YY_MAX_DSTRCTR24 24 | |||
1956 | /************* End control #defines *******************************************/ | |||
1957 | #define fts5YY_NLOOKAHEAD((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) | |||
1958 | ||||
1959 | /* Define the fts5yytestcase() macro to be a no-op if is not already defined | |||
1960 | ** otherwise. | |||
1961 | ** | |||
1962 | ** Applications can choose to define fts5yytestcase() in the %include section | |||
1963 | ** to a macro that can assist in verifying code coverage. For production | |||
1964 | ** code the fts5yytestcase() macro should be turned off. But it is useful | |||
1965 | ** for testing. | |||
1966 | */ | |||
1967 | #ifndef fts5yytestcase | |||
1968 | # define fts5yytestcase(X) | |||
1969 | #endif | |||
1970 | ||||
1971 | /* Macro to determine if stack space has the ability to grow using | |||
1972 | ** heap memory. | |||
1973 | */ | |||
1974 | #if fts5YYSTACKDEPTH100<=0 || fts5YYDYNSTACK0 | |||
1975 | # define fts5YYGROWABLESTACK0 1 | |||
1976 | #else | |||
1977 | # define fts5YYGROWABLESTACK0 0 | |||
1978 | #endif | |||
1979 | ||||
1980 | /* Guarantee a minimum number of initial stack slots. | |||
1981 | */ | |||
1982 | #if fts5YYSTACKDEPTH100<=0 | |||
1983 | # undef fts5YYSTACKDEPTH100 | |||
1984 | # define fts5YYSTACKDEPTH100 2 /* Need a minimum stack size */ | |||
1985 | #endif | |||
1986 | ||||
1987 | ||||
1988 | /* Next are the tables used to determine what action to take based on the | |||
1989 | ** current state and lookahead token. These tables are used to implement | |||
1990 | ** functions that take a state number and lookahead value and return an | |||
1991 | ** action integer. | |||
1992 | ** | |||
1993 | ** Suppose the action integer is N. Then the action is determined as | |||
1994 | ** follows | |||
1995 | ** | |||
1996 | ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead | |||
1997 | ** token onto the stack and goto state N. | |||
1998 | ** | |||
1999 | ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then | |||
2000 | ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. | |||
2001 | ** | |||
2002 | ** N == fts5YY_ERROR_ACTION A syntax error has occurred. | |||
2003 | ** | |||
2004 | ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. | |||
2005 | ** | |||
2006 | ** N == fts5YY_NO_ACTION No such action. Denotes unused | |||
2007 | ** slots in the fts5yy_action[] table. | |||
2008 | ** | |||
2009 | ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE | |||
2010 | ** and fts5YY_MAX_REDUCE | |||
2011 | ** | |||
2012 | ** The action table is constructed as a single large table named fts5yy_action[]. | |||
2013 | ** Given state S and lookahead X, the action is computed as either: | |||
2014 | ** | |||
2015 | ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] | |||
2016 | ** (B) N = fts5yy_default[S] | |||
2017 | ** | |||
2018 | ** The (A) formula is preferred. The B formula is used instead if | |||
2019 | ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. | |||
2020 | ** | |||
2021 | ** The formulas above are for computing the action when the lookahead is | |||
2022 | ** a terminal symbol. If the lookahead is a non-terminal (as occurs after | |||
2023 | ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of | |||
2024 | ** the fts5yy_shift_ofst[] array. | |||
2025 | ** | |||
2026 | ** The following are the tables generated in this section: | |||
2027 | ** | |||
2028 | ** fts5yy_action[] A single table containing all actions. | |||
2029 | ** fts5yy_lookahead[] A table containing the lookahead for each entry in | |||
2030 | ** fts5yy_action. Used to detect hash collisions. | |||
2031 | ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for | |||
2032 | ** shifting terminals. | |||
2033 | ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for | |||
2034 | ** shifting non-terminals after a reduce. | |||
2035 | ** fts5yy_default[] Default action for each state. | |||
2036 | ** | |||
2037 | *********** Begin parsing tables **********************************************/ | |||
2038 | #define fts5YY_ACTTAB_COUNT(105) (105) | |||
2039 | static const fts5YYACTIONTYPEunsigned char fts5yy_action[] = { | |||
2040 | /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, | |||
2041 | /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, | |||
2042 | /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, | |||
2043 | /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, | |||
2044 | /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, | |||
2045 | /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, | |||
2046 | /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, | |||
2047 | /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, | |||
2048 | /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, | |||
2049 | /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, | |||
2050 | /* 100 */ 57, 15, 82, 82, 71, | |||
2051 | }; | |||
2052 | static const fts5YYCODETYPEunsigned char fts5yy_lookahead[] = { | |||
2053 | /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, | |||
2054 | /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, | |||
2055 | /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, | |||
2056 | /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, | |||
2057 | /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, | |||
2058 | /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, | |||
2059 | /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, | |||
2060 | /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, | |||
2061 | /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, | |||
2062 | /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, | |||
2063 | /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, | |||
2064 | /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, | |||
2065 | /* 120 */ 27, | |||
2066 | }; | |||
2067 | #define fts5YY_SHIFT_COUNT(34) (34) | |||
2068 | #define fts5YY_SHIFT_MIN(0) (0) | |||
2069 | #define fts5YY_SHIFT_MAX(93) (93) | |||
2070 | static const unsigned char fts5yy_shift_ofst[] = { | |||
2071 | /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, | |||
2072 | /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, | |||
2073 | /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, | |||
2074 | /* 30 */ 53, 87, 92, 53, 93, | |||
2075 | }; | |||
2076 | #define fts5YY_REDUCE_COUNT(17) (17) | |||
2077 | #define fts5YY_REDUCE_MIN(-17) (-17) | |||
2078 | #define fts5YY_REDUCE_MAX(67) (67) | |||
2079 | static const signed char fts5yy_reduce_ofst[] = { | |||
2080 | /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, | |||
2081 | /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38, | |||
2082 | }; | |||
2083 | static const fts5YYACTIONTYPEunsigned char fts5yy_default[] = { | |||
2084 | /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, | |||
2085 | /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, | |||
2086 | /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, | |||
2087 | /* 30 */ 103, 80, 80, 104, 80, | |||
2088 | }; | |||
2089 | /********** End of lemon-generated parsing tables *****************************/ | |||
2090 | ||||
2091 | /* The next table maps tokens (terminal symbols) into fallback tokens. | |||
2092 | ** If a construct like the following: | |||
2093 | ** | |||
2094 | ** %fallback ID X Y Z. | |||
2095 | ** | |||
2096 | ** appears in the grammar, then ID becomes a fallback token for X, Y, | |||
2097 | ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser | |||
2098 | ** but it does not parse, the type of the token is changed to ID and | |||
2099 | ** the parse is retried before an error is thrown. | |||
2100 | ** | |||
2101 | ** This feature can be used, for example, to cause some keywords in a language | |||
2102 | ** to revert to identifiers if they keyword does not apply in the context where | |||
2103 | ** it appears. | |||
2104 | */ | |||
2105 | #ifdef fts5YYFALLBACK | |||
2106 | static const fts5YYCODETYPEunsigned char fts5yyFallback[] = { | |||
2107 | }; | |||
2108 | #endif /* fts5YYFALLBACK */ | |||
2109 | ||||
2110 | /* The following structure represents a single element of the | |||
2111 | ** parser's stack. Information stored includes: | |||
2112 | ** | |||
2113 | ** + The state number for the parser at this level of the stack. | |||
2114 | ** | |||
2115 | ** + The value of the token stored at this level of the stack. | |||
2116 | ** (In other words, the "major" token.) | |||
2117 | ** | |||
2118 | ** + The semantic value stored at this level of the stack. This is | |||
2119 | ** the information used by the action routines in the grammar. | |||
2120 | ** It is sometimes called the "minor" token. | |||
2121 | ** | |||
2122 | ** After the "shift" half of a SHIFTREDUCE action, the stateno field | |||
2123 | ** actually contains the reduce action for the second half of the | |||
2124 | ** SHIFTREDUCE. | |||
2125 | */ | |||
2126 | struct fts5yyStackEntry { | |||
2127 | fts5YYACTIONTYPEunsigned char stateno; /* The state-number, or reduce action in SHIFTREDUCE */ | |||
2128 | fts5YYCODETYPEunsigned char major; /* The major token value. This is the code | |||
2129 | ** number for the token at this stack level */ | |||
2130 | fts5YYMINORTYPE minor; /* The user-supplied minor token value. This | |||
2131 | ** is the value of the token */ | |||
2132 | }; | |||
2133 | typedef struct fts5yyStackEntry fts5yyStackEntry; | |||
2134 | ||||
2135 | /* The state of the parser is completely contained in an instance of | |||
2136 | ** the following structure */ | |||
2137 | struct fts5yyParser { | |||
2138 | fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ | |||
2139 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
2140 | int fts5yyhwm; /* High-water mark of the stack */ | |||
2141 | #endif | |||
2142 | #ifndef fts5YYNOERRORRECOVERY1 | |||
2143 | int fts5yyerrcnt; /* Shifts left before out of the error */ | |||
2144 | #endif | |||
2145 | sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; /* A place to hold %extra_argument */ | |||
2146 | sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */ | |||
2147 | fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */ | |||
2148 | fts5yyStackEntry *fts5yystack; /* The parser stack */ | |||
2149 | fts5yyStackEntry fts5yystk0[fts5YYSTACKDEPTH100]; /* Initial stack space */ | |||
2150 | }; | |||
2151 | typedef struct fts5yyParser fts5yyParser; | |||
2152 | ||||
2153 | #include <assert.h> | |||
2154 | #ifndef NDEBUG1 | |||
2155 | #include <stdio.h> | |||
2156 | static FILE *fts5yyTraceFILE = 0; | |||
2157 | static char *fts5yyTracePrompt = 0; | |||
2158 | #endif /* NDEBUG */ | |||
2159 | ||||
2160 | #ifndef NDEBUG1 | |||
2161 | /* | |||
2162 | ** Turn parser tracing on by giving a stream to which to write the trace | |||
2163 | ** and a prompt to preface each trace message. Tracing is turned off | |||
2164 | ** by making either argument NULL | |||
2165 | ** | |||
2166 | ** Inputs: | |||
2167 | ** <ul> | |||
2168 | ** <li> A FILE* to which trace output should be written. | |||
2169 | ** If NULL, then tracing is turned off. | |||
2170 | ** <li> A prefix string written at the beginning of every | |||
2171 | ** line of trace output. If NULL, then tracing is | |||
2172 | ** turned off. | |||
2173 | ** </ul> | |||
2174 | ** | |||
2175 | ** Outputs: | |||
2176 | ** None. | |||
2177 | */ | |||
2178 | static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ | |||
2179 | fts5yyTraceFILE = TraceFILE; | |||
2180 | fts5yyTracePrompt = zTracePrompt; | |||
2181 | if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; | |||
2182 | else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; | |||
2183 | } | |||
2184 | #endif /* NDEBUG */ | |||
2185 | ||||
2186 | #if defined(fts5YYCOVERAGE) || !defined(NDEBUG1) | |||
2187 | /* For tracing shifts, the names of all terminals and nonterminals | |||
2188 | ** are required. The following table supplies these names */ | |||
2189 | static const char *const fts5yyTokenName[] = { | |||
2190 | /* 0 */ "$", | |||
2191 | /* 1 */ "OR", | |||
2192 | /* 2 */ "AND", | |||
2193 | /* 3 */ "NOT", | |||
2194 | /* 4 */ "TERM", | |||
2195 | /* 5 */ "COLON", | |||
2196 | /* 6 */ "MINUS", | |||
2197 | /* 7 */ "LCP", | |||
2198 | /* 8 */ "RCP", | |||
2199 | /* 9 */ "STRING", | |||
2200 | /* 10 */ "LP", | |||
2201 | /* 11 */ "RP", | |||
2202 | /* 12 */ "CARET", | |||
2203 | /* 13 */ "COMMA", | |||
2204 | /* 14 */ "PLUS", | |||
2205 | /* 15 */ "STAR", | |||
2206 | /* 16 */ "input", | |||
2207 | /* 17 */ "expr", | |||
2208 | /* 18 */ "cnearset", | |||
2209 | /* 19 */ "exprlist", | |||
2210 | /* 20 */ "colset", | |||
2211 | /* 21 */ "colsetlist", | |||
2212 | /* 22 */ "nearset", | |||
2213 | /* 23 */ "nearphrases", | |||
2214 | /* 24 */ "phrase", | |||
2215 | /* 25 */ "neardist_opt", | |||
2216 | /* 26 */ "star_opt", | |||
2217 | }; | |||
2218 | #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */ | |||
2219 | ||||
2220 | #ifndef NDEBUG1 | |||
2221 | /* For tracing reduce actions, the names of all rules are required. | |||
2222 | */ | |||
2223 | static const char *const fts5yyRuleName[] = { | |||
2224 | /* 0 */ "input ::= expr", | |||
2225 | /* 1 */ "colset ::= MINUS LCP colsetlist RCP", | |||
2226 | /* 2 */ "colset ::= LCP colsetlist RCP", | |||
2227 | /* 3 */ "colset ::= STRING", | |||
2228 | /* 4 */ "colset ::= MINUS STRING", | |||
2229 | /* 5 */ "colsetlist ::= colsetlist STRING", | |||
2230 | /* 6 */ "colsetlist ::= STRING", | |||
2231 | /* 7 */ "expr ::= expr AND expr", | |||
2232 | /* 8 */ "expr ::= expr OR expr", | |||
2233 | /* 9 */ "expr ::= expr NOT expr", | |||
2234 | /* 10 */ "expr ::= colset COLON LP expr RP", | |||
2235 | /* 11 */ "expr ::= LP expr RP", | |||
2236 | /* 12 */ "expr ::= exprlist", | |||
2237 | /* 13 */ "exprlist ::= cnearset", | |||
2238 | /* 14 */ "exprlist ::= exprlist cnearset", | |||
2239 | /* 15 */ "cnearset ::= nearset", | |||
2240 | /* 16 */ "cnearset ::= colset COLON nearset", | |||
2241 | /* 17 */ "nearset ::= phrase", | |||
2242 | /* 18 */ "nearset ::= CARET phrase", | |||
2243 | /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP", | |||
2244 | /* 20 */ "nearphrases ::= phrase", | |||
2245 | /* 21 */ "nearphrases ::= nearphrases phrase", | |||
2246 | /* 22 */ "neardist_opt ::=", | |||
2247 | /* 23 */ "neardist_opt ::= COMMA STRING", | |||
2248 | /* 24 */ "phrase ::= phrase PLUS STRING star_opt", | |||
2249 | /* 25 */ "phrase ::= STRING star_opt", | |||
2250 | /* 26 */ "star_opt ::= STAR", | |||
2251 | /* 27 */ "star_opt ::=", | |||
2252 | }; | |||
2253 | #endif /* NDEBUG */ | |||
2254 | ||||
2255 | ||||
2256 | #if fts5YYGROWABLESTACK0 | |||
2257 | /* | |||
2258 | ** Try to increase the size of the parser stack. Return the number | |||
2259 | ** of errors. Return 0 on success. | |||
2260 | */ | |||
2261 | static int fts5yyGrowStack(fts5yyParser *p)1{ | |||
2262 | int oldSize = 1 + (int)(p->fts5yystackEnd - p->fts5yystack); | |||
2263 | int newSize; | |||
2264 | int idx; | |||
2265 | fts5yyStackEntry *pNew; | |||
2266 | ||||
2267 | newSize = oldSize*2 + 100; | |||
2268 | idx = (int)(p->fts5yytos - p->fts5yystack); | |||
2269 | if( p->fts5yystack==p->fts5yystk0 ){ | |||
2270 | pNew = fts5YYREALLOCrealloc(0, newSize*sizeof(pNew[0])); | |||
2271 | if( pNew==0 ) return 1; | |||
2272 | memcpy(pNew, p->fts5yystack, oldSize*sizeof(pNew[0])); | |||
2273 | }else{ | |||
2274 | pNew = fts5YYREALLOCrealloc(p->fts5yystack, newSize*sizeof(pNew[0])); | |||
2275 | if( pNew==0 ) return 1; | |||
2276 | } | |||
2277 | p->fts5yystack = pNew; | |||
2278 | p->fts5yytos = &p->fts5yystack[idx]; | |||
2279 | #ifndef NDEBUG1 | |||
2280 | if( fts5yyTraceFILE ){ | |||
2281 | fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n", | |||
2282 | fts5yyTracePrompt, oldSize, newSize); | |||
2283 | } | |||
2284 | #endif | |||
2285 | p->fts5yystackEnd = &p->fts5yystack[newSize-1]; | |||
2286 | return 0; | |||
2287 | } | |||
2288 | #endif /* fts5YYGROWABLESTACK */ | |||
2289 | ||||
2290 | #if !fts5YYGROWABLESTACK0 | |||
2291 | /* For builds that do no have a growable stack, fts5yyGrowStack always | |||
2292 | ** returns an error. | |||
2293 | */ | |||
2294 | # define fts5yyGrowStack(X)1 1 | |||
2295 | #endif | |||
2296 | ||||
2297 | /* Datatype of the argument to the memory allocated passed as the | |||
2298 | ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by | |||
2299 | ** putting an appropriate #define in the %include section of the input | |||
2300 | ** grammar. | |||
2301 | */ | |||
2302 | #ifndef fts5YYMALLOCARGTYPEu64 | |||
2303 | # define fts5YYMALLOCARGTYPEu64 size_t | |||
2304 | #endif | |||
2305 | ||||
2306 | /* Initialize a new parser that has already been allocated. | |||
2307 | */ | |||
2308 | static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){ | |||
2309 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser; | |||
2310 | sqlite3Fts5ParserCTX_STORE | |||
2311 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
2312 | fts5yypParser->fts5yyhwm = 0; | |||
2313 | #endif | |||
2314 | fts5yypParser->fts5yystack = fts5yypParser->fts5yystk0; | |||
2315 | fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH100-1]; | |||
2316 | #ifndef fts5YYNOERRORRECOVERY1 | |||
2317 | fts5yypParser->fts5yyerrcnt = -1; | |||
2318 | #endif | |||
2319 | fts5yypParser->fts5yytos = fts5yypParser->fts5yystack; | |||
2320 | fts5yypParser->fts5yystack[0].stateno = 0; | |||
2321 | fts5yypParser->fts5yystack[0].major = 0; | |||
2322 | } | |||
2323 | ||||
2324 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | |||
2325 | /* | |||
2326 | ** This function allocates a new parser. | |||
2327 | ** The only argument is a pointer to a function which works like | |||
2328 | ** malloc. | |||
2329 | ** | |||
2330 | ** Inputs: | |||
2331 | ** A pointer to the function used to allocate memory. | |||
2332 | ** | |||
2333 | ** Outputs: | |||
2334 | ** A pointer to a parser. This pointer is used in subsequent calls | |||
2335 | ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. | |||
2336 | */ | |||
2337 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPEu64) sqlite3Fts5ParserCTX_PDECL){ | |||
2338 | fts5yyParser *fts5yypParser; | |||
2339 | fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPEu64)sizeof(fts5yyParser) ); | |||
2340 | if( fts5yypParser ){ | |||
2341 | sqlite3Fts5ParserCTX_STORE | |||
2342 | sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM); | |||
2343 | } | |||
2344 | return (void*)fts5yypParser; | |||
2345 | } | |||
2346 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | |||
2347 | ||||
2348 | ||||
2349 | /* The following function deletes the "minor type" or semantic value | |||
2350 | ** associated with a symbol. The symbol can be either a terminal | |||
2351 | ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is | |||
2352 | ** a pointer to the value to be deleted. The code used to do the | |||
2353 | ** deletions is derived from the %destructor and/or %token_destructor | |||
2354 | ** directives of the input grammar. | |||
2355 | */ | |||
2356 | static void fts5yy_destructor( | |||
2357 | fts5yyParser *fts5yypParser, /* The parser */ | |||
2358 | fts5YYCODETYPEunsigned char fts5yymajor, /* Type code for object to destroy */ | |||
2359 | fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ | |||
2360 | ){ | |||
2361 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
2362 | sqlite3Fts5ParserCTX_FETCH | |||
2363 | switch( fts5yymajor ){ | |||
2364 | /* Here is inserted the actions which take place when a | |||
2365 | ** terminal or non-terminal is destroyed. This can happen | |||
2366 | ** when the symbol is popped from the stack during a | |||
2367 | ** reduce or during error processing or when a parser is | |||
2368 | ** being destroyed before it is finished parsing. | |||
2369 | ** | |||
2370 | ** Note: during a reduce, the only symbols destroyed are those | |||
2371 | ** which appear on the RHS of the rule, but which are *not* used | |||
2372 | ** inside the C code. | |||
2373 | */ | |||
2374 | /********* Begin destructor definitions ***************************************/ | |||
2375 | case 16: /* input */ | |||
2376 | { | |||
2377 | #line 83 "fts5parse.y" | |||
2378 | (void)pParse; | |||
2379 | #line 606 "fts5parse.sql" | |||
2380 | } | |||
2381 | break; | |||
2382 | case 17: /* expr */ | |||
2383 | case 18: /* cnearset */ | |||
2384 | case 19: /* exprlist */ | |||
2385 | { | |||
2386 | #line 89 "fts5parse.y" | |||
2387 | sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); | |||
2388 | #line 615 "fts5parse.sql" | |||
2389 | } | |||
2390 | break; | |||
2391 | case 20: /* colset */ | |||
2392 | case 21: /* colsetlist */ | |||
2393 | { | |||
2394 | #line 93 "fts5parse.y" | |||
2395 | sqlite3_freesqlite3_api->free((fts5yypminor->fts5yy11)); | |||
2396 | #line 623 "fts5parse.sql" | |||
2397 | } | |||
2398 | break; | |||
2399 | case 22: /* nearset */ | |||
2400 | case 23: /* nearphrases */ | |||
2401 | { | |||
2402 | #line 148 "fts5parse.y" | |||
2403 | sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); | |||
2404 | #line 631 "fts5parse.sql" | |||
2405 | } | |||
2406 | break; | |||
2407 | case 24: /* phrase */ | |||
2408 | { | |||
2409 | #line 183 "fts5parse.y" | |||
2410 | sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); | |||
2411 | #line 638 "fts5parse.sql" | |||
2412 | } | |||
2413 | break; | |||
2414 | /********* End destructor definitions *****************************************/ | |||
2415 | default: break; /* If no destructor action specified: do nothing */ | |||
2416 | } | |||
2417 | } | |||
2418 | ||||
2419 | /* | |||
2420 | ** Pop the parser's stack once. | |||
2421 | ** | |||
2422 | ** If there is a destructor routine associated with the token which | |||
2423 | ** is popped from the stack, then call it. | |||
2424 | */ | |||
2425 | static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ | |||
2426 | fts5yyStackEntry *fts5yytos; | |||
2427 | assert( pParser->fts5yytos!=0 )((void) (0)); | |||
2428 | assert( pParser->fts5yytos > pParser->fts5yystack )((void) (0)); | |||
2429 | fts5yytos = pParser->fts5yytos--; | |||
2430 | #ifndef NDEBUG1 | |||
2431 | if( fts5yyTraceFILE ){ | |||
2432 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | |||
2433 | fts5yyTracePrompt, | |||
2434 | fts5yyTokenName[fts5yytos->major]); | |||
2435 | } | |||
2436 | #endif | |||
2437 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | |||
2438 | } | |||
2439 | ||||
2440 | /* | |||
2441 | ** Clear all secondary memory allocations from the parser | |||
2442 | */ | |||
2443 | static void sqlite3Fts5ParserFinalize(void *p){ | |||
2444 | fts5yyParser *pParser = (fts5yyParser*)p; | |||
2445 | ||||
2446 | /* In-lined version of calling fts5yy_pop_parser_stack() for each | |||
2447 | ** element left in the stack */ | |||
2448 | fts5yyStackEntry *fts5yytos = pParser->fts5yytos; | |||
2449 | while( fts5yytos>pParser->fts5yystack ){ | |||
2450 | #ifndef NDEBUG1 | |||
2451 | if( fts5yyTraceFILE ){ | |||
2452 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | |||
2453 | fts5yyTracePrompt, | |||
2454 | fts5yyTokenName[fts5yytos->major]); | |||
2455 | } | |||
2456 | #endif | |||
2457 | if( fts5yytos->major>=fts5YY_MIN_DSTRCTR16 ){ | |||
2458 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | |||
2459 | } | |||
2460 | fts5yytos--; | |||
2461 | } | |||
2462 | ||||
2463 | #if fts5YYGROWABLESTACK0 | |||
2464 | if( pParser->fts5yystack!=pParser->fts5yystk0 ) fts5YYFREEfree(pParser->fts5yystack); | |||
2465 | #endif | |||
2466 | } | |||
2467 | ||||
2468 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | |||
2469 | /* | |||
2470 | ** Deallocate and destroy a parser. Destructors are called for | |||
2471 | ** all stack elements before shutting the parser down. | |||
2472 | ** | |||
2473 | ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it | |||
2474 | ** is defined in a %include section of the input grammar) then it is | |||
2475 | ** assumed that the input pointer is never NULL. | |||
2476 | */ | |||
2477 | static void sqlite3Fts5ParserFree( | |||
2478 | void *p, /* The parser to be deleted */ | |||
2479 | void (*freeProc)(void*) /* Function used to reclaim memory */ | |||
2480 | ){ | |||
2481 | #ifndef fts5YYPARSEFREENEVERNULL | |||
2482 | if( p==0 ) return; | |||
2483 | #endif | |||
2484 | sqlite3Fts5ParserFinalize(p); | |||
2485 | (*freeProc)(p); | |||
2486 | } | |||
2487 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | |||
2488 | ||||
2489 | /* | |||
2490 | ** Return the peak depth of the stack for a parser. | |||
2491 | */ | |||
2492 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
2493 | static int sqlite3Fts5ParserStackPeak(void *p){ | |||
2494 | fts5yyParser *pParser = (fts5yyParser*)p; | |||
2495 | return pParser->fts5yyhwm; | |||
2496 | } | |||
2497 | #endif | |||
2498 | ||||
2499 | /* This array of booleans keeps track of the parser statement | |||
2500 | ** coverage. The element fts5yycoverage[X][Y] is set when the parser | |||
2501 | ** is in state X and has a lookahead token Y. In a well-tested | |||
2502 | ** systems, every element of this matrix should end up being set. | |||
2503 | */ | |||
2504 | #if defined(fts5YYCOVERAGE) | |||
2505 | static unsigned char fts5yycoverage[fts5YYNSTATE35][fts5YYNFTS5TOKEN16]; | |||
2506 | #endif | |||
2507 | ||||
2508 | /* | |||
2509 | ** Write into out a description of every state/lookahead combination that | |||
2510 | ** | |||
2511 | ** (1) has not been used by the parser, and | |||
2512 | ** (2) is not a syntax error. | |||
2513 | ** | |||
2514 | ** Return the number of missed state/lookahead combinations. | |||
2515 | */ | |||
2516 | #if defined(fts5YYCOVERAGE) | |||
2517 | static int sqlite3Fts5ParserCoverage(FILE *out){ | |||
2518 | int stateno, iLookAhead, i; | |||
2519 | int nMissed = 0; | |||
2520 | for(stateno=0; stateno<fts5YYNSTATE35; stateno++){ | |||
2521 | i = fts5yy_shift_ofst[stateno]; | |||
2522 | for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN16; iLookAhead++){ | |||
2523 | if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; | |||
2524 | if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; | |||
2525 | if( out ){ | |||
2526 | fprintf(out,"State %d lookahead %s %s\n", stateno, | |||
2527 | fts5yyTokenName[iLookAhead], | |||
2528 | fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed"); | |||
2529 | } | |||
2530 | } | |||
2531 | } | |||
2532 | return nMissed; | |||
2533 | } | |||
2534 | #endif | |||
2535 | ||||
2536 | /* | |||
2537 | ** Find the appropriate action for a parser given the terminal | |||
2538 | ** look-ahead token iLookAhead. | |||
2539 | */ | |||
2540 | static fts5YYACTIONTYPEunsigned char fts5yy_find_shift_action( | |||
2541 | fts5YYCODETYPEunsigned char iLookAhead, /* The look-ahead token */ | |||
2542 | fts5YYACTIONTYPEunsigned char stateno /* Current state number */ | |||
2543 | ){ | |||
2544 | int i; | |||
2545 | ||||
2546 | if( stateno>fts5YY_MAX_SHIFT34 ) return stateno; | |||
2547 | assert( stateno <= fts5YY_SHIFT_COUNT )((void) (0)); | |||
2548 | #if defined(fts5YYCOVERAGE) | |||
2549 | fts5yycoverage[stateno][iLookAhead] = 1; | |||
2550 | #endif | |||
2551 | do{ | |||
2552 | i = fts5yy_shift_ofst[stateno]; | |||
2553 | assert( i>=0 )((void) (0)); | |||
2554 | assert( i<=fts5YY_ACTTAB_COUNT )((void) (0)); | |||
2555 | assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD )((void) (0)); | |||
2556 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | |||
2557 | assert( iLookAhead < fts5YYNFTS5TOKEN )((void) (0)); | |||
2558 | i += iLookAhead; | |||
2559 | assert( i<(int)fts5YY_NLOOKAHEAD )((void) (0)); | |||
2560 | if( fts5yy_lookahead[i]!=iLookAhead ){ | |||
2561 | #ifdef fts5YYFALLBACK | |||
2562 | fts5YYCODETYPEunsigned char iFallback; /* Fallback token */ | |||
2563 | assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) )((void) (0)); | |||
2564 | iFallback = fts5yyFallback[iLookAhead]; | |||
2565 | if( iFallback!=0 ){ | |||
2566 | #ifndef NDEBUG1 | |||
2567 | if( fts5yyTraceFILE ){ | |||
2568 | fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n", | |||
2569 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]); | |||
2570 | } | |||
2571 | #endif | |||
2572 | assert( fts5yyFallback[iFallback]==0 )((void) (0)); /* Fallback loop must terminate */ | |||
2573 | iLookAhead = iFallback; | |||
2574 | continue; | |||
2575 | } | |||
2576 | #endif | |||
2577 | #ifdef fts5YYWILDCARD | |||
2578 | { | |||
2579 | int j = i - iLookAhead + fts5YYWILDCARD; | |||
2580 | assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) )((void) (0)); | |||
2581 | if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){ | |||
2582 | #ifndef NDEBUG1 | |||
2583 | if( fts5yyTraceFILE ){ | |||
2584 | fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n", | |||
2585 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], | |||
2586 | fts5yyTokenName[fts5YYWILDCARD]); | |||
2587 | } | |||
2588 | #endif /* NDEBUG */ | |||
2589 | return fts5yy_action[j]; | |||
2590 | } | |||
2591 | } | |||
2592 | #endif /* fts5YYWILDCARD */ | |||
2593 | return fts5yy_default[stateno]; | |||
2594 | }else{ | |||
2595 | assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) )((void) (0)); | |||
2596 | return fts5yy_action[i]; | |||
2597 | } | |||
2598 | }while(1); | |||
2599 | } | |||
2600 | ||||
2601 | /* | |||
2602 | ** Find the appropriate action for a parser given the non-terminal | |||
2603 | ** look-ahead token iLookAhead. | |||
2604 | */ | |||
2605 | static fts5YYACTIONTYPEunsigned char fts5yy_find_reduce_action( | |||
2606 | fts5YYACTIONTYPEunsigned char stateno, /* Current state number */ | |||
2607 | fts5YYCODETYPEunsigned char iLookAhead /* The look-ahead token */ | |||
2608 | ){ | |||
2609 | int i; | |||
2610 | #ifdef fts5YYERRORSYMBOL | |||
2611 | if( stateno>fts5YY_REDUCE_COUNT(17) ){ | |||
2612 | return fts5yy_default[stateno]; | |||
2613 | } | |||
2614 | #else | |||
2615 | assert( stateno<=fts5YY_REDUCE_COUNT )((void) (0)); | |||
2616 | #endif | |||
2617 | i = fts5yy_reduce_ofst[stateno]; | |||
2618 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | |||
2619 | i += iLookAhead; | |||
2620 | #ifdef fts5YYERRORSYMBOL | |||
2621 | if( i<0 || i>=fts5YY_ACTTAB_COUNT(105) || fts5yy_lookahead[i]!=iLookAhead ){ | |||
2622 | return fts5yy_default[stateno]; | |||
2623 | } | |||
2624 | #else | |||
2625 | assert( i>=0 && i<fts5YY_ACTTAB_COUNT )((void) (0)); | |||
2626 | assert( fts5yy_lookahead[i]==iLookAhead )((void) (0)); | |||
2627 | #endif | |||
2628 | return fts5yy_action[i]; | |||
2629 | } | |||
2630 | ||||
2631 | /* | |||
2632 | ** The following routine is called if the stack overflows. | |||
2633 | */ | |||
2634 | static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ | |||
2635 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
2636 | sqlite3Fts5ParserCTX_FETCH | |||
2637 | #ifndef NDEBUG1 | |||
2638 | if( fts5yyTraceFILE ){ | |||
2639 | fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt); | |||
2640 | } | |||
2641 | #endif | |||
2642 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | |||
2643 | /* Here code is inserted which will execute if the parser | |||
2644 | ** stack every overflows */ | |||
2645 | /******** Begin %stack_overflow code ******************************************/ | |||
2646 | #line 36 "fts5parse.y" | |||
2647 | ||||
2648 | sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); | |||
2649 | #line 876 "fts5parse.sql" | |||
2650 | /******** End %stack_overflow code ********************************************/ | |||
2651 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument var */ | |||
2652 | sqlite3Fts5ParserCTX_STORE | |||
2653 | } | |||
2654 | ||||
2655 | /* | |||
2656 | ** Print tracing information for a SHIFT action | |||
2657 | */ | |||
2658 | #ifndef NDEBUG1 | |||
2659 | static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){ | |||
2660 | if( fts5yyTraceFILE ){ | |||
2661 | if( fts5yyNewState<fts5YYNSTATE35 ){ | |||
2662 | fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n", | |||
2663 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | |||
2664 | fts5yyNewState); | |||
2665 | }else{ | |||
2666 | fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n", | |||
2667 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | |||
2668 | fts5yyNewState - fts5YY_MIN_REDUCE83); | |||
2669 | } | |||
2670 | } | |||
2671 | } | |||
2672 | #else | |||
2673 | # define fts5yyTraceShift(X,Y,Z) | |||
2674 | #endif | |||
2675 | ||||
2676 | /* | |||
2677 | ** Perform a shift action. | |||
2678 | */ | |||
2679 | static void fts5yy_shift( | |||
2680 | fts5yyParser *fts5yypParser, /* The parser to be shifted */ | |||
2681 | fts5YYACTIONTYPEunsigned char fts5yyNewState, /* The new state to shift in */ | |||
2682 | fts5YYCODETYPEunsigned char fts5yyMajor, /* The major token to shift in */ | |||
2683 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyMinor /* The minor token to shift in */ | |||
2684 | ){ | |||
2685 | fts5yyStackEntry *fts5yytos; | |||
2686 | fts5yypParser->fts5yytos++; | |||
2687 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
2688 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | |||
2689 | fts5yypParser->fts5yyhwm++; | |||
2690 | assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) )((void) (0)); | |||
2691 | } | |||
2692 | #endif | |||
2693 | fts5yytos = fts5yypParser->fts5yytos; | |||
2694 | if( fts5yytos>fts5yypParser->fts5yystackEnd ){ | |||
2695 | if( fts5yyGrowStack(fts5yypParser)1 ){ | |||
2696 | fts5yypParser->fts5yytos--; | |||
2697 | fts5yyStackOverflow(fts5yypParser); | |||
2698 | return; | |||
2699 | } | |||
2700 | fts5yytos = fts5yypParser->fts5yytos; | |||
2701 | assert( fts5yytos <= fts5yypParser->fts5yystackEnd )((void) (0)); | |||
2702 | } | |||
2703 | if( fts5yyNewState > fts5YY_MAX_SHIFT34 ){ | |||
2704 | fts5yyNewState += fts5YY_MIN_REDUCE83 - fts5YY_MIN_SHIFTREDUCE52; | |||
2705 | } | |||
2706 | fts5yytos->stateno = fts5yyNewState; | |||
2707 | fts5yytos->major = fts5yyMajor; | |||
2708 | fts5yytos->minor.fts5yy0 = fts5yyMinor; | |||
2709 | fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift"); | |||
2710 | } | |||
2711 | ||||
2712 | /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side | |||
2713 | ** of that rule */ | |||
2714 | static const fts5YYCODETYPEunsigned char fts5yyRuleInfoLhs[] = { | |||
2715 | 16, /* (0) input ::= expr */ | |||
2716 | 20, /* (1) colset ::= MINUS LCP colsetlist RCP */ | |||
2717 | 20, /* (2) colset ::= LCP colsetlist RCP */ | |||
2718 | 20, /* (3) colset ::= STRING */ | |||
2719 | 20, /* (4) colset ::= MINUS STRING */ | |||
2720 | 21, /* (5) colsetlist ::= colsetlist STRING */ | |||
2721 | 21, /* (6) colsetlist ::= STRING */ | |||
2722 | 17, /* (7) expr ::= expr AND expr */ | |||
2723 | 17, /* (8) expr ::= expr OR expr */ | |||
2724 | 17, /* (9) expr ::= expr NOT expr */ | |||
2725 | 17, /* (10) expr ::= colset COLON LP expr RP */ | |||
2726 | 17, /* (11) expr ::= LP expr RP */ | |||
2727 | 17, /* (12) expr ::= exprlist */ | |||
2728 | 19, /* (13) exprlist ::= cnearset */ | |||
2729 | 19, /* (14) exprlist ::= exprlist cnearset */ | |||
2730 | 18, /* (15) cnearset ::= nearset */ | |||
2731 | 18, /* (16) cnearset ::= colset COLON nearset */ | |||
2732 | 22, /* (17) nearset ::= phrase */ | |||
2733 | 22, /* (18) nearset ::= CARET phrase */ | |||
2734 | 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
2735 | 23, /* (20) nearphrases ::= phrase */ | |||
2736 | 23, /* (21) nearphrases ::= nearphrases phrase */ | |||
2737 | 25, /* (22) neardist_opt ::= */ | |||
2738 | 25, /* (23) neardist_opt ::= COMMA STRING */ | |||
2739 | 24, /* (24) phrase ::= phrase PLUS STRING star_opt */ | |||
2740 | 24, /* (25) phrase ::= STRING star_opt */ | |||
2741 | 26, /* (26) star_opt ::= STAR */ | |||
2742 | 26, /* (27) star_opt ::= */ | |||
2743 | }; | |||
2744 | ||||
2745 | /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number | |||
2746 | ** of symbols on the right-hand side of that rule. */ | |||
2747 | static const signed char fts5yyRuleInfoNRhs[] = { | |||
2748 | -1, /* (0) input ::= expr */ | |||
2749 | -4, /* (1) colset ::= MINUS LCP colsetlist RCP */ | |||
2750 | -3, /* (2) colset ::= LCP colsetlist RCP */ | |||
2751 | -1, /* (3) colset ::= STRING */ | |||
2752 | -2, /* (4) colset ::= MINUS STRING */ | |||
2753 | -2, /* (5) colsetlist ::= colsetlist STRING */ | |||
2754 | -1, /* (6) colsetlist ::= STRING */ | |||
2755 | -3, /* (7) expr ::= expr AND expr */ | |||
2756 | -3, /* (8) expr ::= expr OR expr */ | |||
2757 | -3, /* (9) expr ::= expr NOT expr */ | |||
2758 | -5, /* (10) expr ::= colset COLON LP expr RP */ | |||
2759 | -3, /* (11) expr ::= LP expr RP */ | |||
2760 | -1, /* (12) expr ::= exprlist */ | |||
2761 | -1, /* (13) exprlist ::= cnearset */ | |||
2762 | -2, /* (14) exprlist ::= exprlist cnearset */ | |||
2763 | -1, /* (15) cnearset ::= nearset */ | |||
2764 | -3, /* (16) cnearset ::= colset COLON nearset */ | |||
2765 | -1, /* (17) nearset ::= phrase */ | |||
2766 | -2, /* (18) nearset ::= CARET phrase */ | |||
2767 | -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
2768 | -1, /* (20) nearphrases ::= phrase */ | |||
2769 | -2, /* (21) nearphrases ::= nearphrases phrase */ | |||
2770 | 0, /* (22) neardist_opt ::= */ | |||
2771 | -2, /* (23) neardist_opt ::= COMMA STRING */ | |||
2772 | -4, /* (24) phrase ::= phrase PLUS STRING star_opt */ | |||
2773 | -2, /* (25) phrase ::= STRING star_opt */ | |||
2774 | -1, /* (26) star_opt ::= STAR */ | |||
2775 | 0, /* (27) star_opt ::= */ | |||
2776 | }; | |||
2777 | ||||
2778 | static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ | |||
2779 | ||||
2780 | /* | |||
2781 | ** Perform a reduce action and the shift that must immediately | |||
2782 | ** follow the reduce. | |||
2783 | ** | |||
2784 | ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions | |||
2785 | ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE | |||
2786 | ** if the lookahead token has already been consumed. As this procedure is | |||
2787 | ** only called from one place, optimizing compilers will in-line it, which | |||
2788 | ** means that the extra parameters have no performance impact. | |||
2789 | */ | |||
2790 | static fts5YYACTIONTYPEunsigned char fts5yy_reduce( | |||
2791 | fts5yyParser *fts5yypParser, /* The parser */ | |||
2792 | unsigned int fts5yyruleno, /* Number of the rule by which to reduce */ | |||
2793 | int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */ | |||
2794 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyLookaheadToken /* Value of the lookahead token */ | |||
2795 | sqlite3Fts5ParserCTX_PDECL /* %extra_context */ | |||
2796 | ){ | |||
2797 | int fts5yygoto; /* The next state */ | |||
2798 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The next action */ | |||
2799 | fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ | |||
2800 | int fts5yysize; /* Amount to pop the stack */ | |||
2801 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
2802 | (void)fts5yyLookahead; | |||
2803 | (void)fts5yyLookaheadToken; | |||
2804 | fts5yymsp = fts5yypParser->fts5yytos; | |||
2805 | ||||
2806 | switch( fts5yyruleno ){ | |||
2807 | /* Beginning here are the reduction cases. A typical example | |||
2808 | ** follows: | |||
2809 | ** case 0: | |||
2810 | ** #line <lineno> <grammarfile> | |||
2811 | ** { ... } // User supplied code | |||
2812 | ** #line <lineno> <thisfile> | |||
2813 | ** break; | |||
2814 | */ | |||
2815 | /********** Begin reduce actions **********************************************/ | |||
2816 | fts5YYMINORTYPE fts5yylhsminor; | |||
2817 | case 0: /* input ::= expr */ | |||
2818 | #line 82 "fts5parse.y" | |||
2819 | { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } | |||
2820 | #line 1047 "fts5parse.sql" | |||
2821 | break; | |||
2822 | case 1: /* colset ::= MINUS LCP colsetlist RCP */ | |||
2823 | #line 97 "fts5parse.y" | |||
2824 | { | |||
2825 | fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | |||
2826 | } | |||
2827 | #line 1054 "fts5parse.sql" | |||
2828 | break; | |||
2829 | case 2: /* colset ::= LCP colsetlist RCP */ | |||
2830 | #line 100 "fts5parse.y" | |||
2831 | { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } | |||
2832 | #line 1059 "fts5parse.sql" | |||
2833 | break; | |||
2834 | case 3: /* colset ::= STRING */ | |||
2835 | #line 101 "fts5parse.y" | |||
2836 | { | |||
2837 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
2838 | } | |||
2839 | #line 1066 "fts5parse.sql" | |||
2840 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
2841 | break; | |||
2842 | case 4: /* colset ::= MINUS STRING */ | |||
2843 | #line 104 "fts5parse.y" | |||
2844 | { | |||
2845 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
2846 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | |||
2847 | } | |||
2848 | #line 1075 "fts5parse.sql" | |||
2849 | break; | |||
2850 | case 5: /* colsetlist ::= colsetlist STRING */ | |||
2851 | #line 109 "fts5parse.y" | |||
2852 | { | |||
2853 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); } | |||
2854 | #line 1081 "fts5parse.sql" | |||
2855 | fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
2856 | break; | |||
2857 | case 6: /* colsetlist ::= STRING */ | |||
2858 | #line 111 "fts5parse.y" | |||
2859 | { | |||
2860 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
2861 | } | |||
2862 | #line 1089 "fts5parse.sql" | |||
2863 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
2864 | break; | |||
2865 | case 7: /* expr ::= expr AND expr */ | |||
2866 | #line 115 "fts5parse.y" | |||
2867 | { | |||
2868 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND2, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
2869 | } | |||
2870 | #line 1097 "fts5parse.sql" | |||
2871 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2872 | break; | |||
2873 | case 8: /* expr ::= expr OR expr */ | |||
2874 | #line 118 "fts5parse.y" | |||
2875 | { | |||
2876 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR1, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
2877 | } | |||
2878 | #line 1105 "fts5parse.sql" | |||
2879 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2880 | break; | |||
2881 | case 9: /* expr ::= expr NOT expr */ | |||
2882 | #line 121 "fts5parse.y" | |||
2883 | { | |||
2884 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT3, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
2885 | } | |||
2886 | #line 1113 "fts5parse.sql" | |||
2887 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2888 | break; | |||
2889 | case 10: /* expr ::= colset COLON LP expr RP */ | |||
2890 | #line 125 "fts5parse.y" | |||
2891 | { | |||
2892 | sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11); | |||
2893 | fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24; | |||
2894 | } | |||
2895 | #line 1122 "fts5parse.sql" | |||
2896 | fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2897 | break; | |||
2898 | case 11: /* expr ::= LP expr RP */ | |||
2899 | #line 129 "fts5parse.y" | |||
2900 | {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} | |||
2901 | #line 1128 "fts5parse.sql" | |||
2902 | break; | |||
2903 | case 12: /* expr ::= exprlist */ | |||
2904 | case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13); | |||
2905 | #line 130 "fts5parse.y" | |||
2906 | {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} | |||
2907 | #line 1134 "fts5parse.sql" | |||
2908 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2909 | break; | |||
2910 | case 14: /* exprlist ::= exprlist cnearset */ | |||
2911 | #line 133 "fts5parse.y" | |||
2912 | { | |||
2913 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24); | |||
2914 | } | |||
2915 | #line 1142 "fts5parse.sql" | |||
2916 | fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2917 | break; | |||
2918 | case 15: /* cnearset ::= nearset */ | |||
2919 | #line 137 "fts5parse.y" | |||
2920 | { | |||
2921 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | |||
2922 | } | |||
2923 | #line 1150 "fts5parse.sql" | |||
2924 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2925 | break; | |||
2926 | case 16: /* cnearset ::= colset COLON nearset */ | |||
2927 | #line 140 "fts5parse.y" | |||
2928 | { | |||
2929 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | |||
2930 | sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11); | |||
2931 | } | |||
2932 | #line 1159 "fts5parse.sql" | |||
2933 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
2934 | break; | |||
2935 | case 17: /* nearset ::= phrase */ | |||
2936 | #line 151 "fts5parse.y" | |||
2937 | { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); } | |||
2938 | #line 1165 "fts5parse.sql" | |||
2939 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
2940 | break; | |||
2941 | case 18: /* nearset ::= CARET phrase */ | |||
2942 | #line 152 "fts5parse.y" | |||
2943 | { | |||
2944 | sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53); | |||
2945 | fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | |||
2946 | } | |||
2947 | #line 1174 "fts5parse.sql" | |||
2948 | break; | |||
2949 | case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
2950 | #line 156 "fts5parse.y" | |||
2951 | { | |||
2952 | sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); | |||
2953 | sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0); | |||
2954 | fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; | |||
2955 | } | |||
2956 | #line 1183 "fts5parse.sql" | |||
2957 | fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
2958 | break; | |||
2959 | case 20: /* nearphrases ::= phrase */ | |||
2960 | #line 162 "fts5parse.y" | |||
2961 | { | |||
2962 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | |||
2963 | } | |||
2964 | #line 1191 "fts5parse.sql" | |||
2965 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
2966 | break; | |||
2967 | case 21: /* nearphrases ::= nearphrases phrase */ | |||
2968 | #line 165 "fts5parse.y" | |||
2969 | { | |||
2970 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53); | |||
2971 | } | |||
2972 | #line 1199 "fts5parse.sql" | |||
2973 | fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
2974 | break; | |||
2975 | case 22: /* neardist_opt ::= */ | |||
2976 | #line 172 "fts5parse.y" | |||
2977 | { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } | |||
2978 | #line 1205 "fts5parse.sql" | |||
2979 | break; | |||
2980 | case 23: /* neardist_opt ::= COMMA STRING */ | |||
2981 | #line 173 "fts5parse.y" | |||
2982 | { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } | |||
2983 | #line 1210 "fts5parse.sql" | |||
2984 | break; | |||
2985 | case 24: /* phrase ::= phrase PLUS STRING star_opt */ | |||
2986 | #line 185 "fts5parse.y" | |||
2987 | { | |||
2988 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | |||
2989 | } | |||
2990 | #line 1217 "fts5parse.sql" | |||
2991 | fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | |||
2992 | break; | |||
2993 | case 25: /* phrase ::= STRING star_opt */ | |||
2994 | #line 188 "fts5parse.y" | |||
2995 | { | |||
2996 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | |||
2997 | } | |||
2998 | #line 1225 "fts5parse.sql" | |||
2999 | fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | |||
3000 | break; | |||
3001 | case 26: /* star_opt ::= STAR */ | |||
3002 | #line 196 "fts5parse.y" | |||
3003 | { fts5yymsp[0].minor.fts5yy4 = 1; } | |||
3004 | #line 1231 "fts5parse.sql" | |||
3005 | break; | |||
3006 | case 27: /* star_opt ::= */ | |||
3007 | #line 197 "fts5parse.y" | |||
3008 | { fts5yymsp[1].minor.fts5yy4 = 0; } | |||
3009 | #line 1236 "fts5parse.sql" | |||
3010 | break; | |||
3011 | default: | |||
3012 | break; | |||
3013 | /********** End reduce actions ************************************************/ | |||
3014 | }; | |||
3015 | assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) )((void) (0)); | |||
3016 | fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno]; | |||
3017 | fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | |||
3018 | fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPEunsigned char)fts5yygoto); | |||
3019 | ||||
3020 | /* There are no SHIFTREDUCE actions on nonterminals because the table | |||
3021 | ** generator has simplified them to pure REDUCE actions. */ | |||
3022 | assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) )((void) (0)); | |||
3023 | ||||
3024 | /* It is not possible for a REDUCE to be followed by an error */ | |||
3025 | assert( fts5yyact!=fts5YY_ERROR_ACTION )((void) (0)); | |||
3026 | ||||
3027 | fts5yymsp += fts5yysize+1; | |||
3028 | fts5yypParser->fts5yytos = fts5yymsp; | |||
3029 | fts5yymsp->stateno = (fts5YYACTIONTYPEunsigned char)fts5yyact; | |||
3030 | fts5yymsp->major = (fts5YYCODETYPEunsigned char)fts5yygoto; | |||
3031 | fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift"); | |||
3032 | return fts5yyact; | |||
3033 | } | |||
3034 | ||||
3035 | /* | |||
3036 | ** The following code executes when the parse fails | |||
3037 | */ | |||
3038 | #ifndef fts5YYNOERRORRECOVERY1 | |||
3039 | static void fts5yy_parse_failed( | |||
3040 | fts5yyParser *fts5yypParser /* The parser */ | |||
3041 | ){ | |||
3042 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
3043 | sqlite3Fts5ParserCTX_FETCH | |||
3044 | #ifndef NDEBUG1 | |||
3045 | if( fts5yyTraceFILE ){ | |||
3046 | fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt); | |||
3047 | } | |||
3048 | #endif | |||
3049 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | |||
3050 | /* Here code is inserted which will be executed whenever the | |||
3051 | ** parser fails */ | |||
3052 | /************ Begin %parse_failure code ***************************************/ | |||
3053 | /************ End %parse_failure code *****************************************/ | |||
3054 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
3055 | sqlite3Fts5ParserCTX_STORE | |||
3056 | } | |||
3057 | #endif /* fts5YYNOERRORRECOVERY */ | |||
3058 | ||||
3059 | /* | |||
3060 | ** The following code executes when a syntax error first occurs. | |||
3061 | */ | |||
3062 | static void fts5yy_syntax_error( | |||
3063 | fts5yyParser *fts5yypParser, /* The parser */ | |||
3064 | int fts5yymajor, /* The major type of the error token */ | |||
3065 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The minor type of the error token */ | |||
3066 | ){ | |||
3067 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
3068 | sqlite3Fts5ParserCTX_FETCH | |||
3069 | #define FTS5TOKENfts5yyminor fts5yyminor | |||
3070 | /************ Begin %syntax_error code ****************************************/ | |||
3071 | #line 30 "fts5parse.y" | |||
3072 | ||||
3073 | UNUSED_PARAM(fts5yymajor)(void)(fts5yymajor); /* Silence a compiler warning */ | |||
3074 | sqlite3Fts5ParseError( | |||
3075 | pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKENfts5yyminor.n,FTS5TOKENfts5yyminor.p | |||
3076 | ); | |||
3077 | #line 1304 "fts5parse.sql" | |||
3078 | /************ End %syntax_error code ******************************************/ | |||
3079 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
3080 | sqlite3Fts5ParserCTX_STORE | |||
3081 | } | |||
3082 | ||||
3083 | /* | |||
3084 | ** The following is executed when the parser accepts | |||
3085 | */ | |||
3086 | static void fts5yy_accept( | |||
3087 | fts5yyParser *fts5yypParser /* The parser */ | |||
3088 | ){ | |||
3089 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
3090 | sqlite3Fts5ParserCTX_FETCH | |||
3091 | #ifndef NDEBUG1 | |||
3092 | if( fts5yyTraceFILE ){ | |||
3093 | fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt); | |||
3094 | } | |||
3095 | #endif | |||
3096 | #ifndef fts5YYNOERRORRECOVERY1 | |||
3097 | fts5yypParser->fts5yyerrcnt = -1; | |||
3098 | #endif | |||
3099 | assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack )((void) (0)); | |||
3100 | /* Here code is inserted which will be executed whenever the | |||
3101 | ** parser accepts */ | |||
3102 | /*********** Begin %parse_accept code *****************************************/ | |||
3103 | /*********** End %parse_accept code *******************************************/ | |||
3104 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
3105 | sqlite3Fts5ParserCTX_STORE | |||
3106 | } | |||
3107 | ||||
3108 | /* The main parser program. | |||
3109 | ** The first argument is a pointer to a structure obtained from | |||
3110 | ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. | |||
3111 | ** The second argument is the major token number. The third is | |||
3112 | ** the minor token. The fourth optional argument is whatever the | |||
3113 | ** user wants (and specified in the grammar) and is available for | |||
3114 | ** use by the action routines. | |||
3115 | ** | |||
3116 | ** Inputs: | |||
3117 | ** <ul> | |||
3118 | ** <li> A pointer to the parser (an opaque structure.) | |||
3119 | ** <li> The major token number. | |||
3120 | ** <li> The minor token number. | |||
3121 | ** <li> An option argument of a grammar-specified type. | |||
3122 | ** </ul> | |||
3123 | ** | |||
3124 | ** Outputs: | |||
3125 | ** None. | |||
3126 | */ | |||
3127 | static void sqlite3Fts5Parser( | |||
3128 | void *fts5yyp, /* The parser */ | |||
3129 | int fts5yymajor, /* The major token code number */ | |||
3130 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The value for the token */ | |||
3131 | sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse /* Optional %extra_argument parameter */ | |||
3132 | ){ | |||
3133 | fts5YYMINORTYPE fts5yyminorunion; | |||
3134 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The parser action. */ | |||
3135 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | |||
3136 | int fts5yyendofinput; /* True if we are at the end of input */ | |||
3137 | #endif | |||
3138 | #ifdef fts5YYERRORSYMBOL | |||
3139 | int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ | |||
3140 | #endif | |||
3141 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */ | |||
3142 | sqlite3Fts5ParserCTX_FETCH | |||
3143 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; | |||
3144 | ||||
3145 | assert( fts5yypParser->fts5yytos!=0 )((void) (0)); | |||
3146 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | |||
3147 | fts5yyendofinput = (fts5yymajor==0); | |||
3148 | #endif | |||
3149 | ||||
3150 | fts5yyact = fts5yypParser->fts5yytos->stateno; | |||
3151 | #ifndef NDEBUG1 | |||
3152 | if( fts5yyTraceFILE ){ | |||
3153 | if( fts5yyact < fts5YY_MIN_REDUCE83 ){ | |||
3154 | fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n", | |||
3155 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact); | |||
3156 | }else{ | |||
3157 | fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n", | |||
3158 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE83); | |||
3159 | } | |||
3160 | } | |||
3161 | #endif | |||
3162 | ||||
3163 | while(1){ /* Exit by "break" */ | |||
3164 | assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack )((void) (0)); | |||
3165 | assert( fts5yyact==fts5yypParser->fts5yytos->stateno )((void) (0)); | |||
3166 | fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyact); | |||
3167 | if( fts5yyact >= fts5YY_MIN_REDUCE83 ){ | |||
3168 | unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE83; /* Reduce by this rule */ | |||
3169 | #ifndef NDEBUG1 | |||
3170 | assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) )((void) (0)); | |||
3171 | if( fts5yyTraceFILE ){ | |||
3172 | int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | |||
3173 | if( fts5yysize ){ | |||
3174 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", | |||
3175 | fts5yyTracePrompt, | |||
3176 | fts5yyruleno, fts5yyRuleName[fts5yyruleno], | |||
3177 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action", | |||
3178 | fts5yypParser->fts5yytos[fts5yysize].stateno); | |||
3179 | }else{ | |||
3180 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n", | |||
3181 | fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno], | |||
3182 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action"); | |||
3183 | } | |||
3184 | } | |||
3185 | #endif /* NDEBUG */ | |||
3186 | ||||
3187 | /* Check that the stack is large enough to grow by a single entry | |||
3188 | ** if the RHS of the rule is empty. This ensures that there is room | |||
3189 | ** enough on the stack to push the LHS value */ | |||
3190 | if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ | |||
3191 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
3192 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | |||
3193 | fts5yypParser->fts5yyhwm++; | |||
3194 | assert( fts5yypParser->fts5yyhwm ==((void) (0)) | |||
3195 | (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack))((void) (0)); | |||
3196 | } | |||
3197 | #endif | |||
3198 | if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ | |||
3199 | if( fts5yyGrowStack(fts5yypParser)1 ){ | |||
3200 | fts5yyStackOverflow(fts5yypParser); | |||
3201 | break; | |||
3202 | } | |||
3203 | } | |||
3204 | } | |||
3205 | fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM); | |||
3206 | }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE79 ){ | |||
3207 | fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyminor); | |||
3208 | #ifndef fts5YYNOERRORRECOVERY1 | |||
3209 | fts5yypParser->fts5yyerrcnt--; | |||
3210 | #endif | |||
3211 | break; | |||
3212 | }else if( fts5yyact==fts5YY_ACCEPT_ACTION81 ){ | |||
3213 | fts5yypParser->fts5yytos--; | |||
3214 | fts5yy_accept(fts5yypParser); | |||
3215 | return; | |||
3216 | }else{ | |||
3217 | assert( fts5yyact == fts5YY_ERROR_ACTION )((void) (0)); | |||
3218 | fts5yyminorunion.fts5yy0 = fts5yyminor; | |||
3219 | #ifdef fts5YYERRORSYMBOL | |||
3220 | int fts5yymx; | |||
3221 | #endif | |||
3222 | #ifndef NDEBUG1 | |||
3223 | if( fts5yyTraceFILE ){ | |||
3224 | fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt); | |||
3225 | } | |||
3226 | #endif | |||
3227 | #ifdef fts5YYERRORSYMBOL | |||
3228 | /* A syntax error has occurred. | |||
3229 | ** The response to an error depends upon whether or not the | |||
3230 | ** grammar defines an error token "ERROR". | |||
3231 | ** | |||
3232 | ** This is what we do if the grammar does define ERROR: | |||
3233 | ** | |||
3234 | ** * Call the %syntax_error function. | |||
3235 | ** | |||
3236 | ** * Begin popping the stack until we enter a state where | |||
3237 | ** it is legal to shift the error symbol, then shift | |||
3238 | ** the error symbol. | |||
3239 | ** | |||
3240 | ** * Set the error count to three. | |||
3241 | ** | |||
3242 | ** * Begin accepting and shifting new tokens. No new error | |||
3243 | ** processing will occur until three tokens have been | |||
3244 | ** shifted successfully. | |||
3245 | ** | |||
3246 | */ | |||
3247 | if( fts5yypParser->fts5yyerrcnt<0 ){ | |||
3248 | fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); | |||
3249 | } | |||
3250 | fts5yymx = fts5yypParser->fts5yytos->major; | |||
3251 | if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ | |||
3252 | #ifndef NDEBUG1 | |||
3253 | if( fts5yyTraceFILE ){ | |||
3254 | fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n", | |||
3255 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); | |||
3256 | } | |||
3257 | #endif | |||
3258 | fts5yy_destructor(fts5yypParser, (fts5YYCODETYPEunsigned char)fts5yymajor, &fts5yyminorunion); | |||
3259 | fts5yymajor = fts5YYNOCODE27; | |||
3260 | }else{ | |||
3261 | while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){ | |||
3262 | fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno, | |||
3263 | fts5YYERRORSYMBOL); | |||
3264 | if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE79 ) break; | |||
3265 | fts5yy_pop_parser_stack(fts5yypParser); | |||
3266 | } | |||
3267 | if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){ | |||
3268 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
3269 | fts5yy_parse_failed(fts5yypParser); | |||
3270 | #ifndef fts5YYNOERRORRECOVERY1 | |||
3271 | fts5yypParser->fts5yyerrcnt = -1; | |||
3272 | #endif | |||
3273 | fts5yymajor = fts5YYNOCODE27; | |||
3274 | }else if( fts5yymx!=fts5YYERRORSYMBOL ){ | |||
3275 | fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); | |||
3276 | } | |||
3277 | } | |||
3278 | fts5yypParser->fts5yyerrcnt = 3; | |||
3279 | fts5yyerrorhit = 1; | |||
3280 | if( fts5yymajor==fts5YYNOCODE27 ) break; | |||
3281 | fts5yyact = fts5yypParser->fts5yytos->stateno; | |||
3282 | #elif defined(fts5YYNOERRORRECOVERY1) | |||
3283 | /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to | |||
3284 | ** do any kind of error recovery. Instead, simply invoke the syntax | |||
3285 | ** error routine and continue going as if nothing had happened. | |||
3286 | ** | |||
3287 | ** Applications can set this macro (for example inside %include) if | |||
3288 | ** they intend to abandon the parse upon the first syntax error seen. | |||
3289 | */ | |||
3290 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | |||
3291 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
3292 | break; | |||
3293 | #else /* fts5YYERRORSYMBOL is not defined */ | |||
3294 | /* This is what we do if the grammar does not define ERROR: | |||
3295 | ** | |||
3296 | ** * Report an error message, and throw away the input token. | |||
3297 | ** | |||
3298 | ** * If the input token is $, then fail the parse. | |||
3299 | ** | |||
3300 | ** As before, subsequent error messages are suppressed until | |||
3301 | ** three input tokens have been successfully shifted. | |||
3302 | */ | |||
3303 | if( fts5yypParser->fts5yyerrcnt<=0 ){ | |||
3304 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | |||
3305 | } | |||
3306 | fts5yypParser->fts5yyerrcnt = 3; | |||
3307 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
3308 | if( fts5yyendofinput ){ | |||
3309 | fts5yy_parse_failed(fts5yypParser); | |||
3310 | #ifndef fts5YYNOERRORRECOVERY1 | |||
3311 | fts5yypParser->fts5yyerrcnt = -1; | |||
3312 | #endif | |||
3313 | } | |||
3314 | break; | |||
3315 | #endif | |||
3316 | } | |||
3317 | } | |||
3318 | #ifndef NDEBUG1 | |||
3319 | if( fts5yyTraceFILE ){ | |||
3320 | fts5yyStackEntry *i; | |||
3321 | char cDiv = '['; | |||
3322 | fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); | |||
3323 | for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ | |||
3324 | fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]); | |||
3325 | cDiv = ' '; | |||
3326 | } | |||
3327 | fprintf(fts5yyTraceFILE,"]\n"); | |||
3328 | } | |||
3329 | #endif | |||
3330 | return; | |||
3331 | } | |||
3332 | ||||
3333 | /* | |||
3334 | ** Return the fallback token corresponding to canonical token iToken, or | |||
3335 | ** 0 if iToken has no fallback. | |||
3336 | */ | |||
3337 | static int sqlite3Fts5ParserFallback(int iToken){ | |||
3338 | #ifdef fts5YYFALLBACK | |||
3339 | assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) )((void) (0)); | |||
3340 | return fts5yyFallback[iToken]; | |||
3341 | #else | |||
3342 | (void)iToken; | |||
3343 | return 0; | |||
3344 | #endif | |||
3345 | } | |||
3346 | ||||
3347 | #line 1 "fts5_aux.c" | |||
3348 | /* | |||
3349 | ** 2014 May 31 | |||
3350 | ** | |||
3351 | ** The author disclaims copyright to this source code. In place of | |||
3352 | ** a legal notice, here is a blessing: | |||
3353 | ** | |||
3354 | ** May you do good and not evil. | |||
3355 | ** May you find forgiveness for yourself and forgive others. | |||
3356 | ** May you share freely, never taking more than you give. | |||
3357 | ** | |||
3358 | ****************************************************************************** | |||
3359 | */ | |||
3360 | ||||
3361 | ||||
3362 | /* #include "fts5Int.h" */ | |||
3363 | #include <math.h> /* amalgamator: keep */ | |||
3364 | ||||
3365 | /* | |||
3366 | ** Object used to iterate through all "coalesced phrase instances" in | |||
3367 | ** a single column of the current row. If the phrase instances in the | |||
3368 | ** column being considered do not overlap, this object simply iterates | |||
3369 | ** through them. Or, if they do overlap (share one or more tokens in | |||
3370 | ** common), each set of overlapping instances is treated as a single | |||
3371 | ** match. See documentation for the highlight() auxiliary function for | |||
3372 | ** details. | |||
3373 | ** | |||
3374 | ** Usage is: | |||
3375 | ** | |||
3376 | ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); | |||
3377 | ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); | |||
3378 | ** rc = fts5CInstIterNext(&iter) | |||
3379 | ** ){ | |||
3380 | ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); | |||
3381 | ** } | |||
3382 | ** | |||
3383 | */ | |||
3384 | typedef struct CInstIter CInstIter; | |||
3385 | struct CInstIter { | |||
3386 | const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ | |||
3387 | Fts5Context *pFts; /* First arg to pass to pApi functions */ | |||
3388 | int iCol; /* Column to search */ | |||
3389 | int iInst; /* Next phrase instance index */ | |||
3390 | int nInst; /* Total number of phrase instances */ | |||
3391 | ||||
3392 | /* Output variables */ | |||
3393 | int iStart; /* First token in coalesced phrase instance */ | |||
3394 | int iEnd; /* Last token in coalesced phrase instance */ | |||
3395 | }; | |||
3396 | ||||
3397 | /* | |||
3398 | ** Advance the iterator to the next coalesced phrase instance. Return | |||
3399 | ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. | |||
3400 | */ | |||
3401 | static int fts5CInstIterNext(CInstIter *pIter){ | |||
3402 | int rc = SQLITE_OK0; | |||
3403 | pIter->iStart = -1; | |||
3404 | pIter->iEnd = -1; | |||
3405 | ||||
3406 | while( rc==SQLITE_OK0 && pIter->iInst<pIter->nInst ){ | |||
3407 | int ip; int ic; int io; | |||
3408 | rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); | |||
3409 | if( rc==SQLITE_OK0 ){ | |||
3410 | if( ic==pIter->iCol ){ | |||
3411 | int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); | |||
3412 | if( pIter->iStart<0 ){ | |||
3413 | pIter->iStart = io; | |||
3414 | pIter->iEnd = iEnd; | |||
3415 | }else if( io<=pIter->iEnd ){ | |||
3416 | if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; | |||
3417 | }else{ | |||
3418 | break; | |||
3419 | } | |||
3420 | } | |||
3421 | pIter->iInst++; | |||
3422 | } | |||
3423 | } | |||
3424 | ||||
3425 | return rc; | |||
3426 | } | |||
3427 | ||||
3428 | /* | |||
3429 | ** Initialize the iterator object indicated by the final parameter to | |||
3430 | ** iterate through coalesced phrase instances in column iCol. | |||
3431 | */ | |||
3432 | static int fts5CInstIterInit( | |||
3433 | const Fts5ExtensionApi *pApi, | |||
3434 | Fts5Context *pFts, | |||
3435 | int iCol, | |||
3436 | CInstIter *pIter | |||
3437 | ){ | |||
3438 | int rc; | |||
3439 | ||||
3440 | memset(pIter, 0, sizeof(CInstIter)); | |||
3441 | pIter->pApi = pApi; | |||
3442 | pIter->pFts = pFts; | |||
3443 | pIter->iCol = iCol; | |||
3444 | rc = pApi->xInstCount(pFts, &pIter->nInst); | |||
3445 | ||||
3446 | if( rc==SQLITE_OK0 ){ | |||
3447 | rc = fts5CInstIterNext(pIter); | |||
3448 | } | |||
3449 | ||||
3450 | return rc; | |||
3451 | } | |||
3452 | ||||
3453 | ||||
3454 | ||||
3455 | /************************************************************************* | |||
3456 | ** Start of highlight() implementation. | |||
3457 | */ | |||
3458 | typedef struct HighlightContext HighlightContext; | |||
3459 | struct HighlightContext { | |||
3460 | /* Constant parameters to fts5HighlightCb() */ | |||
3461 | int iRangeStart; /* First token to include */ | |||
3462 | int iRangeEnd; /* If non-zero, last token to include */ | |||
3463 | const char *zOpen; /* Opening highlight */ | |||
3464 | const char *zClose; /* Closing highlight */ | |||
3465 | const char *zIn; /* Input text */ | |||
3466 | int nIn; /* Size of input text in bytes */ | |||
3467 | ||||
3468 | /* Variables modified by fts5HighlightCb() */ | |||
3469 | CInstIter iter; /* Coalesced Instance Iterator */ | |||
3470 | int iPos; /* Current token offset in zIn[] */ | |||
3471 | int iOff; /* Have copied up to this offset in zIn[] */ | |||
3472 | int bOpen; /* True if highlight is open */ | |||
3473 | char *zOut; /* Output value */ | |||
3474 | }; | |||
3475 | ||||
3476 | /* | |||
3477 | ** Append text to the HighlightContext output string - p->zOut. Argument | |||
3478 | ** z points to a buffer containing n bytes of text to append. If n is | |||
3479 | ** negative, everything up until the first '\0' is appended to the output. | |||
3480 | ** | |||
3481 | ** If *pRc is set to any value other than SQLITE_OK when this function is | |||
3482 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, | |||
3483 | ** *pRc is set to an error code before returning. | |||
3484 | */ | |||
3485 | static void fts5HighlightAppend( | |||
3486 | int *pRc, | |||
3487 | HighlightContext *p, | |||
3488 | const char *z, int n | |||
3489 | ){ | |||
3490 | if( *pRc==SQLITE_OK0 && z ){ | |||
3491 | if( n<0 ) n = (int)strlen(z); | |||
3492 | p->zOut = sqlite3_mprintfsqlite3_api->mprintf("%z%.*s", p->zOut, n, z); | |||
3493 | if( p->zOut==0 ) *pRc = SQLITE_NOMEM7; | |||
3494 | } | |||
3495 | } | |||
3496 | ||||
3497 | /* | |||
3498 | ** Tokenizer callback used by implementation of highlight() function. | |||
3499 | */ | |||
3500 | static int fts5HighlightCb( | |||
3501 | void *pContext, /* Pointer to HighlightContext object */ | |||
3502 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
3503 | const char *pToken, /* Buffer containing token */ | |||
3504 | int nToken, /* Size of token in bytes */ | |||
3505 | int iStartOff, /* Start byte offset of token */ | |||
3506 | int iEndOff /* End byte offset of token */ | |||
3507 | ){ | |||
3508 | HighlightContext *p = (HighlightContext*)pContext; | |||
3509 | int rc = SQLITE_OK0; | |||
3510 | int iPos; | |||
3511 | ||||
3512 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | |||
3513 | ||||
3514 | if( tflags & FTS5_TOKEN_COLOCATED0x0001 ) return SQLITE_OK0; | |||
3515 | iPos = p->iPos++; | |||
3516 | ||||
3517 | if( p->iRangeEnd>=0 ){ | |||
3518 | if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK0; | |||
3519 | if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; | |||
3520 | } | |||
3521 | ||||
3522 | /* If the parenthesis is open, and this token is not part of the current | |||
3523 | ** phrase, and the starting byte offset of this token is past the point | |||
3524 | ** that has currently been copied into the output buffer, close the | |||
3525 | ** parenthesis. */ | |||
3526 | if( p->bOpen | |||
3527 | && (iPos<=p->iter.iStart || p->iter.iStart<0) | |||
3528 | && iStartOff>p->iOff | |||
3529 | ){ | |||
3530 | fts5HighlightAppend(&rc, p, p->zClose, -1); | |||
3531 | p->bOpen = 0; | |||
3532 | } | |||
3533 | ||||
3534 | /* If this is the start of a new phrase, and the highlight is not open: | |||
3535 | ** | |||
3536 | ** * copy text from the input up to the start of the phrase, and | |||
3537 | ** * open the highlight. | |||
3538 | */ | |||
3539 | if( iPos==p->iter.iStart && p->bOpen==0 ){ | |||
3540 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); | |||
3541 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | |||
3542 | p->iOff = iStartOff; | |||
3543 | p->bOpen = 1; | |||
3544 | } | |||
3545 | ||||
3546 | if( iPos==p->iter.iEnd ){ | |||
3547 | if( p->bOpen==0 ){ | |||
3548 | assert( p->iRangeEnd>=0 )((void) (0)); | |||
3549 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | |||
3550 | p->bOpen = 1; | |||
3551 | } | |||
3552 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
3553 | p->iOff = iEndOff; | |||
3554 | ||||
3555 | if( rc==SQLITE_OK0 ){ | |||
3556 | rc = fts5CInstIterNext(&p->iter); | |||
3557 | } | |||
3558 | } | |||
3559 | ||||
3560 | if( iPos==p->iRangeEnd ){ | |||
3561 | if( p->bOpen ){ | |||
3562 | if( p->iter.iStart>=0 && iPos>=p->iter.iStart ){ | |||
3563 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
3564 | p->iOff = iEndOff; | |||
3565 | } | |||
3566 | fts5HighlightAppend(&rc, p, p->zClose, -1); | |||
3567 | p->bOpen = 0; | |||
3568 | } | |||
3569 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
3570 | p->iOff = iEndOff; | |||
3571 | } | |||
3572 | ||||
3573 | return rc; | |||
3574 | } | |||
3575 | ||||
3576 | ||||
3577 | /* | |||
3578 | ** Implementation of highlight() function. | |||
3579 | */ | |||
3580 | static void fts5HighlightFunction( | |||
3581 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
3582 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
3583 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
3584 | int nVal, /* Number of values in apVal[] array */ | |||
3585 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
3586 | ){ | |||
3587 | HighlightContext ctx; | |||
3588 | int rc; | |||
3589 | int iCol; | |||
3590 | ||||
3591 | if( nVal!=3 ){ | |||
3592 | const char *zErr = "wrong number of arguments to function highlight()"; | |||
3593 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
3594 | return; | |||
3595 | } | |||
3596 | ||||
3597 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
3598 | memset(&ctx, 0, sizeof(HighlightContext)); | |||
3599 | ctx.zOpen = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | |||
3600 | ctx.zClose = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2]); | |||
3601 | ctx.iRangeEnd = -1; | |||
3602 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); | |||
3603 | if( rc==SQLITE_RANGE25 ){ | |||
3604 | sqlite3_result_textsqlite3_api->result_text(pCtx, "", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
3605 | rc = SQLITE_OK0; | |||
3606 | }else if( ctx.zIn ){ | |||
3607 | const char *pLoc = 0; /* Locale of column iCol */ | |||
3608 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
3609 | if( rc==SQLITE_OK0 ){ | |||
3610 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); | |||
3611 | } | |||
3612 | ||||
3613 | if( rc==SQLITE_OK0 ){ | |||
3614 | rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc); | |||
3615 | } | |||
3616 | if( rc==SQLITE_OK0 ){ | |||
3617 | rc = pApi->xTokenize_v2( | |||
3618 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb | |||
3619 | ); | |||
3620 | } | |||
3621 | if( ctx.bOpen ){ | |||
3622 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | |||
3623 | } | |||
3624 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |||
3625 | ||||
3626 | if( rc==SQLITE_OK0 ){ | |||
3627 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
3628 | } | |||
3629 | sqlite3_freesqlite3_api->free(ctx.zOut); | |||
3630 | } | |||
3631 | if( rc!=SQLITE_OK0 ){ | |||
3632 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
3633 | } | |||
3634 | } | |||
3635 | /* | |||
3636 | ** End of highlight() implementation. | |||
3637 | **************************************************************************/ | |||
3638 | ||||
3639 | /* | |||
3640 | ** Context object passed to the fts5SentenceFinderCb() function. | |||
3641 | */ | |||
3642 | typedef struct Fts5SFinder Fts5SFinder; | |||
3643 | struct Fts5SFinder { | |||
3644 | int iPos; /* Current token position */ | |||
3645 | int nFirstAlloc; /* Allocated size of aFirst[] */ | |||
3646 | int nFirst; /* Number of entries in aFirst[] */ | |||
3647 | int *aFirst; /* Array of first token in each sentence */ | |||
3648 | const char *zDoc; /* Document being tokenized */ | |||
3649 | }; | |||
3650 | ||||
3651 | /* | |||
3652 | ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if | |||
3653 | ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an | |||
3654 | ** error occurs. | |||
3655 | */ | |||
3656 | static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ | |||
3657 | if( p->nFirstAlloc==p->nFirst ){ | |||
3658 | int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; | |||
3659 | int *aNew; | |||
3660 | ||||
3661 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aFirst, nNew*sizeof(int)); | |||
3662 | if( aNew==0 ) return SQLITE_NOMEM7; | |||
3663 | p->aFirst = aNew; | |||
3664 | p->nFirstAlloc = nNew; | |||
3665 | } | |||
3666 | p->aFirst[p->nFirst++] = iAdd; | |||
3667 | return SQLITE_OK0; | |||
3668 | } | |||
3669 | ||||
3670 | /* | |||
3671 | ** This function is an xTokenize() callback used by the auxiliary snippet() | |||
3672 | ** function. Its job is to identify tokens that are the first in a sentence. | |||
3673 | ** For each such token, an entry is added to the SFinder.aFirst[] array. | |||
3674 | */ | |||
3675 | static int fts5SentenceFinderCb( | |||
3676 | void *pContext, /* Pointer to HighlightContext object */ | |||
3677 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
3678 | const char *pToken, /* Buffer containing token */ | |||
3679 | int nToken, /* Size of token in bytes */ | |||
3680 | int iStartOff, /* Start offset of token */ | |||
3681 | int iEndOff /* End offset of token */ | |||
3682 | ){ | |||
3683 | int rc = SQLITE_OK0; | |||
3684 | ||||
3685 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | |||
3686 | UNUSED_PARAM(iEndOff)(void)(iEndOff); | |||
3687 | ||||
3688 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | |||
3689 | Fts5SFinder *p = (Fts5SFinder*)pContext; | |||
3690 | if( p->iPos>0 ){ | |||
3691 | int i; | |||
3692 | char c = 0; | |||
3693 | for(i=iStartOff-1; i>=0; i--){ | |||
3694 | c = p->zDoc[i]; | |||
3695 | if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; | |||
3696 | } | |||
3697 | if( i!=iStartOff-1 && (c=='.' || c==':') ){ | |||
3698 | rc = fts5SentenceFinderAdd(p, p->iPos); | |||
3699 | } | |||
3700 | }else{ | |||
3701 | rc = fts5SentenceFinderAdd(p, 0); | |||
3702 | } | |||
3703 | p->iPos++; | |||
3704 | } | |||
3705 | return rc; | |||
3706 | } | |||
3707 | ||||
3708 | static int fts5SnippetScore( | |||
3709 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
3710 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
3711 | int nDocsize, /* Size of column in tokens */ | |||
3712 | unsigned char *aSeen, /* Array with one element per query phrase */ | |||
3713 | int iCol, /* Column to score */ | |||
3714 | int iPos, /* Starting offset to score */ | |||
3715 | int nToken, /* Max tokens per snippet */ | |||
3716 | int *pnScore, /* OUT: Score */ | |||
3717 | int *piPos /* OUT: Adjusted offset */ | |||
3718 | ){ | |||
3719 | int rc; | |||
3720 | int i; | |||
3721 | int ip = 0; | |||
3722 | int ic = 0; | |||
3723 | int iOff = 0; | |||
3724 | int iFirst = -1; | |||
3725 | int nInst; | |||
3726 | int nScore = 0; | |||
3727 | int iLast = 0; | |||
3728 | sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; | |||
3729 | ||||
3730 | rc = pApi->xInstCount(pFts, &nInst); | |||
3731 | for(i=0; i<nInst && rc==SQLITE_OK0; i++){ | |||
3732 | rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); | |||
3733 | if( rc==SQLITE_OK0 && ic==iCol && iOff>=iPos && iOff<iEnd ){ | |||
3734 | nScore += (aSeen[ip] ? 1 : 1000); | |||
3735 | aSeen[ip] = 1; | |||
3736 | if( iFirst<0 ) iFirst = iOff; | |||
3737 | iLast = iOff + pApi->xPhraseSize(pFts, ip); | |||
3738 | } | |||
3739 | } | |||
3740 | ||||
3741 | *pnScore = nScore; | |||
3742 | if( piPos ){ | |||
3743 | sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; | |||
3744 | if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; | |||
3745 | if( iAdj<0 ) iAdj = 0; | |||
3746 | *piPos = (int)iAdj; | |||
3747 | } | |||
3748 | ||||
3749 | return rc; | |||
3750 | } | |||
3751 | ||||
3752 | /* | |||
3753 | ** Return the value in pVal interpreted as utf-8 text. Except, if pVal | |||
3754 | ** contains a NULL value, return a pointer to a static string zero | |||
3755 | ** bytes in length instead of a NULL pointer. | |||
3756 | */ | |||
3757 | static const char *fts5ValueToText(sqlite3_value *pVal){ | |||
3758 | const char *zRet = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
3759 | return zRet ? zRet : ""; | |||
3760 | } | |||
3761 | ||||
3762 | /* | |||
3763 | ** Implementation of snippet() function. | |||
3764 | */ | |||
3765 | static void fts5SnippetFunction( | |||
3766 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
3767 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
3768 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
3769 | int nVal, /* Number of values in apVal[] array */ | |||
3770 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
3771 | ){ | |||
3772 | HighlightContext ctx; | |||
3773 | int rc = SQLITE_OK0; /* Return code */ | |||
3774 | int iCol; /* 1st argument to snippet() */ | |||
3775 | const char *zEllips; /* 4th argument to snippet() */ | |||
3776 | int nToken; /* 5th argument to snippet() */ | |||
3777 | int nInst = 0; /* Number of instance matches this row */ | |||
3778 | int i; /* Used to iterate through instances */ | |||
3779 | int nPhrase; /* Number of phrases in query */ | |||
3780 | unsigned char *aSeen; /* Array of "seen instance" flags */ | |||
3781 | int iBestCol; /* Column containing best snippet */ | |||
3782 | int iBestStart = 0; /* First token of best snippet */ | |||
3783 | int nBestScore = 0; /* Score of best snippet */ | |||
3784 | int nColSize = 0; /* Total size of iBestCol in tokens */ | |||
3785 | Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ | |||
3786 | int nCol; | |||
3787 | ||||
3788 | if( nVal!=5 ){ | |||
3789 | const char *zErr = "wrong number of arguments to function snippet()"; | |||
3790 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
3791 | return; | |||
3792 | } | |||
3793 | ||||
3794 | nCol = pApi->xColumnCount(pFts); | |||
3795 | memset(&ctx, 0, sizeof(HighlightContext)); | |||
3796 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
3797 | ctx.zOpen = fts5ValueToText(apVal[1]); | |||
3798 | ctx.zClose = fts5ValueToText(apVal[2]); | |||
3799 | ctx.iRangeEnd = -1; | |||
3800 | zEllips = fts5ValueToText(apVal[3]); | |||
3801 | nToken = sqlite3_value_intsqlite3_api->value_int(apVal[4]); | |||
3802 | ||||
3803 | iBestCol = (iCol>=0 ? iCol : 0); | |||
3804 | nPhrase = pApi->xPhraseCount(pFts); | |||
3805 | aSeen = sqlite3_mallocsqlite3_api->malloc(nPhrase); | |||
3806 | if( aSeen==0 ){ | |||
3807 | rc = SQLITE_NOMEM7; | |||
3808 | } | |||
3809 | if( rc==SQLITE_OK0 ){ | |||
3810 | rc = pApi->xInstCount(pFts, &nInst); | |||
3811 | } | |||
3812 | ||||
3813 | memset(&sFinder, 0, sizeof(Fts5SFinder)); | |||
3814 | for(i=0; i<nCol; i++){ | |||
3815 | if( iCol<0 || iCol==i ){ | |||
3816 | const char *pLoc = 0; /* Locale of column iCol */ | |||
3817 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
3818 | int nDoc; | |||
3819 | int nDocsize; | |||
3820 | int ii; | |||
3821 | sFinder.iPos = 0; | |||
3822 | sFinder.nFirst = 0; | |||
3823 | rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); | |||
3824 | if( rc!=SQLITE_OK0 ) break; | |||
3825 | rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc); | |||
3826 | if( rc!=SQLITE_OK0 ) break; | |||
3827 | rc = pApi->xTokenize_v2(pFts, | |||
3828 | sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb | |||
3829 | ); | |||
3830 | if( rc!=SQLITE_OK0 ) break; | |||
3831 | rc = pApi->xColumnSize(pFts, i, &nDocsize); | |||
3832 | if( rc!=SQLITE_OK0 ) break; | |||
3833 | ||||
3834 | for(ii=0; rc==SQLITE_OK0 && ii<nInst; ii++){ | |||
3835 | int ip, ic, io; | |||
3836 | int iAdj; | |||
3837 | int nScore; | |||
3838 | int jj; | |||
3839 | ||||
3840 | rc = pApi->xInst(pFts, ii, &ip, &ic, &io); | |||
3841 | if( ic!=i ) continue; | |||
3842 | if( io>nDocsize ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
3843 | if( rc!=SQLITE_OK0 ) continue; | |||
3844 | memset(aSeen, 0, nPhrase); | |||
3845 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | |||
3846 | io, nToken, &nScore, &iAdj | |||
3847 | ); | |||
3848 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | |||
3849 | nBestScore = nScore; | |||
3850 | iBestCol = i; | |||
3851 | iBestStart = iAdj; | |||
3852 | nColSize = nDocsize; | |||
3853 | } | |||
3854 | ||||
3855 | if( rc==SQLITE_OK0 && sFinder.nFirst && nDocsize>nToken ){ | |||
3856 | for(jj=0; jj<(sFinder.nFirst-1); jj++){ | |||
3857 | if( sFinder.aFirst[jj+1]>io ) break; | |||
3858 | } | |||
3859 | ||||
3860 | if( sFinder.aFirst[jj]<io ){ | |||
3861 | memset(aSeen, 0, nPhrase); | |||
3862 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | |||
3863 | sFinder.aFirst[jj], nToken, &nScore, 0 | |||
3864 | ); | |||
3865 | ||||
3866 | nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); | |||
3867 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | |||
3868 | nBestScore = nScore; | |||
3869 | iBestCol = i; | |||
3870 | iBestStart = sFinder.aFirst[jj]; | |||
3871 | nColSize = nDocsize; | |||
3872 | } | |||
3873 | } | |||
3874 | } | |||
3875 | } | |||
3876 | } | |||
3877 | } | |||
3878 | ||||
3879 | if( rc==SQLITE_OK0 ){ | |||
3880 | rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); | |||
3881 | } | |||
3882 | if( rc==SQLITE_OK0 && nColSize==0 ){ | |||
3883 | rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); | |||
3884 | } | |||
3885 | if( ctx.zIn ){ | |||
3886 | const char *pLoc = 0; /* Locale of column iBestCol */ | |||
3887 | int nLoc = 0; /* Bytes in pLoc */ | |||
3888 | ||||
3889 | if( rc==SQLITE_OK0 ){ | |||
3890 | rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); | |||
3891 | } | |||
3892 | ||||
3893 | ctx.iRangeStart = iBestStart; | |||
3894 | ctx.iRangeEnd = iBestStart + nToken - 1; | |||
3895 | ||||
3896 | if( iBestStart>0 ){ | |||
3897 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |||
3898 | } | |||
3899 | ||||
3900 | /* Advance iterator ctx.iter so that it points to the first coalesced | |||
3901 | ** phrase instance at or following position iBestStart. */ | |||
3902 | while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK0 ){ | |||
3903 | rc = fts5CInstIterNext(&ctx.iter); | |||
3904 | } | |||
3905 | ||||
3906 | if( rc==SQLITE_OK0 ){ | |||
3907 | rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc); | |||
3908 | } | |||
3909 | if( rc==SQLITE_OK0 ){ | |||
3910 | rc = pApi->xTokenize_v2( | |||
3911 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb | |||
3912 | ); | |||
3913 | } | |||
3914 | if( ctx.bOpen ){ | |||
3915 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | |||
3916 | } | |||
3917 | if( ctx.iRangeEnd>=(nColSize-1) ){ | |||
3918 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |||
3919 | }else{ | |||
3920 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |||
3921 | } | |||
3922 | } | |||
3923 | if( rc==SQLITE_OK0 ){ | |||
3924 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
3925 | }else{ | |||
3926 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
3927 | } | |||
3928 | sqlite3_freesqlite3_api->free(ctx.zOut); | |||
3929 | sqlite3_freesqlite3_api->free(aSeen); | |||
3930 | sqlite3_freesqlite3_api->free(sFinder.aFirst); | |||
3931 | } | |||
3932 | ||||
3933 | /************************************************************************/ | |||
3934 | ||||
3935 | /* | |||
3936 | ** The first time the bm25() function is called for a query, an instance | |||
3937 | ** of the following structure is allocated and populated. | |||
3938 | */ | |||
3939 | typedef struct Fts5Bm25Data Fts5Bm25Data; | |||
3940 | struct Fts5Bm25Data { | |||
3941 | int nPhrase; /* Number of phrases in query */ | |||
3942 | double avgdl; /* Average number of tokens in each row */ | |||
3943 | double *aIDF; /* IDF for each phrase */ | |||
3944 | double *aFreq; /* Array used to calculate phrase freq. */ | |||
3945 | }; | |||
3946 | ||||
3947 | /* | |||
3948 | ** Callback used by fts5Bm25GetData() to count the number of rows in the | |||
3949 | ** table matched by each individual phrase within the query. | |||
3950 | */ | |||
3951 | static int fts5CountCb( | |||
3952 | const Fts5ExtensionApi *pApi, | |||
3953 | Fts5Context *pFts, | |||
3954 | void *pUserData /* Pointer to sqlite3_int64 variable */ | |||
3955 | ){ | |||
3956 | sqlite3_int64 *pn = (sqlite3_int64*)pUserData; | |||
3957 | UNUSED_PARAM2(pApi, pFts)(void)(pApi), (void)(pFts); | |||
3958 | (*pn)++; | |||
3959 | return SQLITE_OK0; | |||
3960 | } | |||
3961 | ||||
3962 | /* | |||
3963 | ** Set *ppData to point to the Fts5Bm25Data object for the current query. | |||
3964 | ** If the object has not already been allocated, allocate and populate it | |||
3965 | ** now. | |||
3966 | */ | |||
3967 | static int fts5Bm25GetData( | |||
3968 | const Fts5ExtensionApi *pApi, | |||
3969 | Fts5Context *pFts, | |||
3970 | Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ | |||
3971 | ){ | |||
3972 | int rc = SQLITE_OK0; /* Return code */ | |||
3973 | Fts5Bm25Data *p; /* Object to return */ | |||
3974 | ||||
3975 | p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); | |||
3976 | if( p==0 ){ | |||
3977 | int nPhrase; /* Number of phrases in query */ | |||
3978 | sqlite3_int64 nRow = 0; /* Number of rows in table */ | |||
3979 | sqlite3_int64 nToken = 0; /* Number of tokens in table */ | |||
3980 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
3981 | int i; | |||
3982 | ||||
3983 | /* Allocate the Fts5Bm25Data object */ | |||
3984 | nPhrase = pApi->xPhraseCount(pFts); | |||
3985 | nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); | |||
3986 | p = (Fts5Bm25Data*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
3987 | if( p==0 ){ | |||
3988 | rc = SQLITE_NOMEM7; | |||
3989 | }else{ | |||
3990 | memset(p, 0, (size_t)nByte); | |||
3991 | p->nPhrase = nPhrase; | |||
3992 | p->aIDF = (double*)&p[1]; | |||
3993 | p->aFreq = &p->aIDF[nPhrase]; | |||
3994 | } | |||
3995 | ||||
3996 | /* Calculate the average document length for this FTS5 table */ | |||
3997 | if( rc==SQLITE_OK0 ) rc = pApi->xRowCount(pFts, &nRow); | |||
3998 | assert( rc!=SQLITE_OK || nRow>0 )((void) (0)); | |||
3999 | if( rc==SQLITE_OK0 ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); | |||
4000 | if( rc==SQLITE_OK0 ) p->avgdl = (double)nToken / (double)nRow; | |||
4001 | ||||
4002 | /* Calculate an IDF for each phrase in the query */ | |||
4003 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | |||
4004 | sqlite3_int64 nHit = 0; | |||
4005 | rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); | |||
4006 | if( rc==SQLITE_OK0 ){ | |||
4007 | /* Calculate the IDF (Inverse Document Frequency) for phrase i. | |||
4008 | ** This is done using the standard BM25 formula as found on wikipedia: | |||
4009 | ** | |||
4010 | ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) | |||
4011 | ** | |||
4012 | ** where "N" is the total number of documents in the set and nHit | |||
4013 | ** is the number that contain at least one instance of the phrase | |||
4014 | ** under consideration. | |||
4015 | ** | |||
4016 | ** The problem with this is that if (N < 2*nHit), the IDF is | |||
4017 | ** negative. Which is undesirable. So the minimum allowable IDF is | |||
4018 | ** (1e-6) - roughly the same as a term that appears in just over | |||
4019 | ** half of set of 5,000,000 documents. */ | |||
4020 | double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); | |||
4021 | if( idf<=0.0 ) idf = 1e-6; | |||
4022 | p->aIDF[i] = idf; | |||
4023 | } | |||
4024 | } | |||
4025 | ||||
4026 | if( rc!=SQLITE_OK0 ){ | |||
4027 | sqlite3_freesqlite3_api->free(p); | |||
4028 | }else{ | |||
4029 | rc = pApi->xSetAuxdata(pFts, p, sqlite3_freesqlite3_api->free); | |||
4030 | } | |||
4031 | if( rc!=SQLITE_OK0 ) p = 0; | |||
4032 | } | |||
4033 | *ppData = p; | |||
4034 | return rc; | |||
4035 | } | |||
4036 | ||||
4037 | /* | |||
4038 | ** Implementation of bm25() function. | |||
4039 | */ | |||
4040 | static void fts5Bm25Function( | |||
4041 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
4042 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
4043 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
4044 | int nVal, /* Number of values in apVal[] array */ | |||
4045 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
4046 | ){ | |||
4047 | const double k1 = 1.2; /* Constant "k1" from BM25 formula */ | |||
4048 | const double b = 0.75; /* Constant "b" from BM25 formula */ | |||
4049 | int rc; /* Error code */ | |||
4050 | double score = 0.0; /* SQL function return value */ | |||
4051 | Fts5Bm25Data *pData; /* Values allocated/calculated once only */ | |||
4052 | int i; /* Iterator variable */ | |||
4053 | int nInst = 0; /* Value returned by xInstCount() */ | |||
4054 | double D = 0.0; /* Total number of tokens in row */ | |||
4055 | double *aFreq = 0; /* Array of phrase freq. for current row */ | |||
4056 | ||||
4057 | /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) | |||
4058 | ** for each phrase in the query for the current row. */ | |||
4059 | rc = fts5Bm25GetData(pApi, pFts, &pData); | |||
4060 | if( rc==SQLITE_OK0 ){ | |||
4061 | aFreq = pData->aFreq; | |||
4062 | memset(aFreq, 0, sizeof(double) * pData->nPhrase); | |||
4063 | rc = pApi->xInstCount(pFts, &nInst); | |||
4064 | } | |||
4065 | for(i=0; rc==SQLITE_OK0 && i<nInst; i++){ | |||
4066 | int ip; int ic; int io; | |||
4067 | rc = pApi->xInst(pFts, i, &ip, &ic, &io); | |||
4068 | if( rc==SQLITE_OK0 ){ | |||
4069 | double w = (nVal > ic) ? sqlite3_value_doublesqlite3_api->value_double(apVal[ic]) : 1.0; | |||
4070 | aFreq[ip] += w; | |||
4071 | } | |||
4072 | } | |||
4073 | ||||
4074 | /* Figure out the total size of the current row in tokens. */ | |||
4075 | if( rc==SQLITE_OK0 ){ | |||
4076 | int nTok; | |||
4077 | rc = pApi->xColumnSize(pFts, -1, &nTok); | |||
4078 | D = (double)nTok; | |||
4079 | } | |||
4080 | ||||
4081 | /* Determine and return the BM25 score for the current row. Or, if an | |||
4082 | ** error has occurred, throw an exception. */ | |||
4083 | if( rc==SQLITE_OK0 ){ | |||
4084 | for(i=0; i<pData->nPhrase; i++){ | |||
4085 | score += pData->aIDF[i] * ( | |||
4086 | ( aFreq[i] * (k1 + 1.0) ) / | |||
4087 | ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) | |||
4088 | ); | |||
4089 | } | |||
4090 | sqlite3_result_doublesqlite3_api->result_double(pCtx, -1.0 * score); | |||
4091 | }else{ | |||
4092 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
4093 | } | |||
4094 | } | |||
4095 | ||||
4096 | /* | |||
4097 | ** Implementation of fts5_get_locale() function. | |||
4098 | */ | |||
4099 | static void fts5GetLocaleFunction( | |||
4100 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
4101 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
4102 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
4103 | int nVal, /* Number of values in apVal[] array */ | |||
4104 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
4105 | ){ | |||
4106 | int iCol = 0; | |||
4107 | int eType = 0; | |||
4108 | int rc = SQLITE_OK0; | |||
4109 | const char *zLocale = 0; | |||
4110 | int nLocale = 0; | |||
4111 | ||||
4112 | /* xColumnLocale() must be available */ | |||
4113 | assert( pApi->iVersion>=4 )((void) (0)); | |||
4114 | ||||
4115 | if( nVal!=1 ){ | |||
4116 | const char *z = "wrong number of arguments to function fts5_get_locale()"; | |||
4117 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | |||
4118 | return; | |||
4119 | } | |||
4120 | ||||
4121 | eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[0]); | |||
4122 | if( eType!=SQLITE_INTEGER1 ){ | |||
4123 | const char *z = "non-integer argument passed to function fts5_get_locale()"; | |||
4124 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | |||
4125 | return; | |||
4126 | } | |||
4127 | ||||
4128 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
4129 | if( iCol<0 || iCol>=pApi->xColumnCount(pFts) ){ | |||
4130 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, SQLITE_RANGE25); | |||
4131 | return; | |||
4132 | } | |||
4133 | ||||
4134 | rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale); | |||
4135 | if( rc!=SQLITE_OK0 ){ | |||
4136 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
4137 | return; | |||
4138 | } | |||
4139 | ||||
4140 | sqlite3_result_textsqlite3_api->result_text(pCtx, zLocale, nLocale, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
4141 | } | |||
4142 | ||||
4143 | static int sqlite3Fts5AuxInit(fts5_api *pApi){ | |||
4144 | struct Builtin { | |||
4145 | const char *zFunc; /* Function name (nul-terminated) */ | |||
4146 | void *pUserData; /* User-data pointer */ | |||
4147 | fts5_extension_function xFunc;/* Callback function */ | |||
4148 | void (*xDestroy)(void*); /* Destructor function */ | |||
4149 | } aBuiltin [] = { | |||
4150 | { "snippet", 0, fts5SnippetFunction, 0 }, | |||
4151 | { "highlight", 0, fts5HighlightFunction, 0 }, | |||
4152 | { "bm25", 0, fts5Bm25Function, 0 }, | |||
4153 | { "fts5_get_locale", 0, fts5GetLocaleFunction, 0 }, | |||
4154 | }; | |||
4155 | int rc = SQLITE_OK0; /* Return code */ | |||
4156 | int i; /* To iterate through builtin functions */ | |||
4157 | ||||
4158 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | |||
4159 | rc = pApi->xCreateFunction(pApi, | |||
4160 | aBuiltin[i].zFunc, | |||
4161 | aBuiltin[i].pUserData, | |||
4162 | aBuiltin[i].xFunc, | |||
4163 | aBuiltin[i].xDestroy | |||
4164 | ); | |||
4165 | } | |||
4166 | ||||
4167 | return rc; | |||
4168 | } | |||
4169 | ||||
4170 | #line 1 "fts5_buffer.c" | |||
4171 | /* | |||
4172 | ** 2014 May 31 | |||
4173 | ** | |||
4174 | ** The author disclaims copyright to this source code. In place of | |||
4175 | ** a legal notice, here is a blessing: | |||
4176 | ** | |||
4177 | ** May you do good and not evil. | |||
4178 | ** May you find forgiveness for yourself and forgive others. | |||
4179 | ** May you share freely, never taking more than you give. | |||
4180 | ** | |||
4181 | ****************************************************************************** | |||
4182 | */ | |||
4183 | ||||
4184 | ||||
4185 | ||||
4186 | /* #include "fts5Int.h" */ | |||
4187 | ||||
4188 | static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ | |||
4189 | if( (u32)pBuf->nSpace<nByte ){ | |||
4190 | u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64; | |||
4191 | u8 *pNew; | |||
4192 | while( nNew<nByte ){ | |||
4193 | nNew = nNew * 2; | |||
4194 | } | |||
4195 | pNew = sqlite3_realloc64sqlite3_api->realloc64(pBuf->p, nNew); | |||
4196 | if( pNew==0 ){ | |||
4197 | *pRc = SQLITE_NOMEM7; | |||
4198 | return 1; | |||
4199 | }else{ | |||
4200 | pBuf->nSpace = (int)nNew; | |||
4201 | pBuf->p = pNew; | |||
4202 | } | |||
4203 | } | |||
4204 | return 0; | |||
4205 | } | |||
4206 | ||||
4207 | ||||
4208 | /* | |||
4209 | ** Encode value iVal as an SQLite varint and append it to the buffer object | |||
4210 | ** pBuf. If an OOM error occurs, set the error code in p. | |||
4211 | */ | |||
4212 | static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ | |||
4213 | if( fts5BufferGrow(pRc, pBuf, 9)( (u32)((pBuf)->n) + (u32)(9) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(9)+(pBuf)->n) ) ) return; | |||
4214 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); | |||
4215 | } | |||
4216 | ||||
4217 | static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ | |||
4218 | aBuf[0] = (iVal>>24) & 0x00FF; | |||
4219 | aBuf[1] = (iVal>>16) & 0x00FF; | |||
4220 | aBuf[2] = (iVal>> 8) & 0x00FF; | |||
4221 | aBuf[3] = (iVal>> 0) & 0x00FF; | |||
4222 | } | |||
4223 | ||||
4224 | static int sqlite3Fts5Get32(const u8 *aBuf){ | |||
4225 | return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]); | |||
4226 | } | |||
4227 | ||||
4228 | /* | |||
4229 | ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set | |||
4230 | ** the error code in p. If an error has already occurred when this function | |||
4231 | ** is called, it is a no-op. | |||
4232 | */ | |||
4233 | static void sqlite3Fts5BufferAppendBlob( | |||
4234 | int *pRc, | |||
4235 | Fts5Buffer *pBuf, | |||
4236 | u32 nData, | |||
4237 | const u8 *pData | |||
4238 | ){ | |||
4239 | if( nData ){ | |||
4240 | if( fts5BufferGrow(pRc, pBuf, nData)( (u32)((pBuf)->n) + (u32)(nData) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nData)+(pBuf)-> n) ) ) return; | |||
4241 | assert( pBuf->p!=0 )((void) (0)); | |||
4242 | memcpy(&pBuf->p[pBuf->n], pData, nData); | |||
4243 | pBuf->n += nData; | |||
4244 | } | |||
4245 | } | |||
4246 | ||||
4247 | /* | |||
4248 | ** Append the nul-terminated string zStr to the buffer pBuf. This function | |||
4249 | ** ensures that the byte following the buffer data is set to 0x00, even | |||
4250 | ** though this byte is not included in the pBuf->n count. | |||
4251 | */ | |||
4252 | static void sqlite3Fts5BufferAppendString( | |||
4253 | int *pRc, | |||
4254 | Fts5Buffer *pBuf, | |||
4255 | const char *zStr | |||
4256 | ){ | |||
4257 | int nStr = (int)strlen(zStr); | |||
4258 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); | |||
4259 | pBuf->n--; | |||
4260 | } | |||
4261 | ||||
4262 | /* | |||
4263 | ** Argument zFmt is a printf() style format string. This function performs | |||
4264 | ** the printf() style processing, then appends the results to buffer pBuf. | |||
4265 | ** | |||
4266 | ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte | |||
4267 | ** following the buffer data is set to 0x00, even though this byte is not | |||
4268 | ** included in the pBuf->n count. | |||
4269 | */ | |||
4270 | static void sqlite3Fts5BufferAppendPrintf( | |||
4271 | int *pRc, | |||
4272 | Fts5Buffer *pBuf, | |||
4273 | char *zFmt, ... | |||
4274 | ){ | |||
4275 | if( *pRc==SQLITE_OK0 ){ | |||
4276 | char *zTmp; | |||
4277 | va_list ap; | |||
4278 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
4279 | zTmp = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
4280 | va_end(ap)__builtin_va_end(ap); | |||
4281 | ||||
4282 | if( zTmp==0 ){ | |||
4283 | *pRc = SQLITE_NOMEM7; | |||
4284 | }else{ | |||
4285 | sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); | |||
4286 | sqlite3_freesqlite3_api->free(zTmp); | |||
4287 | } | |||
4288 | } | |||
4289 | } | |||
4290 | ||||
4291 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ | |||
4292 | char *zRet = 0; | |||
4293 | if( *pRc==SQLITE_OK0 ){ | |||
4294 | va_list ap; | |||
4295 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
4296 | zRet = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
4297 | va_end(ap)__builtin_va_end(ap); | |||
4298 | if( zRet==0 ){ | |||
4299 | *pRc = SQLITE_NOMEM7; | |||
4300 | } | |||
4301 | } | |||
4302 | return zRet; | |||
4303 | } | |||
4304 | ||||
4305 | ||||
4306 | /* | |||
4307 | ** Free any buffer allocated by pBuf. Zero the structure before returning. | |||
4308 | */ | |||
4309 | static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ | |||
4310 | sqlite3_freesqlite3_api->free(pBuf->p); | |||
4311 | memset(pBuf, 0, sizeof(Fts5Buffer)); | |||
4312 | } | |||
4313 | ||||
4314 | /* | |||
4315 | ** Zero the contents of the buffer object. But do not free the associated | |||
4316 | ** memory allocation. | |||
4317 | */ | |||
4318 | static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ | |||
4319 | pBuf->n = 0; | |||
4320 | } | |||
4321 | ||||
4322 | /* | |||
4323 | ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an | |||
4324 | ** the error code in p. If an error has already occurred when this function | |||
4325 | ** is called, it is a no-op. | |||
4326 | */ | |||
4327 | static void sqlite3Fts5BufferSet( | |||
4328 | int *pRc, | |||
4329 | Fts5Buffer *pBuf, | |||
4330 | int nData, | |||
4331 | const u8 *pData | |||
4332 | ){ | |||
4333 | pBuf->n = 0; | |||
4334 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); | |||
4335 | } | |||
4336 | ||||
4337 | static int sqlite3Fts5PoslistNext64( | |||
4338 | const u8 *a, int n, /* Buffer containing poslist */ | |||
4339 | int *pi, /* IN/OUT: Offset within a[] */ | |||
4340 | i64 *piOff /* IN/OUT: Current offset */ | |||
4341 | ){ | |||
4342 | int i = *pi; | |||
4343 | assert( a!=0 || i==0 )((void) (0)); | |||
4344 | if( i>=n ){ | |||
4345 | /* EOF */ | |||
4346 | *piOff = -1; | |||
4347 | return 1; | |||
4348 | }else{ | |||
4349 | i64 iOff = *piOff; | |||
4350 | u32 iVal; | |||
4351 | assert( a!=0 )((void) (0)); | |||
4352 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
4353 | if( iVal<=1 ){ | |||
4354 | if( iVal==0 ){ | |||
4355 | *pi = i; | |||
4356 | return 0; | |||
4357 | } | |||
4358 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
4359 | iOff = ((i64)iVal) << 32; | |||
4360 | assert( iOff>=0 )((void) (0)); | |||
4361 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
4362 | if( iVal<2 ){ | |||
4363 | /* This is a corrupt record. So stop parsing it here. */ | |||
4364 | *piOff = -1; | |||
4365 | return 1; | |||
4366 | } | |||
4367 | *piOff = iOff + ((iVal-2) & 0x7FFFFFFF); | |||
4368 | }else{ | |||
4369 | *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF); | |||
4370 | } | |||
4371 | *pi = i; | |||
4372 | assert_nc( *piOff>=iOff )((void) (0)); | |||
4373 | return 0; | |||
4374 | } | |||
4375 | } | |||
4376 | ||||
4377 | ||||
4378 | /* | |||
4379 | ** Advance the iterator object passed as the only argument. Return true | |||
4380 | ** if the iterator reaches EOF, or false otherwise. | |||
4381 | */ | |||
4382 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ | |||
4383 | if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ | |||
4384 | pIter->bEof = 1; | |||
4385 | } | |||
4386 | return pIter->bEof; | |||
4387 | } | |||
4388 | ||||
4389 | static int sqlite3Fts5PoslistReaderInit( | |||
4390 | const u8 *a, int n, /* Poslist buffer to iterate through */ | |||
4391 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | |||
4392 | ){ | |||
4393 | memset(pIter, 0, sizeof(*pIter)); | |||
4394 | pIter->a = a; | |||
4395 | pIter->n = n; | |||
4396 | sqlite3Fts5PoslistReaderNext(pIter); | |||
4397 | return pIter->bEof; | |||
4398 | } | |||
4399 | ||||
4400 | /* | |||
4401 | ** Append position iPos to the position list being accumulated in buffer | |||
4402 | ** pBuf, which must be already be large enough to hold the new data. | |||
4403 | ** The previous position written to this list is *piPrev. *piPrev is set | |||
4404 | ** to iPos before returning. | |||
4405 | */ | |||
4406 | static void sqlite3Fts5PoslistSafeAppend( | |||
4407 | Fts5Buffer *pBuf, | |||
4408 | i64 *piPrev, | |||
4409 | i64 iPos | |||
4410 | ){ | |||
4411 | if( iPos>=*piPrev ){ | |||
4412 | static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; | |||
4413 | if( (iPos & colmask) != (*piPrev & colmask) ){ | |||
4414 | pBuf->p[pBuf->n++] = 1; | |||
4415 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); | |||
4416 | *piPrev = (iPos & colmask); | |||
4417 | } | |||
4418 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); | |||
4419 | *piPrev = iPos; | |||
4420 | } | |||
4421 | } | |||
4422 | ||||
4423 | static int sqlite3Fts5PoslistWriterAppend( | |||
4424 | Fts5Buffer *pBuf, | |||
4425 | Fts5PoslistWriter *pWriter, | |||
4426 | i64 iPos | |||
4427 | ){ | |||
4428 | int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ | |||
4429 | if( fts5BufferGrow(&rc, pBuf, 5+5+5)( (u32)((pBuf)->n) + (u32)(5+5+5) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&rc),(pBuf),(5+5+5)+(pBuf) ->n) ) ) return rc; | |||
4430 | sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); | |||
4431 | return SQLITE_OK0; | |||
4432 | } | |||
4433 | ||||
4434 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){ | |||
4435 | void *pRet = 0; | |||
4436 | if( *pRc==SQLITE_OK0 ){ | |||
4437 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
4438 | if( pRet==0 ){ | |||
4439 | if( nByte>0 ) *pRc = SQLITE_NOMEM7; | |||
4440 | }else{ | |||
4441 | memset(pRet, 0, (size_t)nByte); | |||
4442 | } | |||
4443 | } | |||
4444 | return pRet; | |||
4445 | } | |||
4446 | ||||
4447 | /* | |||
4448 | ** Return a nul-terminated copy of the string indicated by pIn. If nIn | |||
4449 | ** is non-negative, then it is the length of the string in bytes. Otherwise, | |||
4450 | ** the length of the string is determined using strlen(). | |||
4451 | ** | |||
4452 | ** It is the responsibility of the caller to eventually free the returned | |||
4453 | ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. | |||
4454 | */ | |||
4455 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ | |||
4456 | char *zRet = 0; | |||
4457 | if( *pRc==SQLITE_OK0 ){ | |||
4458 | if( nIn<0 ){ | |||
4459 | nIn = (int)strlen(pIn); | |||
4460 | } | |||
4461 | zRet = (char*)sqlite3_mallocsqlite3_api->malloc(nIn+1); | |||
4462 | if( zRet ){ | |||
4463 | memcpy(zRet, pIn, nIn); | |||
4464 | zRet[nIn] = '\0'; | |||
4465 | }else{ | |||
4466 | *pRc = SQLITE_NOMEM7; | |||
4467 | } | |||
4468 | } | |||
4469 | return zRet; | |||
4470 | } | |||
4471 | ||||
4472 | ||||
4473 | /* | |||
4474 | ** Return true if character 't' may be part of an FTS5 bareword, or false | |||
4475 | ** otherwise. Characters that may be part of barewords: | |||
4476 | ** | |||
4477 | ** * All non-ASCII characters, | |||
4478 | ** * The 52 upper and lower case ASCII characters, and | |||
4479 | ** * The 10 integer ASCII characters. | |||
4480 | ** * The underscore character "_" (0x5F). | |||
4481 | ** * The unicode "substitute" character (0x1A). | |||
4482 | */ | |||
4483 | static int sqlite3Fts5IsBareword(char t){ | |||
4484 | u8 aBareword[128] = { | |||
4485 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ | |||
4486 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ | |||
4487 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ | |||
4488 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ | |||
4489 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ | |||
4490 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ | |||
4491 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ | |||
4492 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ | |||
4493 | }; | |||
4494 | ||||
4495 | return (t & 0x80) || aBareword[(int)t]; | |||
4496 | } | |||
4497 | ||||
4498 | ||||
4499 | /************************************************************************* | |||
4500 | */ | |||
4501 | typedef struct Fts5TermsetEntry Fts5TermsetEntry; | |||
4502 | struct Fts5TermsetEntry { | |||
4503 | char *pTerm; | |||
4504 | int nTerm; | |||
4505 | int iIdx; /* Index (main or aPrefix[] entry) */ | |||
4506 | Fts5TermsetEntry *pNext; | |||
4507 | }; | |||
4508 | ||||
4509 | struct Fts5Termset { | |||
4510 | Fts5TermsetEntry *apHash[512]; | |||
4511 | }; | |||
4512 | ||||
4513 | static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ | |||
4514 | int rc = SQLITE_OK0; | |||
4515 | *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); | |||
4516 | return rc; | |||
4517 | } | |||
4518 | ||||
4519 | static int sqlite3Fts5TermsetAdd( | |||
4520 | Fts5Termset *p, | |||
4521 | int iIdx, | |||
4522 | const char *pTerm, int nTerm, | |||
4523 | int *pbPresent | |||
4524 | ){ | |||
4525 | int rc = SQLITE_OK0; | |||
4526 | *pbPresent = 0; | |||
4527 | if( p ){ | |||
4528 | int i; | |||
4529 | u32 hash = 13; | |||
4530 | Fts5TermsetEntry *pEntry; | |||
4531 | ||||
4532 | /* Calculate a hash value for this term. This is the same hash checksum | |||
4533 | ** used by the fts5_hash.c module. This is not important for correct | |||
4534 | ** operation of the module, but is necessary to ensure that some tests | |||
4535 | ** designed to produce hash table collisions really do work. */ | |||
4536 | for(i=nTerm-1; i>=0; i--){ | |||
4537 | hash = (hash << 3) ^ hash ^ pTerm[i]; | |||
4538 | } | |||
4539 | hash = (hash << 3) ^ hash ^ iIdx; | |||
4540 | hash = hash % ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); | |||
4541 | ||||
4542 | for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ | |||
4543 | if( pEntry->iIdx==iIdx | |||
4544 | && pEntry->nTerm==nTerm | |||
4545 | && memcmp(pEntry->pTerm, pTerm, nTerm)==0 | |||
4546 | ){ | |||
4547 | *pbPresent = 1; | |||
4548 | break; | |||
4549 | } | |||
4550 | } | |||
4551 | ||||
4552 | if( pEntry==0 ){ | |||
4553 | pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); | |||
4554 | if( pEntry ){ | |||
4555 | pEntry->pTerm = (char*)&pEntry[1]; | |||
4556 | pEntry->nTerm = nTerm; | |||
4557 | pEntry->iIdx = iIdx; | |||
4558 | memcpy(pEntry->pTerm, pTerm, nTerm); | |||
4559 | pEntry->pNext = p->apHash[hash]; | |||
4560 | p->apHash[hash] = pEntry; | |||
4561 | } | |||
4562 | } | |||
4563 | } | |||
4564 | ||||
4565 | return rc; | |||
4566 | } | |||
4567 | ||||
4568 | static void sqlite3Fts5TermsetFree(Fts5Termset *p){ | |||
4569 | if( p ){ | |||
4570 | u32 i; | |||
4571 | for(i=0; i<ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); i++){ | |||
4572 | Fts5TermsetEntry *pEntry = p->apHash[i]; | |||
4573 | while( pEntry ){ | |||
4574 | Fts5TermsetEntry *pDel = pEntry; | |||
4575 | pEntry = pEntry->pNext; | |||
4576 | sqlite3_freesqlite3_api->free(pDel); | |||
4577 | } | |||
4578 | } | |||
4579 | sqlite3_freesqlite3_api->free(p); | |||
4580 | } | |||
4581 | } | |||
4582 | ||||
4583 | #line 1 "fts5_config.c" | |||
4584 | /* | |||
4585 | ** 2014 Jun 09 | |||
4586 | ** | |||
4587 | ** The author disclaims copyright to this source code. In place of | |||
4588 | ** a legal notice, here is a blessing: | |||
4589 | ** | |||
4590 | ** May you do good and not evil. | |||
4591 | ** May you find forgiveness for yourself and forgive others. | |||
4592 | ** May you share freely, never taking more than you give. | |||
4593 | ** | |||
4594 | ****************************************************************************** | |||
4595 | ** | |||
4596 | ** This is an SQLite module implementing full-text search. | |||
4597 | */ | |||
4598 | ||||
4599 | ||||
4600 | /* #include "fts5Int.h" */ | |||
4601 | ||||
4602 | #define FTS5_DEFAULT_PAGE_SIZE4050 4050 | |||
4603 | #define FTS5_DEFAULT_AUTOMERGE4 4 | |||
4604 | #define FTS5_DEFAULT_USERMERGE4 4 | |||
4605 | #define FTS5_DEFAULT_CRISISMERGE16 16 | |||
4606 | #define FTS5_DEFAULT_HASHSIZE(1024*1024) (1024*1024) | |||
4607 | ||||
4608 | #define FTS5_DEFAULT_DELETE_AUTOMERGE10 10 /* default 10% */ | |||
4609 | ||||
4610 | /* Maximum allowed page size */ | |||
4611 | #define FTS5_MAX_PAGE_SIZE(64*1024) (64*1024) | |||
4612 | ||||
4613 | static int fts5_iswhitespace(char x){ | |||
4614 | return (x==' '); | |||
4615 | } | |||
4616 | ||||
4617 | static int fts5_isopenquote(char x){ | |||
4618 | return (x=='"' || x=='\'' || x=='[' || x=='`'); | |||
4619 | } | |||
4620 | ||||
4621 | /* | |||
4622 | ** Argument pIn points to a character that is part of a nul-terminated | |||
4623 | ** string. Return a pointer to the first character following *pIn in | |||
4624 | ** the string that is not a white-space character. | |||
4625 | */ | |||
4626 | static const char *fts5ConfigSkipWhitespace(const char *pIn){ | |||
4627 | const char *p = pIn; | |||
4628 | if( p ){ | |||
4629 | while( fts5_iswhitespace(*p) ){ p++; } | |||
4630 | } | |||
4631 | return p; | |||
4632 | } | |||
4633 | ||||
4634 | /* | |||
4635 | ** Argument pIn points to a character that is part of a nul-terminated | |||
4636 | ** string. Return a pointer to the first character following *pIn in | |||
4637 | ** the string that is not a "bareword" character. | |||
4638 | */ | |||
4639 | static const char *fts5ConfigSkipBareword(const char *pIn){ | |||
4640 | const char *p = pIn; | |||
4641 | while ( sqlite3Fts5IsBareword(*p) ) p++; | |||
4642 | if( p==pIn ) p = 0; | |||
4643 | return p; | |||
4644 | } | |||
4645 | ||||
4646 | static int fts5_isdigit(char a){ | |||
4647 | return (a>='0' && a<='9'); | |||
4648 | } | |||
4649 | ||||
4650 | ||||
4651 | ||||
4652 | static const char *fts5ConfigSkipLiteral(const char *pIn){ | |||
4653 | const char *p = pIn; | |||
4654 | switch( *p ){ | |||
4655 | case 'n': case 'N': | |||
4656 | if( sqlite3_strnicmpsqlite3_api->strnicmp("null", p, 4)==0 ){ | |||
4657 | p = &p[4]; | |||
4658 | }else{ | |||
4659 | p = 0; | |||
4660 | } | |||
4661 | break; | |||
4662 | ||||
4663 | case 'x': case 'X': | |||
4664 | p++; | |||
4665 | if( *p=='\'' ){ | |||
4666 | p++; | |||
4667 | while( (*p>='a' && *p<='f') | |||
4668 | || (*p>='A' && *p<='F') | |||
4669 | || (*p>='0' && *p<='9') | |||
4670 | ){ | |||
4671 | p++; | |||
4672 | } | |||
4673 | if( *p=='\'' && 0==((p-pIn)%2) ){ | |||
4674 | p++; | |||
4675 | }else{ | |||
4676 | p = 0; | |||
4677 | } | |||
4678 | }else{ | |||
4679 | p = 0; | |||
4680 | } | |||
4681 | break; | |||
4682 | ||||
4683 | case '\'': | |||
4684 | p++; | |||
4685 | while( p ){ | |||
4686 | if( *p=='\'' ){ | |||
4687 | p++; | |||
4688 | if( *p!='\'' ) break; | |||
4689 | } | |||
4690 | p++; | |||
4691 | if( *p==0 ) p = 0; | |||
4692 | } | |||
4693 | break; | |||
4694 | ||||
4695 | default: | |||
4696 | /* maybe a number */ | |||
4697 | if( *p=='+' || *p=='-' ) p++; | |||
4698 | while( fts5_isdigit(*p) ) p++; | |||
4699 | ||||
4700 | /* At this point, if the literal was an integer, the parse is | |||
4701 | ** finished. Or, if it is a floating point value, it may continue | |||
4702 | ** with either a decimal point or an 'E' character. */ | |||
4703 | if( *p=='.' && fts5_isdigit(p[1]) ){ | |||
4704 | p += 2; | |||
4705 | while( fts5_isdigit(*p) ) p++; | |||
4706 | } | |||
4707 | if( p==pIn ) p = 0; | |||
4708 | ||||
4709 | break; | |||
4710 | } | |||
4711 | ||||
4712 | return p; | |||
4713 | } | |||
4714 | ||||
4715 | /* | |||
4716 | ** The first character of the string pointed to by argument z is guaranteed | |||
4717 | ** to be an open-quote character (see function fts5_isopenquote()). | |||
4718 | ** | |||
4719 | ** This function searches for the corresponding close-quote character within | |||
4720 | ** the string and, if found, dequotes the string in place and adds a new | |||
4721 | ** nul-terminator byte. | |||
4722 | ** | |||
4723 | ** If the close-quote is found, the value returned is the byte offset of | |||
4724 | ** the character immediately following it. Or, if the close-quote is not | |||
4725 | ** found, -1 is returned. If -1 is returned, the buffer is left in an | |||
4726 | ** undefined state. | |||
4727 | */ | |||
4728 | static int fts5Dequote(char *z){ | |||
4729 | char q; | |||
4730 | int iIn = 1; | |||
4731 | int iOut = 0; | |||
4732 | q = z[0]; | |||
4733 | ||||
4734 | /* Set stack variable q to the close-quote character */ | |||
4735 | assert( q=='[' || q=='\'' || q=='"' || q=='`' )((void) (0)); | |||
4736 | if( q=='[' ) q = ']'; | |||
4737 | ||||
4738 | while( z[iIn] ){ | |||
4739 | if( z[iIn]==q ){ | |||
4740 | if( z[iIn+1]!=q ){ | |||
4741 | /* Character iIn was the close quote. */ | |||
4742 | iIn++; | |||
4743 | break; | |||
4744 | }else{ | |||
4745 | /* Character iIn and iIn+1 form an escaped quote character. Skip | |||
4746 | ** the input cursor past both and copy a single quote character | |||
4747 | ** to the output buffer. */ | |||
4748 | iIn += 2; | |||
4749 | z[iOut++] = q; | |||
4750 | } | |||
4751 | }else{ | |||
4752 | z[iOut++] = z[iIn++]; | |||
4753 | } | |||
4754 | } | |||
4755 | ||||
4756 | z[iOut] = '\0'; | |||
4757 | return iIn; | |||
4758 | } | |||
4759 | ||||
4760 | /* | |||
4761 | ** Convert an SQL-style quoted string into a normal string by removing | |||
4762 | ** the quote characters. The conversion is done in-place. If the | |||
4763 | ** input does not begin with a quote character, then this routine | |||
4764 | ** is a no-op. | |||
4765 | ** | |||
4766 | ** Examples: | |||
4767 | ** | |||
4768 | ** "abc" becomes abc | |||
4769 | ** 'xyz' becomes xyz | |||
4770 | ** [pqr] becomes pqr | |||
4771 | ** `mno` becomes mno | |||
4772 | */ | |||
4773 | static void sqlite3Fts5Dequote(char *z){ | |||
4774 | char quote; /* Quote character (if any ) */ | |||
4775 | ||||
4776 | assert( 0==fts5_iswhitespace(z[0]) )((void) (0)); | |||
4777 | quote = z[0]; | |||
4778 | if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ | |||
4779 | fts5Dequote(z); | |||
4780 | } | |||
4781 | } | |||
4782 | ||||
4783 | ||||
4784 | struct Fts5Enum { | |||
4785 | const char *zName; | |||
4786 | int eVal; | |||
4787 | }; | |||
4788 | typedef struct Fts5Enum Fts5Enum; | |||
4789 | ||||
4790 | static int fts5ConfigSetEnum( | |||
4791 | const Fts5Enum *aEnum, | |||
4792 | const char *zEnum, | |||
4793 | int *peVal | |||
4794 | ){ | |||
4795 | int nEnum = (int)strlen(zEnum); | |||
4796 | int i; | |||
4797 | int iVal = -1; | |||
4798 | ||||
4799 | for(i=0; aEnum[i].zName; i++){ | |||
4800 | if( sqlite3_strnicmpsqlite3_api->strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ | |||
4801 | if( iVal>=0 ) return SQLITE_ERROR1; | |||
4802 | iVal = aEnum[i].eVal; | |||
4803 | } | |||
4804 | } | |||
4805 | ||||
4806 | *peVal = iVal; | |||
4807 | return iVal<0 ? SQLITE_ERROR1 : SQLITE_OK0; | |||
4808 | } | |||
4809 | ||||
4810 | /* | |||
4811 | ** Parse a "special" CREATE VIRTUAL TABLE directive and update | |||
4812 | ** configuration object pConfig as appropriate. | |||
4813 | ** | |||
4814 | ** If successful, object pConfig is updated and SQLITE_OK returned. If | |||
4815 | ** an error occurs, an SQLite error code is returned and an error message | |||
4816 | ** may be left in *pzErr. It is the responsibility of the caller to | |||
4817 | ** eventually free any such error message using sqlite3_free(). | |||
4818 | */ | |||
4819 | static int fts5ConfigParseSpecial( | |||
4820 | Fts5Config *pConfig, /* Configuration object to update */ | |||
4821 | const char *zCmd, /* Special command to parse */ | |||
4822 | const char *zArg, /* Argument to parse */ | |||
4823 | char **pzErr /* OUT: Error message */ | |||
4824 | ){ | |||
4825 | int rc = SQLITE_OK0; | |||
4826 | int nCmd = (int)strlen(zCmd); | |||
4827 | ||||
4828 | if( sqlite3_strnicmpsqlite3_api->strnicmp("prefix", zCmd, nCmd)==0 ){ | |||
4829 | const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES31; | |||
4830 | const char *p; | |||
4831 | int bFirst = 1; | |||
4832 | if( pConfig->aPrefix==0 ){ | |||
4833 | pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); | |||
4834 | if( rc ) return rc; | |||
4835 | } | |||
4836 | ||||
4837 | p = zArg; | |||
4838 | while( 1 ){ | |||
4839 | int nPre = 0; | |||
4840 | ||||
4841 | while( p[0]==' ' ) p++; | |||
4842 | if( bFirst==0 && p[0]==',' ){ | |||
4843 | p++; | |||
4844 | while( p[0]==' ' ) p++; | |||
4845 | }else if( p[0]=='\0' ){ | |||
4846 | break; | |||
4847 | } | |||
4848 | if( p[0]<'0' || p[0]>'9' ){ | |||
4849 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed prefix=... directive"); | |||
4850 | rc = SQLITE_ERROR1; | |||
4851 | break; | |||
4852 | } | |||
4853 | ||||
4854 | if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES31 ){ | |||
4855 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
4856 | "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES31 | |||
4857 | ); | |||
4858 | rc = SQLITE_ERROR1; | |||
4859 | break; | |||
4860 | } | |||
4861 | ||||
4862 | while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ | |||
4863 | nPre = nPre*10 + (p[0] - '0'); | |||
4864 | p++; | |||
4865 | } | |||
4866 | ||||
4867 | if( nPre<=0 || nPre>=1000 ){ | |||
4868 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("prefix length out of range (max 999)"); | |||
4869 | rc = SQLITE_ERROR1; | |||
4870 | break; | |||
4871 | } | |||
4872 | ||||
4873 | pConfig->aPrefix[pConfig->nPrefix] = nPre; | |||
4874 | pConfig->nPrefix++; | |||
4875 | bFirst = 0; | |||
4876 | } | |||
4877 | assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES )((void) (0)); | |||
4878 | return rc; | |||
4879 | } | |||
4880 | ||||
4881 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokenize", zCmd, nCmd)==0 ){ | |||
4882 | const char *p = (const char*)zArg; | |||
4883 | sqlite3_int64 nArg = strlen(zArg) + 1; | |||
4884 | char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg); | |||
4885 | ||||
4886 | if( azArg ){ | |||
4887 | char *pSpace = (char*)&azArg[nArg]; | |||
4888 | if( pConfig->t.azArg ){ | |||
4889 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple tokenize=... directives"); | |||
4890 | rc = SQLITE_ERROR1; | |||
4891 | }else{ | |||
4892 | for(nArg=0; p && *p; nArg++){ | |||
4893 | const char *p2 = fts5ConfigSkipWhitespace(p); | |||
4894 | if( *p2=='\'' ){ | |||
4895 | p = fts5ConfigSkipLiteral(p2); | |||
4896 | }else{ | |||
4897 | p = fts5ConfigSkipBareword(p2); | |||
4898 | } | |||
4899 | if( p ){ | |||
4900 | memcpy(pSpace, p2, p-p2); | |||
4901 | azArg[nArg] = pSpace; | |||
4902 | sqlite3Fts5Dequote(pSpace); | |||
4903 | pSpace += (p - p2) + 1; | |||
4904 | p = fts5ConfigSkipWhitespace(p); | |||
4905 | } | |||
4906 | } | |||
4907 | if( p==0 ){ | |||
4908 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in tokenize directive"); | |||
4909 | rc = SQLITE_ERROR1; | |||
4910 | }else{ | |||
4911 | pConfig->t.azArg = (const char**)azArg; | |||
4912 | pConfig->t.nArg = nArg; | |||
4913 | azArg = 0; | |||
4914 | } | |||
4915 | } | |||
4916 | } | |||
4917 | sqlite3_freesqlite3_api->free(azArg); | |||
4918 | ||||
4919 | return rc; | |||
4920 | } | |||
4921 | ||||
4922 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content", zCmd, nCmd)==0 ){ | |||
4923 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 ){ | |||
4924 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content=... directives"); | |||
4925 | rc = SQLITE_ERROR1; | |||
4926 | }else{ | |||
4927 | if( zArg[0] ){ | |||
4928 | pConfig->eContent = FTS5_CONTENT_EXTERNAL2; | |||
4929 | pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); | |||
4930 | }else{ | |||
4931 | pConfig->eContent = FTS5_CONTENT_NONE1; | |||
4932 | } | |||
4933 | } | |||
4934 | return rc; | |||
4935 | } | |||
4936 | ||||
4937 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_delete", zCmd, nCmd)==0 ){ | |||
4938 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
4939 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | |||
4940 | rc = SQLITE_ERROR1; | |||
4941 | }else{ | |||
4942 | pConfig->bContentlessDelete = (zArg[0]=='1'); | |||
4943 | } | |||
4944 | return rc; | |||
4945 | } | |||
4946 | ||||
4947 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){ | |||
4948 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
4949 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | |||
4950 | rc = SQLITE_ERROR1; | |||
4951 | }else{ | |||
4952 | pConfig->bContentlessUnindexed = (zArg[0]=='1'); | |||
4953 | } | |||
4954 | return rc; | |||
4955 | } | |||
4956 | ||||
4957 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content_rowid", zCmd, nCmd)==0 ){ | |||
4958 | if( pConfig->zContentRowid ){ | |||
4959 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content_rowid=... directives"); | |||
4960 | rc = SQLITE_ERROR1; | |||
4961 | }else{ | |||
4962 | pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); | |||
4963 | } | |||
4964 | return rc; | |||
4965 | } | |||
4966 | ||||
4967 | if( sqlite3_strnicmpsqlite3_api->strnicmp("columnsize", zCmd, nCmd)==0 ){ | |||
4968 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
4969 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed columnsize=... directive"); | |||
4970 | rc = SQLITE_ERROR1; | |||
4971 | }else{ | |||
4972 | pConfig->bColumnsize = (zArg[0]=='1'); | |||
4973 | } | |||
4974 | return rc; | |||
4975 | } | |||
4976 | ||||
4977 | if( sqlite3_strnicmpsqlite3_api->strnicmp("locale", zCmd, nCmd)==0 ){ | |||
4978 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
4979 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed locale=... directive"); | |||
4980 | rc = SQLITE_ERROR1; | |||
4981 | }else{ | |||
4982 | pConfig->bLocale = (zArg[0]=='1'); | |||
4983 | } | |||
4984 | return rc; | |||
4985 | } | |||
4986 | ||||
4987 | if( sqlite3_strnicmpsqlite3_api->strnicmp("detail", zCmd, nCmd)==0 ){ | |||
4988 | const Fts5Enum aDetail[] = { | |||
4989 | { "none", FTS5_DETAIL_NONE1 }, | |||
4990 | { "full", FTS5_DETAIL_FULL0 }, | |||
4991 | { "columns", FTS5_DETAIL_COLUMNS2 }, | |||
4992 | { 0, 0 } | |||
4993 | }; | |||
4994 | ||||
4995 | if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ | |||
4996 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed detail=... directive"); | |||
4997 | } | |||
4998 | return rc; | |||
4999 | } | |||
5000 | ||||
5001 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokendata", zCmd, nCmd)==0 ){ | |||
5002 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
5003 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed tokendata=... directive"); | |||
5004 | rc = SQLITE_ERROR1; | |||
5005 | }else{ | |||
5006 | pConfig->bTokendata = (zArg[0]=='1'); | |||
5007 | } | |||
5008 | return rc; | |||
5009 | } | |||
5010 | ||||
5011 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); | |||
5012 | return SQLITE_ERROR1; | |||
5013 | } | |||
5014 | ||||
5015 | /* | |||
5016 | ** Gobble up the first bareword or quoted word from the input buffer zIn. | |||
5017 | ** Return a pointer to the character immediately following the last in | |||
5018 | ** the gobbled word if successful, or a NULL pointer otherwise (failed | |||
5019 | ** to find close-quote character). | |||
5020 | ** | |||
5021 | ** Before returning, set pzOut to point to a new buffer containing a | |||
5022 | ** nul-terminated, dequoted copy of the gobbled word. If the word was | |||
5023 | ** quoted, *pbQuoted is also set to 1 before returning. | |||
5024 | ** | |||
5025 | ** If *pRc is other than SQLITE_OK when this function is called, it is | |||
5026 | ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this | |||
5027 | ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* | |||
5028 | ** set if a parse error (failed to find close quote) occurs. | |||
5029 | */ | |||
5030 | static const char *fts5ConfigGobbleWord( | |||
5031 | int *pRc, /* IN/OUT: Error code */ | |||
5032 | const char *zIn, /* Buffer to gobble string/bareword from */ | |||
5033 | char **pzOut, /* OUT: malloc'd buffer containing str/bw */ | |||
5034 | int *pbQuoted /* OUT: Set to true if dequoting required */ | |||
5035 | ){ | |||
5036 | const char *zRet = 0; | |||
5037 | ||||
5038 | sqlite3_int64 nIn = strlen(zIn); | |||
5039 | char *zOut = sqlite3_malloc64sqlite3_api->malloc64(nIn+1); | |||
5040 | ||||
5041 | assert( *pRc==SQLITE_OK )((void) (0)); | |||
5042 | *pbQuoted = 0; | |||
5043 | *pzOut = 0; | |||
5044 | ||||
5045 | if( zOut==0 ){ | |||
5046 | *pRc = SQLITE_NOMEM7; | |||
5047 | }else{ | |||
5048 | memcpy(zOut, zIn, (size_t)(nIn+1)); | |||
5049 | if( fts5_isopenquote(zOut[0]) ){ | |||
5050 | int ii = fts5Dequote(zOut); | |||
5051 | zRet = &zIn[ii]; | |||
5052 | *pbQuoted = 1; | |||
5053 | }else{ | |||
5054 | zRet = fts5ConfigSkipBareword(zIn); | |||
5055 | if( zRet ){ | |||
5056 | zOut[zRet-zIn] = '\0'; | |||
5057 | } | |||
5058 | } | |||
5059 | } | |||
5060 | ||||
5061 | if( zRet==0 ){ | |||
5062 | sqlite3_freesqlite3_api->free(zOut); | |||
5063 | }else{ | |||
5064 | *pzOut = zOut; | |||
5065 | } | |||
5066 | ||||
5067 | return zRet; | |||
5068 | } | |||
5069 | ||||
5070 | static int fts5ConfigParseColumn( | |||
5071 | Fts5Config *p, | |||
5072 | char *zCol, | |||
5073 | char *zArg, | |||
5074 | char **pzErr, | |||
5075 | int *pbUnindexed | |||
5076 | ){ | |||
5077 | int rc = SQLITE_OK0; | |||
5078 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_RANK_NAME"rank") | |||
5079 | || 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_ROWID_NAME"rowid") | |||
5080 | ){ | |||
5081 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 column name: %s", zCol); | |||
5082 | rc = SQLITE_ERROR1; | |||
5083 | }else if( zArg ){ | |||
5084 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "unindexed") ){ | |||
5085 | p->abUnindexed[p->nCol] = 1; | |||
5086 | *pbUnindexed = 1; | |||
5087 | }else{ | |||
5088 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized column option: %s", zArg); | |||
5089 | rc = SQLITE_ERROR1; | |||
5090 | } | |||
5091 | } | |||
5092 | ||||
5093 | p->azCol[p->nCol++] = zCol; | |||
5094 | return rc; | |||
5095 | } | |||
5096 | ||||
5097 | /* | |||
5098 | ** Populate the Fts5Config.zContentExprlist string. | |||
5099 | */ | |||
5100 | static int fts5ConfigMakeExprlist(Fts5Config *p){ | |||
5101 | int i; | |||
5102 | int rc = SQLITE_OK0; | |||
5103 | Fts5Buffer buf = {0, 0, 0}; | |||
5104 | ||||
5105 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); | |||
5106 | if( p->eContent!=FTS5_CONTENT_NONE1 ){ | |||
5107 | assert( p->eContent==FTS5_CONTENT_EXTERNAL((void) (0)) | |||
5108 | || p->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
5109 | || p->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
5110 | )((void) (0)); | |||
5111 | for(i=0; i<p->nCol; i++){ | |||
5112 | if( p->eContent==FTS5_CONTENT_EXTERNAL2 ){ | |||
5113 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); | |||
5114 | }else if( p->eContent==FTS5_CONTENT_NORMAL0 || p->abUnindexed[i] ){ | |||
5115 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); | |||
5116 | }else{ | |||
5117 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | |||
5118 | } | |||
5119 | } | |||
5120 | } | |||
5121 | if( p->eContent==FTS5_CONTENT_NORMAL0 && p->bLocale ){ | |||
5122 | for(i=0; i<p->nCol; i++){ | |||
5123 | if( p->abUnindexed[i]==0 ){ | |||
5124 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i); | |||
5125 | }else{ | |||
5126 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | |||
5127 | } | |||
5128 | } | |||
5129 | } | |||
5130 | ||||
5131 | assert( p->zContentExprlist==0 )((void) (0)); | |||
5132 | p->zContentExprlist = (char*)buf.p; | |||
5133 | return rc; | |||
5134 | } | |||
5135 | ||||
5136 | /* | |||
5137 | ** Arguments nArg/azArg contain the string arguments passed to the xCreate | |||
5138 | ** or xConnect method of the virtual table. This function attempts to | |||
5139 | ** allocate an instance of Fts5Config containing the results of parsing | |||
5140 | ** those arguments. | |||
5141 | ** | |||
5142 | ** If successful, SQLITE_OK is returned and *ppOut is set to point to the | |||
5143 | ** new Fts5Config object. If an error occurs, an SQLite error code is | |||
5144 | ** returned, *ppOut is set to NULL and an error message may be left in | |||
5145 | ** *pzErr. It is the responsibility of the caller to eventually free any | |||
5146 | ** such error message using sqlite3_free(). | |||
5147 | */ | |||
5148 | static int sqlite3Fts5ConfigParse( | |||
5149 | Fts5Global *pGlobal, | |||
5150 | sqlite3 *db, | |||
5151 | int nArg, /* Number of arguments */ | |||
5152 | const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ | |||
5153 | Fts5Config **ppOut, /* OUT: Results of parse */ | |||
5154 | char **pzErr /* OUT: Error message */ | |||
5155 | ){ | |||
5156 | int rc = SQLITE_OK0; /* Return code */ | |||
5157 | Fts5Config *pRet; /* New object to return */ | |||
5158 | int i; | |||
5159 | sqlite3_int64 nByte; | |||
5160 | int bUnindexed = 0; /* True if there are one or more UNINDEXED */ | |||
5161 | ||||
5162 | *ppOut = pRet = (Fts5Config*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Config)); | |||
5163 | if( pRet==0 ) return SQLITE_NOMEM7; | |||
5164 | memset(pRet, 0, sizeof(Fts5Config)); | |||
5165 | pRet->pGlobal = pGlobal; | |||
5166 | pRet->db = db; | |||
5167 | pRet->iCookie = -1; | |||
5168 | ||||
5169 | nByte = nArg * (sizeof(char*) + sizeof(u8)); | |||
5170 | pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); | |||
5171 | pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0; | |||
5172 | pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); | |||
5173 | pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); | |||
5174 | pRet->bColumnsize = 1; | |||
5175 | pRet->eDetail = FTS5_DETAIL_FULL0; | |||
5176 | #ifdef SQLITE_DEBUG | |||
5177 | pRet->bPrefixIndex = 1; | |||
5178 | #endif | |||
5179 | if( rc==SQLITE_OK0 && sqlite3_stricmpsqlite3_api->stricmp(pRet->zName, FTS5_RANK_NAME"rank")==0 ){ | |||
5180 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 table name: %s", pRet->zName); | |||
5181 | rc = SQLITE_ERROR1; | |||
5182 | } | |||
5183 | ||||
5184 | assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK )((void) (0)); | |||
5185 | for(i=3; rc==SQLITE_OK0 && i<nArg; i++){ | |||
5186 | const char *zOrig = azArg[i]; | |||
5187 | const char *z; | |||
5188 | char *zOne = 0; | |||
5189 | char *zTwo = 0; | |||
5190 | int bOption = 0; | |||
5191 | int bMustBeCol = 0; | |||
5192 | ||||
5193 | z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); | |||
5194 | z = fts5ConfigSkipWhitespace(z); | |||
5195 | if( z && *z=='=' ){ | |||
5196 | bOption = 1; | |||
5197 | assert( zOne!=0 )((void) (0)); | |||
5198 | z++; | |||
5199 | if( bMustBeCol ) z = 0; | |||
5200 | } | |||
5201 | z = fts5ConfigSkipWhitespace(z); | |||
5202 | if( z && z[0] ){ | |||
5203 | int bDummy; | |||
5204 | z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); | |||
5205 | if( z && z[0] ) z = 0; | |||
5206 | } | |||
5207 | ||||
5208 | if( rc==SQLITE_OK0 ){ | |||
5209 | if( z==0 ){ | |||
5210 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in \"%s\"", zOrig); | |||
5211 | rc = SQLITE_ERROR1; | |||
5212 | }else{ | |||
5213 | if( bOption ){ | |||
5214 | rc = fts5ConfigParseSpecial(pRet, | |||
5215 | ALWAYS(zOne)(zOne)?zOne:"", | |||
5216 | zTwo?zTwo:"", | |||
5217 | pzErr | |||
5218 | ); | |||
5219 | }else{ | |||
5220 | rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed); | |||
5221 | zOne = 0; | |||
5222 | } | |||
5223 | } | |||
5224 | } | |||
5225 | ||||
5226 | sqlite3_freesqlite3_api->free(zOne); | |||
5227 | sqlite3_freesqlite3_api->free(zTwo); | |||
5228 | } | |||
5229 | ||||
5230 | /* We only allow contentless_delete=1 if the table is indeed contentless. */ | |||
5231 | if( rc==SQLITE_OK0 | |||
5232 | && pRet->bContentlessDelete | |||
5233 | && pRet->eContent!=FTS5_CONTENT_NONE1 | |||
5234 | ){ | |||
5235 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5236 | "contentless_delete=1 requires a contentless table" | |||
5237 | ); | |||
5238 | rc = SQLITE_ERROR1; | |||
5239 | } | |||
5240 | ||||
5241 | /* We only allow contentless_delete=1 if columnsize=0 is not present. | |||
5242 | ** | |||
5243 | ** This restriction may be removed at some point. | |||
5244 | */ | |||
5245 | if( rc==SQLITE_OK0 && pRet->bContentlessDelete && pRet->bColumnsize==0 ){ | |||
5246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5247 | "contentless_delete=1 is incompatible with columnsize=0" | |||
5248 | ); | |||
5249 | rc = SQLITE_ERROR1; | |||
5250 | } | |||
5251 | ||||
5252 | /* We only allow contentless_unindexed=1 if the table is actually a | |||
5253 | ** contentless one. | |||
5254 | */ | |||
5255 | if( rc==SQLITE_OK0 | |||
5256 | && pRet->bContentlessUnindexed | |||
5257 | && pRet->eContent!=FTS5_CONTENT_NONE1 | |||
5258 | ){ | |||
5259 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
5260 | "contentless_unindexed=1 requires a contentless table" | |||
5261 | ); | |||
5262 | rc = SQLITE_ERROR1; | |||
5263 | } | |||
5264 | ||||
5265 | /* If no zContent option was specified, fill in the default values. */ | |||
5266 | if( rc==SQLITE_OK0 && pRet->zContent==0 ){ | |||
5267 | const char *zTail = 0; | |||
5268 | assert( pRet->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
5269 | || pRet->eContent==FTS5_CONTENT_NONE((void) (0)) | |||
5270 | )((void) (0)); | |||
5271 | if( pRet->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
5272 | zTail = "content"; | |||
5273 | }else if( bUnindexed && pRet->bContentlessUnindexed ){ | |||
5274 | pRet->eContent = FTS5_CONTENT_UNINDEXED3; | |||
5275 | zTail = "content"; | |||
5276 | }else if( pRet->bColumnsize ){ | |||
5277 | zTail = "docsize"; | |||
5278 | } | |||
5279 | ||||
5280 | if( zTail ){ | |||
5281 | pRet->zContent = sqlite3Fts5Mprintf( | |||
5282 | &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail | |||
5283 | ); | |||
5284 | } | |||
5285 | } | |||
5286 | ||||
5287 | if( rc==SQLITE_OK0 && pRet->zContentRowid==0 ){ | |||
5288 | pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); | |||
5289 | } | |||
5290 | ||||
5291 | /* Formulate the zContentExprlist text */ | |||
5292 | if( rc==SQLITE_OK0 ){ | |||
5293 | rc = fts5ConfigMakeExprlist(pRet); | |||
5294 | } | |||
5295 | ||||
5296 | if( rc!=SQLITE_OK0 ){ | |||
5297 | sqlite3Fts5ConfigFree(pRet); | |||
5298 | *ppOut = 0; | |||
5299 | } | |||
5300 | return rc; | |||
5301 | } | |||
5302 | ||||
5303 | /* | |||
5304 | ** Free the configuration object passed as the only argument. | |||
5305 | */ | |||
5306 | static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ | |||
5307 | if( pConfig ){ | |||
5308 | int i; | |||
5309 | if( pConfig->t.pTok ){ | |||
5310 | if( pConfig->t.pApi1 ){ | |||
5311 | pConfig->t.pApi1->xDelete(pConfig->t.pTok); | |||
5312 | }else{ | |||
5313 | pConfig->t.pApi2->xDelete(pConfig->t.pTok); | |||
5314 | } | |||
5315 | } | |||
5316 | sqlite3_freesqlite3_api->free((char*)pConfig->t.azArg); | |||
5317 | sqlite3_freesqlite3_api->free(pConfig->zDb); | |||
5318 | sqlite3_freesqlite3_api->free(pConfig->zName); | |||
5319 | for(i=0; i<pConfig->nCol; i++){ | |||
5320 | sqlite3_freesqlite3_api->free(pConfig->azCol[i]); | |||
5321 | } | |||
5322 | sqlite3_freesqlite3_api->free(pConfig->azCol); | |||
5323 | sqlite3_freesqlite3_api->free(pConfig->aPrefix); | |||
5324 | sqlite3_freesqlite3_api->free(pConfig->zRank); | |||
5325 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | |||
5326 | sqlite3_freesqlite3_api->free(pConfig->zContent); | |||
5327 | sqlite3_freesqlite3_api->free(pConfig->zContentRowid); | |||
5328 | sqlite3_freesqlite3_api->free(pConfig->zContentExprlist); | |||
5329 | sqlite3_freesqlite3_api->free(pConfig); | |||
5330 | } | |||
5331 | } | |||
5332 | ||||
5333 | /* | |||
5334 | ** Call sqlite3_declare_vtab() based on the contents of the configuration | |||
5335 | ** object passed as the only argument. Return SQLITE_OK if successful, or | |||
5336 | ** an SQLite error code if an error occurs. | |||
5337 | */ | |||
5338 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ | |||
5339 | int i; | |||
5340 | int rc = SQLITE_OK0; | |||
5341 | char *zSql; | |||
5342 | ||||
5343 | zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); | |||
5344 | for(i=0; zSql && i<pConfig->nCol; i++){ | |||
5345 | const char *zSep = (i==0?"":", "); | |||
5346 | zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); | |||
5347 | } | |||
5348 | zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", | |||
5349 | zSql, pConfig->zName, FTS5_RANK_NAME"rank" | |||
5350 | ); | |||
5351 | ||||
5352 | assert( zSql || rc==SQLITE_NOMEM )((void) (0)); | |||
5353 | if( zSql ){ | |||
5354 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(pConfig->db, zSql); | |||
5355 | sqlite3_freesqlite3_api->free(zSql); | |||
5356 | } | |||
5357 | ||||
5358 | return rc; | |||
5359 | } | |||
5360 | ||||
5361 | /* | |||
5362 | ** Tokenize the text passed via the second and third arguments. | |||
5363 | ** | |||
5364 | ** The callback is invoked once for each token in the input text. The | |||
5365 | ** arguments passed to it are, in order: | |||
5366 | ** | |||
5367 | ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() | |||
5368 | ** const char *pToken // Pointer to buffer containing token | |||
5369 | ** int nToken // Size of token in bytes | |||
5370 | ** int iStart // Byte offset of start of token within input text | |||
5371 | ** int iEnd // Byte offset of end of token within input text | |||
5372 | ** int iPos // Position of token in input (first token is 0) | |||
5373 | ** | |||
5374 | ** If the callback returns a non-zero value the tokenization is abandoned | |||
5375 | ** and no further callbacks are issued. | |||
5376 | ** | |||
5377 | ** This function returns SQLITE_OK if successful or an SQLite error code | |||
5378 | ** if an error occurs. If the tokenization was abandoned early because | |||
5379 | ** the callback returned SQLITE_DONE, this is not an error and this function | |||
5380 | ** still returns SQLITE_OK. Or, if the tokenization was abandoned early | |||
5381 | ** because the callback returned another non-zero value, it is assumed | |||
5382 | ** to be an SQLite error code and returned to the caller. | |||
5383 | */ | |||
5384 | static int sqlite3Fts5Tokenize( | |||
5385 | Fts5Config *pConfig, /* FTS5 Configuration object */ | |||
5386 | int flags, /* FTS5_TOKENIZE_* flags */ | |||
5387 | const char *pText, int nText, /* Text to tokenize */ | |||
5388 | void *pCtx, /* Context passed to xToken() */ | |||
5389 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
5390 | ){ | |||
5391 | int rc = SQLITE_OK0; | |||
5392 | if( pText ){ | |||
5393 | if( pConfig->t.pTok==0 ){ | |||
5394 | rc = sqlite3Fts5LoadTokenizer(pConfig); | |||
5395 | } | |||
5396 | if( rc==SQLITE_OK0 ){ | |||
5397 | if( pConfig->t.pApi1 ){ | |||
5398 | rc = pConfig->t.pApi1->xTokenize( | |||
5399 | pConfig->t.pTok, pCtx, flags, pText, nText, xToken | |||
5400 | ); | |||
5401 | }else{ | |||
5402 | rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags, | |||
5403 | pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken | |||
5404 | ); | |||
5405 | } | |||
5406 | } | |||
5407 | } | |||
5408 | return rc; | |||
5409 | } | |||
5410 | ||||
5411 | /* | |||
5412 | ** Argument pIn points to the first character in what is expected to be | |||
5413 | ** a comma-separated list of SQL literals followed by a ')' character. | |||
5414 | ** If it actually is this, return a pointer to the ')'. Otherwise, return | |||
5415 | ** NULL to indicate a parse error. | |||
5416 | */ | |||
5417 | static const char *fts5ConfigSkipArgs(const char *pIn){ | |||
5418 | const char *p = pIn; | |||
5419 | ||||
5420 | while( 1 ){ | |||
5421 | p = fts5ConfigSkipWhitespace(p); | |||
5422 | p = fts5ConfigSkipLiteral(p); | |||
5423 | p = fts5ConfigSkipWhitespace(p); | |||
5424 | if( p==0 || *p==')' ) break; | |||
5425 | if( *p!=',' ){ | |||
5426 | p = 0; | |||
5427 | break; | |||
5428 | } | |||
5429 | p++; | |||
5430 | } | |||
5431 | ||||
5432 | return p; | |||
5433 | } | |||
5434 | ||||
5435 | /* | |||
5436 | ** Parameter zIn contains a rank() function specification. The format of | |||
5437 | ** this is: | |||
5438 | ** | |||
5439 | ** + Bareword (function name) | |||
5440 | ** + Open parenthesis - "(" | |||
5441 | ** + Zero or more SQL literals in a comma separated list | |||
5442 | ** + Close parenthesis - ")" | |||
5443 | */ | |||
5444 | static int sqlite3Fts5ConfigParseRank( | |||
5445 | const char *zIn, /* Input string */ | |||
5446 | char **pzRank, /* OUT: Rank function name */ | |||
5447 | char **pzRankArgs /* OUT: Rank function arguments */ | |||
5448 | ){ | |||
5449 | const char *p = zIn; | |||
5450 | const char *pRank; | |||
5451 | char *zRank = 0; | |||
5452 | char *zRankArgs = 0; | |||
5453 | int rc = SQLITE_OK0; | |||
5454 | ||||
5455 | *pzRank = 0; | |||
5456 | *pzRankArgs = 0; | |||
5457 | ||||
5458 | if( p==0 ){ | |||
5459 | rc = SQLITE_ERROR1; | |||
5460 | }else{ | |||
5461 | p = fts5ConfigSkipWhitespace(p); | |||
5462 | pRank = p; | |||
5463 | p = fts5ConfigSkipBareword(p); | |||
5464 | ||||
5465 | if( p ){ | |||
5466 | zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); | |||
5467 | if( zRank ) memcpy(zRank, pRank, p-pRank); | |||
5468 | }else{ | |||
5469 | rc = SQLITE_ERROR1; | |||
5470 | } | |||
5471 | ||||
5472 | if( rc==SQLITE_OK0 ){ | |||
5473 | p = fts5ConfigSkipWhitespace(p); | |||
5474 | if( *p!='(' ) rc = SQLITE_ERROR1; | |||
5475 | p++; | |||
5476 | } | |||
5477 | if( rc==SQLITE_OK0 ){ | |||
5478 | const char *pArgs; | |||
5479 | p = fts5ConfigSkipWhitespace(p); | |||
5480 | pArgs = p; | |||
5481 | if( *p!=')' ){ | |||
5482 | p = fts5ConfigSkipArgs(p); | |||
5483 | if( p==0 ){ | |||
5484 | rc = SQLITE_ERROR1; | |||
5485 | }else{ | |||
5486 | zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); | |||
5487 | if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); | |||
5488 | } | |||
5489 | } | |||
5490 | } | |||
5491 | } | |||
5492 | ||||
5493 | if( rc!=SQLITE_OK0 ){ | |||
5494 | sqlite3_freesqlite3_api->free(zRank); | |||
5495 | assert( zRankArgs==0 )((void) (0)); | |||
5496 | }else{ | |||
5497 | *pzRank = zRank; | |||
5498 | *pzRankArgs = zRankArgs; | |||
5499 | } | |||
5500 | return rc; | |||
5501 | } | |||
5502 | ||||
5503 | static int sqlite3Fts5ConfigSetValue( | |||
5504 | Fts5Config *pConfig, | |||
5505 | const char *zKey, | |||
5506 | sqlite3_value *pVal, | |||
5507 | int *pbBadkey | |||
5508 | ){ | |||
5509 | int rc = SQLITE_OK0; | |||
5510 | ||||
5511 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "pgsz") ){ | |||
5512 | int pgsz = 0; | |||
5513 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5514 | pgsz = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5515 | } | |||
5516 | if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE(64*1024) ){ | |||
5517 | *pbBadkey = 1; | |||
5518 | }else{ | |||
5519 | pConfig->pgsz = pgsz; | |||
5520 | } | |||
5521 | } | |||
5522 | ||||
5523 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "hashsize") ){ | |||
5524 | int nHashSize = -1; | |||
5525 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5526 | nHashSize = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5527 | } | |||
5528 | if( nHashSize<=0 ){ | |||
5529 | *pbBadkey = 1; | |||
5530 | }else{ | |||
5531 | pConfig->nHashSize = nHashSize; | |||
5532 | } | |||
5533 | } | |||
5534 | ||||
5535 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "automerge") ){ | |||
5536 | int nAutomerge = -1; | |||
5537 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5538 | nAutomerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5539 | } | |||
5540 | if( nAutomerge<0 || nAutomerge>64 ){ | |||
5541 | *pbBadkey = 1; | |||
5542 | }else{ | |||
5543 | if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | |||
5544 | pConfig->nAutomerge = nAutomerge; | |||
5545 | } | |||
5546 | } | |||
5547 | ||||
5548 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "usermerge") ){ | |||
5549 | int nUsermerge = -1; | |||
5550 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5551 | nUsermerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5552 | } | |||
5553 | if( nUsermerge<2 || nUsermerge>16 ){ | |||
5554 | *pbBadkey = 1; | |||
5555 | }else{ | |||
5556 | pConfig->nUsermerge = nUsermerge; | |||
5557 | } | |||
5558 | } | |||
5559 | ||||
5560 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "crisismerge") ){ | |||
5561 | int nCrisisMerge = -1; | |||
5562 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5563 | nCrisisMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5564 | } | |||
5565 | if( nCrisisMerge<0 ){ | |||
5566 | *pbBadkey = 1; | |||
5567 | }else{ | |||
5568 | if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | |||
5569 | if( nCrisisMerge>=FTS5_MAX_SEGMENT2000 ) nCrisisMerge = FTS5_MAX_SEGMENT2000-1; | |||
5570 | pConfig->nCrisisMerge = nCrisisMerge; | |||
5571 | } | |||
5572 | } | |||
5573 | ||||
5574 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "deletemerge") ){ | |||
5575 | int nVal = -1; | |||
5576 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5577 | nVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5578 | }else{ | |||
5579 | *pbBadkey = 1; | |||
5580 | } | |||
5581 | if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE10; | |||
5582 | if( nVal>100 ) nVal = 0; | |||
5583 | pConfig->nDeleteMerge = nVal; | |||
5584 | } | |||
5585 | ||||
5586 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "rank") ){ | |||
5587 | const char *zIn = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
5588 | char *zRank; | |||
5589 | char *zRankArgs; | |||
5590 | rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); | |||
5591 | if( rc==SQLITE_OK0 ){ | |||
5592 | sqlite3_freesqlite3_api->free(pConfig->zRank); | |||
5593 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | |||
5594 | pConfig->zRank = zRank; | |||
5595 | pConfig->zRankArgs = zRankArgs; | |||
5596 | }else if( rc==SQLITE_ERROR1 ){ | |||
5597 | rc = SQLITE_OK0; | |||
5598 | *pbBadkey = 1; | |||
5599 | } | |||
5600 | } | |||
5601 | ||||
5602 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "secure-delete") ){ | |||
5603 | int bVal = -1; | |||
5604 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5605 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5606 | } | |||
5607 | if( bVal<0 ){ | |||
5608 | *pbBadkey = 1; | |||
5609 | }else{ | |||
5610 | pConfig->bSecureDelete = (bVal ? 1 : 0); | |||
5611 | } | |||
5612 | } | |||
5613 | ||||
5614 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "insttoken") ){ | |||
5615 | int bVal = -1; | |||
5616 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
5617 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5618 | } | |||
5619 | if( bVal<0 ){ | |||
5620 | *pbBadkey = 1; | |||
5621 | }else{ | |||
5622 | pConfig->bPrefixInsttoken = (bVal ? 1 : 0); | |||
5623 | } | |||
5624 | ||||
5625 | }else{ | |||
5626 | *pbBadkey = 1; | |||
5627 | } | |||
5628 | return rc; | |||
5629 | } | |||
5630 | ||||
5631 | /* | |||
5632 | ** Load the contents of the %_config table into memory. | |||
5633 | */ | |||
5634 | static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ | |||
5635 | const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; | |||
5636 | char *zSql; | |||
5637 | sqlite3_stmt *p = 0; | |||
5638 | int rc = SQLITE_OK0; | |||
5639 | int iVersion = 0; | |||
5640 | ||||
5641 | /* Set default values */ | |||
5642 | pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE4050; | |||
5643 | pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | |||
5644 | pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE4; | |||
5645 | pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | |||
5646 | pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE(1024*1024); | |||
5647 | pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE10; | |||
5648 | ||||
5649 | zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); | |||
5650 | if( zSql ){ | |||
5651 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &p, 0); | |||
5652 | sqlite3_freesqlite3_api->free(zSql); | |||
5653 | } | |||
5654 | ||||
5655 | assert( rc==SQLITE_OK || p==0 )((void) (0)); | |||
5656 | if( rc==SQLITE_OK0 ){ | |||
5657 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p) ){ | |||
5658 | const char *zK = (const char*)sqlite3_column_textsqlite3_api->column_text(p, 0); | |||
5659 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(p, 1); | |||
5660 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zK, "version") ){ | |||
5661 | iVersion = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
5662 | }else{ | |||
5663 | int bDummy = 0; | |||
5664 | sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); | |||
5665 | } | |||
5666 | } | |||
5667 | rc = sqlite3_finalizesqlite3_api->finalize(p); | |||
5668 | } | |||
5669 | ||||
5670 | if( rc==SQLITE_OK0 | |||
5671 | && iVersion!=FTS5_CURRENT_VERSION4 | |||
5672 | && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
5673 | ){ | |||
5674 | rc = SQLITE_ERROR1; | |||
5675 | sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format " | |||
5676 | "(found %d, expected %d or %d) - run 'rebuild'", | |||
5677 | iVersion, FTS5_CURRENT_VERSION4, FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
5678 | ); | |||
5679 | }else{ | |||
5680 | pConfig->iVersion = iVersion; | |||
5681 | } | |||
5682 | ||||
5683 | if( rc==SQLITE_OK0 ){ | |||
5684 | pConfig->iCookie = iCookie; | |||
5685 | } | |||
5686 | return rc; | |||
5687 | } | |||
5688 | ||||
5689 | /* | |||
5690 | ** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer | |||
5691 | ** containing the error message created using printf() style formatting | |||
5692 | ** string zFmt and its trailing arguments. | |||
5693 | */ | |||
5694 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){ | |||
5695 | va_list ap; /* ... printf arguments */ | |||
5696 | char *zMsg = 0; | |||
5697 | ||||
5698 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
5699 | zMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
5700 | if( pConfig->pzErrmsg ){ | |||
5701 | assert( *pConfig->pzErrmsg==0 )((void) (0)); | |||
5702 | *pConfig->pzErrmsg = zMsg; | |||
5703 | }else{ | |||
5704 | sqlite3_freesqlite3_api->free(zMsg); | |||
5705 | } | |||
5706 | ||||
5707 | va_end(ap)__builtin_va_end(ap); | |||
5708 | } | |||
5709 | ||||
5710 | ||||
5711 | ||||
5712 | #line 1 "fts5_expr.c" | |||
5713 | /* | |||
5714 | ** 2014 May 31 | |||
5715 | ** | |||
5716 | ** The author disclaims copyright to this source code. In place of | |||
5717 | ** a legal notice, here is a blessing: | |||
5718 | ** | |||
5719 | ** May you do good and not evil. | |||
5720 | ** May you find forgiveness for yourself and forgive others. | |||
5721 | ** May you share freely, never taking more than you give. | |||
5722 | ** | |||
5723 | ****************************************************************************** | |||
5724 | ** | |||
5725 | */ | |||
5726 | ||||
5727 | ||||
5728 | ||||
5729 | /* #include "fts5Int.h" */ | |||
5730 | /* #include "fts5parse.h" */ | |||
5731 | ||||
5732 | #ifndef SQLITE_FTS5_MAX_EXPR_DEPTH256 | |||
5733 | # define SQLITE_FTS5_MAX_EXPR_DEPTH256 256 | |||
5734 | #endif | |||
5735 | ||||
5736 | /* | |||
5737 | ** All token types in the generated fts5parse.h file are greater than 0. | |||
5738 | */ | |||
5739 | #define FTS5_EOF0 0 | |||
5740 | ||||
5741 | #define FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | |||
5742 | ||||
5743 | typedef struct Fts5ExprTerm Fts5ExprTerm; | |||
5744 | ||||
5745 | /* | |||
5746 | ** Functions generated by lemon from fts5parse.y. | |||
5747 | */ | |||
5748 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); | |||
5749 | static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); | |||
5750 | static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); | |||
5751 | #ifndef NDEBUG1 | |||
5752 | #include <stdio.h> | |||
5753 | static void sqlite3Fts5ParserTrace(FILE*, char*); | |||
5754 | #endif | |||
5755 | static int sqlite3Fts5ParserFallback(int); | |||
5756 | ||||
5757 | ||||
5758 | struct Fts5Expr { | |||
5759 | Fts5Index *pIndex; | |||
5760 | Fts5Config *pConfig; | |||
5761 | Fts5ExprNode *pRoot; | |||
5762 | int bDesc; /* Iterate in descending rowid order */ | |||
5763 | int nPhrase; /* Number of phrases in expression */ | |||
5764 | Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ | |||
5765 | }; | |||
5766 | ||||
5767 | /* | |||
5768 | ** eType: | |||
5769 | ** Expression node type. Usually one of: | |||
5770 | ** | |||
5771 | ** FTS5_AND (nChild, apChild valid) | |||
5772 | ** FTS5_OR (nChild, apChild valid) | |||
5773 | ** FTS5_NOT (nChild, apChild valid) | |||
5774 | ** FTS5_STRING (pNear valid) | |||
5775 | ** FTS5_TERM (pNear valid) | |||
5776 | ** | |||
5777 | ** An expression node with eType==0 may also exist. It always matches zero | |||
5778 | ** rows. This is created when a phrase containing no tokens is parsed. | |||
5779 | ** e.g. "". | |||
5780 | ** | |||
5781 | ** iHeight: | |||
5782 | ** Distance from this node to furthest leaf. This is always 0 for nodes | |||
5783 | ** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one | |||
5784 | ** greater than the largest child value. | |||
5785 | */ | |||
5786 | struct Fts5ExprNode { | |||
5787 | int eType; /* Node type */ | |||
5788 | int bEof; /* True at EOF */ | |||
5789 | int bNomatch; /* True if entry is not a match */ | |||
5790 | int iHeight; /* Distance to tree leaf nodes */ | |||
5791 | ||||
5792 | /* Next method for this node. */ | |||
5793 | int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); | |||
5794 | ||||
5795 | i64 iRowid; /* Current rowid */ | |||
5796 | Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ | |||
5797 | ||||
5798 | /* Child nodes. For a NOT node, this array always contains 2 entries. For | |||
5799 | ** AND or OR nodes, it contains 2 or more entries. */ | |||
5800 | int nChild; /* Number of child nodes */ | |||
5801 | Fts5ExprNode *apChild[FLEXARRAY]; /* Array of child nodes */ | |||
5802 | }; | |||
5803 | ||||
5804 | /* Size (in bytes) of an Fts5ExprNode object that holds up to N children */ | |||
5805 | #define SZ_FTS5EXPRNODE(N)(__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode *)) \ | |||
5806 | (offsetof(Fts5ExprNode,apChild)__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode*)) | |||
5807 | ||||
5808 | #define Fts5NodeIsString(p)((p)->eType==4 || (p)->eType==9) ((p)->eType==FTS5_TERM4 || (p)->eType==FTS5_STRING9) | |||
5809 | ||||
5810 | /* | |||
5811 | ** Invoke the xNext method of an Fts5ExprNode object. This macro should be | |||
5812 | ** used as if it has the same signature as the xNext() methods themselves. | |||
5813 | */ | |||
5814 | #define fts5ExprNodeNext(a,b,c,d)(b)->xNext((a), (b), (c), (d)) (b)->xNext((a), (b), (c), (d)) | |||
5815 | ||||
5816 | /* | |||
5817 | ** An instance of the following structure represents a single search term | |||
5818 | ** or term prefix. | |||
5819 | */ | |||
5820 | struct Fts5ExprTerm { | |||
5821 | u8 bPrefix; /* True for a prefix term */ | |||
5822 | u8 bFirst; /* True if token must be first in column */ | |||
5823 | char *pTerm; /* Term data */ | |||
5824 | int nQueryTerm; /* Effective size of term in bytes */ | |||
5825 | int nFullTerm; /* Size of term in bytes incl. tokendata */ | |||
5826 | Fts5IndexIter *pIter; /* Iterator for this term */ | |||
5827 | Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ | |||
5828 | }; | |||
5829 | ||||
5830 | /* | |||
5831 | ** A phrase. One or more terms that must appear in a contiguous sequence | |||
5832 | ** within a document for it to match. | |||
5833 | */ | |||
5834 | struct Fts5ExprPhrase { | |||
5835 | Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ | |||
5836 | Fts5Buffer poslist; /* Current position list */ | |||
5837 | int nTerm; /* Number of entries in aTerm[] */ | |||
5838 | Fts5ExprTerm aTerm[FLEXARRAY]; /* Terms that make up this phrase */ | |||
5839 | }; | |||
5840 | ||||
5841 | /* Size (in bytes) of an Fts5ExprPhrase object that holds up to N terms */ | |||
5842 | #define SZ_FTS5EXPRPHRASE(N)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm )) \ | |||
5843 | (offsetof(Fts5ExprPhrase,aTerm)__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm)) | |||
5844 | ||||
5845 | /* | |||
5846 | ** One or more phrases that must appear within a certain token distance of | |||
5847 | ** each other within each matching document. | |||
5848 | */ | |||
5849 | struct Fts5ExprNearset { | |||
5850 | int nNear; /* NEAR parameter */ | |||
5851 | Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ | |||
5852 | int nPhrase; /* Number of entries in aPhrase[] array */ | |||
5853 | Fts5ExprPhrase *apPhrase[FLEXARRAY]; /* Array of phrase pointers */ | |||
5854 | }; | |||
5855 | ||||
5856 | /* Size (in bytes) of an Fts5ExprNearset object covering up to N phrases */ | |||
5857 | #define SZ_FTS5EXPRNEARSET(N)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase *)) \ | |||
5858 | (offsetof(Fts5ExprNearset,apPhrase)__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase*)) | |||
5859 | ||||
5860 | /* | |||
5861 | ** Parse context. | |||
5862 | */ | |||
5863 | struct Fts5Parse { | |||
5864 | Fts5Config *pConfig; | |||
5865 | char *zErr; | |||
5866 | int rc; | |||
5867 | int nPhrase; /* Size of apPhrase array */ | |||
5868 | Fts5ExprPhrase **apPhrase; /* Array of all phrases */ | |||
5869 | Fts5ExprNode *pExpr; /* Result of a successful parse */ | |||
5870 | int bPhraseToAnd; /* Convert "a+b" to "a AND b" */ | |||
5871 | }; | |||
5872 | ||||
5873 | /* | |||
5874 | ** Check that the Fts5ExprNode.iHeight variables are set correctly in | |||
5875 | ** the expression tree passed as the only argument. | |||
5876 | */ | |||
5877 | #ifndef NDEBUG1 | |||
5878 | static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){ | |||
5879 | if( rc==SQLITE_OK0 ){ | |||
5880 | if( p->eType==FTS5_TERM4 || p->eType==FTS5_STRING9 || p->eType==0 ){ | |||
5881 | assert( p->iHeight==0 )((void) (0)); | |||
5882 | }else{ | |||
5883 | int ii; | |||
5884 | int iMaxChild = 0; | |||
5885 | for(ii=0; ii<p->nChild; ii++){ | |||
5886 | Fts5ExprNode *pChild = p->apChild[ii]; | |||
5887 | iMaxChild = MAX(iMaxChild, pChild->iHeight)(((iMaxChild) > (pChild->iHeight)) ? (iMaxChild) : (pChild ->iHeight)); | |||
5888 | assert_expr_depth_ok(SQLITE_OK, pChild); | |||
5889 | } | |||
5890 | assert( p->iHeight==iMaxChild+1 )((void) (0)); | |||
5891 | } | |||
5892 | } | |||
5893 | } | |||
5894 | #else | |||
5895 | # define assert_expr_depth_ok(rc, p) | |||
5896 | #endif | |||
5897 | ||||
5898 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ | |||
5899 | va_list ap; | |||
5900 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
5901 | if( pParse->rc==SQLITE_OK0 ){ | |||
5902 | assert( pParse->zErr==0 )((void) (0)); | |||
5903 | pParse->zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
5904 | pParse->rc = SQLITE_ERROR1; | |||
5905 | } | |||
5906 | va_end(ap)__builtin_va_end(ap); | |||
5907 | } | |||
5908 | ||||
5909 | static int fts5ExprIsspace(char t){ | |||
5910 | return t==' ' || t=='\t' || t=='\n' || t=='\r'; | |||
5911 | } | |||
5912 | ||||
5913 | /* | |||
5914 | ** Read the first token from the nul-terminated string at *pz. | |||
5915 | */ | |||
5916 | static int fts5ExprGetToken( | |||
5917 | Fts5Parse *pParse, | |||
5918 | const char **pz, /* IN/OUT: Pointer into buffer */ | |||
5919 | Fts5Token *pToken | |||
5920 | ){ | |||
5921 | const char *z = *pz; | |||
5922 | int tok; | |||
5923 | ||||
5924 | /* Skip past any whitespace */ | |||
5925 | while( fts5ExprIsspace(*z) ) z++; | |||
5926 | ||||
5927 | pToken->p = z; | |||
5928 | pToken->n = 1; | |||
5929 | switch( *z ){ | |||
5930 | case '(': tok = FTS5_LP10; break; | |||
5931 | case ')': tok = FTS5_RP11; break; | |||
5932 | case '{': tok = FTS5_LCP7; break; | |||
5933 | case '}': tok = FTS5_RCP8; break; | |||
5934 | case ':': tok = FTS5_COLON5; break; | |||
5935 | case ',': tok = FTS5_COMMA13; break; | |||
5936 | case '+': tok = FTS5_PLUS14; break; | |||
5937 | case '*': tok = FTS5_STAR15; break; | |||
5938 | case '-': tok = FTS5_MINUS6; break; | |||
5939 | case '^': tok = FTS5_CARET12; break; | |||
5940 | case '\0': tok = FTS5_EOF0; break; | |||
5941 | ||||
5942 | case '"': { | |||
5943 | const char *z2; | |||
5944 | tok = FTS5_STRING9; | |||
5945 | ||||
5946 | for(z2=&z[1]; 1; z2++){ | |||
5947 | if( z2[0]=='"' ){ | |||
5948 | z2++; | |||
5949 | if( z2[0]!='"' ) break; | |||
5950 | } | |||
5951 | if( z2[0]=='\0' ){ | |||
5952 | sqlite3Fts5ParseError(pParse, "unterminated string"); | |||
5953 | return FTS5_EOF0; | |||
5954 | } | |||
5955 | } | |||
5956 | pToken->n = (z2 - z); | |||
5957 | break; | |||
5958 | } | |||
5959 | ||||
5960 | default: { | |||
5961 | const char *z2; | |||
5962 | if( sqlite3Fts5IsBareword(z[0])==0 ){ | |||
5963 | sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); | |||
5964 | return FTS5_EOF0; | |||
5965 | } | |||
5966 | tok = FTS5_STRING9; | |||
5967 | for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); | |||
5968 | pToken->n = (z2 - z); | |||
5969 | if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR1; | |||
5970 | if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT3; | |||
5971 | if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND2; | |||
5972 | break; | |||
5973 | } | |||
5974 | } | |||
5975 | ||||
5976 | *pz = &pToken->p[pToken->n]; | |||
5977 | return tok; | |||
5978 | } | |||
5979 | ||||
5980 | static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)t);} | |||
5981 | static void fts5ParseFree(void *p){ sqlite3_freesqlite3_api->free(p); } | |||
5982 | ||||
5983 | static int sqlite3Fts5ExprNew( | |||
5984 | Fts5Config *pConfig, /* FTS5 Configuration */ | |||
5985 | int bPhraseToAnd, | |||
5986 | int iCol, | |||
5987 | const char *zExpr, /* Expression text */ | |||
5988 | Fts5Expr **ppNew, | |||
5989 | char **pzErr | |||
5990 | ){ | |||
5991 | Fts5Parse sParse; | |||
5992 | Fts5Token token; | |||
5993 | const char *z = zExpr; | |||
5994 | int t; /* Next token type */ | |||
5995 | void *pEngine; | |||
5996 | Fts5Expr *pNew; | |||
5997 | ||||
5998 | *ppNew = 0; | |||
5999 | *pzErr = 0; | |||
6000 | memset(&sParse, 0, sizeof(sParse)); | |||
6001 | sParse.bPhraseToAnd = bPhraseToAnd; | |||
6002 | pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); | |||
6003 | if( pEngine==0 ){ return SQLITE_NOMEM7; } | |||
6004 | sParse.pConfig = pConfig; | |||
6005 | ||||
6006 | do { | |||
6007 | t = fts5ExprGetToken(&sParse, &z, &token); | |||
6008 | sqlite3Fts5Parser(pEngine, t, token, &sParse); | |||
6009 | }while( sParse.rc==SQLITE_OK0 && t!=FTS5_EOF0 ); | |||
6010 | sqlite3Fts5ParserFree(pEngine, fts5ParseFree); | |||
6011 | ||||
6012 | assert( sParse.pExpr || sParse.rc!=SQLITE_OK )((void) (0)); | |||
6013 | assert_expr_depth_ok(sParse.rc, sParse.pExpr); | |||
6014 | ||||
6015 | /* If the LHS of the MATCH expression was a user column, apply the | |||
6016 | ** implicit column-filter. */ | |||
6017 | if( sParse.rc==SQLITE_OK0 && iCol<pConfig->nCol ){ | |||
6018 | int n = SZ_FTS5COLSET(1)(sizeof(i64)*((1 +2)/2)); | |||
6019 | Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); | |||
6020 | if( pColset ){ | |||
6021 | pColset->nCol = 1; | |||
6022 | pColset->aiCol[0] = iCol; | |||
6023 | sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset); | |||
6024 | } | |||
6025 | } | |||
6026 | ||||
6027 | assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 )((void) (0)); | |||
6028 | if( sParse.rc==SQLITE_OK0 ){ | |||
6029 | *ppNew = pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Expr)); | |||
6030 | if( pNew==0 ){ | |||
6031 | sParse.rc = SQLITE_NOMEM7; | |||
6032 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | |||
6033 | }else{ | |||
6034 | pNew->pRoot = sParse.pExpr; | |||
6035 | pNew->pIndex = 0; | |||
6036 | pNew->pConfig = pConfig; | |||
6037 | pNew->apExprPhrase = sParse.apPhrase; | |||
6038 | pNew->nPhrase = sParse.nPhrase; | |||
6039 | pNew->bDesc = 0; | |||
6040 | sParse.apPhrase = 0; | |||
6041 | } | |||
6042 | }else{ | |||
6043 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | |||
6044 | } | |||
6045 | ||||
6046 | sqlite3_freesqlite3_api->free(sParse.apPhrase); | |||
6047 | if( 0==*pzErr ){ | |||
6048 | *pzErr = sParse.zErr; | |||
6049 | }else{ | |||
6050 | sqlite3_freesqlite3_api->free(sParse.zErr); | |||
6051 | } | |||
6052 | return sParse.rc; | |||
6053 | } | |||
6054 | ||||
6055 | /* | |||
6056 | ** Assuming that buffer z is at least nByte bytes in size and contains a | |||
6057 | ** valid utf-8 string, return the number of characters in the string. | |||
6058 | */ | |||
6059 | static int fts5ExprCountChar(const char *z, int nByte){ | |||
6060 | int nRet = 0; | |||
6061 | int ii; | |||
6062 | for(ii=0; ii<nByte; ii++){ | |||
6063 | if( (z[ii] & 0xC0)!=0x80 ) nRet++; | |||
6064 | } | |||
6065 | return nRet; | |||
6066 | } | |||
6067 | ||||
6068 | /* | |||
6069 | ** This function is only called when using the special 'trigram' tokenizer. | |||
6070 | ** Argument zText contains the text of a LIKE or GLOB pattern matched | |||
6071 | ** against column iCol. This function creates and compiles an FTS5 MATCH | |||
6072 | ** expression that will match a superset of the rows matched by the LIKE or | |||
6073 | ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error | |||
6074 | ** code. | |||
6075 | */ | |||
6076 | static int sqlite3Fts5ExprPattern( | |||
6077 | Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp | |||
6078 | ){ | |||
6079 | i64 nText = strlen(zText); | |||
6080 | char *zExpr = (char*)sqlite3_malloc64sqlite3_api->malloc64(nText*4 + 1); | |||
6081 | int rc = SQLITE_OK0; | |||
6082 | ||||
6083 | if( zExpr==0 ){ | |||
6084 | rc = SQLITE_NOMEM7; | |||
6085 | }else{ | |||
6086 | char aSpec[3]; | |||
6087 | int iOut = 0; | |||
6088 | int i = 0; | |||
6089 | int iFirst = 0; | |||
6090 | ||||
6091 | if( bGlob==0 ){ | |||
6092 | aSpec[0] = '_'; | |||
6093 | aSpec[1] = '%'; | |||
6094 | aSpec[2] = 0; | |||
6095 | }else{ | |||
6096 | aSpec[0] = '*'; | |||
6097 | aSpec[1] = '?'; | |||
6098 | aSpec[2] = '['; | |||
6099 | } | |||
6100 | ||||
6101 | while( i<=nText ){ | |||
6102 | if( i==nText | |||
6103 | || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] | |||
6104 | ){ | |||
6105 | ||||
6106 | if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){ | |||
6107 | int jj; | |||
6108 | zExpr[iOut++] = '"'; | |||
6109 | for(jj=iFirst; jj<i; jj++){ | |||
6110 | zExpr[iOut++] = zText[jj]; | |||
6111 | if( zText[jj]=='"' ) zExpr[iOut++] = '"'; | |||
6112 | } | |||
6113 | zExpr[iOut++] = '"'; | |||
6114 | zExpr[iOut++] = ' '; | |||
6115 | } | |||
6116 | if( zText[i]==aSpec[2] ){ | |||
6117 | i += 2; | |||
6118 | if( zText[i-1]=='^' ) i++; | |||
6119 | while( i<nText && zText[i]!=']' ) i++; | |||
6120 | } | |||
6121 | iFirst = i+1; | |||
6122 | } | |||
6123 | i++; | |||
6124 | } | |||
6125 | if( iOut>0 ){ | |||
6126 | int bAnd = 0; | |||
6127 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
6128 | bAnd = 1; | |||
6129 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
6130 | iCol = pConfig->nCol; | |||
6131 | } | |||
6132 | } | |||
6133 | zExpr[iOut] = '\0'; | |||
6134 | rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg); | |||
6135 | }else{ | |||
6136 | *pp = 0; | |||
6137 | } | |||
6138 | sqlite3_freesqlite3_api->free(zExpr); | |||
6139 | } | |||
6140 | ||||
6141 | return rc; | |||
6142 | } | |||
6143 | ||||
6144 | /* | |||
6145 | ** Free the expression node object passed as the only argument. | |||
6146 | */ | |||
6147 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ | |||
6148 | if( p ){ | |||
6149 | int i; | |||
6150 | for(i=0; i<p->nChild; i++){ | |||
6151 | sqlite3Fts5ParseNodeFree(p->apChild[i]); | |||
6152 | } | |||
6153 | sqlite3Fts5ParseNearsetFree(p->pNear); | |||
6154 | sqlite3_freesqlite3_api->free(p); | |||
6155 | } | |||
6156 | } | |||
6157 | ||||
6158 | /* | |||
6159 | ** Free the expression object passed as the only argument. | |||
6160 | */ | |||
6161 | static void sqlite3Fts5ExprFree(Fts5Expr *p){ | |||
6162 | if( p ){ | |||
6163 | sqlite3Fts5ParseNodeFree(p->pRoot); | |||
6164 | sqlite3_freesqlite3_api->free(p->apExprPhrase); | |||
6165 | sqlite3_freesqlite3_api->free(p); | |||
6166 | } | |||
6167 | } | |||
6168 | ||||
6169 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){ | |||
6170 | Fts5Parse sParse; | |||
6171 | memset(&sParse, 0, sizeof(sParse)); | |||
6172 | ||||
6173 | if( *pp1 && p2 ){ | |||
6174 | Fts5Expr *p1 = *pp1; | |||
6175 | int nPhrase = p1->nPhrase + p2->nPhrase; | |||
6176 | ||||
6177 | p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND2, p1->pRoot, p2->pRoot,0); | |||
6178 | p2->pRoot = 0; | |||
6179 | ||||
6180 | if( sParse.rc==SQLITE_OK0 ){ | |||
6181 | Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_reallocsqlite3_api->realloc( | |||
6182 | p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*) | |||
6183 | ); | |||
6184 | if( ap==0 ){ | |||
6185 | sParse.rc = SQLITE_NOMEM7; | |||
6186 | }else{ | |||
6187 | int i; | |||
6188 | memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*)); | |||
6189 | for(i=0; i<p2->nPhrase; i++){ | |||
6190 | ap[i] = p2->apExprPhrase[i]; | |||
6191 | } | |||
6192 | p1->nPhrase = nPhrase; | |||
6193 | p1->apExprPhrase = ap; | |||
6194 | } | |||
6195 | } | |||
6196 | sqlite3_freesqlite3_api->free(p2->apExprPhrase); | |||
6197 | sqlite3_freesqlite3_api->free(p2); | |||
6198 | }else if( p2 ){ | |||
6199 | *pp1 = p2; | |||
6200 | } | |||
6201 | ||||
6202 | return sParse.rc; | |||
6203 | } | |||
6204 | ||||
6205 | /* | |||
6206 | ** Argument pTerm must be a synonym iterator. Return the current rowid | |||
6207 | ** that it points to. | |||
6208 | */ | |||
6209 | static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ | |||
6210 | i64 iRet = 0; | |||
6211 | int bRetValid = 0; | |||
6212 | Fts5ExprTerm *p; | |||
6213 | ||||
6214 | assert( pTerm )((void) (0)); | |||
6215 | assert( pTerm->pSynonym )((void) (0)); | |||
6216 | assert( bDesc==0 || bDesc==1 )((void) (0)); | |||
6217 | for(p=pTerm; p; p=p->pSynonym){ | |||
6218 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | |||
6219 | i64 iRowid = p->pIter->iRowid; | |||
6220 | if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ | |||
6221 | iRet = iRowid; | |||
6222 | bRetValid = 1; | |||
6223 | } | |||
6224 | } | |||
6225 | } | |||
6226 | ||||
6227 | if( pbEof && bRetValid==0 ) *pbEof = 1; | |||
6228 | return iRet; | |||
6229 | } | |||
6230 | ||||
6231 | /* | |||
6232 | ** Argument pTerm must be a synonym iterator. | |||
6233 | */ | |||
6234 | static int fts5ExprSynonymList( | |||
6235 | Fts5ExprTerm *pTerm, | |||
6236 | i64 iRowid, | |||
6237 | Fts5Buffer *pBuf, /* Use this buffer for space if required */ | |||
6238 | u8 **pa, int *pn | |||
6239 | ){ | |||
6240 | Fts5PoslistReader aStatic[4]; | |||
6241 | Fts5PoslistReader *aIter = aStatic; | |||
6242 | int nIter = 0; | |||
6243 | int nAlloc = 4; | |||
6244 | int rc = SQLITE_OK0; | |||
6245 | Fts5ExprTerm *p; | |||
6246 | ||||
6247 | assert( pTerm->pSynonym )((void) (0)); | |||
6248 | for(p=pTerm; p; p=p->pSynonym){ | |||
6249 | Fts5IndexIter *pIter = p->pIter; | |||
6250 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 && pIter->iRowid==iRowid ){ | |||
6251 | if( pIter->nData==0 ) continue; | |||
6252 | if( nIter==nAlloc ){ | |||
6253 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; | |||
6254 | Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
6255 | if( aNew==0 ){ | |||
6256 | rc = SQLITE_NOMEM7; | |||
6257 | goto synonym_poslist_out; | |||
6258 | } | |||
6259 | memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); | |||
6260 | nAlloc = nAlloc*2; | |||
6261 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
6262 | aIter = aNew; | |||
6263 | } | |||
6264 | sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); | |||
6265 | assert( aIter[nIter].bEof==0 )((void) (0)); | |||
6266 | nIter++; | |||
6267 | } | |||
6268 | } | |||
6269 | ||||
6270 | if( nIter==1 ){ | |||
6271 | *pa = (u8*)aIter[0].a; | |||
6272 | *pn = aIter[0].n; | |||
6273 | }else{ | |||
6274 | Fts5PoslistWriter writer = {0}; | |||
6275 | i64 iPrev = -1; | |||
6276 | fts5BufferZero(pBuf)sqlite3Fts5BufferZero(pBuf); | |||
6277 | while( 1 ){ | |||
6278 | int i; | |||
6279 | i64 iMin = FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
6280 | for(i=0; i<nIter; i++){ | |||
6281 | if( aIter[i].bEof==0 ){ | |||
6282 | if( aIter[i].iPos==iPrev ){ | |||
6283 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; | |||
6284 | } | |||
6285 | if( aIter[i].iPos<iMin ){ | |||
6286 | iMin = aIter[i].iPos; | |||
6287 | } | |||
6288 | } | |||
6289 | } | |||
6290 | if( iMin==FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) || rc!=SQLITE_OK0 ) break; | |||
6291 | rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); | |||
6292 | iPrev = iMin; | |||
6293 | } | |||
6294 | if( rc==SQLITE_OK0 ){ | |||
6295 | *pa = pBuf->p; | |||
6296 | *pn = pBuf->n; | |||
6297 | } | |||
6298 | } | |||
6299 | ||||
6300 | synonym_poslist_out: | |||
6301 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
6302 | return rc; | |||
6303 | } | |||
6304 | ||||
6305 | ||||
6306 | /* | |||
6307 | ** All individual term iterators in pPhrase are guaranteed to be valid and | |||
6308 | ** pointing to the same rowid when this function is called. This function | |||
6309 | ** checks if the current rowid really is a match, and if so populates | |||
6310 | ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch | |||
6311 | ** is set to true if this is really a match, or false otherwise. | |||
6312 | ** | |||
6313 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | |||
6314 | ** otherwise. It is not considered an error code if the current rowid is | |||
6315 | ** not a match. | |||
6316 | */ | |||
6317 | static int fts5ExprPhraseIsMatch( | |||
6318 | Fts5ExprNode *pNode, /* Node pPhrase belongs to */ | |||
6319 | Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ | |||
6320 | int *pbMatch /* OUT: Set to true if really a match */ | |||
6321 | ){ | |||
6322 | Fts5PoslistWriter writer = {0}; | |||
6323 | Fts5PoslistReader aStatic[4]; | |||
6324 | Fts5PoslistReader *aIter = aStatic; | |||
6325 | int i; | |||
6326 | int rc = SQLITE_OK0; | |||
6327 | int bFirst = pPhrase->aTerm[0].bFirst; | |||
6328 | ||||
6329 | fts5BufferZero(&pPhrase->poslist)sqlite3Fts5BufferZero(&pPhrase->poslist); | |||
6330 | ||||
6331 | /* If the aStatic[] array is not large enough, allocate a large array | |||
6332 | ** using sqlite3_malloc(). This approach could be improved upon. */ | |||
6333 | if( pPhrase->nTerm>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | |||
6334 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; | |||
6335 | aIter = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
6336 | if( !aIter ) return SQLITE_NOMEM7; | |||
6337 | } | |||
6338 | memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); | |||
6339 | ||||
6340 | /* Initialize a term iterator for each term in the phrase */ | |||
6341 | for(i=0; i<pPhrase->nTerm; i++){ | |||
6342 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | |||
6343 | int n = 0; | |||
6344 | int bFlag = 0; | |||
6345 | u8 *a = 0; | |||
6346 | if( pTerm->pSynonym ){ | |||
6347 | Fts5Buffer buf = {0, 0, 0}; | |||
6348 | rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); | |||
6349 | if( rc ){ | |||
6350 | sqlite3_freesqlite3_api->free(a); | |||
6351 | goto ismatch_out; | |||
6352 | } | |||
6353 | if( a==buf.p ) bFlag = 1; | |||
6354 | }else{ | |||
6355 | a = (u8*)pTerm->pIter->pData; | |||
6356 | n = pTerm->pIter->nData; | |||
6357 | } | |||
6358 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | |||
6359 | aIter[i].bFlag = (u8)bFlag; | |||
6360 | if( aIter[i].bEof ) goto ismatch_out; | |||
6361 | } | |||
6362 | ||||
6363 | while( 1 ){ | |||
6364 | int bMatch; | |||
6365 | i64 iPos = aIter[0].iPos; | |||
6366 | do { | |||
6367 | bMatch = 1; | |||
6368 | for(i=0; i<pPhrase->nTerm; i++){ | |||
6369 | Fts5PoslistReader *pPos = &aIter[i]; | |||
6370 | i64 iAdj = iPos + i; | |||
6371 | if( pPos->iPos!=iAdj ){ | |||
6372 | bMatch = 0; | |||
6373 | while( pPos->iPos<iAdj ){ | |||
6374 | if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; | |||
6375 | } | |||
6376 | if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; | |||
6377 | } | |||
6378 | } | |||
6379 | }while( bMatch==0 ); | |||
6380 | ||||
6381 | /* Append position iPos to the output */ | |||
6382 | if( bFirst==0 || FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF)==0 ){ | |||
6383 | rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); | |||
6384 | if( rc!=SQLITE_OK0 ) goto ismatch_out; | |||
6385 | } | |||
6386 | ||||
6387 | for(i=0; i<pPhrase->nTerm; i++){ | |||
6388 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; | |||
6389 | } | |||
6390 | } | |||
6391 | ||||
6392 | ismatch_out: | |||
6393 | *pbMatch = (pPhrase->poslist.n>0); | |||
6394 | for(i=0; i<pPhrase->nTerm; i++){ | |||
6395 | if( aIter[i].bFlag ) sqlite3_freesqlite3_api->free((u8*)aIter[i].a); | |||
6396 | } | |||
6397 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
6398 | return rc; | |||
6399 | } | |||
6400 | ||||
6401 | typedef struct Fts5LookaheadReader Fts5LookaheadReader; | |||
6402 | struct Fts5LookaheadReader { | |||
6403 | const u8 *a; /* Buffer containing position list */ | |||
6404 | int n; /* Size of buffer a[] in bytes */ | |||
6405 | int i; /* Current offset in position list */ | |||
6406 | i64 iPos; /* Current position */ | |||
6407 | i64 iLookahead; /* Next position */ | |||
6408 | }; | |||
6409 | ||||
6410 | #define FTS5_LOOKAHEAD_EOF(((i64)1) << 62) (((i64)1) << 62) | |||
6411 | ||||
6412 | static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ | |||
6413 | p->iPos = p->iLookahead; | |||
6414 | if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ | |||
6415 | p->iLookahead = FTS5_LOOKAHEAD_EOF(((i64)1) << 62); | |||
6416 | } | |||
6417 | return (p->iPos==FTS5_LOOKAHEAD_EOF(((i64)1) << 62)); | |||
6418 | } | |||
6419 | ||||
6420 | static int fts5LookaheadReaderInit( | |||
6421 | const u8 *a, int n, /* Buffer to read position list from */ | |||
6422 | Fts5LookaheadReader *p /* Iterator object to initialize */ | |||
6423 | ){ | |||
6424 | memset(p, 0, sizeof(Fts5LookaheadReader)); | |||
6425 | p->a = a; | |||
6426 | p->n = n; | |||
6427 | fts5LookaheadReaderNext(p); | |||
6428 | return fts5LookaheadReaderNext(p); | |||
6429 | } | |||
6430 | ||||
6431 | typedef struct Fts5NearTrimmer Fts5NearTrimmer; | |||
6432 | struct Fts5NearTrimmer { | |||
6433 | Fts5LookaheadReader reader; /* Input iterator */ | |||
6434 | Fts5PoslistWriter writer; /* Writer context */ | |||
6435 | Fts5Buffer *pOut; /* Output poslist */ | |||
6436 | }; | |||
6437 | ||||
6438 | /* | |||
6439 | ** The near-set object passed as the first argument contains more than | |||
6440 | ** one phrase. All phrases currently point to the same row. The | |||
6441 | ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function | |||
6442 | ** tests if the current row contains instances of each phrase sufficiently | |||
6443 | ** close together to meet the NEAR constraint. Non-zero is returned if it | |||
6444 | ** does, or zero otherwise. | |||
6445 | ** | |||
6446 | ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this | |||
6447 | ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) | |||
6448 | ** occurs within this function (*pRc) is set accordingly before returning. | |||
6449 | ** The return value is undefined in both these cases. | |||
6450 | ** | |||
6451 | ** If no error occurs and non-zero (a match) is returned, the position-list | |||
6452 | ** of each phrase object is edited to contain only those entries that | |||
6453 | ** meet the constraint before returning. | |||
6454 | */ | |||
6455 | static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ | |||
6456 | Fts5NearTrimmer aStatic[4]; | |||
6457 | Fts5NearTrimmer *a = aStatic; | |||
6458 | Fts5ExprPhrase **apPhrase = pNear->apPhrase; | |||
6459 | ||||
6460 | int i; | |||
6461 | int rc = *pRc; | |||
6462 | int bMatch; | |||
6463 | ||||
6464 | assert( pNear->nPhrase>1 )((void) (0)); | |||
6465 | ||||
6466 | /* If the aStatic[] array is not large enough, allocate a large array | |||
6467 | ** using sqlite3_malloc(). This approach could be improved upon. */ | |||
6468 | if( pNear->nPhrase>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | |||
6469 | sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; | |||
6470 | a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); | |||
6471 | }else{ | |||
6472 | memset(aStatic, 0, sizeof(aStatic)); | |||
6473 | } | |||
6474 | if( rc!=SQLITE_OK0 ){ | |||
6475 | *pRc = rc; | |||
6476 | return 0; | |||
6477 | } | |||
6478 | ||||
6479 | /* Initialize a lookahead iterator for each phrase. After passing the | |||
6480 | ** buffer and buffer size to the lookaside-reader init function, zero | |||
6481 | ** the phrase poslist buffer. The new poslist for the phrase (containing | |||
6482 | ** the same entries as the original with some entries removed on account | |||
6483 | ** of the NEAR constraint) is written over the original even as it is | |||
6484 | ** being read. This is safe as the entries for the new poslist are a | |||
6485 | ** subset of the old, so it is not possible for data yet to be read to | |||
6486 | ** be overwritten. */ | |||
6487 | for(i=0; i<pNear->nPhrase; i++){ | |||
6488 | Fts5Buffer *pPoslist = &apPhrase[i]->poslist; | |||
6489 | fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); | |||
6490 | pPoslist->n = 0; | |||
6491 | a[i].pOut = pPoslist; | |||
6492 | } | |||
6493 | ||||
6494 | while( 1 ){ | |||
6495 | int iAdv; | |||
6496 | i64 iMin; | |||
6497 | i64 iMax; | |||
6498 | ||||
6499 | /* This block advances the phrase iterators until they point to a set of | |||
6500 | ** entries that together comprise a match. */ | |||
6501 | iMax = a[0].reader.iPos; | |||
6502 | do { | |||
6503 | bMatch = 1; | |||
6504 | for(i=0; i<pNear->nPhrase; i++){ | |||
6505 | Fts5LookaheadReader *pPos = &a[i].reader; | |||
6506 | iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; | |||
6507 | if( pPos->iPos<iMin || pPos->iPos>iMax ){ | |||
6508 | bMatch = 0; | |||
6509 | while( pPos->iPos<iMin ){ | |||
6510 | if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; | |||
6511 | } | |||
6512 | if( pPos->iPos>iMax ) iMax = pPos->iPos; | |||
6513 | } | |||
6514 | } | |||
6515 | }while( bMatch==0 ); | |||
6516 | ||||
6517 | /* Add an entry to each output position list */ | |||
6518 | for(i=0; i<pNear->nPhrase; i++){ | |||
6519 | i64 iPos = a[i].reader.iPos; | |||
6520 | Fts5PoslistWriter *pWriter = &a[i].writer; | |||
6521 | if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ | |||
6522 | sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); | |||
6523 | } | |||
6524 | } | |||
6525 | ||||
6526 | iAdv = 0; | |||
6527 | iMin = a[0].reader.iLookahead; | |||
6528 | for(i=0; i<pNear->nPhrase; i++){ | |||
6529 | if( a[i].reader.iLookahead < iMin ){ | |||
6530 | iMin = a[i].reader.iLookahead; | |||
6531 | iAdv = i; | |||
6532 | } | |||
6533 | } | |||
6534 | if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; | |||
6535 | } | |||
6536 | ||||
6537 | ismatch_out: { | |||
6538 | int bRet = a[0].pOut->n>0; | |||
6539 | *pRc = rc; | |||
6540 | if( a!=aStatic ) sqlite3_freesqlite3_api->free(a); | |||
6541 | return bRet; | |||
6542 | } | |||
6543 | } | |||
6544 | ||||
6545 | /* | |||
6546 | ** Advance iterator pIter until it points to a value equal to or laster | |||
6547 | ** than the initial value of *piLast. If this means the iterator points | |||
6548 | ** to a value laster than *piLast, update *piLast to the new lastest value. | |||
6549 | ** | |||
6550 | ** If the iterator reaches EOF, set *pbEof to true before returning. If | |||
6551 | ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc | |||
6552 | ** are set, return a non-zero value. Otherwise, return zero. | |||
6553 | */ | |||
6554 | static int fts5ExprAdvanceto( | |||
6555 | Fts5IndexIter *pIter, /* Iterator to advance */ | |||
6556 | int bDesc, /* True if iterator is "rowid DESC" */ | |||
6557 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | |||
6558 | int *pRc, /* OUT: Error code */ | |||
6559 | int *pbEof /* OUT: Set to true if EOF */ | |||
6560 | ){ | |||
6561 | i64 iLast = *piLast; | |||
6562 | i64 iRowid; | |||
6563 | ||||
6564 | iRowid = pIter->iRowid; | |||
6565 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | |||
6566 | int rc = sqlite3Fts5IterNextFrom(pIter, iLast); | |||
6567 | if( rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | |||
6568 | *pRc = rc; | |||
6569 | *pbEof = 1; | |||
6570 | return 1; | |||
6571 | } | |||
6572 | iRowid = pIter->iRowid; | |||
6573 | assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) )((void) (0)); | |||
6574 | } | |||
6575 | *piLast = iRowid; | |||
6576 | ||||
6577 | return 0; | |||
6578 | } | |||
6579 | ||||
6580 | static int fts5ExprSynonymAdvanceto( | |||
6581 | Fts5ExprTerm *pTerm, /* Term iterator to advance */ | |||
6582 | int bDesc, /* True if iterator is "rowid DESC" */ | |||
6583 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | |||
6584 | int *pRc /* OUT: Error code */ | |||
6585 | ){ | |||
6586 | int rc = SQLITE_OK0; | |||
6587 | i64 iLast = *piLast; | |||
6588 | Fts5ExprTerm *p; | |||
6589 | int bEof = 0; | |||
6590 | ||||
6591 | for(p=pTerm; rc==SQLITE_OK0 && p; p=p->pSynonym){ | |||
6592 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
6593 | i64 iRowid = p->pIter->iRowid; | |||
6594 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | |||
6595 | rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); | |||
6596 | } | |||
6597 | } | |||
6598 | } | |||
6599 | ||||
6600 | if( rc!=SQLITE_OK0 ){ | |||
6601 | *pRc = rc; | |||
6602 | bEof = 1; | |||
6603 | }else{ | |||
6604 | *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); | |||
6605 | } | |||
6606 | return bEof; | |||
6607 | } | |||
6608 | ||||
6609 | ||||
6610 | static int fts5ExprNearTest( | |||
6611 | int *pRc, | |||
6612 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | |||
6613 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ | |||
6614 | ){ | |||
6615 | Fts5ExprNearset *pNear = pNode->pNear; | |||
6616 | int rc = *pRc; | |||
6617 | ||||
6618 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
6619 | Fts5ExprTerm *pTerm; | |||
6620 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | |||
6621 | pPhrase->poslist.n = 0; | |||
6622 | for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ | |||
6623 | Fts5IndexIter *pIter = pTerm->pIter; | |||
6624 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | |||
6625 | if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ | |||
6626 | pPhrase->poslist.n = 1; | |||
6627 | } | |||
6628 | } | |||
6629 | } | |||
6630 | return pPhrase->poslist.n; | |||
6631 | }else{ | |||
6632 | int i; | |||
6633 | ||||
6634 | /* Check that each phrase in the nearset matches the current row. | |||
6635 | ** Populate the pPhrase->poslist buffers at the same time. If any | |||
6636 | ** phrase is not a match, break out of the loop early. */ | |||
6637 | for(i=0; rc==SQLITE_OK0 && i<pNear->nPhrase; i++){ | |||
6638 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
6639 | if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym | |||
6640 | || pNear->pColset || pPhrase->aTerm[0].bFirst | |||
6641 | ){ | |||
6642 | int bMatch = 0; | |||
6643 | rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); | |||
6644 | if( bMatch==0 ) break; | |||
6645 | }else{ | |||
6646 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | |||
6647 | fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData)sqlite3Fts5BufferSet(&rc,&pPhrase->poslist,pIter-> nData,pIter->pData); | |||
6648 | } | |||
6649 | } | |||
6650 | ||||
6651 | *pRc = rc; | |||
6652 | if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ | |||
6653 | return 1; | |||
6654 | } | |||
6655 | return 0; | |||
6656 | } | |||
6657 | } | |||
6658 | ||||
6659 | ||||
6660 | /* | |||
6661 | ** Initialize all term iterators in the pNear object. If any term is found | |||
6662 | ** to match no documents at all, return immediately without initializing any | |||
6663 | ** further iterators. | |||
6664 | ** | |||
6665 | ** If an error occurs, return an SQLite error code. Otherwise, return | |||
6666 | ** SQLITE_OK. It is not considered an error if some term matches zero | |||
6667 | ** documents. | |||
6668 | */ | |||
6669 | static int fts5ExprNearInitAll( | |||
6670 | Fts5Expr *pExpr, | |||
6671 | Fts5ExprNode *pNode | |||
6672 | ){ | |||
6673 | Fts5ExprNearset *pNear = pNode->pNear; | |||
6674 | int i; | |||
6675 | ||||
6676 | assert( pNode->bNomatch==0 )((void) (0)); | |||
6677 | for(i=0; i<pNear->nPhrase; i++){ | |||
6678 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
6679 | if( pPhrase->nTerm==0 ){ | |||
6680 | pNode->bEof = 1; | |||
6681 | return SQLITE_OK0; | |||
6682 | }else{ | |||
6683 | int j; | |||
6684 | for(j=0; j<pPhrase->nTerm; j++){ | |||
6685 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | |||
6686 | Fts5ExprTerm *p; | |||
6687 | int bHit = 0; | |||
6688 | ||||
6689 | for(p=pTerm; p; p=p->pSynonym){ | |||
6690 | int rc; | |||
6691 | if( p->pIter ){ | |||
6692 | sqlite3Fts5IterClose(p->pIter); | |||
6693 | p->pIter = 0; | |||
6694 | } | |||
6695 | rc = sqlite3Fts5IndexQuery( | |||
6696 | pExpr->pIndex, p->pTerm, p->nQueryTerm, | |||
6697 | (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX0x0001 : 0) | | |||
6698 | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC0x0002 : 0), | |||
6699 | pNear->pColset, | |||
6700 | &p->pIter | |||
6701 | ); | |||
6702 | assert( (rc==SQLITE_OK)==(p->pIter!=0) )((void) (0)); | |||
6703 | if( rc!=SQLITE_OK0 ) return rc; | |||
6704 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | |||
6705 | bHit = 1; | |||
6706 | } | |||
6707 | } | |||
6708 | ||||
6709 | if( bHit==0 ){ | |||
6710 | pNode->bEof = 1; | |||
6711 | return SQLITE_OK0; | |||
6712 | } | |||
6713 | } | |||
6714 | } | |||
6715 | } | |||
6716 | ||||
6717 | pNode->bEof = 0; | |||
6718 | return SQLITE_OK0; | |||
6719 | } | |||
6720 | ||||
6721 | /* | |||
6722 | ** If pExpr is an ASC iterator, this function returns a value with the | |||
6723 | ** same sign as: | |||
6724 | ** | |||
6725 | ** (iLhs - iRhs) | |||
6726 | ** | |||
6727 | ** Otherwise, if this is a DESC iterator, the opposite is returned: | |||
6728 | ** | |||
6729 | ** (iRhs - iLhs) | |||
6730 | */ | |||
6731 | static int fts5RowidCmp( | |||
6732 | Fts5Expr *pExpr, | |||
6733 | i64 iLhs, | |||
6734 | i64 iRhs | |||
6735 | ){ | |||
6736 | assert( pExpr->bDesc==0 || pExpr->bDesc==1 )((void) (0)); | |||
6737 | if( pExpr->bDesc==0 ){ | |||
6738 | if( iLhs<iRhs ) return -1; | |||
6739 | return (iLhs > iRhs); | |||
6740 | }else{ | |||
6741 | if( iLhs>iRhs ) return -1; | |||
6742 | return (iLhs < iRhs); | |||
6743 | } | |||
6744 | } | |||
6745 | ||||
6746 | static void fts5ExprSetEof(Fts5ExprNode *pNode){ | |||
6747 | int i; | |||
6748 | pNode->bEof = 1; | |||
6749 | pNode->bNomatch = 0; | |||
6750 | for(i=0; i<pNode->nChild; i++){ | |||
6751 | fts5ExprSetEof(pNode->apChild[i]); | |||
6752 | } | |||
6753 | } | |||
6754 | ||||
6755 | static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ | |||
6756 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | |||
6757 | Fts5ExprNearset *pNear = pNode->pNear; | |||
6758 | int i; | |||
6759 | for(i=0; i<pNear->nPhrase; i++){ | |||
6760 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
6761 | pPhrase->poslist.n = 0; | |||
6762 | } | |||
6763 | }else{ | |||
6764 | int i; | |||
6765 | for(i=0; i<pNode->nChild; i++){ | |||
6766 | fts5ExprNodeZeroPoslist(pNode->apChild[i]); | |||
6767 | } | |||
6768 | } | |||
6769 | } | |||
6770 | ||||
6771 | ||||
6772 | ||||
6773 | /* | |||
6774 | ** Compare the values currently indicated by the two nodes as follows: | |||
6775 | ** | |||
6776 | ** res = (*p1) - (*p2) | |||
6777 | ** | |||
6778 | ** Nodes that point to values that come later in the iteration order are | |||
6779 | ** considered to be larger. Nodes at EOF are the largest of all. | |||
6780 | ** | |||
6781 | ** This means that if the iteration order is ASC, then numerically larger | |||
6782 | ** rowids are considered larger. Or if it is the default DESC, numerically | |||
6783 | ** smaller rowids are larger. | |||
6784 | */ | |||
6785 | static int fts5NodeCompare( | |||
6786 | Fts5Expr *pExpr, | |||
6787 | Fts5ExprNode *p1, | |||
6788 | Fts5ExprNode *p2 | |||
6789 | ){ | |||
6790 | if( p2->bEof ) return -1; | |||
6791 | if( p1->bEof ) return +1; | |||
6792 | return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); | |||
6793 | } | |||
6794 | ||||
6795 | /* | |||
6796 | ** All individual term iterators in pNear are guaranteed to be valid when | |||
6797 | ** this function is called. This function checks if all term iterators | |||
6798 | ** point to the same rowid, and if not, advances them until they do. | |||
6799 | ** If an EOF is reached before this happens, *pbEof is set to true before | |||
6800 | ** returning. | |||
6801 | ** | |||
6802 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | |||
6803 | ** otherwise. It is not considered an error code if an iterator reaches | |||
6804 | ** EOF. | |||
6805 | */ | |||
6806 | static int fts5ExprNodeTest_STRING( | |||
6807 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
6808 | Fts5ExprNode *pNode | |||
6809 | ){ | |||
6810 | Fts5ExprNearset *pNear = pNode->pNear; | |||
6811 | Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; | |||
6812 | int rc = SQLITE_OK0; | |||
6813 | i64 iLast; /* Lastest rowid any iterator points to */ | |||
6814 | int i, j; /* Phrase and token index, respectively */ | |||
6815 | int bMatch; /* True if all terms are at the same rowid */ | |||
6816 | const int bDesc = pExpr->bDesc; | |||
6817 | ||||
6818 | /* Check that this node should not be FTS5_TERM */ | |||
6819 | assert( pNear->nPhrase>1((void) (0)) | |||
6820 | || pNear->apPhrase[0]->nTerm>1((void) (0)) | |||
6821 | || pNear->apPhrase[0]->aTerm[0].pSynonym((void) (0)) | |||
6822 | || pNear->apPhrase[0]->aTerm[0].bFirst((void) (0)) | |||
6823 | )((void) (0)); | |||
6824 | ||||
6825 | /* Initialize iLast, the "lastest" rowid any iterator points to. If the | |||
6826 | ** iterator skips through rowids in the default ascending order, this means | |||
6827 | ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it | |||
6828 | ** means the minimum rowid. */ | |||
6829 | if( pLeft->aTerm[0].pSynonym ){ | |||
6830 | iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); | |||
6831 | }else{ | |||
6832 | iLast = pLeft->aTerm[0].pIter->iRowid; | |||
6833 | } | |||
6834 | ||||
6835 | do { | |||
6836 | bMatch = 1; | |||
6837 | for(i=0; i<pNear->nPhrase; i++){ | |||
6838 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
6839 | for(j=0; j<pPhrase->nTerm; j++){ | |||
6840 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | |||
6841 | if( pTerm->pSynonym ){ | |||
6842 | i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); | |||
6843 | if( iRowid==iLast ) continue; | |||
6844 | bMatch = 0; | |||
6845 | if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ | |||
6846 | pNode->bNomatch = 0; | |||
6847 | pNode->bEof = 1; | |||
6848 | return rc; | |||
6849 | } | |||
6850 | }else{ | |||
6851 | Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; | |||
6852 | if( pIter->iRowid==iLast ) continue; | |||
6853 | bMatch = 0; | |||
6854 | if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ | |||
6855 | return rc; | |||
6856 | } | |||
6857 | } | |||
6858 | } | |||
6859 | } | |||
6860 | }while( bMatch==0 ); | |||
6861 | ||||
6862 | pNode->iRowid = iLast; | |||
6863 | pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK0); | |||
6864 | assert( pNode->bEof==0 || pNode->bNomatch==0 )((void) (0)); | |||
6865 | ||||
6866 | return rc; | |||
6867 | } | |||
6868 | ||||
6869 | /* | |||
6870 | ** Advance the first term iterator in the first phrase of pNear. Set output | |||
6871 | ** variable *pbEof to true if it reaches EOF or if an error occurs. | |||
6872 | ** | |||
6873 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
6874 | ** occurs. | |||
6875 | */ | |||
6876 | static int fts5ExprNodeNext_STRING( | |||
6877 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
6878 | Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ | |||
6879 | int bFromValid, | |||
6880 | i64 iFrom | |||
6881 | ){ | |||
6882 | Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; | |||
6883 | int rc = SQLITE_OK0; | |||
6884 | ||||
6885 | pNode->bNomatch = 0; | |||
6886 | if( pTerm->pSynonym ){ | |||
6887 | int bEof = 1; | |||
6888 | Fts5ExprTerm *p; | |||
6889 | ||||
6890 | /* Find the firstest rowid any synonym points to. */ | |||
6891 | i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); | |||
6892 | ||||
6893 | /* Advance each iterator that currently points to iRowid. Or, if iFrom | |||
6894 | ** is valid - each iterator that points to a rowid before iFrom. */ | |||
6895 | for(p=pTerm; p; p=p->pSynonym){ | |||
6896 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
6897 | i64 ii = p->pIter->iRowid; | |||
6898 | if( ii==iRowid | |||
6899 | || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) | |||
6900 | ){ | |||
6901 | if( bFromValid ){ | |||
6902 | rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); | |||
6903 | }else{ | |||
6904 | rc = sqlite3Fts5IterNext(p->pIter); | |||
6905 | } | |||
6906 | if( rc!=SQLITE_OK0 ) break; | |||
6907 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
6908 | bEof = 0; | |||
6909 | } | |||
6910 | }else{ | |||
6911 | bEof = 0; | |||
6912 | } | |||
6913 | } | |||
6914 | } | |||
6915 | ||||
6916 | /* Set the EOF flag if either all synonym iterators are at EOF or an | |||
6917 | ** error has occurred. */ | |||
6918 | pNode->bEof = (rc || bEof); | |||
6919 | }else{ | |||
6920 | Fts5IndexIter *pIter = pTerm->pIter; | |||
6921 | ||||
6922 | assert( Fts5NodeIsString(pNode) )((void) (0)); | |||
6923 | if( bFromValid ){ | |||
6924 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | |||
6925 | }else{ | |||
6926 | rc = sqlite3Fts5IterNext(pIter); | |||
6927 | } | |||
6928 | ||||
6929 | pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof)); | |||
6930 | } | |||
6931 | ||||
6932 | if( pNode->bEof==0 ){ | |||
6933 | assert( rc==SQLITE_OK )((void) (0)); | |||
6934 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | |||
6935 | } | |||
6936 | ||||
6937 | return rc; | |||
6938 | } | |||
6939 | ||||
6940 | ||||
6941 | static int fts5ExprNodeTest_TERM( | |||
6942 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | |||
6943 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ | |||
6944 | ){ | |||
6945 | /* As this "NEAR" object is actually a single phrase that consists | |||
6946 | ** of a single term only, grab pointers into the poslist managed by the | |||
6947 | ** fts5_index.c iterator object. This is much faster than synthesizing | |||
6948 | ** a new poslist the way we have to for more complicated phrase or NEAR | |||
6949 | ** expressions. */ | |||
6950 | Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; | |||
6951 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | |||
6952 | ||||
6953 | assert( pNode->eType==FTS5_TERM )((void) (0)); | |||
6954 | assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 )((void) (0)); | |||
6955 | assert( pPhrase->aTerm[0].pSynonym==0 )((void) (0)); | |||
6956 | ||||
6957 | pPhrase->poslist.n = pIter->nData; | |||
6958 | if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
6959 | pPhrase->poslist.p = (u8*)pIter->pData; | |||
6960 | } | |||
6961 | pNode->iRowid = pIter->iRowid; | |||
6962 | pNode->bNomatch = (pPhrase->poslist.n==0); | |||
6963 | return SQLITE_OK0; | |||
6964 | } | |||
6965 | ||||
6966 | /* | |||
6967 | ** xNext() method for a node of type FTS5_TERM. | |||
6968 | */ | |||
6969 | static int fts5ExprNodeNext_TERM( | |||
6970 | Fts5Expr *pExpr, | |||
6971 | Fts5ExprNode *pNode, | |||
6972 | int bFromValid, | |||
6973 | i64 iFrom | |||
6974 | ){ | |||
6975 | int rc; | |||
6976 | Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; | |||
6977 | ||||
6978 | assert( pNode->bEof==0 )((void) (0)); | |||
6979 | if( bFromValid ){ | |||
6980 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | |||
6981 | }else{ | |||
6982 | rc = sqlite3Fts5IterNext(pIter); | |||
6983 | } | |||
6984 | if( rc==SQLITE_OK0 && sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | |||
6985 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | |||
6986 | }else{ | |||
6987 | pNode->bEof = 1; | |||
6988 | pNode->bNomatch = 0; | |||
6989 | } | |||
6990 | return rc; | |||
6991 | } | |||
6992 | ||||
6993 | static void fts5ExprNodeTest_OR( | |||
6994 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | |||
6995 | Fts5ExprNode *pNode /* Expression node to test */ | |||
6996 | ){ | |||
6997 | Fts5ExprNode *pNext = pNode->apChild[0]; | |||
6998 | int i; | |||
6999 | ||||
7000 | for(i=1; i<pNode->nChild; i++){ | |||
7001 | Fts5ExprNode *pChild = pNode->apChild[i]; | |||
7002 | int cmp = fts5NodeCompare(pExpr, pNext, pChild); | |||
7003 | if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ | |||
7004 | pNext = pChild; | |||
7005 | } | |||
7006 | } | |||
7007 | pNode->iRowid = pNext->iRowid; | |||
7008 | pNode->bEof = pNext->bEof; | |||
7009 | pNode->bNomatch = pNext->bNomatch; | |||
7010 | } | |||
7011 | ||||
7012 | static int fts5ExprNodeNext_OR( | |||
7013 | Fts5Expr *pExpr, | |||
7014 | Fts5ExprNode *pNode, | |||
7015 | int bFromValid, | |||
7016 | i64 iFrom | |||
7017 | ){ | |||
7018 | int i; | |||
7019 | i64 iLast = pNode->iRowid; | |||
7020 | ||||
7021 | for(i=0; i<pNode->nChild; i++){ | |||
7022 | Fts5ExprNode *p1 = pNode->apChild[i]; | |||
7023 | assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 )((void) (0)); | |||
7024 | if( p1->bEof==0 ){ | |||
7025 | if( (p1->iRowid==iLast) | |||
7026 | || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) | |||
7027 | ){ | |||
7028 | int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom)(p1)->xNext((pExpr), (p1), (bFromValid), (iFrom)); | |||
7029 | if( rc!=SQLITE_OK0 ){ | |||
7030 | pNode->bNomatch = 0; | |||
7031 | return rc; | |||
7032 | } | |||
7033 | } | |||
7034 | } | |||
7035 | } | |||
7036 | ||||
7037 | fts5ExprNodeTest_OR(pExpr, pNode); | |||
7038 | return SQLITE_OK0; | |||
7039 | } | |||
7040 | ||||
7041 | /* | |||
7042 | ** Argument pNode is an FTS5_AND node. | |||
7043 | */ | |||
7044 | static int fts5ExprNodeTest_AND( | |||
7045 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
7046 | Fts5ExprNode *pAnd /* FTS5_AND node to advance */ | |||
7047 | ){ | |||
7048 | int iChild; | |||
7049 | i64 iLast = pAnd->iRowid; | |||
7050 | int rc = SQLITE_OK0; | |||
7051 | int bMatch; | |||
7052 | ||||
7053 | assert( pAnd->bEof==0 )((void) (0)); | |||
7054 | do { | |||
7055 | pAnd->bNomatch = 0; | |||
7056 | bMatch = 1; | |||
7057 | for(iChild=0; iChild<pAnd->nChild; iChild++){ | |||
7058 | Fts5ExprNode *pChild = pAnd->apChild[iChild]; | |||
7059 | int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); | |||
7060 | if( cmp>0 ){ | |||
7061 | /* Advance pChild until it points to iLast or laster */ | |||
7062 | rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast)(pChild)->xNext((pExpr), (pChild), (1), (iLast)); | |||
7063 | if( rc!=SQLITE_OK0 ){ | |||
7064 | pAnd->bNomatch = 0; | |||
7065 | return rc; | |||
7066 | } | |||
7067 | } | |||
7068 | ||||
7069 | /* If the child node is now at EOF, so is the parent AND node. Otherwise, | |||
7070 | ** the child node is guaranteed to have advanced at least as far as | |||
7071 | ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the | |||
7072 | ** new lastest rowid seen so far. */ | |||
7073 | assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 )((void) (0)); | |||
7074 | if( pChild->bEof ){ | |||
7075 | fts5ExprSetEof(pAnd); | |||
7076 | bMatch = 1; | |||
7077 | break; | |||
7078 | }else if( iLast!=pChild->iRowid ){ | |||
7079 | bMatch = 0; | |||
7080 | iLast = pChild->iRowid; | |||
7081 | } | |||
7082 | ||||
7083 | if( pChild->bNomatch ){ | |||
7084 | pAnd->bNomatch = 1; | |||
7085 | } | |||
7086 | } | |||
7087 | }while( bMatch==0 ); | |||
7088 | ||||
7089 | if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ | |||
7090 | fts5ExprNodeZeroPoslist(pAnd); | |||
7091 | } | |||
7092 | pAnd->iRowid = iLast; | |||
7093 | return SQLITE_OK0; | |||
7094 | } | |||
7095 | ||||
7096 | static int fts5ExprNodeNext_AND( | |||
7097 | Fts5Expr *pExpr, | |||
7098 | Fts5ExprNode *pNode, | |||
7099 | int bFromValid, | |||
7100 | i64 iFrom | |||
7101 | ){ | |||
7102 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | |||
7103 | if( rc==SQLITE_OK0 ){ | |||
7104 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | |||
7105 | }else{ | |||
7106 | pNode->bNomatch = 0; | |||
7107 | } | |||
7108 | return rc; | |||
7109 | } | |||
7110 | ||||
7111 | static int fts5ExprNodeTest_NOT( | |||
7112 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
7113 | Fts5ExprNode *pNode /* FTS5_NOT node to advance */ | |||
7114 | ){ | |||
7115 | int rc = SQLITE_OK0; | |||
7116 | Fts5ExprNode *p1 = pNode->apChild[0]; | |||
7117 | Fts5ExprNode *p2 = pNode->apChild[1]; | |||
7118 | assert( pNode->nChild==2 )((void) (0)); | |||
7119 | ||||
7120 | while( rc==SQLITE_OK0 && p1->bEof==0 ){ | |||
7121 | int cmp = fts5NodeCompare(pExpr, p1, p2); | |||
7122 | if( cmp>0 ){ | |||
7123 | rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid)(p2)->xNext((pExpr), (p2), (1), (p1->iRowid)); | |||
7124 | cmp = fts5NodeCompare(pExpr, p1, p2); | |||
7125 | } | |||
7126 | assert( rc!=SQLITE_OK || cmp<=0 )((void) (0)); | |||
7127 | if( cmp || p2->bNomatch ) break; | |||
7128 | rc = fts5ExprNodeNext(pExpr, p1, 0, 0)(p1)->xNext((pExpr), (p1), (0), (0)); | |||
7129 | } | |||
7130 | pNode->bEof = p1->bEof; | |||
7131 | pNode->bNomatch = p1->bNomatch; | |||
7132 | pNode->iRowid = p1->iRowid; | |||
7133 | if( p1->bEof ){ | |||
7134 | fts5ExprNodeZeroPoslist(p2); | |||
7135 | } | |||
7136 | return rc; | |||
7137 | } | |||
7138 | ||||
7139 | static int fts5ExprNodeNext_NOT( | |||
7140 | Fts5Expr *pExpr, | |||
7141 | Fts5ExprNode *pNode, | |||
7142 | int bFromValid, | |||
7143 | i64 iFrom | |||
7144 | ){ | |||
7145 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | |||
7146 | if( rc==SQLITE_OK0 ){ | |||
7147 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | |||
7148 | } | |||
7149 | if( rc!=SQLITE_OK0 ){ | |||
7150 | pNode->bNomatch = 0; | |||
7151 | } | |||
7152 | return rc; | |||
7153 | } | |||
7154 | ||||
7155 | /* | |||
7156 | ** If pNode currently points to a match, this function returns SQLITE_OK | |||
7157 | ** without modifying it. Otherwise, pNode is advanced until it does point | |||
7158 | ** to a match or EOF is reached. | |||
7159 | */ | |||
7160 | static int fts5ExprNodeTest( | |||
7161 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | |||
7162 | Fts5ExprNode *pNode /* Expression node to test */ | |||
7163 | ){ | |||
7164 | int rc = SQLITE_OK0; | |||
7165 | if( pNode->bEof==0 ){ | |||
7166 | switch( pNode->eType ){ | |||
7167 | ||||
7168 | case FTS5_STRING9: { | |||
7169 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | |||
7170 | break; | |||
7171 | } | |||
7172 | ||||
7173 | case FTS5_TERM4: { | |||
7174 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | |||
7175 | break; | |||
7176 | } | |||
7177 | ||||
7178 | case FTS5_AND2: { | |||
7179 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | |||
7180 | break; | |||
7181 | } | |||
7182 | ||||
7183 | case FTS5_OR1: { | |||
7184 | fts5ExprNodeTest_OR(pExpr, pNode); | |||
7185 | break; | |||
7186 | } | |||
7187 | ||||
7188 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | |||
7189 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | |||
7190 | break; | |||
7191 | } | |||
7192 | } | |||
7193 | } | |||
7194 | return rc; | |||
7195 | } | |||
7196 | ||||
7197 | ||||
7198 | /* | |||
7199 | ** Set node pNode, which is part of expression pExpr, to point to the first | |||
7200 | ** match. If there are no matches, set the Node.bEof flag to indicate EOF. | |||
7201 | ** | |||
7202 | ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. | |||
7203 | ** It is not an error if there are no matches. | |||
7204 | */ | |||
7205 | static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ | |||
7206 | int rc = SQLITE_OK0; | |||
7207 | pNode->bEof = 0; | |||
7208 | pNode->bNomatch = 0; | |||
7209 | ||||
7210 | if( Fts5NodeIsString(pNode)((pNode)->eType==4 || (pNode)->eType==9) ){ | |||
7211 | /* Initialize all term iterators in the NEAR object. */ | |||
7212 | rc = fts5ExprNearInitAll(pExpr, pNode); | |||
7213 | }else if( pNode->xNext==0 ){ | |||
7214 | pNode->bEof = 1; | |||
7215 | }else{ | |||
7216 | int i; | |||
7217 | int nEof = 0; | |||
7218 | for(i=0; i<pNode->nChild && rc==SQLITE_OK0; i++){ | |||
7219 | Fts5ExprNode *pChild = pNode->apChild[i]; | |||
7220 | rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); | |||
7221 | assert( pChild->bEof==0 || pChild->bEof==1 )((void) (0)); | |||
7222 | nEof += pChild->bEof; | |||
7223 | } | |||
7224 | pNode->iRowid = pNode->apChild[0]->iRowid; | |||
7225 | ||||
7226 | switch( pNode->eType ){ | |||
7227 | case FTS5_AND2: | |||
7228 | if( nEof>0 ) fts5ExprSetEof(pNode); | |||
7229 | break; | |||
7230 | ||||
7231 | case FTS5_OR1: | |||
7232 | if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); | |||
7233 | break; | |||
7234 | ||||
7235 | default: | |||
7236 | assert( pNode->eType==FTS5_NOT )((void) (0)); | |||
7237 | pNode->bEof = pNode->apChild[0]->bEof; | |||
7238 | break; | |||
7239 | } | |||
7240 | } | |||
7241 | ||||
7242 | if( rc==SQLITE_OK0 ){ | |||
7243 | rc = fts5ExprNodeTest(pExpr, pNode); | |||
7244 | } | |||
7245 | return rc; | |||
7246 | } | |||
7247 | ||||
7248 | ||||
7249 | /* | |||
7250 | ** Begin iterating through the set of documents in index pIdx matched by | |||
7251 | ** the MATCH expression passed as the first argument. If the "bDesc" | |||
7252 | ** parameter is passed a non-zero value, iteration is in descending rowid | |||
7253 | ** order. Or, if it is zero, in ascending order. | |||
7254 | ** | |||
7255 | ** If iterating in ascending rowid order (bDesc==0), the first document | |||
7256 | ** visited is that with the smallest rowid that is larger than or equal | |||
7257 | ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), | |||
7258 | ** then the first document visited must have a rowid smaller than or | |||
7259 | ** equal to iFirst. | |||
7260 | ** | |||
7261 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | |||
7262 | ** is not considered an error if the query does not match any documents. | |||
7263 | */ | |||
7264 | static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ | |||
7265 | Fts5ExprNode *pRoot = p->pRoot; | |||
7266 | int rc; /* Return code */ | |||
7267 | ||||
7268 | p->pIndex = pIdx; | |||
7269 | p->bDesc = bDesc; | |||
7270 | rc = fts5ExprNodeFirst(p, pRoot); | |||
7271 | ||||
7272 | /* If not at EOF but the current rowid occurs earlier than iFirst in | |||
7273 | ** the iteration order, move to document iFirst or later. */ | |||
7274 | if( rc==SQLITE_OK0 | |||
7275 | && 0==pRoot->bEof | |||
7276 | && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 | |||
7277 | ){ | |||
7278 | rc = fts5ExprNodeNext(p, pRoot, 1, iFirst)(pRoot)->xNext((p), (pRoot), (1), (iFirst)); | |||
7279 | } | |||
7280 | ||||
7281 | /* If the iterator is not at a real match, skip forward until it is. */ | |||
7282 | while( pRoot->bNomatch && rc==SQLITE_OK0 ){ | |||
7283 | assert( pRoot->bEof==0 )((void) (0)); | |||
7284 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | |||
7285 | } | |||
7286 | return rc; | |||
7287 | } | |||
7288 | ||||
7289 | /* | |||
7290 | ** Move to the next document | |||
7291 | ** | |||
7292 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | |||
7293 | ** is not considered an error if the query does not match any documents. | |||
7294 | */ | |||
7295 | static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ | |||
7296 | int rc; | |||
7297 | Fts5ExprNode *pRoot = p->pRoot; | |||
7298 | assert( pRoot->bEof==0 && pRoot->bNomatch==0 )((void) (0)); | |||
7299 | do { | |||
7300 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | |||
7301 | assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) )((void) (0)); | |||
7302 | }while( pRoot->bNomatch ); | |||
7303 | if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ | |||
7304 | pRoot->bEof = 1; | |||
7305 | } | |||
7306 | return rc; | |||
7307 | } | |||
7308 | ||||
7309 | static int sqlite3Fts5ExprEof(Fts5Expr *p){ | |||
7310 | return p->pRoot->bEof; | |||
7311 | } | |||
7312 | ||||
7313 | static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ | |||
7314 | return p->pRoot->iRowid; | |||
7315 | } | |||
7316 | ||||
7317 | static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ | |||
7318 | int rc = SQLITE_OK0; | |||
7319 | *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); | |||
7320 | return rc; | |||
7321 | } | |||
7322 | ||||
7323 | /* | |||
7324 | ** Free the phrase object passed as the only argument. | |||
7325 | */ | |||
7326 | static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ | |||
7327 | if( pPhrase ){ | |||
7328 | int i; | |||
7329 | for(i=0; i<pPhrase->nTerm; i++){ | |||
7330 | Fts5ExprTerm *pSyn; | |||
7331 | Fts5ExprTerm *pNext; | |||
7332 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | |||
7333 | sqlite3_freesqlite3_api->free(pTerm->pTerm); | |||
7334 | sqlite3Fts5IterClose(pTerm->pIter); | |||
7335 | for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ | |||
7336 | pNext = pSyn->pSynonym; | |||
7337 | sqlite3Fts5IterClose(pSyn->pIter); | |||
7338 | fts5BufferFree((Fts5Buffer*)&pSyn[1])sqlite3Fts5BufferFree((Fts5Buffer*)&pSyn[1]); | |||
7339 | sqlite3_freesqlite3_api->free(pSyn); | |||
7340 | } | |||
7341 | } | |||
7342 | if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist)sqlite3Fts5BufferFree(&pPhrase->poslist); | |||
7343 | sqlite3_freesqlite3_api->free(pPhrase); | |||
7344 | } | |||
7345 | } | |||
7346 | ||||
7347 | /* | |||
7348 | ** Set the "bFirst" flag on the first token of the phrase passed as the | |||
7349 | ** only argument. | |||
7350 | */ | |||
7351 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ | |||
7352 | if( pPhrase && pPhrase->nTerm ){ | |||
7353 | pPhrase->aTerm[0].bFirst = 1; | |||
7354 | } | |||
7355 | } | |||
7356 | ||||
7357 | /* | |||
7358 | ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated | |||
7359 | ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is | |||
7360 | ** appended to it and the results returned. | |||
7361 | ** | |||
7362 | ** If an OOM error occurs, both the pNear and pPhrase objects are freed and | |||
7363 | ** NULL returned. | |||
7364 | */ | |||
7365 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | |||
7366 | Fts5Parse *pParse, /* Parse context */ | |||
7367 | Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ | |||
7368 | Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ | |||
7369 | ){ | |||
7370 | const int SZALLOC = 8; | |||
7371 | Fts5ExprNearset *pRet = 0; | |||
7372 | ||||
7373 | if( pParse->rc==SQLITE_OK0 ){ | |||
7374 | if( pNear==0 ){ | |||
7375 | sqlite3_int64 nByte; | |||
7376 | nByte = SZ_FTS5EXPRNEARSET(SZALLOC+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(SZALLOC+1)*sizeof (Fts5ExprPhrase*)); | |||
7377 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
7378 | if( pRet==0 ){ | |||
7379 | pParse->rc = SQLITE_NOMEM7; | |||
7380 | }else{ | |||
7381 | memset(pRet, 0, (size_t)nByte); | |||
7382 | } | |||
7383 | }else if( (pNear->nPhrase % SZALLOC)==0 ){ | |||
7384 | int nNew = pNear->nPhrase + SZALLOC; | |||
7385 | sqlite3_int64 nByte; | |||
7386 | ||||
7387 | nByte = SZ_FTS5EXPRNEARSET(nNew+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(nNew+1)*sizeof (Fts5ExprPhrase*)); | |||
7388 | pRet = (Fts5ExprNearset*)sqlite3_realloc64sqlite3_api->realloc64(pNear, nByte); | |||
7389 | if( pRet==0 ){ | |||
7390 | pParse->rc = SQLITE_NOMEM7; | |||
7391 | } | |||
7392 | }else{ | |||
7393 | pRet = pNear; | |||
7394 | } | |||
7395 | } | |||
7396 | ||||
7397 | if( pRet==0 ){ | |||
7398 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
7399 | sqlite3Fts5ParseNearsetFree(pNear); | |||
7400 | sqlite3Fts5ParsePhraseFree(pPhrase); | |||
7401 | }else{ | |||
7402 | if( pRet->nPhrase>0 ){ | |||
7403 | Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; | |||
7404 | assert( pParse!=0 )((void) (0)); | |||
7405 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
7406 | assert( pParse->nPhrase>=2 )((void) (0)); | |||
7407 | assert( pLast==pParse->apPhrase[pParse->nPhrase-2] )((void) (0)); | |||
7408 | if( pPhrase->nTerm==0 ){ | |||
7409 | fts5ExprPhraseFree(pPhrase); | |||
7410 | pRet->nPhrase--; | |||
7411 | pParse->nPhrase--; | |||
7412 | pPhrase = pLast; | |||
7413 | }else if( pLast->nTerm==0 ){ | |||
7414 | fts5ExprPhraseFree(pLast); | |||
7415 | pParse->apPhrase[pParse->nPhrase-2] = pPhrase; | |||
7416 | pParse->nPhrase--; | |||
7417 | pRet->nPhrase--; | |||
7418 | } | |||
7419 | } | |||
7420 | pRet->apPhrase[pRet->nPhrase++] = pPhrase; | |||
7421 | } | |||
7422 | return pRet; | |||
7423 | } | |||
7424 | ||||
7425 | typedef struct TokenCtx TokenCtx; | |||
7426 | struct TokenCtx { | |||
7427 | Fts5ExprPhrase *pPhrase; | |||
7428 | Fts5Config *pConfig; | |||
7429 | int rc; | |||
7430 | }; | |||
7431 | ||||
7432 | /* | |||
7433 | ** Callback for tokenizing terms used by ParseTerm(). | |||
7434 | */ | |||
7435 | static int fts5ParseTokenize( | |||
7436 | void *pContext, /* Pointer to Fts5InsertCtx object */ | |||
7437 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
7438 | const char *pToken, /* Buffer containing token */ | |||
7439 | int nToken, /* Size of token in bytes */ | |||
7440 | int iUnused1, /* Start offset of token */ | |||
7441 | int iUnused2 /* End offset of token */ | |||
7442 | ){ | |||
7443 | int rc = SQLITE_OK0; | |||
7444 | const int SZALLOC = 8; | |||
7445 | TokenCtx *pCtx = (TokenCtx*)pContext; | |||
7446 | Fts5ExprPhrase *pPhrase = pCtx->pPhrase; | |||
7447 | ||||
7448 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
7449 | ||||
7450 | /* If an error has already occurred, this is a no-op */ | |||
7451 | if( pCtx->rc!=SQLITE_OK0 ) return pCtx->rc; | |||
7452 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
7453 | ||||
7454 | if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED0x0001) ){ | |||
7455 | Fts5ExprTerm *pSyn; | |||
7456 | sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; | |||
7457 | pSyn = (Fts5ExprTerm*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
7458 | if( pSyn==0 ){ | |||
7459 | rc = SQLITE_NOMEM7; | |||
7460 | }else{ | |||
7461 | memset(pSyn, 0, (size_t)nByte); | |||
7462 | pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); | |||
7463 | pSyn->nFullTerm = pSyn->nQueryTerm = nToken; | |||
7464 | if( pCtx->pConfig->bTokendata ){ | |||
7465 | pSyn->nQueryTerm = (int)strlen(pSyn->pTerm); | |||
7466 | } | |||
7467 | memcpy(pSyn->pTerm, pToken, nToken); | |||
7468 | pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; | |||
7469 | pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; | |||
7470 | } | |||
7471 | }else{ | |||
7472 | Fts5ExprTerm *pTerm; | |||
7473 | if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ | |||
7474 | Fts5ExprPhrase *pNew; | |||
7475 | int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); | |||
7476 | ||||
7477 | pNew = (Fts5ExprPhrase*)sqlite3_realloc64sqlite3_api->realloc64(pPhrase, | |||
7478 | SZ_FTS5EXPRPHRASE(nNew+1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (nNew+1)*sizeof( Fts5ExprTerm)) | |||
7479 | ); | |||
7480 | if( pNew==0 ){ | |||
7481 | rc = SQLITE_NOMEM7; | |||
7482 | }else{ | |||
7483 | if( pPhrase==0 ) memset(pNew, 0, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
7484 | pCtx->pPhrase = pPhrase = pNew; | |||
7485 | pNew->nTerm = nNew - SZALLOC; | |||
7486 | } | |||
7487 | } | |||
7488 | ||||
7489 | if( rc==SQLITE_OK0 ){ | |||
7490 | pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; | |||
7491 | memset(pTerm, 0, sizeof(Fts5ExprTerm)); | |||
7492 | pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); | |||
7493 | pTerm->nFullTerm = pTerm->nQueryTerm = nToken; | |||
7494 | if( pCtx->pConfig->bTokendata && rc==SQLITE_OK0 ){ | |||
7495 | pTerm->nQueryTerm = (int)strlen(pTerm->pTerm); | |||
7496 | } | |||
7497 | } | |||
7498 | } | |||
7499 | ||||
7500 | pCtx->rc = rc; | |||
7501 | return rc; | |||
7502 | } | |||
7503 | ||||
7504 | ||||
7505 | /* | |||
7506 | ** Free the phrase object passed as the only argument. | |||
7507 | */ | |||
7508 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ | |||
7509 | fts5ExprPhraseFree(pPhrase); | |||
7510 | } | |||
7511 | ||||
7512 | /* | |||
7513 | ** Free the phrase object passed as the second argument. | |||
7514 | */ | |||
7515 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ | |||
7516 | if( pNear ){ | |||
7517 | int i; | |||
7518 | for(i=0; i<pNear->nPhrase; i++){ | |||
7519 | fts5ExprPhraseFree(pNear->apPhrase[i]); | |||
7520 | } | |||
7521 | sqlite3_freesqlite3_api->free(pNear->pColset); | |||
7522 | sqlite3_freesqlite3_api->free(pNear); | |||
7523 | } | |||
7524 | } | |||
7525 | ||||
7526 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ | |||
7527 | assert( pParse->pExpr==0 )((void) (0)); | |||
7528 | pParse->pExpr = p; | |||
7529 | } | |||
7530 | ||||
7531 | static int parseGrowPhraseArray(Fts5Parse *pParse){ | |||
7532 | if( (pParse->nPhrase % 8)==0 ){ | |||
7533 | sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); | |||
7534 | Fts5ExprPhrase **apNew; | |||
7535 | apNew = (Fts5ExprPhrase**)sqlite3_realloc64sqlite3_api->realloc64(pParse->apPhrase, nByte); | |||
7536 | if( apNew==0 ){ | |||
7537 | pParse->rc = SQLITE_NOMEM7; | |||
7538 | return SQLITE_NOMEM7; | |||
7539 | } | |||
7540 | pParse->apPhrase = apNew; | |||
7541 | } | |||
7542 | return SQLITE_OK0; | |||
7543 | } | |||
7544 | ||||
7545 | /* | |||
7546 | ** This function is called by the parser to process a string token. The | |||
7547 | ** string may or may not be quoted. In any case it is tokenized and a | |||
7548 | ** phrase object consisting of all tokens returned. | |||
7549 | */ | |||
7550 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | |||
7551 | Fts5Parse *pParse, /* Parse context */ | |||
7552 | Fts5ExprPhrase *pAppend, /* Phrase to append to */ | |||
7553 | Fts5Token *pToken, /* String to tokenize */ | |||
7554 | int bPrefix /* True if there is a trailing "*" */ | |||
7555 | ){ | |||
7556 | Fts5Config *pConfig = pParse->pConfig; | |||
7557 | TokenCtx sCtx; /* Context object passed to callback */ | |||
7558 | int rc; /* Tokenize return code */ | |||
7559 | char *z = 0; | |||
7560 | ||||
7561 | memset(&sCtx, 0, sizeof(TokenCtx)); | |||
7562 | sCtx.pPhrase = pAppend; | |||
7563 | sCtx.pConfig = pConfig; | |||
7564 | ||||
7565 | rc = fts5ParseStringFromToken(pToken, &z); | |||
7566 | if( rc==SQLITE_OK0 ){ | |||
7567 | int flags = FTS5_TOKENIZE_QUERY0x0001 | (bPrefix ? FTS5_TOKENIZE_PREFIX0x0002 : 0); | |||
7568 | int n; | |||
7569 | sqlite3Fts5Dequote(z); | |||
7570 | n = (int)strlen(z); | |||
7571 | rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); | |||
7572 | } | |||
7573 | sqlite3_freesqlite3_api->free(z); | |||
7574 | if( rc || (rc = sCtx.rc) ){ | |||
7575 | pParse->rc = rc; | |||
7576 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
7577 | sCtx.pPhrase = 0; | |||
7578 | }else{ | |||
7579 | ||||
7580 | if( pAppend==0 ){ | |||
7581 | if( parseGrowPhraseArray(pParse) ){ | |||
7582 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
7583 | return 0; | |||
7584 | } | |||
7585 | pParse->nPhrase++; | |||
7586 | } | |||
7587 | ||||
7588 | if( sCtx.pPhrase==0 ){ | |||
7589 | /* This happens when parsing a token or quoted phrase that contains | |||
7590 | ** no token characters at all. (e.g ... MATCH '""'). */ | |||
7591 | sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
7592 | }else if( sCtx.pPhrase->nTerm ){ | |||
7593 | sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix; | |||
7594 | } | |||
7595 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
7596 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; | |||
7597 | } | |||
7598 | ||||
7599 | return sCtx.pPhrase; | |||
7600 | } | |||
7601 | ||||
7602 | /* | |||
7603 | ** Create a new FTS5 expression by cloning phrase iPhrase of the | |||
7604 | ** expression passed as the second argument. | |||
7605 | */ | |||
7606 | static int sqlite3Fts5ExprClonePhrase( | |||
7607 | Fts5Expr *pExpr, | |||
7608 | int iPhrase, | |||
7609 | Fts5Expr **ppNew | |||
7610 | ){ | |||
7611 | int rc = SQLITE_OK0; /* Return code */ | |||
7612 | Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ | |||
7613 | Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ | |||
7614 | TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ | |||
7615 | if( !pExpr || iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
7616 | rc = SQLITE_RANGE25; | |||
7617 | }else{ | |||
7618 | pOrig = pExpr->apExprPhrase[iPhrase]; | |||
7619 | pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); | |||
7620 | } | |||
7621 | if( rc==SQLITE_OK0 ){ | |||
7622 | pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, | |||
7623 | sizeof(Fts5ExprPhrase*)); | |||
7624 | } | |||
7625 | if( rc==SQLITE_OK0 ){ | |||
7626 | pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRNODE(1)(__builtin_offsetof(Fts5ExprNode, apChild) + (1)*sizeof(Fts5ExprNode *))); | |||
7627 | } | |||
7628 | if( rc==SQLITE_OK0 ){ | |||
7629 | pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, | |||
7630 | SZ_FTS5EXPRNEARSET(2)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(2)*sizeof(Fts5ExprPhrase *))); | |||
7631 | } | |||
7632 | if( rc==SQLITE_OK0 && ALWAYS(pOrig!=0)(pOrig!=0) ){ | |||
7633 | Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; | |||
7634 | if( pColsetOrig ){ | |||
7635 | sqlite3_int64 nByte; | |||
7636 | Fts5Colset *pColset; | |||
7637 | nByte = SZ_FTS5COLSET(pColsetOrig->nCol)(sizeof(i64)*((pColsetOrig->nCol+2)/2)); | |||
7638 | pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); | |||
7639 | if( pColset ){ | |||
7640 | memcpy(pColset, pColsetOrig, (size_t)nByte); | |||
7641 | } | |||
7642 | pNew->pRoot->pNear->pColset = pColset; | |||
7643 | } | |||
7644 | } | |||
7645 | ||||
7646 | if( rc==SQLITE_OK0 ){ | |||
7647 | if( pOrig->nTerm ){ | |||
7648 | int i; /* Used to iterate through phrase terms */ | |||
7649 | sCtx.pConfig = pExpr->pConfig; | |||
7650 | for(i=0; rc==SQLITE_OK0 && i<pOrig->nTerm; i++){ | |||
7651 | int tflags = 0; | |||
7652 | Fts5ExprTerm *p; | |||
7653 | for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK0; p=p->pSynonym){ | |||
7654 | rc = fts5ParseTokenize((void*)&sCtx,tflags,p->pTerm,p->nFullTerm,0,0); | |||
7655 | tflags = FTS5_TOKEN_COLOCATED0x0001; | |||
7656 | } | |||
7657 | if( rc==SQLITE_OK0 ){ | |||
7658 | sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; | |||
7659 | sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; | |||
7660 | } | |||
7661 | } | |||
7662 | }else{ | |||
7663 | /* This happens when parsing a token or quoted phrase that contains | |||
7664 | ** no token characters at all. (e.g ... MATCH '""'). */ | |||
7665 | sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
7666 | } | |||
7667 | } | |||
7668 | ||||
7669 | if( rc==SQLITE_OK0 && ALWAYS(sCtx.pPhrase)(sCtx.pPhrase) ){ | |||
7670 | /* All the allocations succeeded. Put the expression object together. */ | |||
7671 | pNew->pIndex = pExpr->pIndex; | |||
7672 | pNew->pConfig = pExpr->pConfig; | |||
7673 | pNew->nPhrase = 1; | |||
7674 | pNew->apExprPhrase[0] = sCtx.pPhrase; | |||
7675 | pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; | |||
7676 | pNew->pRoot->pNear->nPhrase = 1; | |||
7677 | sCtx.pPhrase->pNode = pNew->pRoot; | |||
7678 | ||||
7679 | if( pOrig->nTerm==1 | |||
7680 | && pOrig->aTerm[0].pSynonym==0 | |||
7681 | && pOrig->aTerm[0].bFirst==0 | |||
7682 | ){ | |||
7683 | pNew->pRoot->eType = FTS5_TERM4; | |||
7684 | pNew->pRoot->xNext = fts5ExprNodeNext_TERM; | |||
7685 | }else{ | |||
7686 | pNew->pRoot->eType = FTS5_STRING9; | |||
7687 | pNew->pRoot->xNext = fts5ExprNodeNext_STRING; | |||
7688 | } | |||
7689 | }else{ | |||
7690 | sqlite3Fts5ExprFree(pNew); | |||
7691 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
7692 | pNew = 0; | |||
7693 | } | |||
7694 | ||||
7695 | *ppNew = pNew; | |||
7696 | return rc; | |||
7697 | } | |||
7698 | ||||
7699 | ||||
7700 | /* | |||
7701 | ** Token pTok has appeared in a MATCH expression where the NEAR operator | |||
7702 | ** is expected. If token pTok does not contain "NEAR", store an error | |||
7703 | ** in the pParse object. | |||
7704 | */ | |||
7705 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ | |||
7706 | if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ | |||
7707 | sqlite3Fts5ParseError( | |||
7708 | pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p | |||
7709 | ); | |||
7710 | } | |||
7711 | } | |||
7712 | ||||
7713 | static void sqlite3Fts5ParseSetDistance( | |||
7714 | Fts5Parse *pParse, | |||
7715 | Fts5ExprNearset *pNear, | |||
7716 | Fts5Token *p | |||
7717 | ){ | |||
7718 | if( pNear ){ | |||
7719 | int nNear = 0; | |||
7720 | int i; | |||
7721 | if( p->n ){ | |||
7722 | for(i=0; i<p->n; i++){ | |||
7723 | char c = (char)p->p[i]; | |||
7724 | if( c<'0' || c>'9' ){ | |||
7725 | sqlite3Fts5ParseError( | |||
7726 | pParse, "expected integer, got \"%.*s\"", p->n, p->p | |||
7727 | ); | |||
7728 | return; | |||
7729 | } | |||
7730 | if( nNear<214748363 ) nNear = nNear * 10 + (p->p[i] - '0'); | |||
7731 | /* ^^^^^^^^^^^^^^^--- Prevent integer overflow */ | |||
7732 | } | |||
7733 | }else{ | |||
7734 | nNear = FTS5_DEFAULT_NEARDIST10; | |||
7735 | } | |||
7736 | pNear->nNear = nNear; | |||
7737 | } | |||
7738 | } | |||
7739 | ||||
7740 | /* | |||
7741 | ** The second argument passed to this function may be NULL, or it may be | |||
7742 | ** an existing Fts5Colset object. This function returns a pointer to | |||
7743 | ** a new colset object containing the contents of (p) with new value column | |||
7744 | ** number iCol appended. | |||
7745 | ** | |||
7746 | ** If an OOM error occurs, store an error code in pParse and return NULL. | |||
7747 | ** The old colset object (if any) is not freed in this case. | |||
7748 | */ | |||
7749 | static Fts5Colset *fts5ParseColset( | |||
7750 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | |||
7751 | Fts5Colset *p, /* Existing colset object */ | |||
7752 | int iCol /* New column to add to colset object */ | |||
7753 | ){ | |||
7754 | int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ | |||
7755 | Fts5Colset *pNew; /* New colset object to return */ | |||
7756 | ||||
7757 | assert( pParse->rc==SQLITE_OK )((void) (0)); | |||
7758 | assert( iCol>=0 && iCol<pParse->pConfig->nCol )((void) (0)); | |||
7759 | ||||
7760 | pNew = sqlite3_realloc64sqlite3_api->realloc64(p, SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2))); | |||
7761 | if( pNew==0 ){ | |||
7762 | pParse->rc = SQLITE_NOMEM7; | |||
7763 | }else{ | |||
7764 | int *aiCol = pNew->aiCol; | |||
7765 | int i, j; | |||
7766 | for(i=0; i<nCol; i++){ | |||
7767 | if( aiCol[i]==iCol ) return pNew; | |||
7768 | if( aiCol[i]>iCol ) break; | |||
7769 | } | |||
7770 | for(j=nCol; j>i; j--){ | |||
7771 | aiCol[j] = aiCol[j-1]; | |||
7772 | } | |||
7773 | aiCol[i] = iCol; | |||
7774 | pNew->nCol = nCol+1; | |||
7775 | ||||
7776 | #ifndef NDEBUG1 | |||
7777 | /* Check that the array is in order and contains no duplicate entries. */ | |||
7778 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] )((void) (0)); | |||
7779 | #endif | |||
7780 | } | |||
7781 | ||||
7782 | return pNew; | |||
7783 | } | |||
7784 | ||||
7785 | /* | |||
7786 | ** Allocate and return an Fts5Colset object specifying the inverse of | |||
7787 | ** the colset passed as the second argument. Free the colset passed | |||
7788 | ** as the second argument before returning. | |||
7789 | */ | |||
7790 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ | |||
7791 | Fts5Colset *pRet; | |||
7792 | int nCol = pParse->pConfig->nCol; | |||
7793 | ||||
7794 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, | |||
7795 | SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2)) | |||
7796 | ); | |||
7797 | if( pRet ){ | |||
7798 | int i; | |||
7799 | int iOld = 0; | |||
7800 | for(i=0; i<nCol; i++){ | |||
7801 | if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ | |||
7802 | pRet->aiCol[pRet->nCol++] = i; | |||
7803 | }else{ | |||
7804 | iOld++; | |||
7805 | } | |||
7806 | } | |||
7807 | } | |||
7808 | ||||
7809 | sqlite3_freesqlite3_api->free(p); | |||
7810 | return pRet; | |||
7811 | } | |||
7812 | ||||
7813 | static Fts5Colset *sqlite3Fts5ParseColset( | |||
7814 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | |||
7815 | Fts5Colset *pColset, /* Existing colset object */ | |||
7816 | Fts5Token *p | |||
7817 | ){ | |||
7818 | Fts5Colset *pRet = 0; | |||
7819 | int iCol; | |||
7820 | char *z; /* Dequoted copy of token p */ | |||
7821 | ||||
7822 | z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); | |||
7823 | if( pParse->rc==SQLITE_OK0 ){ | |||
7824 | Fts5Config *pConfig = pParse->pConfig; | |||
7825 | sqlite3Fts5Dequote(z); | |||
7826 | for(iCol=0; iCol<pConfig->nCol; iCol++){ | |||
7827 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(pConfig->azCol[iCol], z) ) break; | |||
7828 | } | |||
7829 | if( iCol==pConfig->nCol ){ | |||
7830 | sqlite3Fts5ParseError(pParse, "no such column: %s", z); | |||
7831 | }else{ | |||
7832 | pRet = fts5ParseColset(pParse, pColset, iCol); | |||
7833 | } | |||
7834 | sqlite3_freesqlite3_api->free(z); | |||
7835 | } | |||
7836 | ||||
7837 | if( pRet==0 ){ | |||
7838 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
7839 | sqlite3_freesqlite3_api->free(pColset); | |||
7840 | } | |||
7841 | ||||
7842 | return pRet; | |||
7843 | } | |||
7844 | ||||
7845 | /* | |||
7846 | ** If argument pOrig is NULL, or if (*pRc) is set to anything other than | |||
7847 | ** SQLITE_OK when this function is called, NULL is returned. | |||
7848 | ** | |||
7849 | ** Otherwise, a copy of (*pOrig) is made into memory obtained from | |||
7850 | ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation | |||
7851 | ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned. | |||
7852 | */ | |||
7853 | static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){ | |||
7854 | Fts5Colset *pRet; | |||
7855 | if( pOrig ){ | |||
7856 | sqlite3_int64 nByte = SZ_FTS5COLSET(pOrig->nCol)(sizeof(i64)*((pOrig->nCol+2)/2)); | |||
7857 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); | |||
7858 | if( pRet ){ | |||
7859 | memcpy(pRet, pOrig, (size_t)nByte); | |||
7860 | } | |||
7861 | }else{ | |||
7862 | pRet = 0; | |||
7863 | } | |||
7864 | return pRet; | |||
7865 | } | |||
7866 | ||||
7867 | /* | |||
7868 | ** Remove from colset pColset any columns that are not also in colset pMerge. | |||
7869 | */ | |||
7870 | static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ | |||
7871 | int iIn = 0; /* Next input in pColset */ | |||
7872 | int iMerge = 0; /* Next input in pMerge */ | |||
7873 | int iOut = 0; /* Next output slot in pColset */ | |||
7874 | ||||
7875 | while( iIn<pColset->nCol && iMerge<pMerge->nCol ){ | |||
7876 | int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge]; | |||
7877 | if( iDiff==0 ){ | |||
7878 | pColset->aiCol[iOut++] = pMerge->aiCol[iMerge]; | |||
7879 | iMerge++; | |||
7880 | iIn++; | |||
7881 | }else if( iDiff>0 ){ | |||
7882 | iMerge++; | |||
7883 | }else{ | |||
7884 | iIn++; | |||
7885 | } | |||
7886 | } | |||
7887 | pColset->nCol = iOut; | |||
7888 | } | |||
7889 | ||||
7890 | /* | |||
7891 | ** Recursively apply colset pColset to expression node pNode and all of | |||
7892 | ** its decendents. If (*ppFree) is not NULL, it contains a spare copy | |||
7893 | ** of pColset. This function may use the spare copy and set (*ppFree) to | |||
7894 | ** zero, or it may create copies of pColset using fts5CloneColset(). | |||
7895 | */ | |||
7896 | static void fts5ParseSetColset( | |||
7897 | Fts5Parse *pParse, | |||
7898 | Fts5ExprNode *pNode, | |||
7899 | Fts5Colset *pColset, | |||
7900 | Fts5Colset **ppFree | |||
7901 | ){ | |||
7902 | if( pParse->rc==SQLITE_OK0 ){ | |||
7903 | assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING((void) (0)) | |||
7904 | || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR((void) (0)) | |||
7905 | || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF((void) (0)) | |||
7906 | )((void) (0)); | |||
7907 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | |||
7908 | Fts5ExprNearset *pNear = pNode->pNear; | |||
7909 | if( pNear->pColset ){ | |||
7910 | fts5MergeColset(pNear->pColset, pColset); | |||
7911 | if( pNear->pColset->nCol==0 ){ | |||
7912 | pNode->eType = FTS5_EOF0; | |||
7913 | pNode->xNext = 0; | |||
7914 | } | |||
7915 | }else if( *ppFree ){ | |||
7916 | pNear->pColset = pColset; | |||
7917 | *ppFree = 0; | |||
7918 | }else{ | |||
7919 | pNear->pColset = fts5CloneColset(&pParse->rc, pColset); | |||
7920 | } | |||
7921 | }else{ | |||
7922 | int i; | |||
7923 | assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 )((void) (0)); | |||
7924 | for(i=0; i<pNode->nChild; i++){ | |||
7925 | fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree); | |||
7926 | } | |||
7927 | } | |||
7928 | } | |||
7929 | } | |||
7930 | ||||
7931 | /* | |||
7932 | ** Apply colset pColset to expression node pExpr and all of its descendents. | |||
7933 | */ | |||
7934 | static void sqlite3Fts5ParseSetColset( | |||
7935 | Fts5Parse *pParse, | |||
7936 | Fts5ExprNode *pExpr, | |||
7937 | Fts5Colset *pColset | |||
7938 | ){ | |||
7939 | Fts5Colset *pFree = pColset; | |||
7940 | if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
7941 | sqlite3Fts5ParseError(pParse, | |||
7942 | "fts5: column queries are not supported (detail=none)" | |||
7943 | ); | |||
7944 | }else{ | |||
7945 | fts5ParseSetColset(pParse, pExpr, pColset, &pFree); | |||
7946 | } | |||
7947 | sqlite3_freesqlite3_api->free(pFree); | |||
7948 | } | |||
7949 | ||||
7950 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ | |||
7951 | switch( pNode->eType ){ | |||
7952 | case FTS5_STRING9: { | |||
7953 | Fts5ExprNearset *pNear = pNode->pNear; | |||
7954 | if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 | |||
7955 | && pNear->apPhrase[0]->aTerm[0].pSynonym==0 | |||
7956 | && pNear->apPhrase[0]->aTerm[0].bFirst==0 | |||
7957 | ){ | |||
7958 | pNode->eType = FTS5_TERM4; | |||
7959 | pNode->xNext = fts5ExprNodeNext_TERM; | |||
7960 | }else{ | |||
7961 | pNode->xNext = fts5ExprNodeNext_STRING; | |||
7962 | } | |||
7963 | break; | |||
7964 | }; | |||
7965 | ||||
7966 | case FTS5_OR1: { | |||
7967 | pNode->xNext = fts5ExprNodeNext_OR; | |||
7968 | break; | |||
7969 | }; | |||
7970 | ||||
7971 | case FTS5_AND2: { | |||
7972 | pNode->xNext = fts5ExprNodeNext_AND; | |||
7973 | break; | |||
7974 | }; | |||
7975 | ||||
7976 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | |||
7977 | pNode->xNext = fts5ExprNodeNext_NOT; | |||
7978 | break; | |||
7979 | }; | |||
7980 | } | |||
7981 | } | |||
7982 | ||||
7983 | /* | |||
7984 | ** Add pSub as a child of p. | |||
7985 | */ | |||
7986 | static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ | |||
7987 | int ii = p->nChild; | |||
7988 | if( p->eType!=FTS5_NOT3 && pSub->eType==p->eType ){ | |||
7989 | int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; | |||
7990 | memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); | |||
7991 | p->nChild += pSub->nChild; | |||
7992 | sqlite3_freesqlite3_api->free(pSub); | |||
7993 | }else{ | |||
7994 | p->apChild[p->nChild++] = pSub; | |||
7995 | } | |||
7996 | for( ; ii<p->nChild; ii++){ | |||
7997 | p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1)(((p->iHeight) > (p->apChild[ii]->iHeight + 1)) ? (p->iHeight) : (p->apChild[ii]->iHeight + 1)); | |||
7998 | } | |||
7999 | } | |||
8000 | ||||
8001 | /* | |||
8002 | ** This function is used when parsing LIKE or GLOB patterns against | |||
8003 | ** trigram indexes that specify either detail=column or detail=none. | |||
8004 | ** It converts a phrase: | |||
8005 | ** | |||
8006 | ** abc + def + ghi | |||
8007 | ** | |||
8008 | ** into an AND tree: | |||
8009 | ** | |||
8010 | ** abc AND def AND ghi | |||
8011 | */ | |||
8012 | static Fts5ExprNode *fts5ParsePhraseToAnd( | |||
8013 | Fts5Parse *pParse, | |||
8014 | Fts5ExprNearset *pNear | |||
8015 | ){ | |||
8016 | int nTerm = pNear->apPhrase[0]->nTerm; | |||
8017 | int ii; | |||
8018 | int nByte; | |||
8019 | Fts5ExprNode *pRet; | |||
8020 | ||||
8021 | assert( pNear->nPhrase==1 )((void) (0)); | |||
8022 | assert( pParse->bPhraseToAnd )((void) (0)); | |||
8023 | ||||
8024 | nByte = SZ_FTS5EXPRNODE(nTerm+1)(__builtin_offsetof(Fts5ExprNode, apChild) + (nTerm+1)*sizeof (Fts5ExprNode*)); | |||
8025 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | |||
8026 | if( pRet ){ | |||
8027 | pRet->eType = FTS5_AND2; | |||
8028 | pRet->nChild = nTerm; | |||
8029 | pRet->iHeight = 1; | |||
8030 | fts5ExprAssignXNext(pRet); | |||
8031 | pParse->nPhrase--; | |||
8032 | for(ii=0; ii<nTerm; ii++){ | |||
8033 | Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero( | |||
8034 | &pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm )) | |||
8035 | ); | |||
8036 | if( pPhrase ){ | |||
8037 | if( parseGrowPhraseArray(pParse) ){ | |||
8038 | fts5ExprPhraseFree(pPhrase); | |||
8039 | }else{ | |||
8040 | Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; | |||
8041 | Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; | |||
8042 | pParse->apPhrase[pParse->nPhrase++] = pPhrase; | |||
8043 | pPhrase->nTerm = 1; | |||
8044 | pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); | |||
8045 | pTo->nQueryTerm = p->nQueryTerm; | |||
8046 | pTo->nFullTerm = p->nFullTerm; | |||
8047 | pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, | |||
8048 | 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) | |||
8049 | ); | |||
8050 | } | |||
8051 | } | |||
8052 | } | |||
8053 | ||||
8054 | if( pParse->rc ){ | |||
8055 | sqlite3Fts5ParseNodeFree(pRet); | |||
8056 | pRet = 0; | |||
8057 | }else{ | |||
8058 | sqlite3Fts5ParseNearsetFree(pNear); | |||
8059 | } | |||
8060 | } | |||
8061 | ||||
8062 | return pRet; | |||
8063 | } | |||
8064 | ||||
8065 | /* | |||
8066 | ** Allocate and return a new expression object. If anything goes wrong (i.e. | |||
8067 | ** OOM error), leave an error code in pParse and return NULL. | |||
8068 | */ | |||
8069 | static Fts5ExprNode *sqlite3Fts5ParseNode( | |||
8070 | Fts5Parse *pParse, /* Parse context */ | |||
8071 | int eType, /* FTS5_STRING, AND, OR or NOT */ | |||
8072 | Fts5ExprNode *pLeft, /* Left hand child expression */ | |||
8073 | Fts5ExprNode *pRight, /* Right hand child expression */ | |||
8074 | Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ | |||
8075 | ){ | |||
8076 | Fts5ExprNode *pRet = 0; | |||
8077 | ||||
8078 | if( pParse->rc==SQLITE_OK0 ){ | |||
8079 | int nChild = 0; /* Number of children of returned node */ | |||
8080 | sqlite3_int64 nByte; /* Bytes of space to allocate for this node */ | |||
8081 | ||||
8082 | assert( (eType!=FTS5_STRING && !pNear)((void) (0)) | |||
8083 | || (eType==FTS5_STRING && !pLeft && !pRight)((void) (0)) | |||
8084 | )((void) (0)); | |||
8085 | if( eType==FTS5_STRING9 && pNear==0 ) return 0; | |||
8086 | if( eType!=FTS5_STRING9 && pLeft==0 ) return pRight; | |||
8087 | if( eType!=FTS5_STRING9 && pRight==0 ) return pLeft; | |||
8088 | ||||
8089 | if( eType==FTS5_STRING9 | |||
8090 | && pParse->bPhraseToAnd | |||
8091 | && pNear->apPhrase[0]->nTerm>1 | |||
8092 | ){ | |||
8093 | pRet = fts5ParsePhraseToAnd(pParse, pNear); | |||
8094 | }else{ | |||
8095 | if( eType==FTS5_NOT3 ){ | |||
8096 | nChild = 2; | |||
8097 | }else if( eType==FTS5_AND2 || eType==FTS5_OR1 ){ | |||
8098 | nChild = 2; | |||
8099 | if( pLeft->eType==eType ) nChild += pLeft->nChild-1; | |||
8100 | if( pRight->eType==eType ) nChild += pRight->nChild-1; | |||
8101 | } | |||
8102 | ||||
8103 | nByte = SZ_FTS5EXPRNODE(nChild)(__builtin_offsetof(Fts5ExprNode, apChild) + (nChild)*sizeof( Fts5ExprNode*)); | |||
8104 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | |||
8105 | ||||
8106 | if( pRet ){ | |||
8107 | pRet->eType = eType; | |||
8108 | pRet->pNear = pNear; | |||
8109 | fts5ExprAssignXNext(pRet); | |||
8110 | if( eType==FTS5_STRING9 ){ | |||
8111 | int iPhrase; | |||
8112 | for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ | |||
8113 | pNear->apPhrase[iPhrase]->pNode = pRet; | |||
8114 | if( pNear->apPhrase[iPhrase]->nTerm==0 ){ | |||
8115 | pRet->xNext = 0; | |||
8116 | pRet->eType = FTS5_EOF0; | |||
8117 | } | |||
8118 | } | |||
8119 | ||||
8120 | if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
8121 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | |||
8122 | if( pNear->nPhrase!=1 | |||
8123 | || pPhrase->nTerm>1 | |||
8124 | || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) | |||
8125 | ){ | |||
8126 | sqlite3Fts5ParseError(pParse, | |||
8127 | "fts5: %s queries are not supported (detail!=full)", | |||
8128 | pNear->nPhrase==1 ? "phrase": "NEAR" | |||
8129 | ); | |||
8130 | sqlite3Fts5ParseNodeFree(pRet); | |||
8131 | pRet = 0; | |||
8132 | pNear = 0; | |||
8133 | assert( pLeft==0 && pRight==0 )((void) (0)); | |||
8134 | } | |||
8135 | } | |||
8136 | }else{ | |||
8137 | assert( pNear==0 )((void) (0)); | |||
8138 | fts5ExprAddChildren(pRet, pLeft); | |||
8139 | fts5ExprAddChildren(pRet, pRight); | |||
8140 | pLeft = pRight = 0; | |||
8141 | if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH256 ){ | |||
8142 | sqlite3Fts5ParseError(pParse, | |||
8143 | "fts5 expression tree is too large (maximum depth %d)", | |||
8144 | SQLITE_FTS5_MAX_EXPR_DEPTH256 | |||
8145 | ); | |||
8146 | sqlite3Fts5ParseNodeFree(pRet); | |||
8147 | pRet = 0; | |||
8148 | } | |||
8149 | } | |||
8150 | } | |||
8151 | } | |||
8152 | } | |||
8153 | ||||
8154 | if( pRet==0 ){ | |||
8155 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
8156 | sqlite3Fts5ParseNodeFree(pLeft); | |||
8157 | sqlite3Fts5ParseNodeFree(pRight); | |||
8158 | sqlite3Fts5ParseNearsetFree(pNear); | |||
8159 | } | |||
8160 | return pRet; | |||
8161 | } | |||
8162 | ||||
8163 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | |||
8164 | Fts5Parse *pParse, /* Parse context */ | |||
8165 | Fts5ExprNode *pLeft, /* Left hand child expression */ | |||
8166 | Fts5ExprNode *pRight /* Right hand child expression */ | |||
8167 | ){ | |||
8168 | Fts5ExprNode *pRet = 0; | |||
8169 | Fts5ExprNode *pPrev; | |||
8170 | ||||
8171 | if( pParse->rc ){ | |||
8172 | sqlite3Fts5ParseNodeFree(pLeft); | |||
8173 | sqlite3Fts5ParseNodeFree(pRight); | |||
8174 | }else{ | |||
8175 | ||||
8176 | assert( pLeft->eType==FTS5_STRING((void) (0)) | |||
8177 | || pLeft->eType==FTS5_TERM((void) (0)) | |||
8178 | || pLeft->eType==FTS5_EOF((void) (0)) | |||
8179 | || pLeft->eType==FTS5_AND((void) (0)) | |||
8180 | )((void) (0)); | |||
8181 | assert( pRight->eType==FTS5_STRING((void) (0)) | |||
8182 | || pRight->eType==FTS5_TERM((void) (0)) | |||
8183 | || pRight->eType==FTS5_EOF((void) (0)) | |||
8184 | || (pRight->eType==FTS5_AND && pParse->bPhraseToAnd)((void) (0)) | |||
8185 | )((void) (0)); | |||
8186 | ||||
8187 | if( pLeft->eType==FTS5_AND2 ){ | |||
8188 | pPrev = pLeft->apChild[pLeft->nChild-1]; | |||
8189 | }else{ | |||
8190 | pPrev = pLeft; | |||
8191 | } | |||
8192 | assert( pPrev->eType==FTS5_STRING((void) (0)) | |||
8193 | || pPrev->eType==FTS5_TERM((void) (0)) | |||
8194 | || pPrev->eType==FTS5_EOF((void) (0)) | |||
8195 | )((void) (0)); | |||
8196 | ||||
8197 | if( pRight->eType==FTS5_EOF0 ){ | |||
8198 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
8199 | assert( pParse->nPhrase>0 )((void) (0)); | |||
8200 | assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] )((void) (0)); | |||
8201 | sqlite3Fts5ParseNodeFree(pRight); | |||
8202 | pRet = pLeft; | |||
8203 | pParse->nPhrase--; | |||
8204 | } | |||
8205 | else if( pPrev->eType==FTS5_EOF0 ){ | |||
8206 | Fts5ExprPhrase **ap; | |||
8207 | ||||
8208 | if( pPrev==pLeft ){ | |||
8209 | pRet = pRight; | |||
8210 | }else{ | |||
8211 | pLeft->apChild[pLeft->nChild-1] = pRight; | |||
8212 | pRet = pLeft; | |||
8213 | } | |||
8214 | ||||
8215 | ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; | |||
8216 | assert( ap[0]==pPrev->pNear->apPhrase[0] )((void) (0)); | |||
8217 | memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); | |||
8218 | pParse->nPhrase--; | |||
8219 | ||||
8220 | sqlite3Fts5ParseNodeFree(pPrev); | |||
8221 | } | |||
8222 | else{ | |||
8223 | pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND2, pLeft, pRight, 0); | |||
8224 | } | |||
8225 | } | |||
8226 | ||||
8227 | return pRet; | |||
8228 | } | |||
8229 | ||||
8230 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
8231 | static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ | |||
8232 | sqlite3_int64 nByte = 0; | |||
8233 | Fts5ExprTerm *p; | |||
8234 | char *zQuoted; | |||
8235 | ||||
8236 | /* Determine the maximum amount of space required. */ | |||
8237 | for(p=pTerm; p; p=p->pSynonym){ | |||
8238 | nByte += pTerm->nQueryTerm * 2 + 3 + 2; | |||
8239 | } | |||
8240 | zQuoted = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
8241 | ||||
8242 | if( zQuoted ){ | |||
8243 | int i = 0; | |||
8244 | for(p=pTerm; p; p=p->pSynonym){ | |||
8245 | char *zIn = p->pTerm; | |||
8246 | char *zEnd = &zIn[p->nQueryTerm]; | |||
8247 | zQuoted[i++] = '"'; | |||
8248 | while( zIn<zEnd ){ | |||
8249 | if( *zIn=='"' ) zQuoted[i++] = '"'; | |||
8250 | zQuoted[i++] = *zIn++; | |||
8251 | } | |||
8252 | zQuoted[i++] = '"'; | |||
8253 | if( p->pSynonym ) zQuoted[i++] = '|'; | |||
8254 | } | |||
8255 | if( pTerm->bPrefix ){ | |||
8256 | zQuoted[i++] = ' '; | |||
8257 | zQuoted[i++] = '*'; | |||
8258 | } | |||
8259 | zQuoted[i++] = '\0'; | |||
8260 | } | |||
8261 | return zQuoted; | |||
8262 | } | |||
8263 | ||||
8264 | static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ | |||
8265 | char *zNew; | |||
8266 | va_list ap; | |||
8267 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
8268 | zNew = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
8269 | va_end(ap)__builtin_va_end(ap); | |||
8270 | if( zApp && zNew ){ | |||
8271 | char *zNew2 = sqlite3_mprintfsqlite3_api->mprintf("%s%s", zApp, zNew); | |||
8272 | sqlite3_freesqlite3_api->free(zNew); | |||
8273 | zNew = zNew2; | |||
8274 | } | |||
8275 | sqlite3_freesqlite3_api->free(zApp); | |||
8276 | return zNew; | |||
8277 | } | |||
8278 | ||||
8279 | /* | |||
8280 | ** Compose a tcl-readable representation of expression pExpr. Return a | |||
8281 | ** pointer to a buffer containing that representation. It is the | |||
8282 | ** responsibility of the caller to at some point free the buffer using | |||
8283 | ** sqlite3_free(). | |||
8284 | */ | |||
8285 | static char *fts5ExprPrintTcl( | |||
8286 | Fts5Config *pConfig, | |||
8287 | const char *zNearsetCmd, | |||
8288 | Fts5ExprNode *pExpr | |||
8289 | ){ | |||
8290 | char *zRet = 0; | |||
8291 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | |||
8292 | Fts5ExprNearset *pNear = pExpr->pNear; | |||
8293 | int i; | |||
8294 | int iTerm; | |||
8295 | ||||
8296 | zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); | |||
8297 | if( zRet==0 ) return 0; | |||
8298 | if( pNear->pColset ){ | |||
8299 | int *aiCol = pNear->pColset->aiCol; | |||
8300 | int nCol = pNear->pColset->nCol; | |||
8301 | if( nCol==1 ){ | |||
8302 | zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); | |||
8303 | }else{ | |||
8304 | zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); | |||
8305 | for(i=1; i<pNear->pColset->nCol; i++){ | |||
8306 | zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); | |||
8307 | } | |||
8308 | zRet = fts5PrintfAppend(zRet, "} "); | |||
8309 | } | |||
8310 | if( zRet==0 ) return 0; | |||
8311 | } | |||
8312 | ||||
8313 | if( pNear->nPhrase>1 ){ | |||
8314 | zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); | |||
8315 | if( zRet==0 ) return 0; | |||
8316 | } | |||
8317 | ||||
8318 | zRet = fts5PrintfAppend(zRet, "--"); | |||
8319 | if( zRet==0 ) return 0; | |||
8320 | ||||
8321 | for(i=0; i<pNear->nPhrase; i++){ | |||
8322 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
8323 | ||||
8324 | zRet = fts5PrintfAppend(zRet, " {"); | |||
8325 | for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ | |||
8326 | Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; | |||
8327 | zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", | |||
8328 | p->nQueryTerm, p->pTerm | |||
8329 | ); | |||
8330 | if( pPhrase->aTerm[iTerm].bPrefix ){ | |||
8331 | zRet = fts5PrintfAppend(zRet, "*"); | |||
8332 | } | |||
8333 | } | |||
8334 | ||||
8335 | if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); | |||
8336 | if( zRet==0 ) return 0; | |||
8337 | } | |||
8338 | ||||
8339 | }else if( pExpr->eType==0 ){ | |||
8340 | zRet = sqlite3_mprintfsqlite3_api->mprintf("{}"); | |||
8341 | }else{ | |||
8342 | char const *zOp = 0; | |||
8343 | int i; | |||
8344 | switch( pExpr->eType ){ | |||
8345 | case FTS5_AND2: zOp = "AND"; break; | |||
8346 | case FTS5_NOT3: zOp = "NOT"; break; | |||
8347 | default: | |||
8348 | assert( pExpr->eType==FTS5_OR )((void) (0)); | |||
8349 | zOp = "OR"; | |||
8350 | break; | |||
8351 | } | |||
8352 | ||||
8353 | zRet = sqlite3_mprintfsqlite3_api->mprintf("%s", zOp); | |||
8354 | for(i=0; zRet && i<pExpr->nChild; i++){ | |||
8355 | char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); | |||
8356 | if( !z ){ | |||
8357 | sqlite3_freesqlite3_api->free(zRet); | |||
8358 | zRet = 0; | |||
8359 | }else{ | |||
8360 | zRet = fts5PrintfAppend(zRet, " [%z]", z); | |||
8361 | } | |||
8362 | } | |||
8363 | } | |||
8364 | ||||
8365 | return zRet; | |||
8366 | } | |||
8367 | ||||
8368 | static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ | |||
8369 | char *zRet = 0; | |||
8370 | if( pExpr->eType==0 ){ | |||
8371 | return sqlite3_mprintfsqlite3_api->mprintf("\"\""); | |||
8372 | }else | |||
8373 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | |||
8374 | Fts5ExprNearset *pNear = pExpr->pNear; | |||
8375 | int i; | |||
8376 | int iTerm; | |||
8377 | ||||
8378 | if( pNear->pColset ){ | |||
8379 | int ii; | |||
8380 | Fts5Colset *pColset = pNear->pColset; | |||
8381 | if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{"); | |||
8382 | for(ii=0; ii<pColset->nCol; ii++){ | |||
8383 | zRet = fts5PrintfAppend(zRet, "%s%s", | |||
8384 | pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " " | |||
8385 | ); | |||
8386 | } | |||
8387 | if( zRet ){ | |||
8388 | zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : ""); | |||
8389 | } | |||
8390 | if( zRet==0 ) return 0; | |||
8391 | } | |||
8392 | ||||
8393 | if( pNear->nPhrase>1 ){ | |||
8394 | zRet = fts5PrintfAppend(zRet, "NEAR("); | |||
8395 | if( zRet==0 ) return 0; | |||
8396 | } | |||
8397 | ||||
8398 | for(i=0; i<pNear->nPhrase; i++){ | |||
8399 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
8400 | if( i!=0 ){ | |||
8401 | zRet = fts5PrintfAppend(zRet, " "); | |||
8402 | if( zRet==0 ) return 0; | |||
8403 | } | |||
8404 | for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ | |||
8405 | char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); | |||
8406 | if( zTerm ){ | |||
8407 | zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); | |||
8408 | sqlite3_freesqlite3_api->free(zTerm); | |||
8409 | } | |||
8410 | if( zTerm==0 || zRet==0 ){ | |||
8411 | sqlite3_freesqlite3_api->free(zRet); | |||
8412 | return 0; | |||
8413 | } | |||
8414 | } | |||
8415 | } | |||
8416 | ||||
8417 | if( pNear->nPhrase>1 ){ | |||
8418 | zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); | |||
8419 | if( zRet==0 ) return 0; | |||
8420 | } | |||
8421 | ||||
8422 | }else{ | |||
8423 | char const *zOp = 0; | |||
8424 | int i; | |||
8425 | ||||
8426 | switch( pExpr->eType ){ | |||
8427 | case FTS5_AND2: zOp = " AND "; break; | |||
8428 | case FTS5_NOT3: zOp = " NOT "; break; | |||
8429 | default: | |||
8430 | assert( pExpr->eType==FTS5_OR )((void) (0)); | |||
8431 | zOp = " OR "; | |||
8432 | break; | |||
8433 | } | |||
8434 | ||||
8435 | for(i=0; i<pExpr->nChild; i++){ | |||
8436 | char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); | |||
8437 | if( z==0 ){ | |||
8438 | sqlite3_freesqlite3_api->free(zRet); | |||
8439 | zRet = 0; | |||
8440 | }else{ | |||
8441 | int e = pExpr->apChild[i]->eType; | |||
8442 | int b = (e!=FTS5_STRING9 && e!=FTS5_TERM4 && e!=FTS5_EOF0); | |||
8443 | zRet = fts5PrintfAppend(zRet, "%s%s%z%s", | |||
8444 | (i==0 ? "" : zOp), | |||
8445 | (b?"(":""), z, (b?")":"") | |||
8446 | ); | |||
8447 | } | |||
8448 | if( zRet==0 ) break; | |||
8449 | } | |||
8450 | } | |||
8451 | ||||
8452 | return zRet; | |||
8453 | } | |||
8454 | ||||
8455 | /* | |||
8456 | ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) | |||
8457 | ** and fts5_expr_tcl() (bTcl!=0). | |||
8458 | */ | |||
8459 | static void fts5ExprFunction( | |||
8460 | sqlite3_context *pCtx, /* Function call context */ | |||
8461 | int nArg, /* Number of args */ | |||
8462 | sqlite3_value **apVal, /* Function arguments */ | |||
8463 | int bTcl | |||
8464 | ){ | |||
8465 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
8466 | sqlite3 *db = sqlite3_context_db_handlesqlite3_api->context_db_handle(pCtx); | |||
8467 | const char *zExpr = 0; | |||
8468 | char *zErr = 0; | |||
8469 | Fts5Expr *pExpr = 0; | |||
8470 | int rc; | |||
8471 | int i; | |||
8472 | ||||
8473 | const char **azConfig; /* Array of arguments for Fts5Config */ | |||
8474 | const char *zNearsetCmd = "nearset"; | |||
8475 | int nConfig; /* Size of azConfig[] */ | |||
8476 | Fts5Config *pConfig = 0; | |||
8477 | int iArg = 1; | |||
8478 | ||||
8479 | if( nArg<1 ){ | |||
8480 | zErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of arguments to function %s", | |||
8481 | bTcl ? "fts5_expr_tcl" : "fts5_expr" | |||
8482 | ); | |||
8483 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
8484 | sqlite3_freesqlite3_api->free(zErr); | |||
8485 | return; | |||
8486 | } | |||
8487 | ||||
8488 | if( bTcl && nArg>1 ){ | |||
8489 | zNearsetCmd = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | |||
8490 | iArg = 2; | |||
8491 | } | |||
8492 | ||||
8493 | nConfig = 3 + (nArg-iArg); | |||
8494 | azConfig = (const char**)sqlite3_malloc64sqlite3_api->malloc64(sizeof(char*) * nConfig); | |||
8495 | if( azConfig==0 ){ | |||
8496 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | |||
8497 | return; | |||
8498 | } | |||
8499 | azConfig[0] = 0; | |||
8500 | azConfig[1] = "main"; | |||
8501 | azConfig[2] = "tbl"; | |||
8502 | for(i=3; iArg<nArg; iArg++){ | |||
8503 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[iArg]); | |||
8504 | azConfig[i++] = (z ? z : ""); | |||
8505 | } | |||
8506 | ||||
8507 | zExpr = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | |||
8508 | if( zExpr==0 ) zExpr = ""; | |||
8509 | ||||
8510 | rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); | |||
8511 | if( rc==SQLITE_OK0 ){ | |||
8512 | rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr); | |||
8513 | } | |||
8514 | if( rc==SQLITE_OK0 ){ | |||
8515 | char *zText; | |||
8516 | if( pExpr->pRoot->xNext==0 ){ | |||
8517 | zText = sqlite3_mprintfsqlite3_api->mprintf(""); | |||
8518 | }else if( bTcl ){ | |||
8519 | zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); | |||
8520 | }else{ | |||
8521 | zText = fts5ExprPrint(pConfig, pExpr->pRoot); | |||
8522 | } | |||
8523 | if( zText==0 ){ | |||
8524 | rc = SQLITE_NOMEM7; | |||
8525 | }else{ | |||
8526 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
8527 | sqlite3_freesqlite3_api->free(zText); | |||
8528 | } | |||
8529 | } | |||
8530 | ||||
8531 | if( rc!=SQLITE_OK0 ){ | |||
8532 | if( zErr ){ | |||
8533 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
8534 | sqlite3_freesqlite3_api->free(zErr); | |||
8535 | }else{ | |||
8536 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
8537 | } | |||
8538 | } | |||
8539 | sqlite3_freesqlite3_api->free((void *)azConfig); | |||
8540 | sqlite3Fts5ConfigFree(pConfig); | |||
8541 | sqlite3Fts5ExprFree(pExpr); | |||
8542 | } | |||
8543 | ||||
8544 | static void fts5ExprFunctionHr( | |||
8545 | sqlite3_context *pCtx, /* Function call context */ | |||
8546 | int nArg, /* Number of args */ | |||
8547 | sqlite3_value **apVal /* Function arguments */ | |||
8548 | ){ | |||
8549 | fts5ExprFunction(pCtx, nArg, apVal, 0); | |||
8550 | } | |||
8551 | static void fts5ExprFunctionTcl( | |||
8552 | sqlite3_context *pCtx, /* Function call context */ | |||
8553 | int nArg, /* Number of args */ | |||
8554 | sqlite3_value **apVal /* Function arguments */ | |||
8555 | ){ | |||
8556 | fts5ExprFunction(pCtx, nArg, apVal, 1); | |||
8557 | } | |||
8558 | ||||
8559 | /* | |||
8560 | ** The implementation of an SQLite user-defined-function that accepts a | |||
8561 | ** single integer as an argument. If the integer is an alpha-numeric | |||
8562 | ** unicode code point, 1 is returned. Otherwise 0. | |||
8563 | */ | |||
8564 | static void fts5ExprIsAlnum( | |||
8565 | sqlite3_context *pCtx, /* Function call context */ | |||
8566 | int nArg, /* Number of args */ | |||
8567 | sqlite3_value **apVal /* Function arguments */ | |||
8568 | ){ | |||
8569 | int iCode; | |||
8570 | u8 aArr[32]; | |||
8571 | if( nArg!=1 ){ | |||
8572 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
8573 | "wrong number of arguments to function fts5_isalnum", -1 | |||
8574 | ); | |||
8575 | return; | |||
8576 | } | |||
8577 | memset(aArr, 0, sizeof(aArr)); | |||
8578 | sqlite3Fts5UnicodeCatParse("L*", aArr); | |||
8579 | sqlite3Fts5UnicodeCatParse("N*", aArr); | |||
8580 | sqlite3Fts5UnicodeCatParse("Co", aArr); | |||
8581 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
8582 | sqlite3_result_intsqlite3_api->result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); | |||
8583 | } | |||
8584 | ||||
8585 | static void fts5ExprFold( | |||
8586 | sqlite3_context *pCtx, /* Function call context */ | |||
8587 | int nArg, /* Number of args */ | |||
8588 | sqlite3_value **apVal /* Function arguments */ | |||
8589 | ){ | |||
8590 | if( nArg!=1 && nArg!=2 ){ | |||
8591 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
8592 | "wrong number of arguments to function fts5_fold", -1 | |||
8593 | ); | |||
8594 | }else{ | |||
8595 | int iCode; | |||
8596 | int bRemoveDiacritics = 0; | |||
8597 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
8598 | if( nArg==2 ) bRemoveDiacritics = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | |||
8599 | sqlite3_result_intsqlite3_api->result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); | |||
8600 | } | |||
8601 | } | |||
8602 | #endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
8603 | ||||
8604 | /* | |||
8605 | ** This is called during initialization to register the fts5_expr() scalar | |||
8606 | ** UDF with the SQLite handle passed as the only argument. | |||
8607 | */ | |||
8608 | static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ | |||
8609 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
8610 | struct Fts5ExprFunc { | |||
8611 | const char *z; | |||
8612 | void (*x)(sqlite3_context*,int,sqlite3_value**); | |||
8613 | } aFunc[] = { | |||
8614 | { "fts5_expr", fts5ExprFunctionHr }, | |||
8615 | { "fts5_expr_tcl", fts5ExprFunctionTcl }, | |||
8616 | { "fts5_isalnum", fts5ExprIsAlnum }, | |||
8617 | { "fts5_fold", fts5ExprFold }, | |||
8618 | }; | |||
8619 | int i; | |||
8620 | int rc = SQLITE_OK0; | |||
8621 | void *pCtx = (void*)pGlobal; | |||
8622 | ||||
8623 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aFunc)((int)(sizeof(aFunc) / sizeof(aFunc[0]))); i++){ | |||
8624 | struct Fts5ExprFunc *p = &aFunc[i]; | |||
8625 | rc = sqlite3_create_functionsqlite3_api->create_function(db, p->z, -1, SQLITE_UTF81, pCtx, p->x, 0, 0); | |||
8626 | } | |||
8627 | #else | |||
8628 | int rc = SQLITE_OK0; | |||
8629 | UNUSED_PARAM2(pGlobal,db)(void)(pGlobal), (void)(db); | |||
8630 | #endif | |||
8631 | ||||
8632 | /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and | |||
8633 | ** sqlite3Fts5ParserFallback() are unused */ | |||
8634 | #ifndef NDEBUG1 | |||
8635 | (void)sqlite3Fts5ParserTrace; | |||
8636 | #endif | |||
8637 | (void)sqlite3Fts5ParserFallback; | |||
8638 | ||||
8639 | return rc; | |||
8640 | } | |||
8641 | ||||
8642 | /* | |||
8643 | ** Return the number of phrases in expression pExpr. | |||
8644 | */ | |||
8645 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ | |||
8646 | return (pExpr ? pExpr->nPhrase : 0); | |||
8647 | } | |||
8648 | ||||
8649 | /* | |||
8650 | ** Return the number of terms in the iPhrase'th phrase in pExpr. | |||
8651 | */ | |||
8652 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ | |||
8653 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; | |||
8654 | return pExpr->apExprPhrase[iPhrase]->nTerm; | |||
8655 | } | |||
8656 | ||||
8657 | /* | |||
8658 | ** This function is used to access the current position list for phrase | |||
8659 | ** iPhrase. | |||
8660 | */ | |||
8661 | static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ | |||
8662 | int nRet; | |||
8663 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
8664 | Fts5ExprNode *pNode = pPhrase->pNode; | |||
8665 | if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ | |||
8666 | *pa = pPhrase->poslist.p; | |||
8667 | nRet = pPhrase->poslist.n; | |||
8668 | }else{ | |||
8669 | *pa = 0; | |||
8670 | nRet = 0; | |||
8671 | } | |||
8672 | return nRet; | |||
8673 | } | |||
8674 | ||||
8675 | struct Fts5PoslistPopulator { | |||
8676 | Fts5PoslistWriter writer; | |||
8677 | int bOk; /* True if ok to populate */ | |||
8678 | int bMiss; | |||
8679 | }; | |||
8680 | ||||
8681 | /* | |||
8682 | ** Clear the position lists associated with all phrases in the expression | |||
8683 | ** passed as the first argument. Argument bLive is true if the expression | |||
8684 | ** might be pointing to a real entry, otherwise it has just been reset. | |||
8685 | ** | |||
8686 | ** At present this function is only used for detail=col and detail=none | |||
8687 | ** fts5 tables. This implies that all phrases must be at most 1 token | |||
8688 | ** in size, as phrase matches are not supported without detail=full. | |||
8689 | */ | |||
8690 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ | |||
8691 | Fts5PoslistPopulator *pRet; | |||
8692 | pRet = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | |||
8693 | if( pRet ){ | |||
8694 | int i; | |||
8695 | memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | |||
8696 | for(i=0; i<pExpr->nPhrase; i++){ | |||
8697 | Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; | |||
8698 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | |||
8699 | assert( pExpr->apExprPhrase[i]->nTerm<=1 )((void) (0)); | |||
8700 | if( bLive && | |||
8701 | (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) | |||
8702 | ){ | |||
8703 | pRet[i].bMiss = 1; | |||
8704 | }else{ | |||
8705 | pBuf->n = 0; | |||
8706 | } | |||
8707 | } | |||
8708 | } | |||
8709 | return pRet; | |||
8710 | } | |||
8711 | ||||
8712 | struct Fts5ExprCtx { | |||
8713 | Fts5Expr *pExpr; | |||
8714 | Fts5PoslistPopulator *aPopulator; | |||
8715 | i64 iOff; | |||
8716 | }; | |||
8717 | typedef struct Fts5ExprCtx Fts5ExprCtx; | |||
8718 | ||||
8719 | /* | |||
8720 | ** TODO: Make this more efficient! | |||
8721 | */ | |||
8722 | static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ | |||
8723 | int i; | |||
8724 | for(i=0; i<pColset->nCol; i++){ | |||
8725 | if( pColset->aiCol[i]==iCol ) return 1; | |||
8726 | } | |||
8727 | return 0; | |||
8728 | } | |||
8729 | ||||
8730 | /* | |||
8731 | ** pToken is a buffer nToken bytes in size that may or may not contain | |||
8732 | ** an embedded 0x00 byte. If it does, return the number of bytes in | |||
8733 | ** the buffer before the 0x00. If it does not, return nToken. | |||
8734 | */ | |||
8735 | static int fts5QueryTerm(const char *pToken, int nToken){ | |||
8736 | int ii; | |||
8737 | for(ii=0; ii<nToken && pToken[ii]; ii++){} | |||
8738 | return ii; | |||
8739 | } | |||
8740 | ||||
8741 | static int fts5ExprPopulatePoslistsCb( | |||
8742 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
8743 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
8744 | const char *pToken, /* Pointer to buffer containing token */ | |||
8745 | int nToken, /* Size of token in bytes */ | |||
8746 | int iUnused1, /* Byte offset of token within input text */ | |||
8747 | int iUnused2 /* Byte offset of end of token within input text */ | |||
8748 | ){ | |||
8749 | Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; | |||
8750 | Fts5Expr *pExpr = p->pExpr; | |||
8751 | int i; | |||
8752 | int nQuery = nToken; | |||
8753 | i64 iRowid = pExpr->pRoot->iRowid; | |||
8754 | ||||
8755 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
8756 | ||||
8757 | if( nQuery>FTS5_MAX_TOKEN_SIZE32768 ) nQuery = FTS5_MAX_TOKEN_SIZE32768; | |||
8758 | if( pExpr->pConfig->bTokendata ){ | |||
8759 | nQuery = fts5QueryTerm(pToken, nQuery); | |||
8760 | } | |||
8761 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ) p->iOff++; | |||
8762 | for(i=0; i<pExpr->nPhrase; i++){ | |||
8763 | Fts5ExprTerm *pT; | |||
8764 | if( p->aPopulator[i].bOk==0 ) continue; | |||
8765 | for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ | |||
8766 | if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix)) | |||
8767 | && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0 | |||
8768 | ){ | |||
8769 | int rc = sqlite3Fts5PoslistWriterAppend( | |||
8770 | &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff | |||
8771 | ); | |||
8772 | if( rc==SQLITE_OK0 && (pExpr->pConfig->bTokendata || pT->bPrefix) ){ | |||
8773 | int iCol = p->iOff>>32; | |||
8774 | int iTokOff = p->iOff & 0x7FFFFFFF; | |||
8775 | rc = sqlite3Fts5IndexIterWriteTokendata( | |||
8776 | pT->pIter, pToken, nToken, iRowid, iCol, iTokOff | |||
8777 | ); | |||
8778 | } | |||
8779 | if( rc ) return rc; | |||
8780 | break; | |||
8781 | } | |||
8782 | } | |||
8783 | } | |||
8784 | return SQLITE_OK0; | |||
8785 | } | |||
8786 | ||||
8787 | static int sqlite3Fts5ExprPopulatePoslists( | |||
8788 | Fts5Config *pConfig, | |||
8789 | Fts5Expr *pExpr, | |||
8790 | Fts5PoslistPopulator *aPopulator, | |||
8791 | int iCol, | |||
8792 | const char *z, int n | |||
8793 | ){ | |||
8794 | int i; | |||
8795 | Fts5ExprCtx sCtx; | |||
8796 | sCtx.pExpr = pExpr; | |||
8797 | sCtx.aPopulator = aPopulator; | |||
8798 | sCtx.iOff = (((i64)iCol) << 32) - 1; | |||
8799 | ||||
8800 | for(i=0; i<pExpr->nPhrase; i++){ | |||
8801 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | |||
8802 | Fts5Colset *pColset = pNode->pNear->pColset; | |||
8803 | if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) | |||
8804 | || aPopulator[i].bMiss | |||
8805 | ){ | |||
8806 | aPopulator[i].bOk = 0; | |||
8807 | }else{ | |||
8808 | aPopulator[i].bOk = 1; | |||
8809 | } | |||
8810 | } | |||
8811 | ||||
8812 | return sqlite3Fts5Tokenize(pConfig, | |||
8813 | FTS5_TOKENIZE_DOCUMENT0x0004, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb | |||
8814 | ); | |||
8815 | } | |||
8816 | ||||
8817 | static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ | |||
8818 | if( pNode->eType==FTS5_TERM4 || pNode->eType==FTS5_STRING9 ){ | |||
8819 | pNode->pNear->apPhrase[0]->poslist.n = 0; | |||
8820 | }else{ | |||
8821 | int i; | |||
8822 | for(i=0; i<pNode->nChild; i++){ | |||
8823 | fts5ExprClearPoslists(pNode->apChild[i]); | |||
8824 | } | |||
8825 | } | |||
8826 | } | |||
8827 | ||||
8828 | static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ | |||
8829 | pNode->iRowid = iRowid; | |||
8830 | pNode->bEof = 0; | |||
8831 | switch( pNode->eType ){ | |||
8832 | case 0: | |||
8833 | case FTS5_TERM4: | |||
8834 | case FTS5_STRING9: | |||
8835 | return (pNode->pNear->apPhrase[0]->poslist.n>0); | |||
8836 | ||||
8837 | case FTS5_AND2: { | |||
8838 | int i; | |||
8839 | for(i=0; i<pNode->nChild; i++){ | |||
8840 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ | |||
8841 | fts5ExprClearPoslists(pNode); | |||
8842 | return 0; | |||
8843 | } | |||
8844 | } | |||
8845 | break; | |||
8846 | } | |||
8847 | ||||
8848 | case FTS5_OR1: { | |||
8849 | int i; | |||
8850 | int bRet = 0; | |||
8851 | for(i=0; i<pNode->nChild; i++){ | |||
8852 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ | |||
8853 | bRet = 1; | |||
8854 | } | |||
8855 | } | |||
8856 | return bRet; | |||
8857 | } | |||
8858 | ||||
8859 | default: { | |||
8860 | assert( pNode->eType==FTS5_NOT )((void) (0)); | |||
8861 | if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) | |||
8862 | || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) | |||
8863 | ){ | |||
8864 | fts5ExprClearPoslists(pNode); | |||
8865 | return 0; | |||
8866 | } | |||
8867 | break; | |||
8868 | } | |||
8869 | } | |||
8870 | return 1; | |||
8871 | } | |||
8872 | ||||
8873 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ | |||
8874 | fts5ExprCheckPoslists(pExpr->pRoot, iRowid); | |||
8875 | } | |||
8876 | ||||
8877 | /* | |||
8878 | ** This function is only called for detail=columns tables. | |||
8879 | */ | |||
8880 | static int sqlite3Fts5ExprPhraseCollist( | |||
8881 | Fts5Expr *pExpr, | |||
8882 | int iPhrase, | |||
8883 | const u8 **ppCollist, | |||
8884 | int *pnCollist | |||
8885 | ){ | |||
8886 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
8887 | Fts5ExprNode *pNode = pPhrase->pNode; | |||
8888 | int rc = SQLITE_OK0; | |||
8889 | ||||
8890 | assert( iPhrase>=0 && iPhrase<pExpr->nPhrase )((void) (0)); | |||
8891 | assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
8892 | ||||
8893 | if( pNode->bEof==0 | |||
8894 | && pNode->iRowid==pExpr->pRoot->iRowid | |||
8895 | && pPhrase->poslist.n>0 | |||
8896 | ){ | |||
8897 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; | |||
8898 | if( pTerm->pSynonym ){ | |||
8899 | Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; | |||
8900 | rc = fts5ExprSynonymList( | |||
8901 | pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist | |||
8902 | ); | |||
8903 | }else{ | |||
8904 | *ppCollist = pPhrase->aTerm[0].pIter->pData; | |||
8905 | *pnCollist = pPhrase->aTerm[0].pIter->nData; | |||
8906 | } | |||
8907 | }else{ | |||
8908 | *ppCollist = 0; | |||
8909 | *pnCollist = 0; | |||
8910 | } | |||
8911 | ||||
8912 | return rc; | |||
8913 | } | |||
8914 | ||||
8915 | /* | |||
8916 | ** Does the work of the fts5_api.xQueryToken() API method. | |||
8917 | */ | |||
8918 | static int sqlite3Fts5ExprQueryToken( | |||
8919 | Fts5Expr *pExpr, | |||
8920 | int iPhrase, | |||
8921 | int iToken, | |||
8922 | const char **ppOut, | |||
8923 | int *pnOut | |||
8924 | ){ | |||
8925 | Fts5ExprPhrase *pPhrase = 0; | |||
8926 | ||||
8927 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
8928 | return SQLITE_RANGE25; | |||
8929 | } | |||
8930 | pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
8931 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | |||
8932 | return SQLITE_RANGE25; | |||
8933 | } | |||
8934 | ||||
8935 | *ppOut = pPhrase->aTerm[iToken].pTerm; | |||
8936 | *pnOut = pPhrase->aTerm[iToken].nFullTerm; | |||
8937 | return SQLITE_OK0; | |||
8938 | } | |||
8939 | ||||
8940 | /* | |||
8941 | ** Does the work of the fts5_api.xInstToken() API method. | |||
8942 | */ | |||
8943 | static int sqlite3Fts5ExprInstToken( | |||
8944 | Fts5Expr *pExpr, | |||
8945 | i64 iRowid, | |||
8946 | int iPhrase, | |||
8947 | int iCol, | |||
8948 | int iOff, | |||
8949 | int iToken, | |||
8950 | const char **ppOut, | |||
8951 | int *pnOut | |||
8952 | ){ | |||
8953 | Fts5ExprPhrase *pPhrase = 0; | |||
8954 | Fts5ExprTerm *pTerm = 0; | |||
8955 | int rc = SQLITE_OK0; | |||
8956 | ||||
8957 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
8958 | return SQLITE_RANGE25; | |||
8959 | } | |||
8960 | pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
8961 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | |||
8962 | return SQLITE_RANGE25; | |||
8963 | } | |||
8964 | pTerm = &pPhrase->aTerm[iToken]; | |||
8965 | if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){ | |||
8966 | rc = sqlite3Fts5IterToken( | |||
8967 | pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm, | |||
8968 | iRowid, iCol, iOff+iToken, ppOut, pnOut | |||
8969 | ); | |||
8970 | }else{ | |||
8971 | *ppOut = pTerm->pTerm; | |||
8972 | *pnOut = pTerm->nFullTerm; | |||
8973 | } | |||
8974 | return rc; | |||
8975 | } | |||
8976 | ||||
8977 | /* | |||
8978 | ** Clear the token mappings for all Fts5IndexIter objects managed by | |||
8979 | ** the expression passed as the only argument. | |||
8980 | */ | |||
8981 | static void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ | |||
8982 | int ii; | |||
8983 | for(ii=0; ii<pExpr->nPhrase; ii++){ | |||
8984 | Fts5ExprTerm *pT; | |||
8985 | for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){ | |||
8986 | sqlite3Fts5IndexIterClearTokendata(pT->pIter); | |||
8987 | } | |||
8988 | } | |||
8989 | } | |||
8990 | ||||
8991 | #line 1 "fts5_hash.c" | |||
8992 | /* | |||
8993 | ** 2014 August 11 | |||
8994 | ** | |||
8995 | ** The author disclaims copyright to this source code. In place of | |||
8996 | ** a legal notice, here is a blessing: | |||
8997 | ** | |||
8998 | ** May you do good and not evil. | |||
8999 | ** May you find forgiveness for yourself and forgive others. | |||
9000 | ** May you share freely, never taking more than you give. | |||
9001 | ** | |||
9002 | ****************************************************************************** | |||
9003 | ** | |||
9004 | */ | |||
9005 | ||||
9006 | ||||
9007 | ||||
9008 | /* #include "fts5Int.h" */ | |||
9009 | ||||
9010 | typedef struct Fts5HashEntry Fts5HashEntry; | |||
9011 | ||||
9012 | /* | |||
9013 | ** This file contains the implementation of an in-memory hash table used | |||
9014 | ** to accumulate "term -> doclist" content before it is flushed to a level-0 | |||
9015 | ** segment. | |||
9016 | */ | |||
9017 | ||||
9018 | ||||
9019 | struct Fts5Hash { | |||
9020 | int eDetail; /* Copy of Fts5Config.eDetail */ | |||
9021 | int *pnByte; /* Pointer to bytes counter */ | |||
9022 | int nEntry; /* Number of entries currently in hash */ | |||
9023 | int nSlot; /* Size of aSlot[] array */ | |||
9024 | Fts5HashEntry *pScan; /* Current ordered scan item */ | |||
9025 | Fts5HashEntry **aSlot; /* Array of hash slots */ | |||
9026 | }; | |||
9027 | ||||
9028 | /* | |||
9029 | ** Each entry in the hash table is represented by an object of the | |||
9030 | ** following type. Each object, its key, and its current data are stored | |||
9031 | ** in a single memory allocation. The key immediately follows the object | |||
9032 | ** in memory. The position list data immediately follows the key data | |||
9033 | ** in memory. | |||
9034 | ** | |||
9035 | ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single | |||
9036 | ** byte identifying the index (either the main term index or a prefix-index), | |||
9037 | ** followed by the term data. For example: "0token". There is no | |||
9038 | ** nul-terminator - in this case nKey=6. | |||
9039 | ** | |||
9040 | ** The data that follows the key is in a similar, but not identical format | |||
9041 | ** to the doclist data stored in the database. It is: | |||
9042 | ** | |||
9043 | ** * Rowid, as a varint | |||
9044 | ** * Position list, without 0x00 terminator. | |||
9045 | ** * Size of previous position list and rowid, as a 4 byte | |||
9046 | ** big-endian integer. | |||
9047 | ** | |||
9048 | ** iRowidOff: | |||
9049 | ** Offset of last rowid written to data area. Relative to first byte of | |||
9050 | ** structure. | |||
9051 | ** | |||
9052 | ** nData: | |||
9053 | ** Bytes of data written since iRowidOff. | |||
9054 | */ | |||
9055 | struct Fts5HashEntry { | |||
9056 | Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ | |||
9057 | Fts5HashEntry *pScanNext; /* Next entry in sorted order */ | |||
9058 | ||||
9059 | int nAlloc; /* Total size of allocation */ | |||
9060 | int iSzPoslist; /* Offset of space for 4-byte poslist size */ | |||
9061 | int nData; /* Total bytes of data (incl. structure) */ | |||
9062 | int nKey; /* Length of key in bytes */ | |||
9063 | u8 bDel; /* Set delete-flag @ iSzPoslist */ | |||
9064 | u8 bContent; /* Set content-flag (detail=none mode) */ | |||
9065 | i16 iCol; /* Column of last value written */ | |||
9066 | int iPos; /* Position of last value written */ | |||
9067 | i64 iRowid; /* Rowid of last value written */ | |||
9068 | }; | |||
9069 | ||||
9070 | /* | |||
9071 | ** Equivalent to: | |||
9072 | ** | |||
9073 | ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } | |||
9074 | */ | |||
9075 | #define fts5EntryKey(p)( ((char *)(&(p)[1])) ) ( ((char *)(&(p)[1])) ) | |||
9076 | ||||
9077 | ||||
9078 | /* | |||
9079 | ** Allocate a new hash table. | |||
9080 | */ | |||
9081 | static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ | |||
9082 | int rc = SQLITE_OK0; | |||
9083 | Fts5Hash *pNew; | |||
9084 | ||||
9085 | *ppNew = pNew = (Fts5Hash*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Hash)); | |||
9086 | if( pNew==0 ){ | |||
9087 | rc = SQLITE_NOMEM7; | |||
9088 | }else{ | |||
9089 | sqlite3_int64 nByte; | |||
9090 | memset(pNew, 0, sizeof(Fts5Hash)); | |||
9091 | pNew->pnByte = pnByte; | |||
9092 | pNew->eDetail = pConfig->eDetail; | |||
9093 | ||||
9094 | pNew->nSlot = 1024; | |||
9095 | nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; | |||
9096 | pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
9097 | if( pNew->aSlot==0 ){ | |||
9098 | sqlite3_freesqlite3_api->free(pNew); | |||
9099 | *ppNew = 0; | |||
9100 | rc = SQLITE_NOMEM7; | |||
9101 | }else{ | |||
9102 | memset(pNew->aSlot, 0, (size_t)nByte); | |||
9103 | } | |||
9104 | } | |||
9105 | return rc; | |||
9106 | } | |||
9107 | ||||
9108 | /* | |||
9109 | ** Free a hash table object. | |||
9110 | */ | |||
9111 | static void sqlite3Fts5HashFree(Fts5Hash *pHash){ | |||
9112 | if( pHash ){ | |||
9113 | sqlite3Fts5HashClear(pHash); | |||
9114 | sqlite3_freesqlite3_api->free(pHash->aSlot); | |||
9115 | sqlite3_freesqlite3_api->free(pHash); | |||
9116 | } | |||
9117 | } | |||
9118 | ||||
9119 | /* | |||
9120 | ** Empty (but do not delete) a hash table. | |||
9121 | */ | |||
9122 | static void sqlite3Fts5HashClear(Fts5Hash *pHash){ | |||
9123 | int i; | |||
9124 | for(i=0; i<pHash->nSlot; i++){ | |||
9125 | Fts5HashEntry *pNext; | |||
9126 | Fts5HashEntry *pSlot; | |||
9127 | for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ | |||
9128 | pNext = pSlot->pHashNext; | |||
9129 | sqlite3_freesqlite3_api->free(pSlot); | |||
9130 | } | |||
9131 | } | |||
9132 | memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); | |||
9133 | pHash->nEntry = 0; | |||
9134 | } | |||
9135 | ||||
9136 | static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ | |||
9137 | int i; | |||
9138 | unsigned int h = 13; | |||
9139 | for(i=n-1; i>=0; i--){ | |||
9140 | h = (h << 3) ^ h ^ p[i]; | |||
9141 | } | |||
9142 | return (h % nSlot); | |||
9143 | } | |||
9144 | ||||
9145 | static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ | |||
9146 | int i; | |||
9147 | unsigned int h = 13; | |||
9148 | for(i=n-1; i>=0; i--){ | |||
9149 | h = (h << 3) ^ h ^ p[i]; | |||
9150 | } | |||
9151 | h = (h << 3) ^ h ^ b; | |||
9152 | return (h % nSlot); | |||
9153 | } | |||
9154 | ||||
9155 | /* | |||
9156 | ** Resize the hash table by doubling the number of slots. | |||
9157 | */ | |||
9158 | static int fts5HashResize(Fts5Hash *pHash){ | |||
9159 | int nNew = pHash->nSlot*2; | |||
9160 | int i; | |||
9161 | Fts5HashEntry **apNew; | |||
9162 | Fts5HashEntry **apOld = pHash->aSlot; | |||
9163 | ||||
9164 | apNew = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nNew*sizeof(Fts5HashEntry*)); | |||
9165 | if( !apNew ) return SQLITE_NOMEM7; | |||
9166 | memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); | |||
9167 | ||||
9168 | for(i=0; i<pHash->nSlot; i++){ | |||
9169 | while( apOld[i] ){ | |||
9170 | unsigned int iHash; | |||
9171 | Fts5HashEntry *p = apOld[i]; | |||
9172 | apOld[i] = p->pHashNext; | |||
9173 | iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p)( ((char *)(&(p)[1])) ), p->nKey); | |||
9174 | p->pHashNext = apNew[iHash]; | |||
9175 | apNew[iHash] = p; | |||
9176 | } | |||
9177 | } | |||
9178 | ||||
9179 | sqlite3_freesqlite3_api->free(apOld); | |||
9180 | pHash->nSlot = nNew; | |||
9181 | pHash->aSlot = apNew; | |||
9182 | return SQLITE_OK0; | |||
9183 | } | |||
9184 | ||||
9185 | static int fts5HashAddPoslistSize( | |||
9186 | Fts5Hash *pHash, | |||
9187 | Fts5HashEntry *p, | |||
9188 | Fts5HashEntry *p2 | |||
9189 | ){ | |||
9190 | int nRet = 0; | |||
9191 | if( p->iSzPoslist ){ | |||
9192 | u8 *pPtr = p2 ? (u8*)p2 : (u8*)p; | |||
9193 | int nData = p->nData; | |||
9194 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | |||
9195 | assert( nData==p->iSzPoslist )((void) (0)); | |||
9196 | if( p->bDel ){ | |||
9197 | pPtr[nData++] = 0x00; | |||
9198 | if( p->bContent ){ | |||
9199 | pPtr[nData++] = 0x00; | |||
9200 | } | |||
9201 | } | |||
9202 | }else{ | |||
9203 | int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */ | |||
9204 | int nPos = nSz*2 + p->bDel; /* Value of nPos field */ | |||
9205 | ||||
9206 | assert( p->bDel==0 || p->bDel==1 )((void) (0)); | |||
9207 | if( nPos<=127 ){ | |||
9208 | pPtr[p->iSzPoslist] = (u8)nPos; | |||
9209 | }else{ | |||
9210 | int nByte = sqlite3Fts5GetVarintLen((u32)nPos); | |||
9211 | memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); | |||
9212 | sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); | |||
9213 | nData += (nByte-1); | |||
9214 | } | |||
9215 | } | |||
9216 | ||||
9217 | nRet = nData - p->nData; | |||
9218 | if( p2==0 ){ | |||
9219 | p->iSzPoslist = 0; | |||
9220 | p->bDel = 0; | |||
9221 | p->bContent = 0; | |||
9222 | p->nData = nData; | |||
9223 | } | |||
9224 | } | |||
9225 | return nRet; | |||
9226 | } | |||
9227 | ||||
9228 | /* | |||
9229 | ** Add an entry to the in-memory hash table. The key is the concatenation | |||
9230 | ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). | |||
9231 | ** | |||
9232 | ** (bByte || pToken) -> (iRowid,iCol,iPos) | |||
9233 | ** | |||
9234 | ** Or, if iCol is negative, then the value is a delete marker. | |||
9235 | */ | |||
9236 | static int sqlite3Fts5HashWrite( | |||
9237 | Fts5Hash *pHash, | |||
9238 | i64 iRowid, /* Rowid for this entry */ | |||
9239 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
9240 | int iPos, /* Position of token within column */ | |||
9241 | char bByte, /* First byte of token */ | |||
9242 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
9243 | ){ | |||
9244 | unsigned int iHash; | |||
9245 | Fts5HashEntry *p; | |||
9246 | u8 *pPtr; | |||
9247 | int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ | |||
9248 | int bNew; /* If non-delete entry should be written */ | |||
9249 | ||||
9250 | bNew = (pHash->eDetail==FTS5_DETAIL_FULL0); | |||
9251 | ||||
9252 | /* Attempt to locate an existing hash entry */ | |||
9253 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | |||
9254 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | |||
9255 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
9256 | if( zKey[0]==bByte | |||
9257 | && p->nKey==nToken+1 | |||
9258 | && memcmp(&zKey[1], pToken, nToken)==0 | |||
9259 | ){ | |||
9260 | break; | |||
9261 | } | |||
9262 | } | |||
9263 | ||||
9264 | /* If an existing hash entry cannot be found, create a new one. */ | |||
9265 | if( p==0 ){ | |||
9266 | /* Figure out how much space to allocate */ | |||
9267 | char *zKey; | |||
9268 | sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; | |||
9269 | if( nByte<128 ) nByte = 128; | |||
9270 | ||||
9271 | /* Grow the Fts5Hash.aSlot[] array if necessary. */ | |||
9272 | if( (pHash->nEntry*2)>=pHash->nSlot ){ | |||
9273 | int rc = fts5HashResize(pHash); | |||
9274 | if( rc!=SQLITE_OK0 ) return rc; | |||
9275 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | |||
9276 | } | |||
9277 | ||||
9278 | /* Allocate new Fts5HashEntry and add it to the hash table. */ | |||
9279 | p = (Fts5HashEntry*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
9280 | if( !p ) return SQLITE_NOMEM7; | |||
9281 | memset(p, 0, sizeof(Fts5HashEntry)); | |||
9282 | p->nAlloc = (int)nByte; | |||
9283 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
9284 | zKey[0] = bByte; | |||
9285 | memcpy(&zKey[1], pToken, nToken); | |||
9286 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) )((void) (0)); | |||
9287 | p->nKey = nToken+1; | |||
9288 | zKey[nToken+1] = '\0'; | |||
9289 | p->nData = nToken+1 + sizeof(Fts5HashEntry); | |||
9290 | p->pHashNext = pHash->aSlot[iHash]; | |||
9291 | pHash->aSlot[iHash] = p; | |||
9292 | pHash->nEntry++; | |||
9293 | ||||
9294 | /* Add the first rowid field to the hash-entry */ | |||
9295 | p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); | |||
9296 | p->iRowid = iRowid; | |||
9297 | ||||
9298 | p->iSzPoslist = p->nData; | |||
9299 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
9300 | p->nData += 1; | |||
9301 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | |||
9302 | } | |||
9303 | ||||
9304 | }else{ | |||
9305 | ||||
9306 | /* Appending to an existing hash-entry. Check that there is enough | |||
9307 | ** space to append the largest possible new entry. Worst case scenario | |||
9308 | ** is: | |||
9309 | ** | |||
9310 | ** + 9 bytes for a new rowid, | |||
9311 | ** + 4 byte reserved for the "poslist size" varint. | |||
9312 | ** + 1 byte for a "new column" byte, | |||
9313 | ** + 3 bytes for a new column number (16-bit max) as a varint, | |||
9314 | ** + 5 bytes for the new position offset (32-bit max). | |||
9315 | */ | |||
9316 | if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ | |||
9317 | sqlite3_int64 nNew = p->nAlloc * 2; | |||
9318 | Fts5HashEntry *pNew; | |||
9319 | Fts5HashEntry **pp; | |||
9320 | pNew = (Fts5HashEntry*)sqlite3_realloc64sqlite3_api->realloc64(p, nNew); | |||
9321 | if( pNew==0 ) return SQLITE_NOMEM7; | |||
9322 | pNew->nAlloc = (int)nNew; | |||
9323 | for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); | |||
9324 | *pp = pNew; | |||
9325 | p = pNew; | |||
9326 | } | |||
9327 | nIncr -= p->nData; | |||
9328 | } | |||
9329 | assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) )((void) (0)); | |||
9330 | ||||
9331 | pPtr = (u8*)p; | |||
9332 | ||||
9333 | /* If this is a new rowid, append the 4-byte size field for the previous | |||
9334 | ** entry, and the new rowid for this entry. */ | |||
9335 | if( iRowid!=p->iRowid ){ | |||
9336 | u64 iDiff = (u64)iRowid - (u64)p->iRowid; | |||
9337 | fts5HashAddPoslistSize(pHash, p, 0); | |||
9338 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff); | |||
9339 | p->iRowid = iRowid; | |||
9340 | bNew = 1; | |||
9341 | p->iSzPoslist = p->nData; | |||
9342 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
9343 | p->nData += 1; | |||
9344 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | |||
9345 | p->iPos = 0; | |||
9346 | } | |||
9347 | } | |||
9348 | ||||
9349 | if( iCol>=0 ){ | |||
9350 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | |||
9351 | p->bContent = 1; | |||
9352 | }else{ | |||
9353 | /* Append a new column value, if necessary */ | |||
9354 | assert_nc( iCol>=p->iCol )((void) (0)); | |||
9355 | if( iCol!=p->iCol ){ | |||
9356 | if( pHash->eDetail==FTS5_DETAIL_FULL0 ){ | |||
9357 | pPtr[p->nData++] = 0x01; | |||
9358 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); | |||
9359 | p->iCol = (i16)iCol; | |||
9360 | p->iPos = 0; | |||
9361 | }else{ | |||
9362 | bNew = 1; | |||
9363 | p->iCol = (i16)(iPos = iCol); | |||
9364 | } | |||
9365 | } | |||
9366 | ||||
9367 | /* Append the new position offset, if necessary */ | |||
9368 | if( bNew ){ | |||
9369 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); | |||
9370 | p->iPos = iPos; | |||
9371 | } | |||
9372 | } | |||
9373 | }else{ | |||
9374 | /* This is a delete. Set the delete flag. */ | |||
9375 | p->bDel = 1; | |||
9376 | } | |||
9377 | ||||
9378 | nIncr += p->nData; | |||
9379 | *pHash->pnByte += nIncr; | |||
9380 | return SQLITE_OK0; | |||
9381 | } | |||
9382 | ||||
9383 | ||||
9384 | /* | |||
9385 | ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, | |||
9386 | ** each sorted in key order. This function merges the two lists into a | |||
9387 | ** single list and returns a pointer to its first element. | |||
9388 | */ | |||
9389 | static Fts5HashEntry *fts5HashEntryMerge( | |||
9390 | Fts5HashEntry *pLeft, | |||
9391 | Fts5HashEntry *pRight | |||
9392 | ){ | |||
9393 | Fts5HashEntry *p1 = pLeft; | |||
9394 | Fts5HashEntry *p2 = pRight; | |||
9395 | Fts5HashEntry *pRet = 0; | |||
9396 | Fts5HashEntry **ppOut = &pRet; | |||
9397 | ||||
9398 | while( p1 || p2 ){ | |||
9399 | if( p1==0 ){ | |||
9400 | *ppOut = p2; | |||
9401 | p2 = 0; | |||
9402 | }else if( p2==0 ){ | |||
9403 | *ppOut = p1; | |||
9404 | p1 = 0; | |||
9405 | }else{ | |||
9406 | char *zKey1 = fts5EntryKey(p1)( ((char *)(&(p1)[1])) ); | |||
9407 | char *zKey2 = fts5EntryKey(p2)( ((char *)(&(p2)[1])) ); | |||
9408 | int nMin = MIN(p1->nKey, p2->nKey)(((p1->nKey) < (p2->nKey)) ? (p1->nKey) : (p2-> nKey)); | |||
9409 | ||||
9410 | int cmp = memcmp(zKey1, zKey2, nMin); | |||
9411 | if( cmp==0 ){ | |||
9412 | cmp = p1->nKey - p2->nKey; | |||
9413 | } | |||
9414 | assert( cmp!=0 )((void) (0)); | |||
9415 | ||||
9416 | if( cmp>0 ){ | |||
9417 | /* p2 is smaller */ | |||
9418 | *ppOut = p2; | |||
9419 | ppOut = &p2->pScanNext; | |||
9420 | p2 = p2->pScanNext; | |||
9421 | }else{ | |||
9422 | /* p1 is smaller */ | |||
9423 | *ppOut = p1; | |||
9424 | ppOut = &p1->pScanNext; | |||
9425 | p1 = p1->pScanNext; | |||
9426 | } | |||
9427 | *ppOut = 0; | |||
9428 | } | |||
9429 | } | |||
9430 | ||||
9431 | return pRet; | |||
9432 | } | |||
9433 | ||||
9434 | /* | |||
9435 | ** Link all tokens from hash table iHash into a list in sorted order. The | |||
9436 | ** tokens are not removed from the hash table. | |||
9437 | */ | |||
9438 | static int fts5HashEntrySort( | |||
9439 | Fts5Hash *pHash, | |||
9440 | const char *pTerm, int nTerm, /* Query prefix, if any */ | |||
9441 | Fts5HashEntry **ppSorted | |||
9442 | ){ | |||
9443 | const int nMergeSlot = 32; | |||
9444 | Fts5HashEntry **ap; | |||
9445 | Fts5HashEntry *pList; | |||
9446 | int iSlot; | |||
9447 | int i; | |||
9448 | ||||
9449 | *ppSorted = 0; | |||
9450 | ap = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5HashEntry*) * nMergeSlot); | |||
9451 | if( !ap ) return SQLITE_NOMEM7; | |||
9452 | memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); | |||
9453 | ||||
9454 | for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ | |||
9455 | Fts5HashEntry *pIter; | |||
9456 | for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ | |||
9457 | if( pTerm==0 | |||
9458 | || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter)( ((char *)(&(pIter)[1])) ), pTerm, nTerm)) | |||
9459 | ){ | |||
9460 | Fts5HashEntry *pEntry = pIter; | |||
9461 | pEntry->pScanNext = 0; | |||
9462 | for(i=0; ap[i]; i++){ | |||
9463 | pEntry = fts5HashEntryMerge(pEntry, ap[i]); | |||
9464 | ap[i] = 0; | |||
9465 | } | |||
9466 | ap[i] = pEntry; | |||
9467 | } | |||
9468 | } | |||
9469 | } | |||
9470 | ||||
9471 | pList = 0; | |||
9472 | for(i=0; i<nMergeSlot; i++){ | |||
9473 | pList = fts5HashEntryMerge(pList, ap[i]); | |||
9474 | } | |||
9475 | ||||
9476 | sqlite3_freesqlite3_api->free(ap); | |||
9477 | *ppSorted = pList; | |||
9478 | return SQLITE_OK0; | |||
9479 | } | |||
9480 | ||||
9481 | /* | |||
9482 | ** Query the hash table for a doclist associated with term pTerm/nTerm. | |||
9483 | */ | |||
9484 | static int sqlite3Fts5HashQuery( | |||
9485 | Fts5Hash *pHash, /* Hash table to query */ | |||
9486 | int nPre, | |||
9487 | const char *pTerm, int nTerm, /* Query term */ | |||
9488 | void **ppOut, /* OUT: Pointer to new object */ | |||
9489 | int *pnDoclist /* OUT: Size of doclist in bytes */ | |||
9490 | ){ | |||
9491 | unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); | |||
9492 | char *zKey = 0; | |||
9493 | Fts5HashEntry *p; | |||
9494 | ||||
9495 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | |||
9496 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
9497 | if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break; | |||
9498 | } | |||
9499 | ||||
9500 | if( p ){ | |||
9501 | int nHashPre = sizeof(Fts5HashEntry) + nTerm; | |||
9502 | int nList = p->nData - nHashPre; | |||
9503 | u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64sqlite3_api->malloc64(nPre + nList + 10)); | |||
9504 | if( pRet ){ | |||
9505 | Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre]; | |||
9506 | memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList); | |||
9507 | nList += fts5HashAddPoslistSize(pHash, p, pFaux); | |||
9508 | *pnDoclist = nList; | |||
9509 | }else{ | |||
9510 | *pnDoclist = 0; | |||
9511 | return SQLITE_NOMEM7; | |||
9512 | } | |||
9513 | }else{ | |||
9514 | *ppOut = 0; | |||
9515 | *pnDoclist = 0; | |||
9516 | } | |||
9517 | ||||
9518 | return SQLITE_OK0; | |||
9519 | } | |||
9520 | ||||
9521 | static int sqlite3Fts5HashScanInit( | |||
9522 | Fts5Hash *p, /* Hash table to query */ | |||
9523 | const char *pTerm, int nTerm /* Query prefix */ | |||
9524 | ){ | |||
9525 | return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); | |||
9526 | } | |||
9527 | ||||
9528 | #ifdef SQLITE_DEBUG | |||
9529 | static int fts5HashCount(Fts5Hash *pHash){ | |||
9530 | int nEntry = 0; | |||
9531 | int ii; | |||
9532 | for(ii=0; ii<pHash->nSlot; ii++){ | |||
9533 | Fts5HashEntry *p = 0; | |||
9534 | for(p=pHash->aSlot[ii]; p; p=p->pHashNext){ | |||
9535 | nEntry++; | |||
9536 | } | |||
9537 | } | |||
9538 | return nEntry; | |||
9539 | } | |||
9540 | #endif | |||
9541 | ||||
9542 | /* | |||
9543 | ** Return true if the hash table is empty, false otherwise. | |||
9544 | */ | |||
9545 | static int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){ | |||
9546 | assert( pHash->nEntry==fts5HashCount(pHash) )((void) (0)); | |||
9547 | return pHash->nEntry==0; | |||
9548 | } | |||
9549 | ||||
9550 | static void sqlite3Fts5HashScanNext(Fts5Hash *p){ | |||
9551 | assert( !sqlite3Fts5HashScanEof(p) )((void) (0)); | |||
9552 | p->pScan = p->pScan->pScanNext; | |||
9553 | } | |||
9554 | ||||
9555 | static int sqlite3Fts5HashScanEof(Fts5Hash *p){ | |||
9556 | return (p->pScan==0); | |||
9557 | } | |||
9558 | ||||
9559 | static void sqlite3Fts5HashScanEntry( | |||
9560 | Fts5Hash *pHash, | |||
9561 | const char **pzTerm, /* OUT: term (nul-terminated) */ | |||
9562 | int *pnTerm, /* OUT: Size of term in bytes */ | |||
9563 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | |||
9564 | int *pnDoclist /* OUT: size of doclist in bytes */ | |||
9565 | ){ | |||
9566 | Fts5HashEntry *p; | |||
9567 | if( (p = pHash->pScan) ){ | |||
9568 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
9569 | int nTerm = p->nKey; | |||
9570 | fts5HashAddPoslistSize(pHash, p, 0); | |||
9571 | *pzTerm = zKey; | |||
9572 | *pnTerm = nTerm; | |||
9573 | *ppDoclist = (const u8*)&zKey[nTerm]; | |||
9574 | *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm); | |||
9575 | }else{ | |||
9576 | *pzTerm = 0; | |||
9577 | *pnTerm = 0; | |||
9578 | *ppDoclist = 0; | |||
9579 | *pnDoclist = 0; | |||
9580 | } | |||
9581 | } | |||
9582 | ||||
9583 | #line 1 "fts5_index.c" | |||
9584 | /* | |||
9585 | ** 2014 May 31 | |||
9586 | ** | |||
9587 | ** The author disclaims copyright to this source code. In place of | |||
9588 | ** a legal notice, here is a blessing: | |||
9589 | ** | |||
9590 | ** May you do good and not evil. | |||
9591 | ** May you find forgiveness for yourself and forgive others. | |||
9592 | ** May you share freely, never taking more than you give. | |||
9593 | ** | |||
9594 | ****************************************************************************** | |||
9595 | ** | |||
9596 | ** Low level access to the FTS index stored in the database file. The | |||
9597 | ** routines in this file file implement all read and write access to the | |||
9598 | ** %_data table. Other parts of the system access this functionality via | |||
9599 | ** the interface defined in fts5Int.h. | |||
9600 | */ | |||
9601 | ||||
9602 | ||||
9603 | /* #include "fts5Int.h" */ | |||
9604 | ||||
9605 | /* | |||
9606 | ** Overview: | |||
9607 | ** | |||
9608 | ** The %_data table contains all the FTS indexes for an FTS5 virtual table. | |||
9609 | ** As well as the main term index, there may be up to 31 prefix indexes. | |||
9610 | ** The format is similar to FTS3/4, except that: | |||
9611 | ** | |||
9612 | ** * all segment b-tree leaf data is stored in fixed size page records | |||
9613 | ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is | |||
9614 | ** taken to ensure it is possible to iterate in either direction through | |||
9615 | ** the entries in a doclist, or to seek to a specific entry within a | |||
9616 | ** doclist, without loading it into memory. | |||
9617 | ** | |||
9618 | ** * large doclists that span many pages have associated "doclist index" | |||
9619 | ** records that contain a copy of the first rowid on each page spanned by | |||
9620 | ** the doclist. This is used to speed up seek operations, and merges of | |||
9621 | ** large doclists with very small doclists. | |||
9622 | ** | |||
9623 | ** * extra fields in the "structure record" record the state of ongoing | |||
9624 | ** incremental merge operations. | |||
9625 | ** | |||
9626 | */ | |||
9627 | ||||
9628 | ||||
9629 | #define FTS5_OPT_WORK_UNIT1000 1000 /* Number of leaf pages per optimize step */ | |||
9630 | #define FTS5_WORK_UNIT64 64 /* Number of leaf pages in unit of work */ | |||
9631 | ||||
9632 | #define FTS5_MIN_DLIDX_SIZE4 4 /* Add dlidx if this many empty pages */ | |||
9633 | ||||
9634 | #define FTS5_MAIN_PREFIX'0' '0' | |||
9635 | ||||
9636 | #if FTS5_MAX_PREFIX_INDEXES31 > 31 | |||
9637 | # error "FTS5_MAX_PREFIX_INDEXES is too large" | |||
9638 | #endif | |||
9639 | ||||
9640 | #define FTS5_MAX_LEVEL64 64 | |||
9641 | ||||
9642 | /* | |||
9643 | ** There are two versions of the format used for the structure record: | |||
9644 | ** | |||
9645 | ** 1. the legacy format, that may be read by all fts5 versions, and | |||
9646 | ** | |||
9647 | ** 2. the V2 format, which is used by contentless_delete=1 databases. | |||
9648 | ** | |||
9649 | ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy | |||
9650 | ** format structure record contains a varint - the number of levels in | |||
9651 | ** the structure. Whereas a V2 structure record contains the constant | |||
9652 | ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a | |||
9653 | ** varint has to be at least 16256 to begin with "0xFF". And the default | |||
9654 | ** maximum number of levels is 64. | |||
9655 | ** | |||
9656 | ** See below for more on structure record formats. | |||
9657 | */ | |||
9658 | #define FTS5_STRUCTURE_V2"\xFF\x00\x00\x01" "\xFF\x00\x00\x01" | |||
9659 | ||||
9660 | /* | |||
9661 | ** Details: | |||
9662 | ** | |||
9663 | ** The %_data table managed by this module, | |||
9664 | ** | |||
9665 | ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); | |||
9666 | ** | |||
9667 | ** , contains the following 6 types of records. See the comments surrounding | |||
9668 | ** the FTS5_*_ROWID macros below for a description of how %_data rowids are | |||
9669 | ** assigned to each fo them. | |||
9670 | ** | |||
9671 | ** 1. Structure Records: | |||
9672 | ** | |||
9673 | ** The set of segments that make up an index - the index structure - are | |||
9674 | ** recorded in a single record within the %_data table. The record consists | |||
9675 | ** of a single 32-bit configuration cookie value followed by a list of | |||
9676 | ** SQLite varints. | |||
9677 | ** | |||
9678 | ** If the structure record is a V2 record, the configuration cookie is | |||
9679 | ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01]. | |||
9680 | ** | |||
9681 | ** Next, the record continues with three varints: | |||
9682 | ** | |||
9683 | ** + number of levels, | |||
9684 | ** + total number of segments on all levels, | |||
9685 | ** + value of write counter. | |||
9686 | ** | |||
9687 | ** Then, for each level from 0 to nMax: | |||
9688 | ** | |||
9689 | ** + number of input segments in ongoing merge. | |||
9690 | ** + total number of segments in level. | |||
9691 | ** + for each segment from oldest to newest: | |||
9692 | ** + segment id (always > 0) | |||
9693 | ** + first leaf page number (often 1, always greater than 0) | |||
9694 | ** + final leaf page number | |||
9695 | ** | |||
9696 | ** Then, for V2 structures only: | |||
9697 | ** | |||
9698 | ** + lower origin counter value, | |||
9699 | ** + upper origin counter value, | |||
9700 | ** + the number of tombstone hash pages. | |||
9701 | ** | |||
9702 | ** 2. The Averages Record: | |||
9703 | ** | |||
9704 | ** A single record within the %_data table. The data is a list of varints. | |||
9705 | ** The first value is the number of rows in the index. Then, for each column | |||
9706 | ** from left to right, the total number of tokens in the column for all | |||
9707 | ** rows of the table. | |||
9708 | ** | |||
9709 | ** 3. Segment leaves: | |||
9710 | ** | |||
9711 | ** TERM/DOCLIST FORMAT: | |||
9712 | ** | |||
9713 | ** Most of each segment leaf is taken up by term/doclist data. The | |||
9714 | ** general format of term/doclist, starting with the first term | |||
9715 | ** on the leaf page, is: | |||
9716 | ** | |||
9717 | ** varint : size of first term | |||
9718 | ** blob: first term data | |||
9719 | ** doclist: first doclist | |||
9720 | ** zero-or-more { | |||
9721 | ** varint: number of bytes in common with previous term | |||
9722 | ** varint: number of bytes of new term data (nNew) | |||
9723 | ** blob: nNew bytes of new term data | |||
9724 | ** doclist: next doclist | |||
9725 | ** } | |||
9726 | ** | |||
9727 | ** doclist format: | |||
9728 | ** | |||
9729 | ** varint: first rowid | |||
9730 | ** poslist: first poslist | |||
9731 | ** zero-or-more { | |||
9732 | ** varint: rowid delta (always > 0) | |||
9733 | ** poslist: next poslist | |||
9734 | ** } | |||
9735 | ** | |||
9736 | ** poslist format: | |||
9737 | ** | |||
9738 | ** varint: size of poslist in bytes multiplied by 2, not including | |||
9739 | ** this field. Plus 1 if this entry carries the "delete" flag. | |||
9740 | ** collist: collist for column 0 | |||
9741 | ** zero-or-more { | |||
9742 | ** 0x01 byte | |||
9743 | ** varint: column number (I) | |||
9744 | ** collist: collist for column I | |||
9745 | ** } | |||
9746 | ** | |||
9747 | ** collist format: | |||
9748 | ** | |||
9749 | ** varint: first offset + 2 | |||
9750 | ** zero-or-more { | |||
9751 | ** varint: offset delta + 2 | |||
9752 | ** } | |||
9753 | ** | |||
9754 | ** PAGE FORMAT | |||
9755 | ** | |||
9756 | ** Each leaf page begins with a 4-byte header containing 2 16-bit | |||
9757 | ** unsigned integer fields in big-endian format. They are: | |||
9758 | ** | |||
9759 | ** * The byte offset of the first rowid on the page, if it exists | |||
9760 | ** and occurs before the first term (otherwise 0). | |||
9761 | ** | |||
9762 | ** * The byte offset of the start of the page footer. If the page | |||
9763 | ** footer is 0 bytes in size, then this field is the same as the | |||
9764 | ** size of the leaf page in bytes. | |||
9765 | ** | |||
9766 | ** The page footer consists of a single varint for each term located | |||
9767 | ** on the page. Each varint is the byte offset of the current term | |||
9768 | ** within the page, delta-compressed against the previous value. In | |||
9769 | ** other words, the first varint in the footer is the byte offset of | |||
9770 | ** the first term, the second is the byte offset of the second less that | |||
9771 | ** of the first, and so on. | |||
9772 | ** | |||
9773 | ** The term/doclist format described above is accurate if the entire | |||
9774 | ** term/doclist data fits on a single leaf page. If this is not the case, | |||
9775 | ** the format is changed in two ways: | |||
9776 | ** | |||
9777 | ** + if the first rowid on a page occurs before the first term, it | |||
9778 | ** is stored as a literal value: | |||
9779 | ** | |||
9780 | ** varint: first rowid | |||
9781 | ** | |||
9782 | ** + the first term on each page is stored in the same way as the | |||
9783 | ** very first term of the segment: | |||
9784 | ** | |||
9785 | ** varint : size of first term | |||
9786 | ** blob: first term data | |||
9787 | ** | |||
9788 | ** 5. Segment doclist indexes: | |||
9789 | ** | |||
9790 | ** Doclist indexes are themselves b-trees, however they usually consist of | |||
9791 | ** a single leaf record only. The format of each doclist index leaf page | |||
9792 | ** is: | |||
9793 | ** | |||
9794 | ** * Flags byte. Bits are: | |||
9795 | ** 0x01: Clear if leaf is also the root page, otherwise set. | |||
9796 | ** | |||
9797 | ** * Page number of fts index leaf page. As a varint. | |||
9798 | ** | |||
9799 | ** * First rowid on page indicated by previous field. As a varint. | |||
9800 | ** | |||
9801 | ** * A list of varints, one for each subsequent termless page. A | |||
9802 | ** positive delta if the termless page contains at least one rowid, | |||
9803 | ** or an 0x00 byte otherwise. | |||
9804 | ** | |||
9805 | ** Internal doclist index nodes are: | |||
9806 | ** | |||
9807 | ** * Flags byte. Bits are: | |||
9808 | ** 0x01: Clear for root page, otherwise set. | |||
9809 | ** | |||
9810 | ** * Page number of first child page. As a varint. | |||
9811 | ** | |||
9812 | ** * Copy of first rowid on page indicated by previous field. As a varint. | |||
9813 | ** | |||
9814 | ** * A list of delta-encoded varints - the first rowid on each subsequent | |||
9815 | ** child page. | |||
9816 | ** | |||
9817 | ** 6. Tombstone Hash Page | |||
9818 | ** | |||
9819 | ** These records are only ever present in contentless_delete=1 tables. | |||
9820 | ** There are zero or more of these associated with each segment. They | |||
9821 | ** are used to store the tombstone rowids for rows contained in the | |||
9822 | ** associated segments. | |||
9823 | ** | |||
9824 | ** The set of nHashPg tombstone hash pages associated with a single | |||
9825 | ** segment together form a single hash table containing tombstone rowids. | |||
9826 | ** To find the page of the hash on which a key might be stored: | |||
9827 | ** | |||
9828 | ** iPg = (rowid % nHashPg) | |||
9829 | ** | |||
9830 | ** Then, within page iPg, which has nSlot slots: | |||
9831 | ** | |||
9832 | ** iSlot = (rowid / nHashPg) % nSlot | |||
9833 | ** | |||
9834 | ** Each tombstone hash page begins with an 8 byte header: | |||
9835 | ** | |||
9836 | ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8. | |||
9837 | ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the | |||
9838 | ** first tombstone hash page for each segment (iPg=0). If set, | |||
9839 | ** the hash table contains rowid 0. If clear, it does not. | |||
9840 | ** Rowid 0 is handled specially. | |||
9841 | ** 2-bytes: unused. | |||
9842 | ** 4-bytes: Big-endian integer containing number of entries on page. | |||
9843 | ** | |||
9844 | ** Following this are nSlot 4 or 8 byte slots (depending on the key-size | |||
9845 | ** in the first byte of the page header). The number of slots may be | |||
9846 | ** determined based on the size of the page record and the key-size: | |||
9847 | ** | |||
9848 | ** nSlot = (nByte - 8) / key-size | |||
9849 | */ | |||
9850 | ||||
9851 | /* | |||
9852 | ** Rowids for the averages and structure records in the %_data table. | |||
9853 | */ | |||
9854 | #define FTS5_AVERAGES_ROWID1 1 /* Rowid used for the averages record */ | |||
9855 | #define FTS5_STRUCTURE_ROWID10 10 /* The structure record */ | |||
9856 | ||||
9857 | /* | |||
9858 | ** Macros determining the rowids used by segment leaves and dlidx leaves | |||
9859 | ** and nodes. All nodes and leaves are stored in the %_data table with large | |||
9860 | ** positive rowids. | |||
9861 | ** | |||
9862 | ** Each segment has a unique non-zero 16-bit id. | |||
9863 | ** | |||
9864 | ** The rowid for each segment leaf is found by passing the segment id and | |||
9865 | ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered | |||
9866 | ** sequentially starting from 1. | |||
9867 | */ | |||
9868 | #define FTS5_DATA_ID_B16 16 /* Max seg id number 65535 */ | |||
9869 | #define FTS5_DATA_DLI_B1 1 /* Doclist-index flag (1 bit) */ | |||
9870 | #define FTS5_DATA_HEIGHT_B5 5 /* Max dlidx tree height of 32 */ | |||
9871 | #define FTS5_DATA_PAGE_B31 31 /* Max page number of 2147483648 */ | |||
9872 | ||||
9873 | #define fts5_dri(segid, dlidx, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(dlidx) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) ( \ | |||
9874 | ((i64)(segid) << (FTS5_DATA_PAGE_B31+FTS5_DATA_HEIGHT_B5+FTS5_DATA_DLI_B1)) + \ | |||
9875 | ((i64)(dlidx) << (FTS5_DATA_PAGE_B31 + FTS5_DATA_HEIGHT_B5)) + \ | |||
9876 | ((i64)(height) << (FTS5_DATA_PAGE_B31)) + \ | |||
9877 | ((i64)(pgno)) \ | |||
9878 | ) | |||
9879 | ||||
9880 | #define FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 0, 0, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) | |||
9881 | #define FTS5_DLIDX_ROWID(segid, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 1, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) | |||
9882 | #define FTS5_TOMBSTONE_ROWID(segid,ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) fts5_dri(segid+(1<<16), 0, 0, ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) | |||
9883 | ||||
9884 | #ifdef SQLITE_DEBUG | |||
9885 | static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB(11 | (1<<8)); } | |||
9886 | #endif | |||
9887 | ||||
9888 | ||||
9889 | /* | |||
9890 | ** Each time a blob is read from the %_data table, it is padded with this | |||
9891 | ** many zero bytes. This makes it easier to decode the various record formats | |||
9892 | ** without overreading if the records are corrupt. | |||
9893 | */ | |||
9894 | #define FTS5_DATA_ZERO_PADDING8 8 | |||
9895 | #define FTS5_DATA_PADDING20 20 | |||
9896 | ||||
9897 | typedef struct Fts5Data Fts5Data; | |||
9898 | typedef struct Fts5DlidxIter Fts5DlidxIter; | |||
9899 | typedef struct Fts5DlidxLvl Fts5DlidxLvl; | |||
9900 | typedef struct Fts5DlidxWriter Fts5DlidxWriter; | |||
9901 | typedef struct Fts5Iter Fts5Iter; | |||
9902 | typedef struct Fts5PageWriter Fts5PageWriter; | |||
9903 | typedef struct Fts5SegIter Fts5SegIter; | |||
9904 | typedef struct Fts5DoclistIter Fts5DoclistIter; | |||
9905 | typedef struct Fts5SegWriter Fts5SegWriter; | |||
9906 | typedef struct Fts5Structure Fts5Structure; | |||
9907 | typedef struct Fts5StructureLevel Fts5StructureLevel; | |||
9908 | typedef struct Fts5StructureSegment Fts5StructureSegment; | |||
9909 | typedef struct Fts5TokenDataIter Fts5TokenDataIter; | |||
9910 | typedef struct Fts5TokenDataMap Fts5TokenDataMap; | |||
9911 | typedef struct Fts5TombstoneArray Fts5TombstoneArray; | |||
9912 | ||||
9913 | struct Fts5Data { | |||
9914 | u8 *p; /* Pointer to buffer containing record */ | |||
9915 | int nn; /* Size of record in bytes */ | |||
9916 | int szLeaf; /* Size of leaf without page-index */ | |||
9917 | }; | |||
9918 | ||||
9919 | /* | |||
9920 | ** One object per %_data table. | |||
9921 | ** | |||
9922 | ** nContentlessDelete: | |||
9923 | ** The number of contentless delete operations since the most recent | |||
9924 | ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked | |||
9925 | ** so that extra auto-merge work can be done by fts5IndexFlush() to | |||
9926 | ** account for the delete operations. | |||
9927 | */ | |||
9928 | struct Fts5Index { | |||
9929 | Fts5Config *pConfig; /* Virtual table configuration */ | |||
9930 | char *zDataTbl; /* Name of %_data table */ | |||
9931 | int nWorkUnit; /* Leaf pages in a "unit" of work */ | |||
9932 | ||||
9933 | /* | |||
9934 | ** Variables related to the accumulation of tokens and doclists within the | |||
9935 | ** in-memory hash tables before they are flushed to disk. | |||
9936 | */ | |||
9937 | Fts5Hash *pHash; /* Hash table for in-memory data */ | |||
9938 | int nPendingData; /* Current bytes of pending data */ | |||
9939 | i64 iWriteRowid; /* Rowid for current doc being written */ | |||
9940 | int bDelete; /* Current write is a delete */ | |||
9941 | int nContentlessDelete; /* Number of contentless delete ops */ | |||
9942 | int nPendingRow; /* Number of INSERT in hash table */ | |||
9943 | ||||
9944 | /* Error state. */ | |||
9945 | int rc; /* Current error code */ | |||
9946 | int flushRc; | |||
9947 | ||||
9948 | /* State used by the fts5DataXXX() functions. */ | |||
9949 | sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ | |||
9950 | sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ | |||
9951 | sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ | |||
9952 | sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ | |||
9953 | sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ | |||
9954 | sqlite3_stmt *pIdxSelect; | |||
9955 | sqlite3_stmt *pIdxNextSelect; | |||
9956 | int nRead; /* Total number of blocks read */ | |||
9957 | ||||
9958 | sqlite3_stmt *pDeleteFromIdx; | |||
9959 | ||||
9960 | sqlite3_stmt *pDataVersion; | |||
9961 | i64 iStructVersion; /* data_version when pStruct read */ | |||
9962 | Fts5Structure *pStruct; /* Current db structure (or NULL) */ | |||
9963 | }; | |||
9964 | ||||
9965 | struct Fts5DoclistIter { | |||
9966 | u8 *aEof; /* Pointer to 1 byte past end of doclist */ | |||
9967 | ||||
9968 | /* Output variables. aPoslist==0 at EOF */ | |||
9969 | i64 iRowid; | |||
9970 | u8 *aPoslist; | |||
9971 | int nPoslist; | |||
9972 | int nSize; | |||
9973 | }; | |||
9974 | ||||
9975 | /* | |||
9976 | ** The contents of the "structure" record for each index are represented | |||
9977 | ** using an Fts5Structure record in memory. Which uses instances of the | |||
9978 | ** other Fts5StructureXXX types as components. | |||
9979 | ** | |||
9980 | ** nOriginCntr: | |||
9981 | ** This value is set to non-zero for structure records created for | |||
9982 | ** contentlessdelete=1 tables only. In that case it represents the | |||
9983 | ** origin value to apply to the next top-level segment created. | |||
9984 | */ | |||
9985 | struct Fts5StructureSegment { | |||
9986 | int iSegid; /* Segment id */ | |||
9987 | int pgnoFirst; /* First leaf page number in segment */ | |||
9988 | int pgnoLast; /* Last leaf page number in segment */ | |||
9989 | ||||
9990 | /* contentlessdelete=1 tables only: */ | |||
9991 | u64 iOrigin1; | |||
9992 | u64 iOrigin2; | |||
9993 | int nPgTombstone; /* Number of tombstone hash table pages */ | |||
9994 | u64 nEntryTombstone; /* Number of tombstone entries that "count" */ | |||
9995 | u64 nEntry; /* Number of rows in this segment */ | |||
9996 | }; | |||
9997 | struct Fts5StructureLevel { | |||
9998 | int nMerge; /* Number of segments in incr-merge */ | |||
9999 | int nSeg; /* Total number of segments on level */ | |||
10000 | Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ | |||
10001 | }; | |||
10002 | struct Fts5Structure { | |||
10003 | int nRef; /* Object reference count */ | |||
10004 | u64 nWriteCounter; /* Total leaves written to level 0 */ | |||
10005 | u64 nOriginCntr; /* Origin value for next top-level segment */ | |||
10006 | int nSegment; /* Total segments in this structure */ | |||
10007 | int nLevel; /* Number of levels in this index */ | |||
10008 | Fts5StructureLevel aLevel[FLEXARRAY]; /* Array of nLevel level objects */ | |||
10009 | }; | |||
10010 | ||||
10011 | /* Size (in bytes) of an Fts5Structure object holding up to N levels */ | |||
10012 | #define SZ_FTS5STRUCTURE(N)(__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel )) \ | |||
10013 | (offsetof(Fts5Structure,aLevel)__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel)) | |||
10014 | ||||
10015 | /* | |||
10016 | ** An object of type Fts5SegWriter is used to write to segments. | |||
10017 | */ | |||
10018 | struct Fts5PageWriter { | |||
10019 | int pgno; /* Page number for this page */ | |||
10020 | int iPrevPgidx; /* Previous value written into pgidx */ | |||
10021 | Fts5Buffer buf; /* Buffer containing leaf data */ | |||
10022 | Fts5Buffer pgidx; /* Buffer containing page-index */ | |||
10023 | Fts5Buffer term; /* Buffer containing previous term on page */ | |||
10024 | }; | |||
10025 | struct Fts5DlidxWriter { | |||
10026 | int pgno; /* Page number for this page */ | |||
10027 | int bPrevValid; /* True if iPrev is valid */ | |||
10028 | i64 iPrev; /* Previous rowid value written to page */ | |||
10029 | Fts5Buffer buf; /* Buffer containing page data */ | |||
10030 | }; | |||
10031 | struct Fts5SegWriter { | |||
10032 | int iSegid; /* Segid to write to */ | |||
10033 | Fts5PageWriter writer; /* PageWriter object */ | |||
10034 | i64 iPrevRowid; /* Previous rowid written to current leaf */ | |||
10035 | u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ | |||
10036 | u8 bFirstRowidInPage; /* True if next rowid is first in page */ | |||
10037 | /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ | |||
10038 | u8 bFirstTermInPage; /* True if next term will be first in leaf */ | |||
10039 | int nLeafWritten; /* Number of leaf pages written */ | |||
10040 | int nEmpty; /* Number of contiguous term-less nodes */ | |||
10041 | ||||
10042 | int nDlidx; /* Allocated size of aDlidx[] array */ | |||
10043 | Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ | |||
10044 | ||||
10045 | /* Values to insert into the %_idx table */ | |||
10046 | Fts5Buffer btterm; /* Next term to insert into %_idx table */ | |||
10047 | int iBtPage; /* Page number corresponding to btterm */ | |||
10048 | }; | |||
10049 | ||||
10050 | typedef struct Fts5CResult Fts5CResult; | |||
10051 | struct Fts5CResult { | |||
10052 | u16 iFirst; /* aSeg[] index of firstest iterator */ | |||
10053 | u8 bTermEq; /* True if the terms are equal */ | |||
10054 | }; | |||
10055 | ||||
10056 | /* | |||
10057 | ** Object for iterating through a single segment, visiting each term/rowid | |||
10058 | ** pair in the segment. | |||
10059 | ** | |||
10060 | ** pSeg: | |||
10061 | ** The segment to iterate through. | |||
10062 | ** | |||
10063 | ** iLeafPgno: | |||
10064 | ** Current leaf page number within segment. | |||
10065 | ** | |||
10066 | ** iLeafOffset: | |||
10067 | ** Byte offset within the current leaf that is the first byte of the | |||
10068 | ** position list data (one byte passed the position-list size field). | |||
10069 | ** | |||
10070 | ** pLeaf: | |||
10071 | ** Buffer containing current leaf page data. Set to NULL at EOF. | |||
10072 | ** | |||
10073 | ** iTermLeafPgno, iTermLeafOffset: | |||
10074 | ** Leaf page number containing the last term read from the segment. And | |||
10075 | ** the offset immediately following the term data. | |||
10076 | ** | |||
10077 | ** flags: | |||
10078 | ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: | |||
10079 | ** | |||
10080 | ** FTS5_SEGITER_ONETERM: | |||
10081 | ** If set, set the iterator to point to EOF after the current doclist | |||
10082 | ** has been exhausted. Do not proceed to the next term in the segment. | |||
10083 | ** | |||
10084 | ** FTS5_SEGITER_REVERSE: | |||
10085 | ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If | |||
10086 | ** it is set, iterate through rowid in descending order instead of the | |||
10087 | ** default ascending order. | |||
10088 | ** | |||
10089 | ** iRowidOffset/nRowidOffset/aRowidOffset: | |||
10090 | ** These are used if the FTS5_SEGITER_REVERSE flag is set. | |||
10091 | ** | |||
10092 | ** For each rowid on the page corresponding to the current term, the | |||
10093 | ** corresponding aRowidOffset[] entry is set to the byte offset of the | |||
10094 | ** start of the "position-list-size" field within the page. | |||
10095 | ** | |||
10096 | ** iTermIdx: | |||
10097 | ** Index of current term on iTermLeafPgno. | |||
10098 | ** | |||
10099 | ** apTombstone/nTombstone: | |||
10100 | ** These are used for contentless_delete=1 tables only. When the cursor | |||
10101 | ** is first allocated, the apTombstone[] array is allocated so that it | |||
10102 | ** is large enough for all tombstones hash pages associated with the | |||
10103 | ** segment. The pages themselves are loaded lazily from the database as | |||
10104 | ** they are required. | |||
10105 | */ | |||
10106 | struct Fts5SegIter { | |||
10107 | Fts5StructureSegment *pSeg; /* Segment to iterate through */ | |||
10108 | int flags; /* Mask of configuration flags */ | |||
10109 | int iLeafPgno; /* Current leaf page number */ | |||
10110 | Fts5Data *pLeaf; /* Current leaf data */ | |||
10111 | Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ | |||
10112 | i64 iLeafOffset; /* Byte offset within current leaf */ | |||
10113 | Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */ | |||
10114 | ||||
10115 | /* Next method */ | |||
10116 | void (*xNext)(Fts5Index*, Fts5SegIter*, int*); | |||
10117 | ||||
10118 | /* The page and offset from which the current term was read. The offset | |||
10119 | ** is the offset of the first rowid in the current doclist. */ | |||
10120 | int iTermLeafPgno; | |||
10121 | int iTermLeafOffset; | |||
10122 | ||||
10123 | int iPgidxOff; /* Next offset in pgidx */ | |||
10124 | int iEndofDoclist; | |||
10125 | ||||
10126 | /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ | |||
10127 | int iRowidOffset; /* Current entry in aRowidOffset[] */ | |||
10128 | int nRowidOffset; /* Allocated size of aRowidOffset[] array */ | |||
10129 | int *aRowidOffset; /* Array of offset to rowid fields */ | |||
10130 | ||||
10131 | Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ | |||
10132 | ||||
10133 | /* Variables populated based on current entry. */ | |||
10134 | Fts5Buffer term; /* Current term */ | |||
10135 | i64 iRowid; /* Current rowid */ | |||
10136 | int nPos; /* Number of bytes in current position list */ | |||
10137 | u8 bDel; /* True if the delete flag is set */ | |||
10138 | }; | |||
10139 | ||||
10140 | /* | |||
10141 | ** Array of tombstone pages. Reference counted. | |||
10142 | */ | |||
10143 | struct Fts5TombstoneArray { | |||
10144 | int nRef; /* Number of pointers to this object */ | |||
10145 | int nTombstone; | |||
10146 | Fts5Data *apTombstone[FLEXARRAY]; /* Array of tombstone pages */ | |||
10147 | }; | |||
10148 | ||||
10149 | /* Size (in bytes) of an Fts5TombstoneArray holding up to N tombstones */ | |||
10150 | #define SZ_FTS5TOMBSTONEARRAY(N)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof (Fts5Data*)) \ | |||
10151 | (offsetof(Fts5TombstoneArray,apTombstone)__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof(Fts5Data*)) | |||
10152 | ||||
10153 | /* | |||
10154 | ** Argument is a pointer to an Fts5Data structure that contains a | |||
10155 | ** leaf page. | |||
10156 | */ | |||
10157 | #define ASSERT_SZLEAF_OK(x)((void) (0)) assert( \((void) (0)) | |||
10158 | (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \((void) (0)) | |||
10159 | )((void) (0)) | |||
10160 | ||||
10161 | #define FTS5_SEGITER_ONETERM0x01 0x01 | |||
10162 | #define FTS5_SEGITER_REVERSE0x02 0x02 | |||
10163 | ||||
10164 | /* | |||
10165 | ** Argument is a pointer to an Fts5Data structure that contains a leaf | |||
10166 | ** page. This macro evaluates to true if the leaf contains no terms, or | |||
10167 | ** false if it contains at least one term. | |||
10168 | */ | |||
10169 | #define fts5LeafIsTermless(x)((x)->szLeaf >= (x)->nn) ((x)->szLeaf >= (x)->nn) | |||
10170 | ||||
10171 | #define fts5LeafTermOff(x, i)(fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) | |||
10172 | ||||
10173 | #define fts5LeafFirstRowidOff(x)(fts5GetU16((x)->p)) (fts5GetU16((x)->p)) | |||
10174 | ||||
10175 | /* | |||
10176 | ** Object for iterating through the merged results of one or more segments, | |||
10177 | ** visiting each term/rowid pair in the merged data. | |||
10178 | ** | |||
10179 | ** nSeg is always a power of two greater than or equal to the number of | |||
10180 | ** segments that this object is merging data from. Both the aSeg[] and | |||
10181 | ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded | |||
10182 | ** with zeroed objects - these are handled as if they were iterators opened | |||
10183 | ** on empty segments. | |||
10184 | ** | |||
10185 | ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an | |||
10186 | ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the | |||
10187 | ** comparison in this context is the index of the iterator that currently | |||
10188 | ** points to the smaller term/rowid combination. Iterators at EOF are | |||
10189 | ** considered to be greater than all other iterators. | |||
10190 | ** | |||
10191 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to | |||
10192 | ** the smallest key overall. aFirst[0] is unused. | |||
10193 | ** | |||
10194 | ** poslist: | |||
10195 | ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. | |||
10196 | ** There is no way to tell if this is populated or not. | |||
10197 | ** | |||
10198 | ** pColset: | |||
10199 | ** If not NULL, points to an object containing a set of column indices. | |||
10200 | ** Only matches that occur in one of these columns will be returned. | |||
10201 | ** The Fts5Iter does not own the Fts5Colset object, and so it is not | |||
10202 | ** freed when the iterator is closed - it is owned by the upper layer. | |||
10203 | */ | |||
10204 | struct Fts5Iter { | |||
10205 | Fts5IndexIter base; /* Base class containing output vars */ | |||
10206 | Fts5TokenDataIter *pTokenDataIter; | |||
10207 | ||||
10208 | Fts5Index *pIndex; /* Index that owns this iterator */ | |||
10209 | Fts5Buffer poslist; /* Buffer containing current poslist */ | |||
10210 | Fts5Colset *pColset; /* Restrict matches to these columns */ | |||
10211 | ||||
10212 | /* Invoked to set output variables. */ | |||
10213 | void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); | |||
10214 | ||||
10215 | int nSeg; /* Size of aSeg[] array */ | |||
10216 | int bRev; /* True to iterate in reverse order */ | |||
10217 | u8 bSkipEmpty; /* True to skip deleted entries */ | |||
10218 | ||||
10219 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ | |||
10220 | Fts5CResult *aFirst; /* Current merge state (see above) */ | |||
10221 | Fts5SegIter aSeg[FLEXARRAY]; /* Array of segment iterators */ | |||
10222 | }; | |||
10223 | ||||
10224 | /* Size (in bytes) of an Fts5Iter object holding up to N segment iterators */ | |||
10225 | #define SZ_FTS5ITER(N)(__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) (offsetof(Fts5Iter,aSeg)__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) | |||
10226 | ||||
10227 | /* | |||
10228 | ** An instance of the following type is used to iterate through the contents | |||
10229 | ** of a doclist-index record. | |||
10230 | ** | |||
10231 | ** pData: | |||
10232 | ** Record containing the doclist-index data. | |||
10233 | ** | |||
10234 | ** bEof: | |||
10235 | ** Set to true once iterator has reached EOF. | |||
10236 | ** | |||
10237 | ** iOff: | |||
10238 | ** Set to the current offset within record pData. | |||
10239 | */ | |||
10240 | struct Fts5DlidxLvl { | |||
10241 | Fts5Data *pData; /* Data for current page of this level */ | |||
10242 | int iOff; /* Current offset into pData */ | |||
10243 | int bEof; /* At EOF already */ | |||
10244 | int iFirstOff; /* Used by reverse iterators */ | |||
10245 | ||||
10246 | /* Output variables */ | |||
10247 | int iLeafPgno; /* Page number of current leaf page */ | |||
10248 | i64 iRowid; /* First rowid on leaf iLeafPgno */ | |||
10249 | }; | |||
10250 | struct Fts5DlidxIter { | |||
10251 | int nLvl; | |||
10252 | int iSegid; | |||
10253 | Fts5DlidxLvl aLvl[FLEXARRAY]; | |||
10254 | }; | |||
10255 | ||||
10256 | /* Size (in bytes) of an Fts5DlidxIter object with up to N levels */ | |||
10257 | #define SZ_FTS5DLIDXITER(N)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl )) \ | |||
10258 | (offsetof(Fts5DlidxIter,aLvl)__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl)) | |||
10259 | ||||
10260 | static void fts5PutU16(u8 *aOut, u16 iVal){ | |||
10261 | aOut[0] = (iVal>>8); | |||
10262 | aOut[1] = (iVal&0xFF); | |||
10263 | } | |||
10264 | ||||
10265 | static u16 fts5GetU16(const u8 *aIn){ | |||
10266 | return ((u16)aIn[0] << 8) + aIn[1]; | |||
10267 | } | |||
10268 | ||||
10269 | /* | |||
10270 | ** The only argument points to a buffer at least 8 bytes in size. This | |||
10271 | ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian | |||
10272 | ** unsigned integer and returns the result. | |||
10273 | */ | |||
10274 | static u64 fts5GetU64(u8 *a){ | |||
10275 | return ((u64)a[0] << 56) | |||
10276 | + ((u64)a[1] << 48) | |||
10277 | + ((u64)a[2] << 40) | |||
10278 | + ((u64)a[3] << 32) | |||
10279 | + ((u64)a[4] << 24) | |||
10280 | + ((u64)a[5] << 16) | |||
10281 | + ((u64)a[6] << 8) | |||
10282 | + ((u64)a[7] << 0); | |||
10283 | } | |||
10284 | ||||
10285 | /* | |||
10286 | ** The only argument points to a buffer at least 4 bytes in size. This | |||
10287 | ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian | |||
10288 | ** unsigned integer and returns the result. | |||
10289 | */ | |||
10290 | static u32 fts5GetU32(const u8 *a){ | |||
10291 | return ((u32)a[0] << 24) | |||
10292 | + ((u32)a[1] << 16) | |||
10293 | + ((u32)a[2] << 8) | |||
10294 | + ((u32)a[3] << 0); | |||
10295 | } | |||
10296 | ||||
10297 | /* | |||
10298 | ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the | |||
10299 | ** buffer indicated by the first argument. | |||
10300 | */ | |||
10301 | static void fts5PutU64(u8 *a, u64 iVal){ | |||
10302 | a[0] = ((iVal >> 56) & 0xFF); | |||
10303 | a[1] = ((iVal >> 48) & 0xFF); | |||
10304 | a[2] = ((iVal >> 40) & 0xFF); | |||
10305 | a[3] = ((iVal >> 32) & 0xFF); | |||
10306 | a[4] = ((iVal >> 24) & 0xFF); | |||
10307 | a[5] = ((iVal >> 16) & 0xFF); | |||
10308 | a[6] = ((iVal >> 8) & 0xFF); | |||
10309 | a[7] = ((iVal >> 0) & 0xFF); | |||
10310 | } | |||
10311 | ||||
10312 | /* | |||
10313 | ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the | |||
10314 | ** buffer indicated by the first argument. | |||
10315 | */ | |||
10316 | static void fts5PutU32(u8 *a, u32 iVal){ | |||
10317 | a[0] = ((iVal >> 24) & 0xFF); | |||
10318 | a[1] = ((iVal >> 16) & 0xFF); | |||
10319 | a[2] = ((iVal >> 8) & 0xFF); | |||
10320 | a[3] = ((iVal >> 0) & 0xFF); | |||
10321 | } | |||
10322 | ||||
10323 | /* | |||
10324 | ** Allocate and return a buffer at least nByte bytes in size. | |||
10325 | ** | |||
10326 | ** If an OOM error is encountered, return NULL and set the error code in | |||
10327 | ** the Fts5Index handle passed as the first argument. | |||
10328 | */ | |||
10329 | static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ | |||
10330 | return sqlite3Fts5MallocZero(&p->rc, nByte); | |||
10331 | } | |||
10332 | ||||
10333 | /* | |||
10334 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. | |||
10335 | ** | |||
10336 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | |||
10337 | ** +ve if pRight is smaller than pLeft. In other words: | |||
10338 | ** | |||
10339 | ** res = *pLeft - *pRight | |||
10340 | */ | |||
10341 | #ifdef SQLITE_DEBUG | |||
10342 | static int fts5BufferCompareBlob( | |||
10343 | Fts5Buffer *pLeft, /* Left hand side of comparison */ | |||
10344 | const u8 *pRight, int nRight /* Right hand side of comparison */ | |||
10345 | ){ | |||
10346 | int nCmp = MIN(pLeft->n, nRight)(((pLeft->n) < (nRight)) ? (pLeft->n) : (nRight)); | |||
10347 | int res = memcmp(pLeft->p, pRight, nCmp); | |||
10348 | return (res==0 ? (pLeft->n - nRight) : res); | |||
10349 | } | |||
10350 | #endif | |||
10351 | ||||
10352 | /* | |||
10353 | ** Compare the contents of the two buffers using memcmp(). If one buffer | |||
10354 | ** is a prefix of the other, it is considered the lesser. | |||
10355 | ** | |||
10356 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | |||
10357 | ** +ve if pRight is smaller than pLeft. In other words: | |||
10358 | ** | |||
10359 | ** res = *pLeft - *pRight | |||
10360 | */ | |||
10361 | static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ | |||
10362 | int nCmp, res; | |||
10363 | nCmp = MIN(pLeft->n, pRight->n)(((pLeft->n) < (pRight->n)) ? (pLeft->n) : (pRight ->n)); | |||
10364 | assert( nCmp<=0 || pLeft->p!=0 )((void) (0)); | |||
10365 | assert( nCmp<=0 || pRight->p!=0 )((void) (0)); | |||
10366 | res = fts5Memcmp(pLeft->p, pRight->p, nCmp)((nCmp)<=0 ? 0 : memcmp((pLeft->p), (pRight->p), (nCmp ))); | |||
10367 | return (res==0 ? (pLeft->n - pRight->n) : res); | |||
10368 | } | |||
10369 | ||||
10370 | static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ | |||
10371 | int ret; | |||
10372 | fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret)sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(ret)); | |||
10373 | return ret; | |||
10374 | } | |||
10375 | ||||
10376 | /* | |||
10377 | ** Close the read-only blob handle, if it is open. | |||
10378 | */ | |||
10379 | static void fts5IndexCloseReader(Fts5Index *p){ | |||
10380 | if( p->pReader ){ | |||
10381 | int rc; | |||
10382 | sqlite3_blob *pReader = p->pReader; | |||
10383 | p->pReader = 0; | |||
10384 | rc = sqlite3_blob_closesqlite3_api->blob_close(pReader); | |||
10385 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | |||
10386 | } | |||
10387 | } | |||
10388 | ||||
10389 | /* | |||
10390 | ** Retrieve a record from the %_data table. | |||
10391 | ** | |||
10392 | ** If an error occurs, NULL is returned and an error left in the | |||
10393 | ** Fts5Index object. | |||
10394 | */ | |||
10395 | static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ | |||
10396 | Fts5Data *pRet = 0; | |||
10397 | if( p->rc==SQLITE_OK0 ){ | |||
10398 | int rc = SQLITE_OK0; | |||
10399 | ||||
10400 | if( p->pReader ){ | |||
10401 | /* This call may return SQLITE_ABORT if there has been a savepoint | |||
10402 | ** rollback since it was last used. In this case a new blob handle | |||
10403 | ** is required. */ | |||
10404 | sqlite3_blob *pBlob = p->pReader; | |||
10405 | p->pReader = 0; | |||
10406 | rc = sqlite3_blob_reopensqlite3_api->blob_reopen(pBlob, iRowid); | |||
10407 | assert( p->pReader==0 )((void) (0)); | |||
10408 | p->pReader = pBlob; | |||
10409 | if( rc!=SQLITE_OK0 ){ | |||
10410 | fts5IndexCloseReader(p); | |||
10411 | } | |||
10412 | if( rc==SQLITE_ABORT4 ) rc = SQLITE_OK0; | |||
10413 | } | |||
10414 | ||||
10415 | /* If the blob handle is not open at this point, open it and seek | |||
10416 | ** to the requested entry. */ | |||
10417 | if( p->pReader==0 && rc==SQLITE_OK0 ){ | |||
10418 | Fts5Config *pConfig = p->pConfig; | |||
10419 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, | |||
10420 | pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader | |||
10421 | ); | |||
10422 | } | |||
10423 | ||||
10424 | /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls | |||
10425 | ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. | |||
10426 | ** All the reasons those functions might return SQLITE_ERROR - missing | |||
10427 | ** table, missing row, non-blob/text in block column - indicate | |||
10428 | ** backing store corruption. */ | |||
10429 | if( rc==SQLITE_ERROR1 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10430 | ||||
10431 | if( rc==SQLITE_OK0 ){ | |||
10432 | u8 *aOut = 0; /* Read blob data into this buffer */ | |||
10433 | int nByte = sqlite3_blob_bytessqlite3_api->blob_bytes(p->pReader); | |||
10434 | int szData = (sizeof(Fts5Data) + 7) & ~7; | |||
10435 | sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING20; | |||
10436 | pRet = (Fts5Data*)sqlite3_malloc64sqlite3_api->malloc64(nAlloc); | |||
10437 | if( pRet ){ | |||
10438 | pRet->nn = nByte; | |||
10439 | aOut = pRet->p = (u8*)pRet + szData; | |||
10440 | }else{ | |||
10441 | rc = SQLITE_NOMEM7; | |||
10442 | } | |||
10443 | ||||
10444 | if( rc==SQLITE_OK0 ){ | |||
10445 | rc = sqlite3_blob_readsqlite3_api->blob_read(p->pReader, aOut, nByte, 0); | |||
10446 | } | |||
10447 | if( rc!=SQLITE_OK0 ){ | |||
10448 | sqlite3_freesqlite3_api->free(pRet); | |||
10449 | pRet = 0; | |||
10450 | }else{ | |||
10451 | /* TODO1: Fix this */ | |||
10452 | pRet->p[nByte] = 0x00; | |||
10453 | pRet->p[nByte+1] = 0x00; | |||
10454 | pRet->szLeaf = fts5GetU16(&pRet->p[2]); | |||
10455 | } | |||
10456 | } | |||
10457 | p->rc = rc; | |||
10458 | p->nRead++; | |||
10459 | } | |||
10460 | ||||
10461 | assert( (pRet==0)==(p->rc!=SQLITE_OK) )((void) (0)); | |||
10462 | assert( pRet==0 || EIGHT_BYTE_ALIGNMENT( pRet->p ) )((void) (0)); | |||
10463 | return pRet; | |||
10464 | } | |||
10465 | ||||
10466 | ||||
10467 | /* | |||
10468 | ** Release a reference to data record returned by an earlier call to | |||
10469 | ** fts5DataRead(). | |||
10470 | */ | |||
10471 | static void fts5DataRelease(Fts5Data *pData){ | |||
10472 | sqlite3_freesqlite3_api->free(pData); | |||
10473 | } | |||
10474 | ||||
10475 | static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ | |||
10476 | Fts5Data *pRet = fts5DataRead(p, iRowid); | |||
10477 | if( pRet ){ | |||
10478 | if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){ | |||
10479 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10480 | fts5DataRelease(pRet); | |||
10481 | pRet = 0; | |||
10482 | } | |||
10483 | } | |||
10484 | return pRet; | |||
10485 | } | |||
10486 | ||||
10487 | static int fts5IndexPrepareStmt( | |||
10488 | Fts5Index *p, | |||
10489 | sqlite3_stmt **ppStmt, | |||
10490 | char *zSql | |||
10491 | ){ | |||
10492 | if( p->rc==SQLITE_OK0 ){ | |||
10493 | if( zSql ){ | |||
10494 | int rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(p->pConfig->db, zSql, -1, | |||
10495 | SQLITE_PREPARE_PERSISTENT0x01|SQLITE_PREPARE_NO_VTAB0x04, | |||
10496 | ppStmt, 0); | |||
10497 | /* If this prepare() call fails with SQLITE_ERROR, then one of the | |||
10498 | ** %_idx or %_data tables has been removed or modified. Call this | |||
10499 | ** corruption. */ | |||
10500 | p->rc = (rc==SQLITE_ERROR1 ? SQLITE_CORRUPT11 : rc); | |||
10501 | }else{ | |||
10502 | p->rc = SQLITE_NOMEM7; | |||
10503 | } | |||
10504 | } | |||
10505 | sqlite3_freesqlite3_api->free(zSql); | |||
10506 | return p->rc; | |||
10507 | } | |||
10508 | ||||
10509 | ||||
10510 | /* | |||
10511 | ** INSERT OR REPLACE a record into the %_data table. | |||
10512 | */ | |||
10513 | static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ | |||
10514 | if( p->rc!=SQLITE_OK0 ) return; | |||
10515 | ||||
10516 | if( p->pWriter==0 ){ | |||
10517 | Fts5Config *pConfig = p->pConfig; | |||
10518 | fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintfsqlite3_api->mprintf( | |||
10519 | "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", | |||
10520 | pConfig->zDb, pConfig->zName | |||
10521 | )); | |||
10522 | if( p->rc ) return; | |||
10523 | } | |||
10524 | ||||
10525 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pWriter, 1, iRowid); | |||
10526 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
10527 | sqlite3_stepsqlite3_api->step(p->pWriter); | |||
10528 | p->rc = sqlite3_resetsqlite3_api->reset(p->pWriter); | |||
10529 | sqlite3_bind_nullsqlite3_api->bind_null(p->pWriter, 2); | |||
10530 | } | |||
10531 | ||||
10532 | /* | |||
10533 | ** Execute the following SQL: | |||
10534 | ** | |||
10535 | ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast | |||
10536 | */ | |||
10537 | static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ | |||
10538 | if( p->rc!=SQLITE_OK0 ) return; | |||
10539 | ||||
10540 | if( p->pDeleter==0 ){ | |||
10541 | Fts5Config *pConfig = p->pConfig; | |||
10542 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf( | |||
10543 | "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", | |||
10544 | pConfig->zDb, pConfig->zName | |||
10545 | ); | |||
10546 | if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return; | |||
10547 | } | |||
10548 | ||||
10549 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 1, iFirst); | |||
10550 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 2, iLast); | |||
10551 | sqlite3_stepsqlite3_api->step(p->pDeleter); | |||
10552 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleter); | |||
10553 | } | |||
10554 | ||||
10555 | /* | |||
10556 | ** Remove all records associated with segment iSegid. | |||
10557 | */ | |||
10558 | static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){ | |||
10559 | int iSegid = pSeg->iSegid; | |||
10560 | i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | |||
10561 | i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)( ((i64)(iSegid+1) << (31 +5 +1)) + ((i64)(0) << ( 31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) )-1; | |||
10562 | fts5DataDelete(p, iFirst, iLast); | |||
10563 | ||||
10564 | if( pSeg->nPgTombstone ){ | |||
10565 | i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | |||
10566 | i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg ->nPgTombstone-1)) ); | |||
10567 | fts5DataDelete(p, iTomb1, iTomb2); | |||
10568 | } | |||
10569 | if( p->pIdxDeleter==0 ){ | |||
10570 | Fts5Config *pConfig = p->pConfig; | |||
10571 | fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintfsqlite3_api->mprintf( | |||
10572 | "DELETE FROM '%q'.'%q_idx' WHERE segid=?", | |||
10573 | pConfig->zDb, pConfig->zName | |||
10574 | )); | |||
10575 | } | |||
10576 | if( p->rc==SQLITE_OK0 ){ | |||
10577 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxDeleter, 1, iSegid); | |||
10578 | sqlite3_stepsqlite3_api->step(p->pIdxDeleter); | |||
10579 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxDeleter); | |||
10580 | } | |||
10581 | } | |||
10582 | ||||
10583 | /* | |||
10584 | ** Release a reference to an Fts5Structure object returned by an earlier | |||
10585 | ** call to fts5StructureRead() or fts5StructureDecode(). | |||
10586 | */ | |||
10587 | static void fts5StructureRelease(Fts5Structure *pStruct){ | |||
10588 | if( pStruct && 0>=(--pStruct->nRef) ){ | |||
10589 | int i; | |||
10590 | assert( pStruct->nRef==0 )((void) (0)); | |||
10591 | for(i=0; i<pStruct->nLevel; i++){ | |||
10592 | sqlite3_freesqlite3_api->free(pStruct->aLevel[i].aSeg); | |||
10593 | } | |||
10594 | sqlite3_freesqlite3_api->free(pStruct); | |||
10595 | } | |||
10596 | } | |||
10597 | ||||
10598 | static void fts5StructureRef(Fts5Structure *pStruct){ | |||
10599 | pStruct->nRef++; | |||
10600 | } | |||
10601 | ||||
10602 | static void *sqlite3Fts5StructureRef(Fts5Index *p){ | |||
10603 | fts5StructureRef(p->pStruct); | |||
10604 | return (void*)p->pStruct; | |||
10605 | } | |||
10606 | static void sqlite3Fts5StructureRelease(void *p){ | |||
10607 | if( p ){ | |||
10608 | fts5StructureRelease((Fts5Structure*)p); | |||
10609 | } | |||
10610 | } | |||
10611 | static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){ | |||
10612 | if( p->pStruct!=(Fts5Structure*)pStruct ){ | |||
10613 | return SQLITE_ABORT4; | |||
10614 | } | |||
10615 | return SQLITE_OK0; | |||
10616 | } | |||
10617 | ||||
10618 | /* | |||
10619 | ** Ensure that structure object (*pp) is writable. | |||
10620 | ** | |||
10621 | ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If | |||
10622 | ** an error occurs, (*pRc) is set to an SQLite error code before returning. | |||
10623 | */ | |||
10624 | static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){ | |||
10625 | Fts5Structure *p = *pp; | |||
10626 | if( *pRc==SQLITE_OK0 && p->nRef>1 ){ | |||
10627 | i64 nByte = SZ_FTS5STRUCTURE(p->nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (p->nLevel)*sizeof (Fts5StructureLevel)); | |||
10628 | Fts5Structure *pNew; | |||
10629 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); | |||
10630 | if( pNew ){ | |||
10631 | int i; | |||
10632 | memcpy(pNew, p, nByte); | |||
10633 | for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; | |||
10634 | for(i=0; i<p->nLevel; i++){ | |||
10635 | Fts5StructureLevel *pLvl = &pNew->aLevel[i]; | |||
10636 | nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg; | |||
10637 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); | |||
10638 | if( pLvl->aSeg==0 ){ | |||
10639 | for(i=0; i<p->nLevel; i++){ | |||
10640 | sqlite3_freesqlite3_api->free(pNew->aLevel[i].aSeg); | |||
10641 | } | |||
10642 | sqlite3_freesqlite3_api->free(pNew); | |||
10643 | return; | |||
10644 | } | |||
10645 | memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte); | |||
10646 | } | |||
10647 | p->nRef--; | |||
10648 | pNew->nRef = 1; | |||
10649 | } | |||
10650 | *pp = pNew; | |||
10651 | } | |||
10652 | } | |||
10653 | ||||
10654 | /* | |||
10655 | ** Deserialize and return the structure record currently stored in serialized | |||
10656 | ** form within buffer pData/nData. | |||
10657 | ** | |||
10658 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | |||
10659 | ** are over-allocated by one slot. This allows the structure contents | |||
10660 | ** to be more easily edited. | |||
10661 | ** | |||
10662 | ** If an error occurs, *ppOut is set to NULL and an SQLite error code | |||
10663 | ** returned. Otherwise, *ppOut is set to point to the new object and | |||
10664 | ** SQLITE_OK returned. | |||
10665 | */ | |||
10666 | static int fts5StructureDecode( | |||
10667 | const u8 *pData, /* Buffer containing serialized structure */ | |||
10668 | int nData, /* Size of buffer pData in bytes */ | |||
10669 | int *piCookie, /* Configuration cookie value */ | |||
10670 | Fts5Structure **ppOut /* OUT: Deserialized object */ | |||
10671 | ){ | |||
10672 | int rc = SQLITE_OK0; | |||
10673 | int i = 0; | |||
10674 | int iLvl; | |||
10675 | int nLevel = 0; | |||
10676 | int nSegment = 0; | |||
10677 | sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */ | |||
10678 | Fts5Structure *pRet = 0; /* Structure object to return */ | |||
10679 | int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */ | |||
10680 | u64 nOriginCntr = 0; /* Largest origin value seen so far */ | |||
10681 | ||||
10682 | /* Grab the cookie value */ | |||
10683 | if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); | |||
10684 | i = 4; | |||
10685 | ||||
10686 | /* Check if this is a V2 structure record. Set bStructureV2 if it is. */ | |||
10687 | if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2"\xFF\x00\x00\x01", 4) ){ | |||
10688 | i += 4; | |||
10689 | bStructureV2 = 1; | |||
10690 | } | |||
10691 | ||||
10692 | /* Read the total number of levels and segments from the start of the | |||
10693 | ** structure record. */ | |||
10694 | i += fts5GetVarint32(&pData[i], nLevel)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nLevel)); | |||
10695 | i += fts5GetVarint32(&pData[i], nSegment)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nSegment)); | |||
10696 | if( nLevel>FTS5_MAX_SEGMENT2000 || nLevel<0 | |||
10697 | || nSegment>FTS5_MAX_SEGMENT2000 || nSegment<0 | |||
10698 | ){ | |||
10699 | return FTS5_CORRUPT(11 | (1<<8)); | |||
10700 | } | |||
10701 | nByte = SZ_FTS5STRUCTURE(nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel)*sizeof( Fts5StructureLevel)); | |||
10702 | pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); | |||
10703 | ||||
10704 | if( pRet ){ | |||
10705 | pRet->nRef = 1; | |||
10706 | pRet->nLevel = nLevel; | |||
10707 | pRet->nSegment = nSegment; | |||
10708 | i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); | |||
10709 | ||||
10710 | for(iLvl=0; rc==SQLITE_OK0 && iLvl<nLevel; iLvl++){ | |||
10711 | Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; | |||
10712 | int nTotal = 0; | |||
10713 | int iSeg; | |||
10714 | ||||
10715 | if( i>=nData ){ | |||
10716 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10717 | }else{ | |||
10718 | i += fts5GetVarint32(&pData[i], pLvl->nMerge)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pLvl->nMerge )); | |||
10719 | i += fts5GetVarint32(&pData[i], nTotal)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nTotal)); | |||
10720 | if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10721 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, | |||
10722 | nTotal * sizeof(Fts5StructureSegment) | |||
10723 | ); | |||
10724 | nSegment -= nTotal; | |||
10725 | } | |||
10726 | ||||
10727 | if( rc==SQLITE_OK0 ){ | |||
10728 | pLvl->nSeg = nTotal; | |||
10729 | for(iSeg=0; iSeg<nTotal; iSeg++){ | |||
10730 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
10731 | if( i>=nData ){ | |||
10732 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10733 | break; | |||
10734 | } | |||
10735 | assert( pSeg!=0 )((void) (0)); | |||
10736 | i += fts5GetVarint32(&pData[i], pSeg->iSegid)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->iSegid )); | |||
10737 | i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoFirst )); | |||
10738 | i += fts5GetVarint32(&pData[i], pSeg->pgnoLast)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoLast )); | |||
10739 | if( bStructureV2 ){ | |||
10740 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin1); | |||
10741 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin2); | |||
10742 | i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->nPgTombstone )); | |||
10743 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntryTombstone); | |||
10744 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntry); | |||
10745 | nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2)(((nOriginCntr) > (pSeg->iOrigin2)) ? (nOriginCntr) : ( pSeg->iOrigin2)); | |||
10746 | } | |||
10747 | if( pSeg->pgnoLast<pSeg->pgnoFirst ){ | |||
10748 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10749 | break; | |||
10750 | } | |||
10751 | } | |||
10752 | if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10753 | if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10754 | } | |||
10755 | } | |||
10756 | if( nSegment!=0 && rc==SQLITE_OK0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
10757 | if( bStructureV2 ){ | |||
10758 | pRet->nOriginCntr = nOriginCntr+1; | |||
10759 | } | |||
10760 | ||||
10761 | if( rc!=SQLITE_OK0 ){ | |||
10762 | fts5StructureRelease(pRet); | |||
10763 | pRet = 0; | |||
10764 | } | |||
10765 | } | |||
10766 | ||||
10767 | *ppOut = pRet; | |||
10768 | return rc; | |||
10769 | } | |||
10770 | ||||
10771 | /* | |||
10772 | ** Add a level to the Fts5Structure.aLevel[] array of structure object | |||
10773 | ** (*ppStruct). | |||
10774 | */ | |||
10775 | static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ | |||
10776 | fts5StructureMakeWritable(pRc, ppStruct); | |||
10777 | assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK )((void) (0)); | |||
10778 | if( *pRc==SQLITE_OK0 ){ | |||
10779 | Fts5Structure *pStruct = *ppStruct; | |||
10780 | int nLevel = pStruct->nLevel; | |||
10781 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(nLevel+2)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel+2)*sizeof (Fts5StructureLevel)); | |||
10782 | ||||
10783 | pStruct = sqlite3_realloc64sqlite3_api->realloc64(pStruct, nByte); | |||
10784 | if( pStruct ){ | |||
10785 | memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); | |||
10786 | pStruct->nLevel++; | |||
10787 | *ppStruct = pStruct; | |||
10788 | }else{ | |||
10789 | *pRc = SQLITE_NOMEM7; | |||
10790 | } | |||
10791 | } | |||
10792 | } | |||
10793 | ||||
10794 | /* | |||
10795 | ** Extend level iLvl so that there is room for at least nExtra more | |||
10796 | ** segments. | |||
10797 | */ | |||
10798 | static void fts5StructureExtendLevel( | |||
10799 | int *pRc, | |||
10800 | Fts5Structure *pStruct, | |||
10801 | int iLvl, | |||
10802 | int nExtra, | |||
10803 | int bInsert | |||
10804 | ){ | |||
10805 | if( *pRc==SQLITE_OK0 ){ | |||
10806 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
10807 | Fts5StructureSegment *aNew; | |||
10808 | sqlite3_int64 nByte; | |||
10809 | ||||
10810 | nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); | |||
10811 | aNew = sqlite3_realloc64sqlite3_api->realloc64(pLvl->aSeg, nByte); | |||
10812 | if( aNew ){ | |||
10813 | if( bInsert==0 ){ | |||
10814 | memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); | |||
10815 | }else{ | |||
10816 | int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); | |||
10817 | memmove(&aNew[nExtra], aNew, nMove); | |||
10818 | memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); | |||
10819 | } | |||
10820 | pLvl->aSeg = aNew; | |||
10821 | }else{ | |||
10822 | *pRc = SQLITE_NOMEM7; | |||
10823 | } | |||
10824 | } | |||
10825 | } | |||
10826 | ||||
10827 | static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ | |||
10828 | Fts5Structure *pRet = 0; | |||
10829 | Fts5Config *pConfig = p->pConfig; | |||
10830 | int iCookie; /* Configuration cookie */ | |||
10831 | Fts5Data *pData; | |||
10832 | ||||
10833 | pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID10); | |||
10834 | if( p->rc==SQLITE_OK0 ){ | |||
10835 | /* TODO: Do we need this if the leaf-index is appended? Probably... */ | |||
10836 | memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING20); | |||
10837 | p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); | |||
10838 | if( p->rc==SQLITE_OK0 && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){ | |||
10839 | p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); | |||
10840 | } | |||
10841 | fts5DataRelease(pData); | |||
10842 | if( p->rc!=SQLITE_OK0 ){ | |||
10843 | fts5StructureRelease(pRet); | |||
10844 | pRet = 0; | |||
10845 | } | |||
10846 | } | |||
10847 | ||||
10848 | return pRet; | |||
10849 | } | |||
10850 | ||||
10851 | static i64 fts5IndexDataVersion(Fts5Index *p){ | |||
10852 | i64 iVersion = 0; | |||
10853 | ||||
10854 | if( p->rc==SQLITE_OK0 ){ | |||
10855 | if( p->pDataVersion==0 ){ | |||
10856 | p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, | |||
10857 | sqlite3_mprintfsqlite3_api->mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) | |||
10858 | ); | |||
10859 | if( p->rc ) return 0; | |||
10860 | } | |||
10861 | ||||
10862 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p->pDataVersion) ){ | |||
10863 | iVersion = sqlite3_column_int64sqlite3_api->column_int64(p->pDataVersion, 0); | |||
10864 | } | |||
10865 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDataVersion); | |||
10866 | } | |||
10867 | ||||
10868 | return iVersion; | |||
10869 | } | |||
10870 | ||||
10871 | /* | |||
10872 | ** Read, deserialize and return the structure record. | |||
10873 | ** | |||
10874 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | |||
10875 | ** are over-allocated as described for function fts5StructureDecode() | |||
10876 | ** above. | |||
10877 | ** | |||
10878 | ** If an error occurs, NULL is returned and an error code left in the | |||
10879 | ** Fts5Index handle. If an error has already occurred when this function | |||
10880 | ** is called, it is a no-op. | |||
10881 | */ | |||
10882 | static Fts5Structure *fts5StructureRead(Fts5Index *p){ | |||
10883 | ||||
10884 | if( p->pStruct==0 ){ | |||
10885 | p->iStructVersion = fts5IndexDataVersion(p); | |||
10886 | if( p->rc==SQLITE_OK0 ){ | |||
10887 | p->pStruct = fts5StructureReadUncached(p); | |||
10888 | } | |||
10889 | } | |||
10890 | ||||
10891 | #if 0 | |||
10892 | else{ | |||
10893 | Fts5Structure *pTest = fts5StructureReadUncached(p); | |||
10894 | if( pTest ){ | |||
10895 | int i, j; | |||
10896 | assert_nc( p->pStruct->nSegment==pTest->nSegment )((void) (0)); | |||
10897 | assert_nc( p->pStruct->nLevel==pTest->nLevel )((void) (0)); | |||
10898 | for(i=0; i<pTest->nLevel; i++){ | |||
10899 | assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge )((void) (0)); | |||
10900 | assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg )((void) (0)); | |||
10901 | for(j=0; j<pTest->aLevel[i].nSeg; j++){ | |||
10902 | Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; | |||
10903 | Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; | |||
10904 | assert_nc( p1->iSegid==p2->iSegid )((void) (0)); | |||
10905 | assert_nc( p1->pgnoFirst==p2->pgnoFirst )((void) (0)); | |||
10906 | assert_nc( p1->pgnoLast==p2->pgnoLast )((void) (0)); | |||
10907 | } | |||
10908 | } | |||
10909 | fts5StructureRelease(pTest); | |||
10910 | } | |||
10911 | } | |||
10912 | #endif | |||
10913 | ||||
10914 | if( p->rc!=SQLITE_OK0 ) return 0; | |||
10915 | assert( p->iStructVersion!=0 )((void) (0)); | |||
10916 | assert( p->pStruct!=0 )((void) (0)); | |||
10917 | fts5StructureRef(p->pStruct); | |||
10918 | return p->pStruct; | |||
10919 | } | |||
10920 | ||||
10921 | static void fts5StructureInvalidate(Fts5Index *p){ | |||
10922 | if( p->pStruct ){ | |||
10923 | fts5StructureRelease(p->pStruct); | |||
10924 | p->pStruct = 0; | |||
10925 | } | |||
10926 | } | |||
10927 | ||||
10928 | /* | |||
10929 | ** Return the total number of segments in index structure pStruct. This | |||
10930 | ** function is only ever used as part of assert() conditions. | |||
10931 | */ | |||
10932 | #ifdef SQLITE_DEBUG | |||
10933 | static int fts5StructureCountSegments(Fts5Structure *pStruct){ | |||
10934 | int nSegment = 0; /* Total number of segments */ | |||
10935 | if( pStruct ){ | |||
10936 | int iLvl; /* Used to iterate through levels */ | |||
10937 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
10938 | nSegment += pStruct->aLevel[iLvl].nSeg; | |||
10939 | } | |||
10940 | } | |||
10941 | ||||
10942 | return nSegment; | |||
10943 | } | |||
10944 | #endif | |||
10945 | ||||
10946 | #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pBlob , nBlob); (pBuf)->n += nBlob; } { \ | |||
10947 | assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) )((void) (0)); \ | |||
10948 | memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ | |||
10949 | (pBuf)->n += nBlob; \ | |||
10950 | } | |||
10951 | ||||
10952 | #define fts5BufferSafeAppendVarint(pBuf, iVal){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iVal)); ((void) (0)); } { \ | |||
10953 | (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ | |||
10954 | assert( (pBuf)->nSpace>=(pBuf)->n )((void) (0)); \ | |||
10955 | } | |||
10956 | ||||
10957 | ||||
10958 | /* | |||
10959 | ** Serialize and store the "structure" record. | |||
10960 | ** | |||
10961 | ** If an error occurs, leave an error code in the Fts5Index object. If an | |||
10962 | ** error has already occurred, this function is a no-op. | |||
10963 | */ | |||
10964 | static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ | |||
10965 | if( p->rc==SQLITE_OK0 ){ | |||
10966 | Fts5Buffer buf; /* Buffer to serialize record into */ | |||
10967 | int iLvl; /* Used to iterate through levels */ | |||
10968 | int iCookie; /* Cookie value to store */ | |||
10969 | int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9)); | |||
10970 | ||||
10971 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | |||
10972 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
10973 | ||||
10974 | /* Append the current configuration cookie */ | |||
10975 | iCookie = p->pConfig->iCookie; | |||
10976 | if( iCookie<0 ) iCookie = 0; | |||
10977 | ||||
10978 | if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){ | |||
10979 | sqlite3Fts5Put32(buf.p, iCookie); | |||
10980 | buf.n = 4; | |||
10981 | if( pStruct->nOriginCntr>0 ){ | |||
10982 | fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4){ ((void) (0)); memcpy(&(&buf)->p[(&buf)->n ], "\xFF\x00\x00\x01", 4); (&buf)->n += 4; }; | |||
10983 | } | |||
10984 | fts5BufferSafeAppendVarint(&buf, pStruct->nLevel){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nLevel)); ((void) (0)); }; | |||
10985 | fts5BufferSafeAppendVarint(&buf, pStruct->nSegment){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nSegment)); ((void) (0)); }; | |||
10986 | fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], ((i64)pStruct->nWriteCounter)); ((void ) (0)); }; | |||
10987 | } | |||
10988 | ||||
10989 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
10990 | int iSeg; /* Used to iterate through segments */ | |||
10991 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
10992 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nMerge); | |||
10993 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nSeg); | |||
10994 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | |||
10995 | ||||
10996 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
10997 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
10998 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iSegid); | |||
10999 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoFirst); | |||
11000 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoLast); | |||
11001 | if( pStruct->nOriginCntr>0 ){ | |||
11002 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin1); | |||
11003 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin2); | |||
11004 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nPgTombstone); | |||
11005 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntryTombstone); | |||
11006 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntry); | |||
11007 | } | |||
11008 | } | |||
11009 | } | |||
11010 | ||||
11011 | fts5DataWrite(p, FTS5_STRUCTURE_ROWID10, buf.p, buf.n); | |||
11012 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
11013 | } | |||
11014 | } | |||
11015 | ||||
11016 | #if 0 | |||
11017 | static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); | |||
11018 | static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ | |||
11019 | int rc = SQLITE_OK0; | |||
11020 | Fts5Buffer buf; | |||
11021 | memset(&buf, 0, sizeof(buf)); | |||
11022 | fts5DebugStructure(&rc, &buf, pStruct); | |||
11023 | fprintf(stdout, "%s: %s\n", zCaption, buf.p); | |||
11024 | fflush(stdout); | |||
11025 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
11026 | } | |||
11027 | #else | |||
11028 | # define fts5PrintStructure(x,y) | |||
11029 | #endif | |||
11030 | ||||
11031 | static int fts5SegmentSize(Fts5StructureSegment *pSeg){ | |||
11032 | return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; | |||
11033 | } | |||
11034 | ||||
11035 | /* | |||
11036 | ** Return a copy of index structure pStruct. Except, promote as many | |||
11037 | ** segments as possible to level iPromote. If an OOM occurs, NULL is | |||
11038 | ** returned. | |||
11039 | */ | |||
11040 | static void fts5StructurePromoteTo( | |||
11041 | Fts5Index *p, | |||
11042 | int iPromote, | |||
11043 | int szPromote, | |||
11044 | Fts5Structure *pStruct | |||
11045 | ){ | |||
11046 | int il, is; | |||
11047 | Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; | |||
11048 | ||||
11049 | if( pOut->nMerge==0 ){ | |||
11050 | for(il=iPromote+1; il<pStruct->nLevel; il++){ | |||
11051 | Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; | |||
11052 | if( pLvl->nMerge ) return; | |||
11053 | for(is=pLvl->nSeg-1; is>=0; is--){ | |||
11054 | int sz = fts5SegmentSize(&pLvl->aSeg[is]); | |||
11055 | if( sz>szPromote ) return; | |||
11056 | fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); | |||
11057 | if( p->rc ) return; | |||
11058 | memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); | |||
11059 | pOut->nSeg++; | |||
11060 | pLvl->nSeg--; | |||
11061 | } | |||
11062 | } | |||
11063 | } | |||
11064 | } | |||
11065 | ||||
11066 | /* | |||
11067 | ** A new segment has just been written to level iLvl of index structure | |||
11068 | ** pStruct. This function determines if any segments should be promoted | |||
11069 | ** as a result. Segments are promoted in two scenarios: | |||
11070 | ** | |||
11071 | ** a) If the segment just written is smaller than one or more segments | |||
11072 | ** within the previous populated level, it is promoted to the previous | |||
11073 | ** populated level. | |||
11074 | ** | |||
11075 | ** b) If the segment just written is larger than the newest segment on | |||
11076 | ** the next populated level, then that segment, and any other adjacent | |||
11077 | ** segments that are also smaller than the one just written, are | |||
11078 | ** promoted. | |||
11079 | ** | |||
11080 | ** If one or more segments are promoted, the structure object is updated | |||
11081 | ** to reflect this. | |||
11082 | */ | |||
11083 | static void fts5StructurePromote( | |||
11084 | Fts5Index *p, /* FTS5 backend object */ | |||
11085 | int iLvl, /* Index level just updated */ | |||
11086 | Fts5Structure *pStruct /* Index structure */ | |||
11087 | ){ | |||
11088 | if( p->rc==SQLITE_OK0 ){ | |||
11089 | int iTst; | |||
11090 | int iPromote = -1; | |||
11091 | int szPromote = 0; /* Promote anything this size or smaller */ | |||
11092 | Fts5StructureSegment *pSeg; /* Segment just written */ | |||
11093 | int szSeg; /* Size of segment just written */ | |||
11094 | int nSeg = pStruct->aLevel[iLvl].nSeg; | |||
11095 | ||||
11096 | if( nSeg==0 ) return; | |||
11097 | pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; | |||
11098 | szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); | |||
11099 | ||||
11100 | /* Check for condition (a) */ | |||
11101 | for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); | |||
11102 | if( iTst>=0 ){ | |||
11103 | int i; | |||
11104 | int szMax = 0; | |||
11105 | Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; | |||
11106 | assert( pTst->nMerge==0 )((void) (0)); | |||
11107 | for(i=0; i<pTst->nSeg; i++){ | |||
11108 | int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; | |||
11109 | if( sz>szMax ) szMax = sz; | |||
11110 | } | |||
11111 | if( szMax>=szSeg ){ | |||
11112 | /* Condition (a) is true. Promote the newest segment on level | |||
11113 | ** iLvl to level iTst. */ | |||
11114 | iPromote = iTst; | |||
11115 | szPromote = szMax; | |||
11116 | } | |||
11117 | } | |||
11118 | ||||
11119 | /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() | |||
11120 | ** is a no-op if it is not. */ | |||
11121 | if( iPromote<0 ){ | |||
11122 | iPromote = iLvl; | |||
11123 | szPromote = szSeg; | |||
11124 | } | |||
11125 | fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); | |||
11126 | } | |||
11127 | } | |||
11128 | ||||
11129 | ||||
11130 | /* | |||
11131 | ** Advance the iterator passed as the only argument. If the end of the | |||
11132 | ** doclist-index page is reached, return non-zero. | |||
11133 | */ | |||
11134 | static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ | |||
11135 | Fts5Data *pData = pLvl->pData; | |||
11136 | ||||
11137 | if( pLvl->iOff==0 ){ | |||
11138 | assert( pLvl->bEof==0 )((void) (0)); | |||
11139 | pLvl->iOff = 1; | |||
11140 | pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno)sqlite3Fts5GetVarint32(&pData->p[1],(u32*)&(pLvl-> iLeafPgno)); | |||
11141 | pLvl->iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); | |||
11142 | pLvl->iFirstOff = pLvl->iOff; | |||
11143 | }else{ | |||
11144 | int iOff; | |||
11145 | for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ | |||
11146 | if( pData->p[iOff] ) break; | |||
11147 | } | |||
11148 | ||||
11149 | if( iOff<pData->nn ){ | |||
11150 | u64 iVal; | |||
11151 | pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; | |||
11152 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[iOff], &iVal); | |||
11153 | pLvl->iRowid += iVal; | |||
11154 | pLvl->iOff = iOff; | |||
11155 | }else{ | |||
11156 | pLvl->bEof = 1; | |||
11157 | } | |||
11158 | } | |||
11159 | ||||
11160 | return pLvl->bEof; | |||
11161 | } | |||
11162 | ||||
11163 | /* | |||
11164 | ** Advance the iterator passed as the only argument. | |||
11165 | */ | |||
11166 | static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | |||
11167 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | |||
11168 | ||||
11169 | assert( iLvl<pIter->nLvl )((void) (0)); | |||
11170 | if( fts5DlidxLvlNext(pLvl) ){ | |||
11171 | if( (iLvl+1) < pIter->nLvl ){ | |||
11172 | fts5DlidxIterNextR(p, pIter, iLvl+1); | |||
11173 | if( pLvl[1].bEof==0 ){ | |||
11174 | fts5DataRelease(pLvl->pData); | |||
11175 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
11176 | pLvl->pData = fts5DataRead(p, | |||
11177 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | |||
11178 | ); | |||
11179 | if( pLvl->pData ) fts5DlidxLvlNext(pLvl); | |||
11180 | } | |||
11181 | } | |||
11182 | } | |||
11183 | ||||
11184 | return pIter->aLvl[0].bEof; | |||
11185 | } | |||
11186 | static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
11187 | return fts5DlidxIterNextR(p, pIter, 0); | |||
11188 | } | |||
11189 | ||||
11190 | /* | |||
11191 | ** The iterator passed as the first argument has the following fields set | |||
11192 | ** as follows. This function sets up the rest of the iterator so that it | |||
11193 | ** points to the first rowid in the doclist-index. | |||
11194 | ** | |||
11195 | ** pData: | |||
11196 | ** pointer to doclist-index record, | |||
11197 | ** | |||
11198 | ** When this function is called pIter->iLeafPgno is the page number the | |||
11199 | ** doclist is associated with (the one featuring the term). | |||
11200 | */ | |||
11201 | static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ | |||
11202 | int i; | |||
11203 | for(i=0; i<pIter->nLvl; i++){ | |||
11204 | fts5DlidxLvlNext(&pIter->aLvl[i]); | |||
11205 | } | |||
11206 | return pIter->aLvl[0].bEof; | |||
11207 | } | |||
11208 | ||||
11209 | ||||
11210 | static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
11211 | return p->rc!=SQLITE_OK0 || pIter->aLvl[0].bEof; | |||
11212 | } | |||
11213 | ||||
11214 | static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
11215 | int i; | |||
11216 | ||||
11217 | /* Advance each level to the last entry on the last page */ | |||
11218 | for(i=pIter->nLvl-1; p->rc==SQLITE_OK0 && i>=0; i--){ | |||
11219 | Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; | |||
11220 | while( fts5DlidxLvlNext(pLvl)==0 ); | |||
11221 | pLvl->bEof = 0; | |||
11222 | ||||
11223 | if( i>0 ){ | |||
11224 | Fts5DlidxLvl *pChild = &pLvl[-1]; | |||
11225 | fts5DataRelease(pChild->pData); | |||
11226 | memset(pChild, 0, sizeof(Fts5DlidxLvl)); | |||
11227 | pChild->pData = fts5DataRead(p, | |||
11228 | FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i-1) << (31)) + ((i64)(pLvl->iLeafPgno )) ) | |||
11229 | ); | |||
11230 | } | |||
11231 | } | |||
11232 | } | |||
11233 | ||||
11234 | /* | |||
11235 | ** Move the iterator passed as the only argument to the previous entry. | |||
11236 | */ | |||
11237 | static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ | |||
11238 | int iOff = pLvl->iOff; | |||
11239 | ||||
11240 | assert( pLvl->bEof==0 )((void) (0)); | |||
11241 | if( iOff<=pLvl->iFirstOff ){ | |||
11242 | pLvl->bEof = 1; | |||
11243 | }else{ | |||
11244 | u8 *a = pLvl->pData->p; | |||
11245 | ||||
11246 | pLvl->iOff = 0; | |||
11247 | fts5DlidxLvlNext(pLvl); | |||
11248 | while( 1 ){ | |||
11249 | int nZero = 0; | |||
11250 | int ii = pLvl->iOff; | |||
11251 | u64 delta = 0; | |||
11252 | ||||
11253 | while( a[ii]==0 ){ | |||
11254 | nZero++; | |||
11255 | ii++; | |||
11256 | } | |||
11257 | ii += sqlite3Fts5GetVarint(&a[ii], &delta); | |||
11258 | ||||
11259 | if( ii>=iOff ) break; | |||
11260 | pLvl->iLeafPgno += nZero+1; | |||
11261 | pLvl->iRowid += delta; | |||
11262 | pLvl->iOff = ii; | |||
11263 | } | |||
11264 | } | |||
11265 | ||||
11266 | return pLvl->bEof; | |||
11267 | } | |||
11268 | ||||
11269 | static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | |||
11270 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | |||
11271 | ||||
11272 | assert( iLvl<pIter->nLvl )((void) (0)); | |||
11273 | if( fts5DlidxLvlPrev(pLvl) ){ | |||
11274 | if( (iLvl+1) < pIter->nLvl ){ | |||
11275 | fts5DlidxIterPrevR(p, pIter, iLvl+1); | |||
11276 | if( pLvl[1].bEof==0 ){ | |||
11277 | fts5DataRelease(pLvl->pData); | |||
11278 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
11279 | pLvl->pData = fts5DataRead(p, | |||
11280 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | |||
11281 | ); | |||
11282 | if( pLvl->pData ){ | |||
11283 | while( fts5DlidxLvlNext(pLvl)==0 ); | |||
11284 | pLvl->bEof = 0; | |||
11285 | } | |||
11286 | } | |||
11287 | } | |||
11288 | } | |||
11289 | ||||
11290 | return pIter->aLvl[0].bEof; | |||
11291 | } | |||
11292 | static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
11293 | return fts5DlidxIterPrevR(p, pIter, 0); | |||
11294 | } | |||
11295 | ||||
11296 | /* | |||
11297 | ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). | |||
11298 | */ | |||
11299 | static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ | |||
11300 | if( pIter ){ | |||
11301 | int i; | |||
11302 | for(i=0; i<pIter->nLvl; i++){ | |||
11303 | fts5DataRelease(pIter->aLvl[i].pData); | |||
11304 | } | |||
11305 | sqlite3_freesqlite3_api->free(pIter); | |||
11306 | } | |||
11307 | } | |||
11308 | ||||
11309 | static Fts5DlidxIter *fts5DlidxIterInit( | |||
11310 | Fts5Index *p, /* Fts5 Backend to iterate within */ | |||
11311 | int bRev, /* True for ORDER BY ASC */ | |||
11312 | int iSegid, /* Segment id */ | |||
11313 | int iLeafPg /* Leaf page number to load dlidx for */ | |||
11314 | ){ | |||
11315 | Fts5DlidxIter *pIter = 0; | |||
11316 | int i; | |||
11317 | int bDone = 0; | |||
11318 | ||||
11319 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | |||
11320 | sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(i+1)*sizeof(Fts5DlidxLvl )); | |||
11321 | Fts5DlidxIter *pNew; | |||
11322 | ||||
11323 | pNew = (Fts5DlidxIter*)sqlite3_realloc64sqlite3_api->realloc64(pIter, nByte); | |||
11324 | if( pNew==0 ){ | |||
11325 | p->rc = SQLITE_NOMEM7; | |||
11326 | }else{ | |||
11327 | i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(iLeafPg)) ); | |||
11328 | Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; | |||
11329 | pIter = pNew; | |||
11330 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
11331 | pLvl->pData = fts5DataRead(p, iRowid); | |||
11332 | if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ | |||
11333 | bDone = 1; | |||
11334 | } | |||
11335 | pIter->nLvl = i+1; | |||
11336 | } | |||
11337 | } | |||
11338 | ||||
11339 | if( p->rc==SQLITE_OK0 ){ | |||
11340 | pIter->iSegid = iSegid; | |||
11341 | if( bRev==0 ){ | |||
11342 | fts5DlidxIterFirst(pIter); | |||
11343 | }else{ | |||
11344 | fts5DlidxIterLast(p, pIter); | |||
11345 | } | |||
11346 | } | |||
11347 | ||||
11348 | if( p->rc!=SQLITE_OK0 ){ | |||
11349 | fts5DlidxIterFree(pIter); | |||
11350 | pIter = 0; | |||
11351 | } | |||
11352 | ||||
11353 | return pIter; | |||
11354 | } | |||
11355 | ||||
11356 | static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ | |||
11357 | return pIter->aLvl[0].iRowid; | |||
11358 | } | |||
11359 | static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ | |||
11360 | return pIter->aLvl[0].iLeafPgno; | |||
11361 | } | |||
11362 | ||||
11363 | /* | |||
11364 | ** Load the next leaf page into the segment iterator. | |||
11365 | */ | |||
11366 | static void fts5SegIterNextPage( | |||
11367 | Fts5Index *p, /* FTS5 backend object */ | |||
11368 | Fts5SegIter *pIter /* Iterator to advance to next page */ | |||
11369 | ){ | |||
11370 | Fts5Data *pLeaf; | |||
11371 | Fts5StructureSegment *pSeg = pIter->pSeg; | |||
11372 | fts5DataRelease(pIter->pLeaf); | |||
11373 | pIter->iLeafPgno++; | |||
11374 | if( pIter->pNextLeaf ){ | |||
11375 | pIter->pLeaf = pIter->pNextLeaf; | |||
11376 | pIter->pNextLeaf = 0; | |||
11377 | }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ | |||
11378 | pIter->pLeaf = fts5LeafRead(p, | |||
11379 | FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pIter->iLeafPgno )) ) | |||
11380 | ); | |||
11381 | }else{ | |||
11382 | pIter->pLeaf = 0; | |||
11383 | } | |||
11384 | pLeaf = pIter->pLeaf; | |||
11385 | ||||
11386 | if( pLeaf ){ | |||
11387 | pIter->iPgidxOff = pLeaf->szLeaf; | |||
11388 | if( fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ){ | |||
11389 | pIter->iEndofDoclist = pLeaf->nn+1; | |||
11390 | }else{ | |||
11391 | pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | |||
11392 | pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | |||
11393 | )sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)); | |||
11394 | } | |||
11395 | } | |||
11396 | } | |||
11397 | ||||
11398 | /* | |||
11399 | ** Argument p points to a buffer containing a varint to be interpreted as a | |||
11400 | ** position list size field. Read the varint and return the number of bytes | |||
11401 | ** read. Before returning, set *pnSz to the number of bytes in the position | |||
11402 | ** list, and *pbDel to true if the delete flag is set, or false otherwise. | |||
11403 | */ | |||
11404 | static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ | |||
11405 | int nSz; | |||
11406 | int n = 0; | |||
11407 | fts5FastGetVarint32(p, n, nSz){ nSz = (p)[n++]; if( nSz & 0x80 ){ n--; n += sqlite3Fts5GetVarint32 (&(p)[n],(u32*)&(nSz)); } }; | |||
11408 | assert_nc( nSz>=0 )((void) (0)); | |||
11409 | *pnSz = nSz/2; | |||
11410 | *pbDel = nSz & 0x0001; | |||
11411 | return n; | |||
11412 | } | |||
11413 | ||||
11414 | /* | |||
11415 | ** Fts5SegIter.iLeafOffset currently points to the first byte of a | |||
11416 | ** position-list size field. Read the value of the field and store it | |||
11417 | ** in the following variables: | |||
11418 | ** | |||
11419 | ** Fts5SegIter.nPos | |||
11420 | ** Fts5SegIter.bDel | |||
11421 | ** | |||
11422 | ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the | |||
11423 | ** position list content (if any). | |||
11424 | */ | |||
11425 | static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ | |||
11426 | if( p->rc==SQLITE_OK0 ){ | |||
11427 | int iOff = pIter->iLeafOffset; /* Offset to read at */ | |||
11428 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
11429 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
11430 | int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf)(((pIter->iEndofDoclist) < (pIter->pLeaf->szLeaf) ) ? (pIter->iEndofDoclist) : (pIter->pLeaf->szLeaf)); | |||
11431 | pIter->bDel = 0; | |||
11432 | pIter->nPos = 1; | |||
11433 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | |||
11434 | pIter->bDel = 1; | |||
11435 | iOff++; | |||
11436 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | |||
11437 | pIter->nPos = 1; | |||
11438 | iOff++; | |||
11439 | }else{ | |||
11440 | pIter->nPos = 0; | |||
11441 | } | |||
11442 | } | |||
11443 | }else{ | |||
11444 | int nSz; | |||
11445 | fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz){ nSz = (pIter->pLeaf->p)[iOff++]; if( nSz & 0x80 ) { iOff--; iOff += sqlite3Fts5GetVarint32(&(pIter->pLeaf ->p)[iOff],(u32*)&(nSz)); } }; | |||
11446 | pIter->bDel = (nSz & 0x0001); | |||
11447 | pIter->nPos = nSz>>1; | |||
11448 | assert_nc( pIter->nPos>=0 )((void) (0)); | |||
11449 | } | |||
11450 | pIter->iLeafOffset = iOff; | |||
11451 | } | |||
11452 | } | |||
11453 | ||||
11454 | static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ | |||
11455 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | |||
11456 | i64 iOff = pIter->iLeafOffset; | |||
11457 | ||||
11458 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
11459 | while( iOff>=pIter->pLeaf->szLeaf ){ | |||
11460 | fts5SegIterNextPage(p, pIter); | |||
11461 | if( pIter->pLeaf==0 ){ | |||
11462 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
11463 | return; | |||
11464 | } | |||
11465 | iOff = 4; | |||
11466 | a = pIter->pLeaf->p; | |||
11467 | } | |||
11468 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | |||
11469 | pIter->iLeafOffset = iOff; | |||
11470 | } | |||
11471 | ||||
11472 | /* | |||
11473 | ** Fts5SegIter.iLeafOffset currently points to the first byte of the | |||
11474 | ** "nSuffix" field of a term. Function parameter nKeep contains the value | |||
11475 | ** of the "nPrefix" field (if there was one - it is passed 0 if this is | |||
11476 | ** the first term in the segment). | |||
11477 | ** | |||
11478 | ** This function populates: | |||
11479 | ** | |||
11480 | ** Fts5SegIter.term | |||
11481 | ** Fts5SegIter.rowid | |||
11482 | ** | |||
11483 | ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of | |||
11484 | ** the first position list. The position list belonging to document | |||
11485 | ** (Fts5SegIter.iRowid). | |||
11486 | */ | |||
11487 | static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ | |||
11488 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | |||
11489 | i64 iOff = pIter->iLeafOffset; /* Offset to read at */ | |||
11490 | int nNew; /* Bytes of new data */ | |||
11491 | ||||
11492 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | |||
11493 | if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){ | |||
11494 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
11495 | return; | |||
11496 | } | |||
11497 | pIter->term.n = nKeep; | |||
11498 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | |||
11499 | assert( pIter->term.n<=pIter->term.nSpace )((void) (0)); | |||
11500 | iOff += nNew; | |||
11501 | pIter->iTermLeafOffset = iOff; | |||
11502 | pIter->iTermLeafPgno = pIter->iLeafPgno; | |||
11503 | pIter->iLeafOffset = iOff; | |||
11504 | ||||
11505 | if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ | |||
11506 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
11507 | }else{ | |||
11508 | int nExtra; | |||
11509 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (nExtra)); | |||
11510 | pIter->iEndofDoclist += nExtra; | |||
11511 | } | |||
11512 | ||||
11513 | fts5SegIterLoadRowid(p, pIter); | |||
11514 | } | |||
11515 | ||||
11516 | static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); | |||
11517 | static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); | |||
11518 | static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); | |||
11519 | ||||
11520 | static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ | |||
11521 | if( pIter->flags & FTS5_SEGITER_REVERSE0x02 ){ | |||
11522 | pIter->xNext = fts5SegIterNext_Reverse; | |||
11523 | }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
11524 | pIter->xNext = fts5SegIterNext_None; | |||
11525 | }else{ | |||
11526 | pIter->xNext = fts5SegIterNext; | |||
11527 | } | |||
11528 | } | |||
11529 | ||||
11530 | /* | |||
11531 | ** Allocate a tombstone hash page array object (pIter->pTombArray) for | |||
11532 | ** the iterator passed as the second argument. If an OOM error occurs, | |||
11533 | ** leave an error in the Fts5Index object. | |||
11534 | */ | |||
11535 | static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ | |||
11536 | const int nTomb = pIter->pSeg->nPgTombstone; | |||
11537 | if( nTomb>0 ){ | |||
11538 | int nByte = SZ_FTS5TOMBSTONEARRAY(nTomb+1)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(nTomb+1 )*sizeof(Fts5Data*)); | |||
11539 | Fts5TombstoneArray *pNew; | |||
11540 | pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
11541 | if( pNew ){ | |||
11542 | pNew->nTombstone = nTomb; | |||
11543 | pNew->nRef = 1; | |||
11544 | pIter->pTombArray = pNew; | |||
11545 | } | |||
11546 | } | |||
11547 | } | |||
11548 | ||||
11549 | /* | |||
11550 | ** Initialize the iterator object pIter to iterate through the entries in | |||
11551 | ** segment pSeg. The iterator is left pointing to the first entry when | |||
11552 | ** this function returns. | |||
11553 | ** | |||
11554 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
11555 | ** an error has already occurred when this function is called, it is a no-op. | |||
11556 | */ | |||
11557 | static void fts5SegIterInit( | |||
11558 | Fts5Index *p, /* FTS index object */ | |||
11559 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
11560 | Fts5SegIter *pIter /* Object to populate */ | |||
11561 | ){ | |||
11562 | if( pSeg->pgnoFirst==0 ){ | |||
11563 | /* This happens if the segment is being used as an input to an incremental | |||
11564 | ** merge and all data has already been "trimmed". See function | |||
11565 | ** fts5TrimSegments() for details. In this case leave the iterator empty. | |||
11566 | ** The caller will see the (pIter->pLeaf==0) and assume the iterator is | |||
11567 | ** at EOF already. */ | |||
11568 | assert( pIter->pLeaf==0 )((void) (0)); | |||
11569 | return; | |||
11570 | } | |||
11571 | ||||
11572 | if( p->rc==SQLITE_OK0 ){ | |||
11573 | memset(pIter, 0, sizeof(*pIter)); | |||
11574 | fts5SegIterSetNext(p, pIter); | |||
11575 | pIter->pSeg = pSeg; | |||
11576 | pIter->iLeafPgno = pSeg->pgnoFirst-1; | |||
11577 | do { | |||
11578 | fts5SegIterNextPage(p, pIter); | |||
11579 | }while( p->rc==SQLITE_OK0 && pIter->pLeaf && pIter->pLeaf->nn==4 ); | |||
11580 | } | |||
11581 | ||||
11582 | if( p->rc==SQLITE_OK0 && pIter->pLeaf ){ | |||
11583 | pIter->iLeafOffset = 4; | |||
11584 | assert( pIter->pLeaf!=0 )((void) (0)); | |||
11585 | assert_nc( pIter->pLeaf->nn>4 )((void) (0)); | |||
11586 | assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 )((void) (0)); | |||
11587 | pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; | |||
11588 | fts5SegIterLoadTerm(p, pIter, 0); | |||
11589 | fts5SegIterLoadNPos(p, pIter); | |||
11590 | fts5SegIterAllocTombstone(p, pIter); | |||
11591 | } | |||
11592 | } | |||
11593 | ||||
11594 | /* | |||
11595 | ** This function is only ever called on iterators created by calls to | |||
11596 | ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. | |||
11597 | ** | |||
11598 | ** The iterator is in an unusual state when this function is called: the | |||
11599 | ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of | |||
11600 | ** the position-list size field for the first relevant rowid on the page. | |||
11601 | ** Fts5SegIter.rowid is set, but nPos and bDel are not. | |||
11602 | ** | |||
11603 | ** This function advances the iterator so that it points to the last | |||
11604 | ** relevant rowid on the page and, if necessary, initializes the | |||
11605 | ** aRowidOffset[] and iRowidOffset variables. At this point the iterator | |||
11606 | ** is in its regular state - Fts5SegIter.iLeafOffset points to the first | |||
11607 | ** byte of the position list content associated with said rowid. | |||
11608 | */ | |||
11609 | static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ | |||
11610 | int eDetail = p->pConfig->eDetail; | |||
11611 | int n = pIter->pLeaf->szLeaf; | |||
11612 | int i = pIter->iLeafOffset; | |||
11613 | u8 *a = pIter->pLeaf->p; | |||
11614 | int iRowidOffset = 0; | |||
11615 | ||||
11616 | if( n>pIter->iEndofDoclist ){ | |||
11617 | n = pIter->iEndofDoclist; | |||
11618 | } | |||
11619 | ||||
11620 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
11621 | while( 1 ){ | |||
11622 | u64 iDelta = 0; | |||
11623 | ||||
11624 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
11625 | /* todo */ | |||
11626 | if( i<n && a[i]==0 ){ | |||
11627 | i++; | |||
11628 | if( i<n && a[i]==0 ) i++; | |||
11629 | } | |||
11630 | }else{ | |||
11631 | int nPos; | |||
11632 | int bDummy; | |||
11633 | i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); | |||
11634 | i += nPos; | |||
11635 | } | |||
11636 | if( i>=n ) break; | |||
11637 | i += fts5GetVarintsqlite3Fts5GetVarint(&a[i], &iDelta); | |||
11638 | pIter->iRowid += iDelta; | |||
11639 | ||||
11640 | /* If necessary, grow the pIter->aRowidOffset[] array. */ | |||
11641 | if( iRowidOffset>=pIter->nRowidOffset ){ | |||
11642 | int nNew = pIter->nRowidOffset + 8; | |||
11643 | int *aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(pIter->aRowidOffset,nNew*sizeof(int)); | |||
11644 | if( aNew==0 ){ | |||
11645 | p->rc = SQLITE_NOMEM7; | |||
11646 | break; | |||
11647 | } | |||
11648 | pIter->aRowidOffset = aNew; | |||
11649 | pIter->nRowidOffset = nNew; | |||
11650 | } | |||
11651 | ||||
11652 | pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; | |||
11653 | pIter->iLeafOffset = i; | |||
11654 | } | |||
11655 | pIter->iRowidOffset = iRowidOffset; | |||
11656 | fts5SegIterLoadNPos(p, pIter); | |||
11657 | } | |||
11658 | ||||
11659 | /* | |||
11660 | ** | |||
11661 | */ | |||
11662 | static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ | |||
11663 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | |||
11664 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
11665 | ||||
11666 | fts5DataRelease(pIter->pLeaf); | |||
11667 | pIter->pLeaf = 0; | |||
11668 | while( p->rc==SQLITE_OK0 && pIter->iLeafPgno>pIter->iTermLeafPgno ){ | |||
11669 | Fts5Data *pNew; | |||
11670 | pIter->iLeafPgno--; | |||
11671 | pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | |||
11672 | pIter->pSeg->iSegid, pIter->iLeafPgno( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | |||
11673 | )( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) )); | |||
11674 | if( pNew ){ | |||
11675 | /* iTermLeafOffset may be equal to szLeaf if the term is the last | |||
11676 | ** thing on the page - i.e. the first rowid is on the following page. | |||
11677 | ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ | |||
11678 | if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ | |||
11679 | assert( pIter->pLeaf==0 )((void) (0)); | |||
11680 | if( pIter->iTermLeafOffset<pNew->szLeaf ){ | |||
11681 | pIter->pLeaf = pNew; | |||
11682 | pIter->iLeafOffset = pIter->iTermLeafOffset; | |||
11683 | } | |||
11684 | }else{ | |||
11685 | int iRowidOff; | |||
11686 | iRowidOff = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | |||
11687 | if( iRowidOff ){ | |||
11688 | if( iRowidOff>=pNew->szLeaf ){ | |||
11689 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
11690 | }else{ | |||
11691 | pIter->pLeaf = pNew; | |||
11692 | pIter->iLeafOffset = iRowidOff; | |||
11693 | } | |||
11694 | } | |||
11695 | } | |||
11696 | ||||
11697 | if( pIter->pLeaf ){ | |||
11698 | u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; | |||
11699 | pIter->iLeafOffset += fts5GetVarintsqlite3Fts5GetVarint(a, (u64*)&pIter->iRowid); | |||
11700 | break; | |||
11701 | }else{ | |||
11702 | fts5DataRelease(pNew); | |||
11703 | } | |||
11704 | } | |||
11705 | } | |||
11706 | ||||
11707 | if( pIter->pLeaf ){ | |||
11708 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
11709 | fts5SegIterReverseInitPage(p, pIter); | |||
11710 | } | |||
11711 | } | |||
11712 | ||||
11713 | /* | |||
11714 | ** Return true if the iterator passed as the second argument currently | |||
11715 | ** points to a delete marker. A delete marker is an entry with a 0 byte | |||
11716 | ** position-list. | |||
11717 | */ | |||
11718 | static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ | |||
11719 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
11720 | return (p->rc==SQLITE_OK0 && pSeg->pLeaf && pSeg->nPos==0); | |||
11721 | } | |||
11722 | ||||
11723 | /* | |||
11724 | ** Advance iterator pIter to the next entry. | |||
11725 | ** | |||
11726 | ** This version of fts5SegIterNext() is only used by reverse iterators. | |||
11727 | */ | |||
11728 | static void fts5SegIterNext_Reverse( | |||
11729 | Fts5Index *p, /* FTS5 backend object */ | |||
11730 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
11731 | int *pbUnused /* Unused */ | |||
11732 | ){ | |||
11733 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | |||
11734 | assert( pIter->pNextLeaf==0 )((void) (0)); | |||
11735 | UNUSED_PARAM(pbUnused)(void)(pbUnused); | |||
11736 | ||||
11737 | if( pIter->iRowidOffset>0 ){ | |||
11738 | u8 *a = pIter->pLeaf->p; | |||
11739 | int iOff; | |||
11740 | u64 iDelta; | |||
11741 | ||||
11742 | pIter->iRowidOffset--; | |||
11743 | pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; | |||
11744 | fts5SegIterLoadNPos(p, pIter); | |||
11745 | iOff = pIter->iLeafOffset; | |||
11746 | if( p->pConfig->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
11747 | iOff += pIter->nPos; | |||
11748 | } | |||
11749 | fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], &iDelta); | |||
11750 | pIter->iRowid -= iDelta; | |||
11751 | }else{ | |||
11752 | fts5SegIterReverseNewPage(p, pIter); | |||
11753 | } | |||
11754 | } | |||
11755 | ||||
11756 | /* | |||
11757 | ** Advance iterator pIter to the next entry. | |||
11758 | ** | |||
11759 | ** This version of fts5SegIterNext() is only used if detail=none and the | |||
11760 | ** iterator is not a reverse direction iterator. | |||
11761 | */ | |||
11762 | static void fts5SegIterNext_None( | |||
11763 | Fts5Index *p, /* FTS5 backend object */ | |||
11764 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
11765 | int *pbNewTerm /* OUT: Set for new term */ | |||
11766 | ){ | |||
11767 | int iOff; | |||
11768 | ||||
11769 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
11770 | assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 )((void) (0)); | |||
11771 | assert( p->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
11772 | ||||
11773 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
11774 | iOff = pIter->iLeafOffset; | |||
11775 | ||||
11776 | /* Next entry is on the next page */ | |||
11777 | while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ | |||
11778 | fts5SegIterNextPage(p, pIter); | |||
11779 | if( p->rc || pIter->pLeaf==0 ) return; | |||
11780 | pIter->iRowid = 0; | |||
11781 | iOff = 4; | |||
11782 | } | |||
11783 | ||||
11784 | if( iOff<pIter->iEndofDoclist ){ | |||
11785 | /* Next entry is on the current page */ | |||
11786 | u64 iDelta; | |||
11787 | iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); | |||
11788 | pIter->iLeafOffset = iOff; | |||
11789 | pIter->iRowid += iDelta; | |||
11790 | }else if( (pIter->flags & FTS5_SEGITER_ONETERM0x01)==0 ){ | |||
11791 | if( pIter->pSeg ){ | |||
11792 | int nKeep = 0; | |||
11793 | if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ | |||
11794 | iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iOff],(u32* )&(nKeep)); | |||
11795 | } | |||
11796 | pIter->iLeafOffset = iOff; | |||
11797 | fts5SegIterLoadTerm(p, pIter, nKeep); | |||
11798 | }else{ | |||
11799 | const u8 *pList = 0; | |||
11800 | const char *zTerm = 0; | |||
11801 | int nTerm = 0; | |||
11802 | int nList; | |||
11803 | sqlite3Fts5HashScanNext(p->pHash); | |||
11804 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | |||
11805 | if( pList==0 ) goto next_none_eof; | |||
11806 | pIter->pLeaf->p = (u8*)pList; | |||
11807 | pIter->pLeaf->nn = nList; | |||
11808 | pIter->pLeaf->szLeaf = nList; | |||
11809 | pIter->iEndofDoclist = nList; | |||
11810 | sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm); | |||
11811 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | |||
11812 | } | |||
11813 | ||||
11814 | if( pbNewTerm ) *pbNewTerm = 1; | |||
11815 | }else{ | |||
11816 | goto next_none_eof; | |||
11817 | } | |||
11818 | ||||
11819 | fts5SegIterLoadNPos(p, pIter); | |||
11820 | ||||
11821 | return; | |||
11822 | next_none_eof: | |||
11823 | fts5DataRelease(pIter->pLeaf); | |||
11824 | pIter->pLeaf = 0; | |||
11825 | } | |||
11826 | ||||
11827 | ||||
11828 | /* | |||
11829 | ** Advance iterator pIter to the next entry. | |||
11830 | ** | |||
11831 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It | |||
11832 | ** is not considered an error if the iterator reaches EOF. If an error has | |||
11833 | ** already occurred when this function is called, it is a no-op. | |||
11834 | */ | |||
11835 | static void fts5SegIterNext( | |||
11836 | Fts5Index *p, /* FTS5 backend object */ | |||
11837 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
11838 | int *pbNewTerm /* OUT: Set for new term */ | |||
11839 | ){ | |||
11840 | Fts5Data *pLeaf = pIter->pLeaf; | |||
11841 | int iOff; | |||
11842 | int bNewTerm = 0; | |||
11843 | int nKeep = 0; | |||
11844 | u8 *a; | |||
11845 | int n; | |||
11846 | ||||
11847 | assert( pbNewTerm==0 || *pbNewTerm==0 )((void) (0)); | |||
11848 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
11849 | ||||
11850 | /* Search for the end of the position list within the current page. */ | |||
11851 | a = pLeaf->p; | |||
11852 | n = pLeaf->szLeaf; | |||
11853 | ||||
11854 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
11855 | iOff = pIter->iLeafOffset + pIter->nPos; | |||
11856 | ||||
11857 | if( iOff<n ){ | |||
11858 | /* The next entry is on the current page. */ | |||
11859 | assert_nc( iOff<=pIter->iEndofDoclist )((void) (0)); | |||
11860 | if( iOff>=pIter->iEndofDoclist ){ | |||
11861 | bNewTerm = 1; | |||
11862 | if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ | |||
11863 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | |||
11864 | } | |||
11865 | }else{ | |||
11866 | u64 iDelta; | |||
11867 | iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); | |||
11868 | pIter->iRowid += iDelta; | |||
11869 | assert_nc( iDelta>0 )((void) (0)); | |||
11870 | } | |||
11871 | pIter->iLeafOffset = iOff; | |||
11872 | ||||
11873 | }else if( pIter->pSeg==0 ){ | |||
11874 | const u8 *pList = 0; | |||
11875 | const char *zTerm = 0; | |||
11876 | int nTerm = 0; | |||
11877 | int nList = 0; | |||
11878 | assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm )((void) (0)); | |||
11879 | if( 0==(pIter->flags & FTS5_SEGITER_ONETERM0x01) ){ | |||
11880 | sqlite3Fts5HashScanNext(p->pHash); | |||
11881 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | |||
11882 | } | |||
11883 | if( pList==0 ){ | |||
11884 | fts5DataRelease(pIter->pLeaf); | |||
11885 | pIter->pLeaf = 0; | |||
11886 | }else{ | |||
11887 | pIter->pLeaf->p = (u8*)pList; | |||
11888 | pIter->pLeaf->nn = nList; | |||
11889 | pIter->pLeaf->szLeaf = nList; | |||
11890 | pIter->iEndofDoclist = nList+1; | |||
11891 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm); | |||
11892 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | |||
11893 | *pbNewTerm = 1; | |||
11894 | } | |||
11895 | }else{ | |||
11896 | iOff = 0; | |||
11897 | /* Next entry is not on the current page */ | |||
11898 | while( iOff==0 ){ | |||
11899 | fts5SegIterNextPage(p, pIter); | |||
11900 | pLeaf = pIter->pLeaf; | |||
11901 | if( pLeaf==0 ) break; | |||
11902 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
11903 | if( (iOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))) && iOff<pLeaf->szLeaf ){ | |||
11904 | iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); | |||
11905 | pIter->iLeafOffset = iOff; | |||
11906 | ||||
11907 | if( pLeaf->nn>pLeaf->szLeaf ){ | |||
11908 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | |||
11909 | &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | |||
11910 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)); | |||
11911 | } | |||
11912 | } | |||
11913 | else if( pLeaf->nn>pLeaf->szLeaf ){ | |||
11914 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | |||
11915 | &pLeaf->p[pLeaf->szLeaf], iOffsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | |||
11916 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)); | |||
11917 | pIter->iLeafOffset = iOff; | |||
11918 | pIter->iEndofDoclist = iOff; | |||
11919 | bNewTerm = 1; | |||
11920 | } | |||
11921 | assert_nc( iOff<pLeaf->szLeaf )((void) (0)); | |||
11922 | if( iOff>pLeaf->szLeaf ){ | |||
11923 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
11924 | return; | |||
11925 | } | |||
11926 | } | |||
11927 | } | |||
11928 | ||||
11929 | /* Check if the iterator is now at EOF. If so, return early. */ | |||
11930 | if( pIter->pLeaf ){ | |||
11931 | if( bNewTerm ){ | |||
11932 | if( pIter->flags & FTS5_SEGITER_ONETERM0x01 ){ | |||
11933 | fts5DataRelease(pIter->pLeaf); | |||
11934 | pIter->pLeaf = 0; | |||
11935 | }else{ | |||
11936 | fts5SegIterLoadTerm(p, pIter, nKeep); | |||
11937 | fts5SegIterLoadNPos(p, pIter); | |||
11938 | if( pbNewTerm ) *pbNewTerm = 1; | |||
11939 | } | |||
11940 | }else{ | |||
11941 | /* The following could be done by calling fts5SegIterLoadNPos(). But | |||
11942 | ** this block is particularly performance critical, so equivalent | |||
11943 | ** code is inlined. */ | |||
11944 | int nSz; | |||
11945 | assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn )((void) (0)); | |||
11946 | fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz){ nSz = (pIter->pLeaf->p)[pIter->iLeafOffset++]; if( nSz & 0x80 ){ pIter->iLeafOffset--; pIter->iLeafOffset += sqlite3Fts5GetVarint32(&(pIter->pLeaf->p)[pIter ->iLeafOffset],(u32*)&(nSz)); } }; | |||
11947 | pIter->bDel = (nSz & 0x0001); | |||
11948 | pIter->nPos = nSz>>1; | |||
11949 | assert_nc( pIter->nPos>=0 )((void) (0)); | |||
11950 | } | |||
11951 | } | |||
11952 | } | |||
11953 | ||||
11954 | #define SWAPVAL(T, a, b){ T tmp; tmp=a; a=b; b=tmp; } { T tmp; tmp=a; a=b; b=tmp; } | |||
11955 | ||||
11956 | #define fts5IndexSkipVarint(a, iOff){ int iEnd = iOff+9; while( (a[iOff++] & 0x80) && iOff<iEnd ); } { \ | |||
11957 | int iEnd = iOff+9; \ | |||
11958 | while( (a[iOff++] & 0x80) && iOff<iEnd ); \ | |||
11959 | } | |||
11960 | ||||
11961 | /* | |||
11962 | ** Iterator pIter currently points to the first rowid in a doclist. This | |||
11963 | ** function sets the iterator up so that iterates in reverse order through | |||
11964 | ** the doclist. | |||
11965 | */ | |||
11966 | static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ | |||
11967 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | |||
11968 | Fts5Data *pLast = 0; | |||
11969 | int pgnoLast = 0; | |||
11970 | ||||
11971 | if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION4 ){ | |||
11972 | int iSegid = pIter->pSeg->iSegid; | |||
11973 | pgnoLast = fts5DlidxIterPgno(pDlidx); | |||
11974 | pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgnoLast)) )); | |||
11975 | }else{ | |||
11976 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | |||
11977 | ||||
11978 | /* Currently, Fts5SegIter.iLeafOffset points to the first byte of | |||
11979 | ** position-list content for the current rowid. Back it up so that it | |||
11980 | ** points to the start of the position-list size field. */ | |||
11981 | int iPoslist; | |||
11982 | if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ | |||
11983 | iPoslist = pIter->iTermLeafOffset; | |||
11984 | }else{ | |||
11985 | iPoslist = 4; | |||
11986 | } | |||
11987 | fts5IndexSkipVarint(pLeaf->p, iPoslist){ int iEnd = iPoslist+9; while( (pLeaf->p[iPoslist++] & 0x80) && iPoslist<iEnd ); }; | |||
11988 | pIter->iLeafOffset = iPoslist; | |||
11989 | ||||
11990 | /* If this condition is true then the largest rowid for the current | |||
11991 | ** term may not be stored on the current page. So search forward to | |||
11992 | ** see where said rowid really is. */ | |||
11993 | if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ | |||
11994 | int pgno; | |||
11995 | Fts5StructureSegment *pSeg = pIter->pSeg; | |||
11996 | ||||
11997 | /* The last rowid in the doclist may not be on the current page. Search | |||
11998 | ** forward to find the page containing the last rowid. */ | |||
11999 | for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ | |||
12000 | i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
12001 | Fts5Data *pNew = fts5LeafRead(p, iAbs); | |||
12002 | if( pNew ){ | |||
12003 | int iRowid, bTermless; | |||
12004 | iRowid = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | |||
12005 | bTermless = fts5LeafIsTermless(pNew)((pNew)->szLeaf >= (pNew)->nn); | |||
12006 | if( iRowid ){ | |||
12007 | SWAPVAL(Fts5Data*, pNew, pLast){ Fts5Data* tmp; tmp=pNew; pNew=pLast; pLast=tmp; }; | |||
12008 | pgnoLast = pgno; | |||
12009 | } | |||
12010 | fts5DataRelease(pNew); | |||
12011 | if( bTermless==0 ) break; | |||
12012 | } | |||
12013 | } | |||
12014 | } | |||
12015 | } | |||
12016 | ||||
12017 | /* If pLast is NULL at this point, then the last rowid for this doclist | |||
12018 | ** lies on the page currently indicated by the iterator. In this case | |||
12019 | ** pIter->iLeafOffset is already set to point to the position-list size | |||
12020 | ** field associated with the first relevant rowid on the page. | |||
12021 | ** | |||
12022 | ** Or, if pLast is non-NULL, then it is the page that contains the last | |||
12023 | ** rowid. In this case configure the iterator so that it points to the | |||
12024 | ** first rowid on this page. | |||
12025 | */ | |||
12026 | if( pLast ){ | |||
12027 | int iOff; | |||
12028 | fts5DataRelease(pIter->pLeaf); | |||
12029 | pIter->pLeaf = pLast; | |||
12030 | pIter->iLeafPgno = pgnoLast; | |||
12031 | iOff = fts5LeafFirstRowidOff(pLast)(fts5GetU16((pLast)->p)); | |||
12032 | if( iOff>pLast->szLeaf ){ | |||
12033 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12034 | return; | |||
12035 | } | |||
12036 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); | |||
12037 | pIter->iLeafOffset = iOff; | |||
12038 | ||||
12039 | if( fts5LeafIsTermless(pLast)((pLast)->szLeaf >= (pLast)->nn) ){ | |||
12040 | pIter->iEndofDoclist = pLast->nn+1; | |||
12041 | }else{ | |||
12042 | pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); | |||
12043 | } | |||
12044 | } | |||
12045 | ||||
12046 | fts5SegIterReverseInitPage(p, pIter); | |||
12047 | } | |||
12048 | ||||
12049 | /* | |||
12050 | ** Iterator pIter currently points to the first rowid of a doclist. | |||
12051 | ** There is a doclist-index associated with the final term on the current | |||
12052 | ** page. If the current term is the last term on the page, load the | |||
12053 | ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). | |||
12054 | */ | |||
12055 | static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ | |||
12056 | int iSeg = pIter->pSeg->iSegid; | |||
12057 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | |||
12058 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | |||
12059 | ||||
12060 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
12061 | assert( pIter->pDlidx==0 )((void) (0)); | |||
12062 | ||||
12063 | /* Check if the current doclist ends on this page. If it does, return | |||
12064 | ** early without loading the doclist-index (as it belongs to a different | |||
12065 | ** term. */ | |||
12066 | if( pIter->iTermLeafPgno==pIter->iLeafPgno | |||
12067 | && pIter->iEndofDoclist<pLeaf->szLeaf | |||
12068 | ){ | |||
12069 | return; | |||
12070 | } | |||
12071 | ||||
12072 | pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); | |||
12073 | } | |||
12074 | ||||
12075 | /* | |||
12076 | ** The iterator object passed as the second argument currently contains | |||
12077 | ** no valid values except for the Fts5SegIter.pLeaf member variable. This | |||
12078 | ** function searches the leaf page for a term matching (pTerm/nTerm). | |||
12079 | ** | |||
12080 | ** If the specified term is found on the page, then the iterator is left | |||
12081 | ** pointing to it. If argument bGe is zero and the term is not found, | |||
12082 | ** the iterator is left pointing at EOF. | |||
12083 | ** | |||
12084 | ** If bGe is non-zero and the specified term is not found, then the | |||
12085 | ** iterator is left pointing to the smallest term in the segment that | |||
12086 | ** is larger than the specified term, even if this term is not on the | |||
12087 | ** current page. | |||
12088 | */ | |||
12089 | static void fts5LeafSeek( | |||
12090 | Fts5Index *p, /* Leave any error code here */ | |||
12091 | int bGe, /* True for a >= search */ | |||
12092 | Fts5SegIter *pIter, /* Iterator to seek */ | |||
12093 | const u8 *pTerm, int nTerm /* Term to search for */ | |||
12094 | ){ | |||
12095 | u32 iOff; | |||
12096 | const u8 *a = pIter->pLeaf->p; | |||
12097 | u32 n = (u32)pIter->pLeaf->nn; | |||
12098 | ||||
12099 | u32 nMatch = 0; | |||
12100 | u32 nKeep = 0; | |||
12101 | u32 nNew = 0; | |||
12102 | u32 iTermOff; | |||
12103 | u32 iPgidx; /* Current offset in pgidx */ | |||
12104 | int bEndOfPage = 0; | |||
12105 | ||||
12106 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12107 | ||||
12108 | iPgidx = (u32)pIter->pLeaf->szLeaf; | |||
12109 | iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(iTermOff)); | |||
12110 | iOff = iTermOff; | |||
12111 | if( iOff>n ){ | |||
12112 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12113 | return; | |||
12114 | } | |||
12115 | ||||
12116 | while( 1 ){ | |||
12117 | ||||
12118 | /* Figure out how many new bytes are in this term */ | |||
12119 | fts5FastGetVarint32(a, iOff, nNew){ nNew = (a)[iOff++]; if( nNew & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nNew)); } }; | |||
12120 | if( nKeep<nMatch ){ | |||
12121 | goto search_failed; | |||
12122 | } | |||
12123 | ||||
12124 | assert( nKeep>=nMatch )((void) (0)); | |||
12125 | if( nKeep==nMatch ){ | |||
12126 | u32 nCmp; | |||
12127 | u32 i; | |||
12128 | nCmp = (u32)MIN(nNew, nTerm-nMatch)(((nNew) < (nTerm-nMatch)) ? (nNew) : (nTerm-nMatch)); | |||
12129 | for(i=0; i<nCmp; i++){ | |||
12130 | if( a[iOff+i]!=pTerm[nMatch+i] ) break; | |||
12131 | } | |||
12132 | nMatch += i; | |||
12133 | ||||
12134 | if( (u32)nTerm==nMatch ){ | |||
12135 | if( i==nNew ){ | |||
12136 | goto search_success; | |||
12137 | }else{ | |||
12138 | goto search_failed; | |||
12139 | } | |||
12140 | }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ | |||
12141 | goto search_failed; | |||
12142 | } | |||
12143 | } | |||
12144 | ||||
12145 | if( iPgidx>=n ){ | |||
12146 | bEndOfPage = 1; | |||
12147 | break; | |||
12148 | } | |||
12149 | ||||
12150 | iPgidx += fts5GetVarint32(&a[iPgidx], nKeep)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nKeep)); | |||
12151 | iTermOff += nKeep; | |||
12152 | iOff = iTermOff; | |||
12153 | ||||
12154 | if( iOff>=n ){ | |||
12155 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12156 | return; | |||
12157 | } | |||
12158 | ||||
12159 | /* Read the nKeep field of the next term. */ | |||
12160 | fts5FastGetVarint32(a, iOff, nKeep){ nKeep = (a)[iOff++]; if( nKeep & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nKeep)); } }; | |||
12161 | } | |||
12162 | ||||
12163 | search_failed: | |||
12164 | if( bGe==0 ){ | |||
12165 | fts5DataRelease(pIter->pLeaf); | |||
12166 | pIter->pLeaf = 0; | |||
12167 | return; | |||
12168 | }else if( bEndOfPage ){ | |||
12169 | do { | |||
12170 | fts5SegIterNextPage(p, pIter); | |||
12171 | if( pIter->pLeaf==0 ) return; | |||
12172 | a = pIter->pLeaf->p; | |||
12173 | if( fts5LeafIsTermless(pIter->pLeaf)((pIter->pLeaf)->szLeaf >= (pIter->pLeaf)->nn)==0 ){ | |||
12174 | iPgidx = (u32)pIter->pLeaf->szLeaf; | |||
12175 | iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iPgidx],(u32 *)&(iOff)); | |||
12176 | if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){ | |||
12177 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12178 | return; | |||
12179 | }else{ | |||
12180 | nKeep = 0; | |||
12181 | iTermOff = iOff; | |||
12182 | n = (u32)pIter->pLeaf->nn; | |||
12183 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | |||
12184 | break; | |||
12185 | } | |||
12186 | } | |||
12187 | }while( 1 ); | |||
12188 | } | |||
12189 | ||||
12190 | search_success: | |||
12191 | if( (i64)iOff+nNew>n || nNew<1 ){ | |||
12192 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12193 | return; | |||
12194 | } | |||
12195 | pIter->iLeafOffset = iOff + nNew; | |||
12196 | pIter->iTermLeafOffset = pIter->iLeafOffset; | |||
12197 | pIter->iTermLeafPgno = pIter->iLeafPgno; | |||
12198 | ||||
12199 | fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm)sqlite3Fts5BufferSet(&p->rc,&pIter->term,nKeep, pTerm); | |||
12200 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | |||
12201 | ||||
12202 | if( iPgidx>=n ){ | |||
12203 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
12204 | }else{ | |||
12205 | int nExtra; | |||
12206 | iPgidx += fts5GetVarint32(&a[iPgidx], nExtra)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nExtra)); | |||
12207 | pIter->iEndofDoclist = iTermOff + nExtra; | |||
12208 | } | |||
12209 | pIter->iPgidxOff = iPgidx; | |||
12210 | ||||
12211 | fts5SegIterLoadRowid(p, pIter); | |||
12212 | fts5SegIterLoadNPos(p, pIter); | |||
12213 | } | |||
12214 | ||||
12215 | static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ | |||
12216 | if( p->pIdxSelect==0 ){ | |||
12217 | Fts5Config *pConfig = p->pConfig; | |||
12218 | fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintfsqlite3_api->mprintf( | |||
12219 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | |||
12220 | "segid=? AND term<=? ORDER BY term DESC LIMIT 1", | |||
12221 | pConfig->zDb, pConfig->zName | |||
12222 | )); | |||
12223 | } | |||
12224 | return p->pIdxSelect; | |||
12225 | } | |||
12226 | ||||
12227 | /* | |||
12228 | ** Initialize the object pIter to point to term pTerm/nTerm within segment | |||
12229 | ** pSeg. If there is no such term in the index, the iterator is set to EOF. | |||
12230 | ** | |||
12231 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
12232 | ** an error has already occurred when this function is called, it is a no-op. | |||
12233 | */ | |||
12234 | static void fts5SegIterSeekInit( | |||
12235 | Fts5Index *p, /* FTS5 backend */ | |||
12236 | const u8 *pTerm, int nTerm, /* Term to seek to */ | |||
12237 | int flags, /* Mask of FTS5INDEX_XXX flags */ | |||
12238 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
12239 | Fts5SegIter *pIter /* Object to populate */ | |||
12240 | ){ | |||
12241 | int iPg = 1; | |||
12242 | int bGe = (flags & FTS5INDEX_QUERY_SCAN0x0008); | |||
12243 | int bDlidx = 0; /* True if there is a doclist-index */ | |||
12244 | sqlite3_stmt *pIdxSelect = 0; | |||
12245 | ||||
12246 | assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 )((void) (0)); | |||
12247 | assert( pTerm && nTerm )((void) (0)); | |||
12248 | memset(pIter, 0, sizeof(*pIter)); | |||
12249 | pIter->pSeg = pSeg; | |||
12250 | ||||
12251 | /* This block sets stack variable iPg to the leaf page number that may | |||
12252 | ** contain term (pTerm/nTerm), if it is present in the segment. */ | |||
12253 | pIdxSelect = fts5IdxSelectStmt(p); | |||
12254 | if( p->rc ) return; | |||
12255 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, pSeg->iSegid); | |||
12256 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
12257 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pIdxSelect) ){ | |||
12258 | i64 val = sqlite3_column_intsqlite3_api->column_int(pIdxSelect, 0); | |||
12259 | iPg = (int)(val>>1); | |||
12260 | bDlidx = (val & 0x0001); | |||
12261 | } | |||
12262 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | |||
12263 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | |||
12264 | ||||
12265 | if( iPg<pSeg->pgnoFirst ){ | |||
12266 | iPg = pSeg->pgnoFirst; | |||
12267 | bDlidx = 0; | |||
12268 | } | |||
12269 | ||||
12270 | pIter->iLeafPgno = iPg - 1; | |||
12271 | fts5SegIterNextPage(p, pIter); | |||
12272 | ||||
12273 | if( pIter->pLeaf ){ | |||
12274 | fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); | |||
12275 | } | |||
12276 | ||||
12277 | if( p->rc==SQLITE_OK0 && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM0x0100)) ){ | |||
12278 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
12279 | if( pIter->pLeaf ){ | |||
12280 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
12281 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
12282 | } | |||
12283 | if( bDlidx ){ | |||
12284 | fts5SegIterLoadDlidx(p, pIter); | |||
12285 | } | |||
12286 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
12287 | fts5SegIterReverse(p, pIter); | |||
12288 | } | |||
12289 | } | |||
12290 | } | |||
12291 | ||||
12292 | fts5SegIterSetNext(p, pIter); | |||
12293 | if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM0x0100) ){ | |||
12294 | fts5SegIterAllocTombstone(p, pIter); | |||
12295 | } | |||
12296 | ||||
12297 | /* Either: | |||
12298 | ** | |||
12299 | ** 1) an error has occurred, or | |||
12300 | ** 2) the iterator points to EOF, or | |||
12301 | ** 3) the iterator points to an entry with term (pTerm/nTerm), or | |||
12302 | ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points | |||
12303 | ** to an entry with a term greater than or equal to (pTerm/nTerm). | |||
12304 | */ | |||
12305 | assert_nc( p->rc!=SQLITE_OK /* 1 */((void) (0)) | |||
12306 | || pIter->pLeaf==0 /* 2 */((void) (0)) | |||
12307 | || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */((void) (0)) | |||
12308 | || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */((void) (0)) | |||
12309 | )((void) (0)); | |||
12310 | } | |||
12311 | ||||
12312 | ||||
12313 | /* | |||
12314 | ** SQL used by fts5SegIterNextInit() to find the page to open. | |||
12315 | */ | |||
12316 | static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ | |||
12317 | if( p->pIdxNextSelect==0 ){ | |||
12318 | Fts5Config *pConfig = p->pConfig; | |||
12319 | fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintfsqlite3_api->mprintf( | |||
12320 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | |||
12321 | "segid=? AND term>? ORDER BY term ASC LIMIT 1", | |||
12322 | pConfig->zDb, pConfig->zName | |||
12323 | )); | |||
12324 | ||||
12325 | } | |||
12326 | return p->pIdxNextSelect; | |||
12327 | } | |||
12328 | ||||
12329 | /* | |||
12330 | ** This is similar to fts5SegIterSeekInit(), except that it initializes | |||
12331 | ** the segment iterator to point to the first term following the page | |||
12332 | ** with pToken/nToken on it. | |||
12333 | */ | |||
12334 | static void fts5SegIterNextInit( | |||
12335 | Fts5Index *p, | |||
12336 | const char *pTerm, int nTerm, | |||
12337 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
12338 | Fts5SegIter *pIter /* Object to populate */ | |||
12339 | ){ | |||
12340 | int iPg = -1; /* Page of segment to open */ | |||
12341 | int bDlidx = 0; | |||
12342 | sqlite3_stmt *pSel = 0; /* SELECT to find iPg */ | |||
12343 | ||||
12344 | pSel = fts5IdxNextStmt(p); | |||
12345 | if( pSel ){ | |||
12346 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12347 | sqlite3_bind_intsqlite3_api->bind_int(pSel, 1, pSeg->iSegid); | |||
12348 | sqlite3_bind_blobsqlite3_api->bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
12349 | ||||
12350 | if( sqlite3_stepsqlite3_api->step(pSel)==SQLITE_ROW100 ){ | |||
12351 | i64 val = sqlite3_column_int64sqlite3_api->column_int64(pSel, 0); | |||
12352 | iPg = (int)(val>>1); | |||
12353 | bDlidx = (val & 0x0001); | |||
12354 | } | |||
12355 | p->rc = sqlite3_resetsqlite3_api->reset(pSel); | |||
12356 | sqlite3_bind_nullsqlite3_api->bind_null(pSel, 2); | |||
12357 | if( p->rc ) return; | |||
12358 | } | |||
12359 | ||||
12360 | memset(pIter, 0, sizeof(*pIter)); | |||
12361 | pIter->pSeg = pSeg; | |||
12362 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
12363 | if( iPg>=0 ){ | |||
12364 | pIter->iLeafPgno = iPg - 1; | |||
12365 | fts5SegIterNextPage(p, pIter); | |||
12366 | fts5SegIterSetNext(p, pIter); | |||
12367 | } | |||
12368 | if( pIter->pLeaf ){ | |||
12369 | const u8 *a = pIter->pLeaf->p; | |||
12370 | int iTermOff = 0; | |||
12371 | ||||
12372 | pIter->iPgidxOff = pIter->pLeaf->szLeaf; | |||
12373 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (iTermOff)); | |||
12374 | pIter->iLeafOffset = iTermOff; | |||
12375 | fts5SegIterLoadTerm(p, pIter, 0); | |||
12376 | fts5SegIterLoadNPos(p, pIter); | |||
12377 | if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); | |||
12378 | ||||
12379 | assert( p->rc!=SQLITE_OK ||((void) (0)) | |||
12380 | fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0((void) (0)) | |||
12381 | )((void) (0)); | |||
12382 | } | |||
12383 | } | |||
12384 | ||||
12385 | /* | |||
12386 | ** Initialize the object pIter to point to term pTerm/nTerm within the | |||
12387 | ** in-memory hash table. If there is no such term in the hash-table, the | |||
12388 | ** iterator is set to EOF. | |||
12389 | ** | |||
12390 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
12391 | ** an error has already occurred when this function is called, it is a no-op. | |||
12392 | */ | |||
12393 | static void fts5SegIterHashInit( | |||
12394 | Fts5Index *p, /* FTS5 backend */ | |||
12395 | const u8 *pTerm, int nTerm, /* Term to seek to */ | |||
12396 | int flags, /* Mask of FTS5INDEX_XXX flags */ | |||
12397 | Fts5SegIter *pIter /* Object to populate */ | |||
12398 | ){ | |||
12399 | int nList = 0; | |||
12400 | const u8 *z = 0; | |||
12401 | int n = 0; | |||
12402 | Fts5Data *pLeaf = 0; | |||
12403 | ||||
12404 | assert( p->pHash )((void) (0)); | |||
12405 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12406 | ||||
12407 | if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN0x0008) ){ | |||
12408 | const u8 *pList = 0; | |||
12409 | ||||
12410 | p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); | |||
12411 | sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList); | |||
12412 | if( pList ){ | |||
12413 | pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); | |||
12414 | if( pLeaf ){ | |||
12415 | pLeaf->p = (u8*)pList; | |||
12416 | } | |||
12417 | } | |||
12418 | ||||
12419 | /* The call to sqlite3Fts5HashScanInit() causes the hash table to | |||
12420 | ** fill the size field of all existing position lists. This means they | |||
12421 | ** can no longer be appended to. Since the only scenario in which they | |||
12422 | ** can be appended to is if the previous operation on this table was | |||
12423 | ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this | |||
12424 | ** possibility altogether. */ | |||
12425 | p->bDelete = 0; | |||
12426 | }else{ | |||
12427 | p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data), | |||
12428 | (const char*)pTerm, nTerm, (void**)&pLeaf, &nList | |||
12429 | ); | |||
12430 | if( pLeaf ){ | |||
12431 | pLeaf->p = (u8*)&pLeaf[1]; | |||
12432 | } | |||
12433 | z = pTerm; | |||
12434 | n = nTerm; | |||
12435 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
12436 | } | |||
12437 | ||||
12438 | if( pLeaf ){ | |||
12439 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); | |||
12440 | pLeaf->nn = pLeaf->szLeaf = nList; | |||
12441 | pIter->pLeaf = pLeaf; | |||
12442 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); | |||
12443 | pIter->iEndofDoclist = pLeaf->nn; | |||
12444 | ||||
12445 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
12446 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
12447 | fts5SegIterReverseInitPage(p, pIter); | |||
12448 | }else{ | |||
12449 | fts5SegIterLoadNPos(p, pIter); | |||
12450 | } | |||
12451 | } | |||
12452 | ||||
12453 | fts5SegIterSetNext(p, pIter); | |||
12454 | } | |||
12455 | ||||
12456 | /* | |||
12457 | ** Array ap[] contains n elements. Release each of these elements using | |||
12458 | ** fts5DataRelease(). Then free the array itself using sqlite3_free(). | |||
12459 | */ | |||
12460 | static void fts5IndexFreeArray(Fts5Data **ap, int n){ | |||
12461 | if( ap ){ | |||
12462 | int ii; | |||
12463 | for(ii=0; ii<n; ii++){ | |||
12464 | fts5DataRelease(ap[ii]); | |||
12465 | } | |||
12466 | sqlite3_freesqlite3_api->free(ap); | |||
12467 | } | |||
12468 | } | |||
12469 | ||||
12470 | /* | |||
12471 | ** Decrement the ref-count of the object passed as the only argument. If it | |||
12472 | ** reaches 0, free it and its contents. | |||
12473 | */ | |||
12474 | static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ | |||
12475 | if( p ){ | |||
12476 | p->nRef--; | |||
12477 | if( p->nRef<=0 ){ | |||
12478 | int ii; | |||
12479 | for(ii=0; ii<p->nTombstone; ii++){ | |||
12480 | fts5DataRelease(p->apTombstone[ii]); | |||
12481 | } | |||
12482 | sqlite3_freesqlite3_api->free(p); | |||
12483 | } | |||
12484 | } | |||
12485 | } | |||
12486 | ||||
12487 | /* | |||
12488 | ** Zero the iterator passed as the only argument. | |||
12489 | */ | |||
12490 | static void fts5SegIterClear(Fts5SegIter *pIter){ | |||
12491 | fts5BufferFree(&pIter->term)sqlite3Fts5BufferFree(&pIter->term); | |||
12492 | fts5DataRelease(pIter->pLeaf); | |||
12493 | fts5DataRelease(pIter->pNextLeaf); | |||
12494 | fts5TombstoneArrayDelete(pIter->pTombArray); | |||
12495 | fts5DlidxIterFree(pIter->pDlidx); | |||
12496 | sqlite3_freesqlite3_api->free(pIter->aRowidOffset); | |||
12497 | memset(pIter, 0, sizeof(Fts5SegIter)); | |||
12498 | } | |||
12499 | ||||
12500 | #ifdef SQLITE_DEBUG | |||
12501 | ||||
12502 | /* | |||
12503 | ** This function is used as part of the big assert() procedure implemented by | |||
12504 | ** fts5AssertMultiIterSetup(). It ensures that the result currently stored | |||
12505 | ** in *pRes is the correct result of comparing the current positions of the | |||
12506 | ** two iterators. | |||
12507 | */ | |||
12508 | static void fts5AssertComparisonResult( | |||
12509 | Fts5Iter *pIter, | |||
12510 | Fts5SegIter *p1, | |||
12511 | Fts5SegIter *p2, | |||
12512 | Fts5CResult *pRes | |||
12513 | ){ | |||
12514 | int i1 = p1 - pIter->aSeg; | |||
12515 | int i2 = p2 - pIter->aSeg; | |||
12516 | ||||
12517 | if( p1->pLeaf || p2->pLeaf ){ | |||
12518 | if( p1->pLeaf==0 ){ | |||
12519 | assert( pRes->iFirst==i2 )((void) (0)); | |||
12520 | }else if( p2->pLeaf==0 ){ | |||
12521 | assert( pRes->iFirst==i1 )((void) (0)); | |||
12522 | }else{ | |||
12523 | int nMin = MIN(p1->term.n, p2->term.n)(((p1->term.n) < (p2->term.n)) ? (p1->term.n) : ( p2->term.n)); | |||
12524 | int res = fts5Memcmp(p1->term.p, p2->term.p, nMin)((nMin)<=0 ? 0 : memcmp((p1->term.p), (p2->term.p), ( nMin))); | |||
12525 | if( res==0 ) res = p1->term.n - p2->term.n; | |||
12526 | ||||
12527 | if( res==0 ){ | |||
12528 | assert( pRes->bTermEq==1 )((void) (0)); | |||
12529 | assert( p1->iRowid!=p2->iRowid )((void) (0)); | |||
12530 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; | |||
12531 | }else{ | |||
12532 | assert( pRes->bTermEq==0 )((void) (0)); | |||
12533 | } | |||
12534 | ||||
12535 | if( res<0 ){ | |||
12536 | assert( pRes->iFirst==i1 )((void) (0)); | |||
12537 | }else{ | |||
12538 | assert( pRes->iFirst==i2 )((void) (0)); | |||
12539 | } | |||
12540 | } | |||
12541 | } | |||
12542 | } | |||
12543 | ||||
12544 | /* | |||
12545 | ** This function is a no-op unless SQLITE_DEBUG is defined when this module | |||
12546 | ** is compiled. In that case, this function is essentially an assert() | |||
12547 | ** statement used to verify that the contents of the pIter->aFirst[] array | |||
12548 | ** are correct. | |||
12549 | */ | |||
12550 | static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ | |||
12551 | if( p->rc==SQLITE_OK0 ){ | |||
12552 | Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
12553 | int i; | |||
12554 | ||||
12555 | assert( (pFirst->pLeaf==0)==pIter->base.bEof )((void) (0)); | |||
12556 | ||||
12557 | /* Check that pIter->iSwitchRowid is set correctly. */ | |||
12558 | for(i=0; i<pIter->nSeg; i++){ | |||
12559 | Fts5SegIter *p1 = &pIter->aSeg[i]; | |||
12560 | assert( p1==pFirst((void) (0)) | |||
12561 | || p1->pLeaf==0((void) (0)) | |||
12562 | || fts5BufferCompare(&pFirst->term, &p1->term)((void) (0)) | |||
12563 | || p1->iRowid==pIter->iSwitchRowid((void) (0)) | |||
12564 | || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev((void) (0)) | |||
12565 | )((void) (0)); | |||
12566 | } | |||
12567 | ||||
12568 | for(i=0; i<pIter->nSeg; i+=2){ | |||
12569 | Fts5SegIter *p1 = &pIter->aSeg[i]; | |||
12570 | Fts5SegIter *p2 = &pIter->aSeg[i+1]; | |||
12571 | Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; | |||
12572 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | |||
12573 | } | |||
12574 | ||||
12575 | for(i=1; i<(pIter->nSeg / 2); i+=2){ | |||
12576 | Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; | |||
12577 | Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; | |||
12578 | Fts5CResult *pRes = &pIter->aFirst[i]; | |||
12579 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | |||
12580 | } | |||
12581 | } | |||
12582 | } | |||
12583 | #else | |||
12584 | # define fts5AssertMultiIterSetup(x,y) | |||
12585 | #endif | |||
12586 | ||||
12587 | /* | |||
12588 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. | |||
12589 | ** | |||
12590 | ** If the returned value is non-zero, then it is the index of an entry | |||
12591 | ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing | |||
12592 | ** to a key that is a duplicate of another, higher priority, | |||
12593 | ** segment-iterator in the pSeg->aSeg[] array. | |||
12594 | */ | |||
12595 | static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ | |||
12596 | int i1; /* Index of left-hand Fts5SegIter */ | |||
12597 | int i2; /* Index of right-hand Fts5SegIter */ | |||
12598 | int iRes; | |||
12599 | Fts5SegIter *p1; /* Left-hand Fts5SegIter */ | |||
12600 | Fts5SegIter *p2; /* Right-hand Fts5SegIter */ | |||
12601 | Fts5CResult *pRes = &pIter->aFirst[iOut]; | |||
12602 | ||||
12603 | assert( iOut<pIter->nSeg && iOut>0 )((void) (0)); | |||
12604 | assert( pIter->bRev==0 || pIter->bRev==1 )((void) (0)); | |||
12605 | ||||
12606 | if( iOut>=(pIter->nSeg/2) ){ | |||
12607 | i1 = (iOut - pIter->nSeg/2) * 2; | |||
12608 | i2 = i1 + 1; | |||
12609 | }else{ | |||
12610 | i1 = pIter->aFirst[iOut*2].iFirst; | |||
12611 | i2 = pIter->aFirst[iOut*2+1].iFirst; | |||
12612 | } | |||
12613 | p1 = &pIter->aSeg[i1]; | |||
12614 | p2 = &pIter->aSeg[i2]; | |||
12615 | ||||
12616 | pRes->bTermEq = 0; | |||
12617 | if( p1->pLeaf==0 ){ /* If p1 is at EOF */ | |||
12618 | iRes = i2; | |||
12619 | }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ | |||
12620 | iRes = i1; | |||
12621 | }else{ | |||
12622 | int res = fts5BufferCompare(&p1->term, &p2->term); | |||
12623 | if( res==0 ){ | |||
12624 | assert_nc( i2>i1 )((void) (0)); | |||
12625 | assert_nc( i2!=0 )((void) (0)); | |||
12626 | pRes->bTermEq = 1; | |||
12627 | if( p1->iRowid==p2->iRowid ){ | |||
12628 | return i2; | |||
12629 | } | |||
12630 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; | |||
12631 | } | |||
12632 | assert( res!=0 )((void) (0)); | |||
12633 | if( res<0 ){ | |||
12634 | iRes = i1; | |||
12635 | }else{ | |||
12636 | iRes = i2; | |||
12637 | } | |||
12638 | } | |||
12639 | ||||
12640 | pRes->iFirst = (u16)iRes; | |||
12641 | return 0; | |||
12642 | } | |||
12643 | ||||
12644 | /* | |||
12645 | ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. | |||
12646 | ** It is an error if leaf iLeafPgno does not exist. Unless the db is | |||
12647 | ** a 'secure-delete' db, if it contains no rowids then this is also an error. | |||
12648 | */ | |||
12649 | static void fts5SegIterGotoPage( | |||
12650 | Fts5Index *p, /* FTS5 backend object */ | |||
12651 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
12652 | int iLeafPgno | |||
12653 | ){ | |||
12654 | assert( iLeafPgno>pIter->iLeafPgno )((void) (0)); | |||
12655 | ||||
12656 | if( iLeafPgno>pIter->pSeg->pgnoLast ){ | |||
12657 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12658 | }else{ | |||
12659 | fts5DataRelease(pIter->pNextLeaf); | |||
12660 | pIter->pNextLeaf = 0; | |||
12661 | pIter->iLeafPgno = iLeafPgno-1; | |||
12662 | ||||
12663 | while( p->rc==SQLITE_OK0 ){ | |||
12664 | int iOff; | |||
12665 | fts5SegIterNextPage(p, pIter); | |||
12666 | if( pIter->pLeaf==0 ) break; | |||
12667 | iOff = fts5LeafFirstRowidOff(pIter->pLeaf)(fts5GetU16((pIter->pLeaf)->p)); | |||
12668 | if( iOff>0 ){ | |||
12669 | u8 *a = pIter->pLeaf->p; | |||
12670 | int n = pIter->pLeaf->szLeaf; | |||
12671 | if( iOff<4 || iOff>=n ){ | |||
12672 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
12673 | }else{ | |||
12674 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | |||
12675 | pIter->iLeafOffset = iOff; | |||
12676 | fts5SegIterLoadNPos(p, pIter); | |||
12677 | } | |||
12678 | break; | |||
12679 | } | |||
12680 | } | |||
12681 | } | |||
12682 | } | |||
12683 | ||||
12684 | /* | |||
12685 | ** Advance the iterator passed as the second argument until it is at or | |||
12686 | ** past rowid iFrom. Regardless of the value of iFrom, the iterator is | |||
12687 | ** always advanced at least once. | |||
12688 | */ | |||
12689 | static void fts5SegIterNextFrom( | |||
12690 | Fts5Index *p, /* FTS5 backend object */ | |||
12691 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
12692 | i64 iMatch /* Advance iterator at least this far */ | |||
12693 | ){ | |||
12694 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | |||
12695 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | |||
12696 | int iLeafPgno = pIter->iLeafPgno; | |||
12697 | int bMove = 1; | |||
12698 | ||||
12699 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
12700 | assert( pIter->pDlidx )((void) (0)); | |||
12701 | assert( pIter->pLeaf )((void) (0)); | |||
12702 | ||||
12703 | if( bRev==0 ){ | |||
12704 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ | |||
12705 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | |||
12706 | fts5DlidxIterNext(p, pDlidx); | |||
12707 | } | |||
12708 | assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc )((void) (0)); | |||
12709 | if( iLeafPgno>pIter->iLeafPgno ){ | |||
12710 | fts5SegIterGotoPage(p, pIter, iLeafPgno); | |||
12711 | bMove = 0; | |||
12712 | } | |||
12713 | }else{ | |||
12714 | assert( pIter->pNextLeaf==0 )((void) (0)); | |||
12715 | assert( iMatch<pIter->iRowid )((void) (0)); | |||
12716 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ | |||
12717 | fts5DlidxIterPrev(p, pDlidx); | |||
12718 | } | |||
12719 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | |||
12720 | ||||
12721 | assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno )((void) (0)); | |||
12722 | ||||
12723 | if( iLeafPgno<pIter->iLeafPgno ){ | |||
12724 | pIter->iLeafPgno = iLeafPgno+1; | |||
12725 | fts5SegIterReverseNewPage(p, pIter); | |||
12726 | bMove = 0; | |||
12727 | } | |||
12728 | } | |||
12729 | ||||
12730 | do{ | |||
12731 | if( bMove && p->rc==SQLITE_OK0 ) pIter->xNext(p, pIter, 0); | |||
12732 | if( pIter->pLeaf==0 ) break; | |||
12733 | if( bRev==0 && pIter->iRowid>=iMatch ) break; | |||
12734 | if( bRev!=0 && pIter->iRowid<=iMatch ) break; | |||
12735 | bMove = 1; | |||
12736 | }while( p->rc==SQLITE_OK0 ); | |||
12737 | } | |||
12738 | ||||
12739 | /* | |||
12740 | ** Free the iterator object passed as the second argument. | |||
12741 | */ | |||
12742 | static void fts5MultiIterFree(Fts5Iter *pIter){ | |||
12743 | if( pIter ){ | |||
12744 | int i; | |||
12745 | for(i=0; i<pIter->nSeg; i++){ | |||
12746 | fts5SegIterClear(&pIter->aSeg[i]); | |||
12747 | } | |||
12748 | fts5BufferFree(&pIter->poslist)sqlite3Fts5BufferFree(&pIter->poslist); | |||
12749 | sqlite3_freesqlite3_api->free(pIter); | |||
12750 | } | |||
12751 | } | |||
12752 | ||||
12753 | static void fts5MultiIterAdvanced( | |||
12754 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
12755 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | |||
12756 | int iChanged, /* Index of sub-iterator just advanced */ | |||
12757 | int iMinset /* Minimum entry in aFirst[] to set */ | |||
12758 | ){ | |||
12759 | int i; | |||
12760 | for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK0; i=i/2){ | |||
12761 | int iEq; | |||
12762 | if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ | |||
12763 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | |||
12764 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12765 | pSeg->xNext(p, pSeg, 0); | |||
12766 | i = pIter->nSeg + iEq; | |||
12767 | } | |||
12768 | } | |||
12769 | } | |||
12770 | ||||
12771 | /* | |||
12772 | ** Sub-iterator iChanged of iterator pIter has just been advanced. It still | |||
12773 | ** points to the same term though - just a different rowid. This function | |||
12774 | ** attempts to update the contents of the pIter->aFirst[] accordingly. | |||
12775 | ** If it does so successfully, 0 is returned. Otherwise 1. | |||
12776 | ** | |||
12777 | ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() | |||
12778 | ** on the iterator instead. That function does the same as this one, except | |||
12779 | ** that it deals with more complicated cases as well. | |||
12780 | */ | |||
12781 | static int fts5MultiIterAdvanceRowid( | |||
12782 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | |||
12783 | int iChanged, /* Index of sub-iterator just advanced */ | |||
12784 | Fts5SegIter **ppFirst | |||
12785 | ){ | |||
12786 | Fts5SegIter *pNew = &pIter->aSeg[iChanged]; | |||
12787 | ||||
12788 | if( pNew->iRowid==pIter->iSwitchRowid | |||
12789 | || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev | |||
12790 | ){ | |||
12791 | int i; | |||
12792 | Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; | |||
12793 | pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) : LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
12794 | for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ | |||
12795 | Fts5CResult *pRes = &pIter->aFirst[i]; | |||
12796 | ||||
12797 | assert( pNew->pLeaf )((void) (0)); | |||
12798 | assert( pRes->bTermEq==0 || pOther->pLeaf )((void) (0)); | |||
12799 | ||||
12800 | if( pRes->bTermEq ){ | |||
12801 | if( pNew->iRowid==pOther->iRowid ){ | |||
12802 | return 1; | |||
12803 | }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ | |||
12804 | pIter->iSwitchRowid = pOther->iRowid; | |||
12805 | pNew = pOther; | |||
12806 | }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ | |||
12807 | pIter->iSwitchRowid = pOther->iRowid; | |||
12808 | } | |||
12809 | } | |||
12810 | pRes->iFirst = (u16)(pNew - pIter->aSeg); | |||
12811 | if( i==1 ) break; | |||
12812 | ||||
12813 | pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; | |||
12814 | } | |||
12815 | } | |||
12816 | ||||
12817 | *ppFirst = pNew; | |||
12818 | return 0; | |||
12819 | } | |||
12820 | ||||
12821 | /* | |||
12822 | ** Set the pIter->bEof variable based on the state of the sub-iterators. | |||
12823 | */ | |||
12824 | static void fts5MultiIterSetEof(Fts5Iter *pIter){ | |||
12825 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
12826 | pIter->base.bEof = pSeg->pLeaf==0; | |||
12827 | pIter->iSwitchRowid = pSeg->iRowid; | |||
12828 | } | |||
12829 | ||||
12830 | /* | |||
12831 | ** The argument to this macro must be an Fts5Data structure containing a | |||
12832 | ** tombstone hash page. This macro returns the key-size of the hash-page. | |||
12833 | */ | |||
12834 | #define TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) (pPg->p[0]==4 ? 4 : 8) | |||
12835 | ||||
12836 | #define TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1) \ | |||
12837 | ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8)) : 1) | |||
12838 | ||||
12839 | /* | |||
12840 | ** Query a single tombstone hash table for rowid iRowid. Return true if | |||
12841 | ** it is found or false otherwise. The tombstone hash table is one of | |||
12842 | ** nHashTable tables. | |||
12843 | */ | |||
12844 | static int fts5IndexTombstoneQuery( | |||
12845 | Fts5Data *pHash, /* Hash table page to query */ | |||
12846 | int nHashTable, /* Number of pages attached to segment */ | |||
12847 | u64 iRowid /* Rowid to query hash for */ | |||
12848 | ){ | |||
12849 | const int szKey = TOMBSTONE_KEYSIZE(pHash)(pHash->p[0]==4 ? 4 : 8); | |||
12850 | const int nSlot = TOMBSTONE_NSLOT(pHash)((pHash->nn > 16) ? ((pHash->nn-8) / (pHash->p[0] ==4 ? 4 : 8)) : 1); | |||
12851 | int iSlot = (iRowid / nHashTable) % nSlot; | |||
12852 | int nCollide = nSlot; | |||
12853 | ||||
12854 | if( iRowid==0 ){ | |||
12855 | return pHash->p[1]; | |||
12856 | }else if( szKey==4 ){ | |||
12857 | u32 *aSlot = (u32*)&pHash->p[8]; | |||
12858 | while( aSlot[iSlot] ){ | |||
12859 | if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1; | |||
12860 | if( nCollide--==0 ) break; | |||
12861 | iSlot = (iSlot+1)%nSlot; | |||
12862 | } | |||
12863 | }else{ | |||
12864 | u64 *aSlot = (u64*)&pHash->p[8]; | |||
12865 | while( aSlot[iSlot] ){ | |||
12866 | if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1; | |||
12867 | if( nCollide--==0 ) break; | |||
12868 | iSlot = (iSlot+1)%nSlot; | |||
12869 | } | |||
12870 | } | |||
12871 | ||||
12872 | return 0; | |||
12873 | } | |||
12874 | ||||
12875 | /* | |||
12876 | ** Return true if the iterator passed as the only argument points | |||
12877 | ** to an segment entry for which there is a tombstone. Return false | |||
12878 | ** if there is no tombstone or if the iterator is already at EOF. | |||
12879 | */ | |||
12880 | static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ | |||
12881 | int iFirst = pIter->aFirst[1].iFirst; | |||
12882 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
12883 | Fts5TombstoneArray *pArray = pSeg->pTombArray; | |||
12884 | ||||
12885 | if( pSeg->pLeaf && pArray ){ | |||
12886 | /* Figure out which page the rowid might be present on. */ | |||
12887 | int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone; | |||
12888 | assert( iPg>=0 )((void) (0)); | |||
12889 | ||||
12890 | /* If tombstone hash page iPg has not yet been loaded from the | |||
12891 | ** database, load it now. */ | |||
12892 | if( pArray->apTombstone[iPg]==0 ){ | |||
12893 | pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex, | |||
12894 | FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)( ((i64)(pSeg->pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << ( 31)) + ((i64)(iPg)) ) | |||
12895 | ); | |||
12896 | if( pArray->apTombstone[iPg]==0 ) return 0; | |||
12897 | } | |||
12898 | ||||
12899 | return fts5IndexTombstoneQuery( | |||
12900 | pArray->apTombstone[iPg], | |||
12901 | pArray->nTombstone, | |||
12902 | pSeg->iRowid | |||
12903 | ); | |||
12904 | } | |||
12905 | ||||
12906 | return 0; | |||
12907 | } | |||
12908 | ||||
12909 | /* | |||
12910 | ** Move the iterator to the next entry. | |||
12911 | ** | |||
12912 | ** If an error occurs, an error code is left in Fts5Index.rc. It is not | |||
12913 | ** considered an error if the iterator reaches EOF, or if it is already at | |||
12914 | ** EOF when this function is called. | |||
12915 | */ | |||
12916 | static void fts5MultiIterNext( | |||
12917 | Fts5Index *p, | |||
12918 | Fts5Iter *pIter, | |||
12919 | int bFrom, /* True if argument iFrom is valid */ | |||
12920 | i64 iFrom /* Advance at least as far as this */ | |||
12921 | ){ | |||
12922 | int bUseFrom = bFrom; | |||
12923 | assert( pIter->base.bEof==0 )((void) (0)); | |||
12924 | while( p->rc==SQLITE_OK0 ){ | |||
12925 | int iFirst = pIter->aFirst[1].iFirst; | |||
12926 | int bNewTerm = 0; | |||
12927 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
12928 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12929 | if( bUseFrom && pSeg->pDlidx ){ | |||
12930 | fts5SegIterNextFrom(p, pSeg, iFrom); | |||
12931 | }else{ | |||
12932 | pSeg->xNext(p, pSeg, &bNewTerm); | |||
12933 | } | |||
12934 | ||||
12935 | if( pSeg->pLeaf==0 || bNewTerm | |||
12936 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | |||
12937 | ){ | |||
12938 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | |||
12939 | fts5MultiIterSetEof(pIter); | |||
12940 | pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
12941 | if( pSeg->pLeaf==0 ) return; | |||
12942 | } | |||
12943 | ||||
12944 | fts5AssertMultiIterSetup(p, pIter); | |||
12945 | assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf )((void) (0)); | |||
12946 | if( (pIter->bSkipEmpty==0 || pSeg->nPos) | |||
12947 | && 0==fts5MultiIterIsDeleted(pIter) | |||
12948 | ){ | |||
12949 | pIter->xSetOutputs(pIter, pSeg); | |||
12950 | return; | |||
12951 | } | |||
12952 | bUseFrom = 0; | |||
12953 | } | |||
12954 | } | |||
12955 | ||||
12956 | static void fts5MultiIterNext2( | |||
12957 | Fts5Index *p, | |||
12958 | Fts5Iter *pIter, | |||
12959 | int *pbNewTerm /* OUT: True if *might* be new term */ | |||
12960 | ){ | |||
12961 | assert( pIter->bSkipEmpty )((void) (0)); | |||
12962 | if( p->rc==SQLITE_OK0 ){ | |||
12963 | *pbNewTerm = 0; | |||
12964 | do{ | |||
12965 | int iFirst = pIter->aFirst[1].iFirst; | |||
12966 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
12967 | int bNewTerm = 0; | |||
12968 | ||||
12969 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
12970 | pSeg->xNext(p, pSeg, &bNewTerm); | |||
12971 | if( pSeg->pLeaf==0 || bNewTerm | |||
12972 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | |||
12973 | ){ | |||
12974 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | |||
12975 | fts5MultiIterSetEof(pIter); | |||
12976 | *pbNewTerm = 1; | |||
12977 | } | |||
12978 | fts5AssertMultiIterSetup(p, pIter); | |||
12979 | ||||
12980 | }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter)) | |||
12981 | && (p->rc==SQLITE_OK0) | |||
12982 | ); | |||
12983 | } | |||
12984 | } | |||
12985 | ||||
12986 | static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ | |||
12987 | UNUSED_PARAM2(pUnused1, pUnused2)(void)(pUnused1), (void)(pUnused2); | |||
12988 | } | |||
12989 | ||||
12990 | static Fts5Iter *fts5MultiIterAlloc( | |||
12991 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
12992 | int nSeg | |||
12993 | ){ | |||
12994 | Fts5Iter *pNew; | |||
12995 | i64 nSlot; /* Power of two >= nSeg */ | |||
12996 | ||||
12997 | for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); | |||
12998 | pNew = fts5IdxMalloc(p, | |||
12999 | SZ_FTS5ITER(nSlot)(__builtin_offsetof(Fts5Iter, aSeg)+(nSlot)*sizeof(Fts5SegIter )) + /* pNew + pNew->aSeg[] */ | |||
13000 | sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ | |||
13001 | ); | |||
13002 | if( pNew ){ | |||
13003 | pNew->nSeg = nSlot; | |||
13004 | pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; | |||
13005 | pNew->pIndex = p; | |||
13006 | pNew->xSetOutputs = fts5IterSetOutputs_Noop; | |||
13007 | } | |||
13008 | return pNew; | |||
13009 | } | |||
13010 | ||||
13011 | static void fts5PoslistCallback( | |||
13012 | Fts5Index *pUnused, | |||
13013 | void *pContext, | |||
13014 | const u8 *pChunk, int nChunk | |||
13015 | ){ | |||
13016 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
13017 | assert_nc( nChunk>=0 )((void) (0)); | |||
13018 | if( nChunk>0 ){ | |||
13019 | fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk){ ((void) (0)); memcpy(&((Fts5Buffer*)pContext)->p[((Fts5Buffer *)pContext)->n], pChunk, nChunk); ((Fts5Buffer*)pContext)-> n += nChunk; }; | |||
13020 | } | |||
13021 | } | |||
13022 | ||||
13023 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; | |||
13024 | struct PoslistCallbackCtx { | |||
13025 | Fts5Buffer *pBuf; /* Append to this buffer */ | |||
13026 | Fts5Colset *pColset; /* Restrict matches to this column */ | |||
13027 | int eState; /* See above */ | |||
13028 | }; | |||
13029 | ||||
13030 | typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; | |||
13031 | struct PoslistOffsetsCtx { | |||
13032 | Fts5Buffer *pBuf; /* Append to this buffer */ | |||
13033 | Fts5Colset *pColset; /* Restrict matches to this column */ | |||
13034 | int iRead; | |||
13035 | int iWrite; | |||
13036 | }; | |||
13037 | ||||
13038 | /* | |||
13039 | ** TODO: Make this more efficient! | |||
13040 | */ | |||
13041 | static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ | |||
13042 | int i; | |||
13043 | for(i=0; i<pColset->nCol; i++){ | |||
13044 | if( pColset->aiCol[i]==iCol ) return 1; | |||
13045 | } | |||
13046 | return 0; | |||
13047 | } | |||
13048 | ||||
13049 | static void fts5PoslistOffsetsCallback( | |||
13050 | Fts5Index *pUnused, | |||
13051 | void *pContext, | |||
13052 | const u8 *pChunk, int nChunk | |||
13053 | ){ | |||
13054 | PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; | |||
13055 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
13056 | assert_nc( nChunk>=0 )((void) (0)); | |||
13057 | if( nChunk>0 ){ | |||
13058 | int i = 0; | |||
13059 | while( i<nChunk ){ | |||
13060 | int iVal; | |||
13061 | i += fts5GetVarint32(&pChunk[i], iVal)sqlite3Fts5GetVarint32(&pChunk[i],(u32*)&(iVal)); | |||
13062 | iVal += pCtx->iRead - 2; | |||
13063 | pCtx->iRead = iVal; | |||
13064 | if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ | |||
13065 | fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (iVal + 2 - pCtx->iWrite )); ((void) (0)); }; | |||
13066 | pCtx->iWrite = iVal; | |||
13067 | } | |||
13068 | } | |||
13069 | } | |||
13070 | } | |||
13071 | ||||
13072 | static void fts5PoslistFilterCallback( | |||
13073 | Fts5Index *pUnused, | |||
13074 | void *pContext, | |||
13075 | const u8 *pChunk, int nChunk | |||
13076 | ){ | |||
13077 | PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; | |||
13078 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
13079 | assert_nc( nChunk>=0 )((void) (0)); | |||
13080 | if( nChunk>0 ){ | |||
13081 | /* Search through to find the first varint with value 1. This is the | |||
13082 | ** start of the next columns hits. */ | |||
13083 | int i = 0; | |||
13084 | int iStart = 0; | |||
13085 | ||||
13086 | if( pCtx->eState==2 ){ | |||
13087 | int iCol; | |||
13088 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | |||
13089 | if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ | |||
13090 | pCtx->eState = 1; | |||
13091 | fts5BufferSafeAppendVarint(pCtx->pBuf, 1){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (1)); ((void) (0)); }; | |||
13092 | }else{ | |||
13093 | pCtx->eState = 0; | |||
13094 | } | |||
13095 | } | |||
13096 | ||||
13097 | do { | |||
13098 | while( i<nChunk && pChunk[i]!=0x01 ){ | |||
13099 | while( pChunk[i] & 0x80 ) i++; | |||
13100 | i++; | |||
13101 | } | |||
13102 | if( pCtx->eState ){ | |||
13103 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | |||
13104 | } | |||
13105 | if( i<nChunk ){ | |||
13106 | int iCol; | |||
13107 | iStart = i; | |||
13108 | i++; | |||
13109 | if( i>=nChunk ){ | |||
13110 | pCtx->eState = 2; | |||
13111 | }else{ | |||
13112 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | |||
13113 | pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); | |||
13114 | if( pCtx->eState ){ | |||
13115 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | |||
13116 | iStart = i; | |||
13117 | } | |||
13118 | } | |||
13119 | } | |||
13120 | }while( i<nChunk ); | |||
13121 | } | |||
13122 | } | |||
13123 | ||||
13124 | static void fts5ChunkIterate( | |||
13125 | Fts5Index *p, /* Index object */ | |||
13126 | Fts5SegIter *pSeg, /* Poslist of this iterator */ | |||
13127 | void *pCtx, /* Context pointer for xChunk callback */ | |||
13128 | void (*xChunk)(Fts5Index*, void*, const u8*, int) | |||
13129 | ){ | |||
13130 | int nRem = pSeg->nPos; /* Number of bytes still to come */ | |||
13131 | Fts5Data *pData = 0; | |||
13132 | u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
13133 | int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset)(((nRem) < (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )) ? (nRem) : (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )); | |||
13134 | int pgno = pSeg->iLeafPgno; | |||
13135 | int pgnoSave = 0; | |||
13136 | ||||
13137 | /* This function does not work with detail=none databases. */ | |||
13138 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
13139 | ||||
13140 | if( (pSeg->flags & FTS5_SEGITER_REVERSE0x02)==0 ){ | |||
13141 | pgnoSave = pgno+1; | |||
13142 | } | |||
13143 | ||||
13144 | while( 1 ){ | |||
13145 | xChunk(p, pCtx, pChunk, nChunk); | |||
13146 | nRem -= nChunk; | |||
13147 | fts5DataRelease(pData); | |||
13148 | if( nRem<=0 ){ | |||
13149 | break; | |||
13150 | }else if( pSeg->pSeg==0 ){ | |||
13151 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
13152 | return; | |||
13153 | }else{ | |||
13154 | pgno++; | |||
13155 | pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)( ((i64)(pSeg->pSeg->iSegid) << (31 +5 +1)) + ((i64 )(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno )) )); | |||
13156 | if( pData==0 ) break; | |||
13157 | pChunk = &pData->p[4]; | |||
13158 | nChunk = MIN(nRem, pData->szLeaf - 4)(((nRem) < (pData->szLeaf - 4)) ? (nRem) : (pData->szLeaf - 4)); | |||
13159 | if( pgno==pgnoSave ){ | |||
13160 | assert( pSeg->pNextLeaf==0 )((void) (0)); | |||
13161 | pSeg->pNextLeaf = pData; | |||
13162 | pData = 0; | |||
13163 | } | |||
13164 | } | |||
13165 | } | |||
13166 | } | |||
13167 | ||||
13168 | /* | |||
13169 | ** Iterator pIter currently points to a valid entry (not EOF). This | |||
13170 | ** function appends the position list data for the current entry to | |||
13171 | ** buffer pBuf. It does not make a copy of the position-list size | |||
13172 | ** field. | |||
13173 | */ | |||
13174 | static void fts5SegiterPoslist( | |||
13175 | Fts5Index *p, | |||
13176 | Fts5SegIter *pSeg, | |||
13177 | Fts5Colset *pColset, | |||
13178 | Fts5Buffer *pBuf | |||
13179 | ){ | |||
13180 | assert( pBuf!=0 )((void) (0)); | |||
13181 | assert( pSeg!=0 )((void) (0)); | |||
13182 | if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING)( (u32)((pBuf)->n) + (u32)(pSeg->nPos+8) <= (u32)((pBuf )->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf ),(pSeg->nPos+8)+(pBuf)->n) ) ){ | |||
13183 | assert( pBuf->p!=0 )((void) (0)); | |||
13184 | assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING )((void) (0)); | |||
13185 | memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING8); | |||
13186 | if( pColset==0 ){ | |||
13187 | fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); | |||
13188 | }else{ | |||
13189 | if( p->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
13190 | PoslistCallbackCtx sCtx; | |||
13191 | sCtx.pBuf = pBuf; | |||
13192 | sCtx.pColset = pColset; | |||
13193 | sCtx.eState = fts5IndexColsetTest(pColset, 0); | |||
13194 | assert( sCtx.eState==0 || sCtx.eState==1 )((void) (0)); | |||
13195 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); | |||
13196 | }else{ | |||
13197 | PoslistOffsetsCtx sCtx; | |||
13198 | memset(&sCtx, 0, sizeof(sCtx)); | |||
13199 | sCtx.pBuf = pBuf; | |||
13200 | sCtx.pColset = pColset; | |||
13201 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); | |||
13202 | } | |||
13203 | } | |||
13204 | } | |||
13205 | } | |||
13206 | ||||
13207 | /* | |||
13208 | ** Parameter pPos points to a buffer containing a position list, size nPos. | |||
13209 | ** This function filters it according to pColset (which must be non-NULL) | |||
13210 | ** and sets pIter->base.pData/nData to point to the new position list. | |||
13211 | ** If memory is required for the new position list, use buffer pIter->poslist. | |||
13212 | ** Or, if the new position list is a contiguous subset of the input, set | |||
13213 | ** pIter->base.pData/nData to point directly to it. | |||
13214 | ** | |||
13215 | ** This function is a no-op if *pRc is other than SQLITE_OK when it is | |||
13216 | ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM | |||
13217 | ** before returning. | |||
13218 | */ | |||
13219 | static void fts5IndexExtractColset( | |||
13220 | int *pRc, | |||
13221 | Fts5Colset *pColset, /* Colset to filter on */ | |||
13222 | const u8 *pPos, int nPos, /* Position list */ | |||
13223 | Fts5Iter *pIter | |||
13224 | ){ | |||
13225 | if( *pRc==SQLITE_OK0 ){ | |||
13226 | const u8 *p = pPos; | |||
13227 | const u8 *aCopy = p; | |||
13228 | const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ | |||
13229 | int i = 0; | |||
13230 | int iCurrent = 0; | |||
13231 | ||||
13232 | if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){ | |||
13233 | return; | |||
13234 | } | |||
13235 | ||||
13236 | while( 1 ){ | |||
13237 | while( pColset->aiCol[i]<iCurrent ){ | |||
13238 | i++; | |||
13239 | if( i==pColset->nCol ){ | |||
13240 | pIter->base.pData = pIter->poslist.p; | |||
13241 | pIter->base.nData = pIter->poslist.n; | |||
13242 | return; | |||
13243 | } | |||
13244 | } | |||
13245 | ||||
13246 | /* Advance pointer p until it points to pEnd or an 0x01 byte that is | |||
13247 | ** not part of a varint */ | |||
13248 | while( p<pEnd && *p!=0x01 ){ | |||
13249 | while( *p++ & 0x80 ); | |||
13250 | } | |||
13251 | ||||
13252 | if( pColset->aiCol[i]==iCurrent ){ | |||
13253 | if( pColset->nCol==1 ){ | |||
13254 | pIter->base.pData = aCopy; | |||
13255 | pIter->base.nData = p-aCopy; | |||
13256 | return; | |||
13257 | } | |||
13258 | fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy){ ((void) (0)); memcpy(&(&pIter->poslist)->p[(& pIter->poslist)->n], aCopy, p-aCopy); (&pIter->poslist )->n += p-aCopy; }; | |||
13259 | } | |||
13260 | if( p>=pEnd ){ | |||
13261 | pIter->base.pData = pIter->poslist.p; | |||
13262 | pIter->base.nData = pIter->poslist.n; | |||
13263 | return; | |||
13264 | } | |||
13265 | aCopy = p++; | |||
13266 | iCurrent = *p++; | |||
13267 | if( iCurrent & 0x80 ){ | |||
13268 | p--; | |||
13269 | p += fts5GetVarint32(p, iCurrent)sqlite3Fts5GetVarint32(p,(u32*)&(iCurrent)); | |||
13270 | } | |||
13271 | } | |||
13272 | } | |||
13273 | ||||
13274 | } | |||
13275 | ||||
13276 | /* | |||
13277 | ** xSetOutputs callback used by detail=none tables. | |||
13278 | */ | |||
13279 | static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13280 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
13281 | pIter->base.iRowid = pSeg->iRowid; | |||
13282 | pIter->base.nData = pSeg->nPos; | |||
13283 | } | |||
13284 | ||||
13285 | /* | |||
13286 | ** xSetOutputs callback used by detail=full and detail=col tables when no | |||
13287 | ** column filters are specified. | |||
13288 | */ | |||
13289 | static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13290 | pIter->base.iRowid = pSeg->iRowid; | |||
13291 | pIter->base.nData = pSeg->nPos; | |||
13292 | ||||
13293 | assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
13294 | assert( pIter->pColset==0 )((void) (0)); | |||
13295 | ||||
13296 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | |||
13297 | /* All data is stored on the current page. Populate the output | |||
13298 | ** variables to point into the body of the page object. */ | |||
13299 | pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
13300 | }else{ | |||
13301 | /* The data is distributed over two or more pages. Copy it into the | |||
13302 | ** Fts5Iter.poslist buffer and then set the output pointer to point | |||
13303 | ** to this buffer. */ | |||
13304 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
13305 | fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); | |||
13306 | pIter->base.pData = pIter->poslist.p; | |||
13307 | } | |||
13308 | } | |||
13309 | ||||
13310 | /* | |||
13311 | ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match | |||
13312 | ** against no columns at all). | |||
13313 | */ | |||
13314 | static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13315 | UNUSED_PARAM(pSeg)(void)(pSeg); | |||
13316 | pIter->base.nData = 0; | |||
13317 | } | |||
13318 | ||||
13319 | /* | |||
13320 | ** xSetOutputs callback used by detail=col when there is a column filter | |||
13321 | ** and there are 100 or more columns. Also called as a fallback from | |||
13322 | ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. | |||
13323 | */ | |||
13324 | static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13325 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
13326 | fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); | |||
13327 | pIter->base.iRowid = pSeg->iRowid; | |||
13328 | pIter->base.pData = pIter->poslist.p; | |||
13329 | pIter->base.nData = pIter->poslist.n; | |||
13330 | } | |||
13331 | ||||
13332 | /* | |||
13333 | ** xSetOutputs callback used when: | |||
13334 | ** | |||
13335 | ** * detail=col, | |||
13336 | ** * there is a column filter, and | |||
13337 | ** * the table contains 100 or fewer columns. | |||
13338 | ** | |||
13339 | ** The last point is to ensure all column numbers are stored as | |||
13340 | ** single-byte varints. | |||
13341 | */ | |||
13342 | static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13343 | ||||
13344 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
13345 | assert( pIter->pColset )((void) (0)); | |||
13346 | ||||
13347 | if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ | |||
13348 | fts5IterSetOutputs_Col(pIter, pSeg); | |||
13349 | }else{ | |||
13350 | u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
13351 | u8 *pEnd = (u8*)&a[pSeg->nPos]; | |||
13352 | int iPrev = 0; | |||
13353 | int *aiCol = pIter->pColset->aiCol; | |||
13354 | int *aiColEnd = &aiCol[pIter->pColset->nCol]; | |||
13355 | ||||
13356 | u8 *aOut = pIter->poslist.p; | |||
13357 | int iPrevOut = 0; | |||
13358 | ||||
13359 | pIter->base.iRowid = pSeg->iRowid; | |||
13360 | ||||
13361 | while( a<pEnd ){ | |||
13362 | iPrev += (int)a++[0] - 2; | |||
13363 | while( *aiCol<iPrev ){ | |||
13364 | aiCol++; | |||
13365 | if( aiCol==aiColEnd ) goto setoutputs_col_out; | |||
13366 | } | |||
13367 | if( *aiCol==iPrev ){ | |||
13368 | *aOut++ = (u8)((iPrev - iPrevOut) + 2); | |||
13369 | iPrevOut = iPrev; | |||
13370 | } | |||
13371 | } | |||
13372 | ||||
13373 | setoutputs_col_out: | |||
13374 | pIter->base.pData = pIter->poslist.p; | |||
13375 | pIter->base.nData = aOut - pIter->poslist.p; | |||
13376 | } | |||
13377 | } | |||
13378 | ||||
13379 | /* | |||
13380 | ** xSetOutputs callback used by detail=full when there is a column filter. | |||
13381 | */ | |||
13382 | static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
13383 | Fts5Colset *pColset = pIter->pColset; | |||
13384 | pIter->base.iRowid = pSeg->iRowid; | |||
13385 | ||||
13386 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL )((void) (0)); | |||
13387 | assert( pColset )((void) (0)); | |||
13388 | ||||
13389 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | |||
13390 | /* All data is stored on the current page. Populate the output | |||
13391 | ** variables to point into the body of the page object. */ | |||
13392 | const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
13393 | int *pRc = &pIter->pIndex->rc; | |||
13394 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
13395 | fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter); | |||
13396 | }else{ | |||
13397 | /* The data is distributed over two or more pages. Copy it into the | |||
13398 | ** Fts5Iter.poslist buffer and then set the output pointer to point | |||
13399 | ** to this buffer. */ | |||
13400 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
13401 | fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); | |||
13402 | pIter->base.pData = pIter->poslist.p; | |||
13403 | pIter->base.nData = pIter->poslist.n; | |||
13404 | } | |||
13405 | } | |||
13406 | ||||
13407 | static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ | |||
13408 | assert( pIter!=0 || (*pRc)!=SQLITE_OK )((void) (0)); | |||
13409 | if( *pRc==SQLITE_OK0 ){ | |||
13410 | Fts5Config *pConfig = pIter->pIndex->pConfig; | |||
13411 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
13412 | pIter->xSetOutputs = fts5IterSetOutputs_None; | |||
13413 | } | |||
13414 | ||||
13415 | else if( pIter->pColset==0 ){ | |||
13416 | pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; | |||
13417 | } | |||
13418 | ||||
13419 | else if( pIter->pColset->nCol==0 ){ | |||
13420 | pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; | |||
13421 | } | |||
13422 | ||||
13423 | else if( pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
13424 | pIter->xSetOutputs = fts5IterSetOutputs_Full; | |||
13425 | } | |||
13426 | ||||
13427 | else{ | |||
13428 | assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
13429 | if( pConfig->nCol<=100 ){ | |||
13430 | pIter->xSetOutputs = fts5IterSetOutputs_Col100; | |||
13431 | sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); | |||
13432 | }else{ | |||
13433 | pIter->xSetOutputs = fts5IterSetOutputs_Col; | |||
13434 | } | |||
13435 | } | |||
13436 | } | |||
13437 | } | |||
13438 | ||||
13439 | /* | |||
13440 | ** All the component segment-iterators of pIter have been set up. This | |||
13441 | ** functions finishes setup for iterator pIter itself. | |||
13442 | */ | |||
13443 | static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ | |||
13444 | int iIter; | |||
13445 | for(iIter=pIter->nSeg-1; iIter>0; iIter--){ | |||
13446 | int iEq; | |||
13447 | if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ | |||
13448 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | |||
13449 | if( p->rc==SQLITE_OK0 ) pSeg->xNext(p, pSeg, 0); | |||
13450 | fts5MultiIterAdvanced(p, pIter, iEq, iIter); | |||
13451 | } | |||
13452 | } | |||
13453 | fts5MultiIterSetEof(pIter); | |||
13454 | fts5AssertMultiIterSetup(p, pIter); | |||
13455 | ||||
13456 | if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) | |||
13457 | || fts5MultiIterIsDeleted(pIter) | |||
13458 | ){ | |||
13459 | fts5MultiIterNext(p, pIter, 0, 0); | |||
13460 | }else if( pIter->base.bEof==0 ){ | |||
13461 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
13462 | pIter->xSetOutputs(pIter, pSeg); | |||
13463 | } | |||
13464 | } | |||
13465 | ||||
13466 | /* | |||
13467 | ** Allocate a new Fts5Iter object. | |||
13468 | ** | |||
13469 | ** The new object will be used to iterate through data in structure pStruct. | |||
13470 | ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel | |||
13471 | ** is zero or greater, data from the first nSegment segments on level iLevel | |||
13472 | ** is merged. | |||
13473 | ** | |||
13474 | ** The iterator initially points to the first term/rowid entry in the | |||
13475 | ** iterated data. | |||
13476 | */ | |||
13477 | static void fts5MultiIterNew( | |||
13478 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
13479 | Fts5Structure *pStruct, /* Structure of specific index */ | |||
13480 | int flags, /* FTS5INDEX_QUERY_XXX flags */ | |||
13481 | Fts5Colset *pColset, /* Colset to filter on (or NULL) */ | |||
13482 | const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ | |||
13483 | int iLevel, /* Level to iterate (-1 for all) */ | |||
13484 | int nSegment, /* Number of segments to merge (iLevel>=0) */ | |||
13485 | Fts5Iter **ppOut /* New object */ | |||
13486 | ){ | |||
13487 | int nSeg = 0; /* Number of segment-iters in use */ | |||
13488 | int iIter = 0; /* */ | |||
13489 | int iSeg; /* Used to iterate through segments */ | |||
13490 | Fts5StructureLevel *pLvl; | |||
13491 | Fts5Iter *pNew; | |||
13492 | ||||
13493 | assert( (pTerm==0 && nTerm==0) || iLevel<0 )((void) (0)); | |||
13494 | ||||
13495 | /* Allocate space for the new multi-seg-iterator. */ | |||
13496 | if( p->rc==SQLITE_OK0 ){ | |||
13497 | if( iLevel<0 ){ | |||
13498 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | |||
13499 | nSeg = pStruct->nSegment; | |||
13500 | nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040)); | |||
13501 | }else{ | |||
13502 | nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment)(((pStruct->aLevel[iLevel].nSeg) < (nSegment)) ? (pStruct ->aLevel[iLevel].nSeg) : (nSegment)); | |||
13503 | } | |||
13504 | } | |||
13505 | *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); | |||
13506 | if( pNew==0 ){ | |||
13507 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
13508 | goto fts5MultiIterNew_post_check; | |||
13509 | } | |||
13510 | pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC0x0002)); | |||
13511 | pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY0x0010)); | |||
13512 | pNew->pColset = pColset; | |||
13513 | if( (flags & FTS5INDEX_QUERY_NOOUTPUT0x0020)==0 ){ | |||
13514 | fts5IterSetOutputCb(&p->rc, pNew); | |||
13515 | } | |||
13516 | ||||
13517 | /* Initialize each of the component segment iterators. */ | |||
13518 | if( p->rc==SQLITE_OK0 ){ | |||
13519 | if( iLevel<0 ){ | |||
13520 | Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; | |||
13521 | if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040) ){ | |||
13522 | /* Add a segment iterator for the current contents of the hash table. */ | |||
13523 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | |||
13524 | fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); | |||
13525 | } | |||
13526 | for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ | |||
13527 | for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ | |||
13528 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
13529 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | |||
13530 | if( pTerm==0 ){ | |||
13531 | fts5SegIterInit(p, pSeg, pIter); | |||
13532 | }else{ | |||
13533 | fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); | |||
13534 | } | |||
13535 | } | |||
13536 | } | |||
13537 | }else{ | |||
13538 | pLvl = &pStruct->aLevel[iLevel]; | |||
13539 | for(iSeg=nSeg-1; iSeg>=0; iSeg--){ | |||
13540 | fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); | |||
13541 | } | |||
13542 | } | |||
13543 | assert( iIter==nSeg )((void) (0)); | |||
13544 | } | |||
13545 | ||||
13546 | /* If the above was successful, each component iterator now points | |||
13547 | ** to the first entry in its segment. In this case initialize the | |||
13548 | ** aFirst[] array. Or, if an error has occurred, free the iterator | |||
13549 | ** object and set the output variable to NULL. */ | |||
13550 | if( p->rc==SQLITE_OK0 ){ | |||
13551 | fts5MultiIterFinishSetup(p, pNew); | |||
13552 | }else{ | |||
13553 | fts5MultiIterFree(pNew); | |||
13554 | *ppOut = 0; | |||
13555 | } | |||
13556 | ||||
13557 | fts5MultiIterNew_post_check: | |||
13558 | assert( (*ppOut)!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
13559 | return; | |||
13560 | } | |||
13561 | ||||
13562 | /* | |||
13563 | ** Create an Fts5Iter that iterates through the doclist provided | |||
13564 | ** as the second argument. | |||
13565 | */ | |||
13566 | static void fts5MultiIterNew2( | |||
13567 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
13568 | Fts5Data *pData, /* Doclist to iterate through */ | |||
13569 | int bDesc, /* True for descending rowid order */ | |||
13570 | Fts5Iter **ppOut /* New object */ | |||
13571 | ){ | |||
13572 | Fts5Iter *pNew; | |||
13573 | pNew = fts5MultiIterAlloc(p, 2); | |||
13574 | if( pNew ){ | |||
13575 | Fts5SegIter *pIter = &pNew->aSeg[1]; | |||
13576 | pIter->flags = FTS5_SEGITER_ONETERM0x01; | |||
13577 | if( pData->szLeaf>0 ){ | |||
13578 | pIter->pLeaf = pData; | |||
13579 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pData->p, (u64*)&pIter->iRowid); | |||
13580 | pIter->iEndofDoclist = pData->nn; | |||
13581 | pNew->aFirst[1].iFirst = 1; | |||
13582 | if( bDesc ){ | |||
13583 | pNew->bRev = 1; | |||
13584 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
13585 | fts5SegIterReverseInitPage(p, pIter); | |||
13586 | }else{ | |||
13587 | fts5SegIterLoadNPos(p, pIter); | |||
13588 | } | |||
13589 | pData = 0; | |||
13590 | }else{ | |||
13591 | pNew->base.bEof = 1; | |||
13592 | } | |||
13593 | fts5SegIterSetNext(p, pIter); | |||
13594 | ||||
13595 | *ppOut = pNew; | |||
13596 | } | |||
13597 | ||||
13598 | fts5DataRelease(pData); | |||
13599 | } | |||
13600 | ||||
13601 | /* | |||
13602 | ** Return true if the iterator is at EOF or if an error has occurred. | |||
13603 | ** False otherwise. | |||
13604 | */ | |||
13605 | static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ | |||
13606 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
13607 | assert( p->rc!=SQLITE_OK((void) (0)) | |||
13608 | || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof((void) (0)) | |||
13609 | )((void) (0)); | |||
13610 | return (p->rc || pIter->base.bEof); | |||
13611 | } | |||
13612 | ||||
13613 | /* | |||
13614 | ** Return the rowid of the entry that the iterator currently points | |||
13615 | ** to. If the iterator points to EOF when this function is called the | |||
13616 | ** results are undefined. | |||
13617 | */ | |||
13618 | static i64 fts5MultiIterRowid(Fts5Iter *pIter){ | |||
13619 | assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf )((void) (0)); | |||
13620 | return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; | |||
13621 | } | |||
13622 | ||||
13623 | /* | |||
13624 | ** Move the iterator to the next entry at or following iMatch. | |||
13625 | */ | |||
13626 | static void fts5MultiIterNextFrom( | |||
13627 | Fts5Index *p, | |||
13628 | Fts5Iter *pIter, | |||
13629 | i64 iMatch | |||
13630 | ){ | |||
13631 | while( 1 ){ | |||
13632 | i64 iRowid; | |||
13633 | fts5MultiIterNext(p, pIter, 1, iMatch); | |||
13634 | if( fts5MultiIterEof(p, pIter) ) break; | |||
13635 | iRowid = fts5MultiIterRowid(pIter); | |||
13636 | if( pIter->bRev==0 && iRowid>=iMatch ) break; | |||
13637 | if( pIter->bRev!=0 && iRowid<=iMatch ) break; | |||
13638 | } | |||
13639 | } | |||
13640 | ||||
13641 | /* | |||
13642 | ** Return a pointer to a buffer containing the term associated with the | |||
13643 | ** entry that the iterator currently points to. | |||
13644 | */ | |||
13645 | static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ | |||
13646 | Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
13647 | *pn = p->term.n; | |||
13648 | return p->term.p; | |||
13649 | } | |||
13650 | ||||
13651 | /* | |||
13652 | ** Allocate a new segment-id for the structure pStruct. The new segment | |||
13653 | ** id must be between 1 and 65335 inclusive, and must not be used by | |||
13654 | ** any currently existing segment. If a free segment id cannot be found, | |||
13655 | ** SQLITE_FULL is returned. | |||
13656 | ** | |||
13657 | ** If an error has already occurred, this function is a no-op. 0 is | |||
13658 | ** returned in this case. | |||
13659 | */ | |||
13660 | static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ | |||
13661 | int iSegid = 0; | |||
13662 | ||||
13663 | if( p->rc==SQLITE_OK0 ){ | |||
13664 | if( pStruct->nSegment>=FTS5_MAX_SEGMENT2000 ){ | |||
13665 | p->rc = SQLITE_FULL13; | |||
13666 | }else{ | |||
13667 | /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following | |||
13668 | ** array is 63 elements, or 252 bytes, in size. */ | |||
13669 | u32 aUsed[(FTS5_MAX_SEGMENT2000+31) / 32]; | |||
13670 | int iLvl, iSeg; | |||
13671 | int i; | |||
13672 | u32 mask; | |||
13673 | memset(aUsed, 0, sizeof(aUsed)); | |||
13674 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
13675 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
13676 | int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; | |||
13677 | if( iId<=FTS5_MAX_SEGMENT2000 && iId>0 ){ | |||
13678 | aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); | |||
13679 | } | |||
13680 | } | |||
13681 | } | |||
13682 | ||||
13683 | for(i=0; aUsed[i]==0xFFFFFFFF; i++); | |||
13684 | mask = aUsed[i]; | |||
13685 | for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); | |||
13686 | iSegid += 1 + i*32; | |||
13687 | ||||
13688 | #ifdef SQLITE_DEBUG | |||
13689 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
13690 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
13691 | assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid )((void) (0)); | |||
13692 | } | |||
13693 | } | |||
13694 | assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT )((void) (0)); | |||
13695 | ||||
13696 | { | |||
13697 | sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); | |||
13698 | if( p->rc==SQLITE_OK0 ){ | |||
13699 | u8 aBlob[2] = {0xff, 0xff}; | |||
13700 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, iSegid); | |||
13701 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
13702 | assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW )((void) (0)); | |||
13703 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | |||
13704 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | |||
13705 | } | |||
13706 | } | |||
13707 | #endif | |||
13708 | } | |||
13709 | } | |||
13710 | ||||
13711 | return iSegid; | |||
13712 | } | |||
13713 | ||||
13714 | /* | |||
13715 | ** Discard all data currently cached in the hash-tables. | |||
13716 | */ | |||
13717 | static void fts5IndexDiscardData(Fts5Index *p){ | |||
13718 | assert( p->pHash || p->nPendingData==0 )((void) (0)); | |||
13719 | if( p->pHash ){ | |||
13720 | sqlite3Fts5HashClear(p->pHash); | |||
13721 | p->nPendingData = 0; | |||
13722 | p->nPendingRow = 0; | |||
13723 | p->flushRc = SQLITE_OK0; | |||
13724 | } | |||
13725 | p->nContentlessDelete = 0; | |||
13726 | } | |||
13727 | ||||
13728 | /* | |||
13729 | ** Return the size of the prefix, in bytes, that buffer | |||
13730 | ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). | |||
13731 | ** | |||
13732 | ** Buffer (pNew/<length-unknown>) is guaranteed to be greater | |||
13733 | ** than buffer (pOld/nOld). | |||
13734 | */ | |||
13735 | static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ | |||
13736 | int i; | |||
13737 | for(i=0; i<nOld; i++){ | |||
13738 | if( pOld[i]!=pNew[i] ) break; | |||
13739 | } | |||
13740 | return i; | |||
13741 | } | |||
13742 | ||||
13743 | static void fts5WriteDlidxClear( | |||
13744 | Fts5Index *p, | |||
13745 | Fts5SegWriter *pWriter, | |||
13746 | int bFlush /* If true, write dlidx to disk */ | |||
13747 | ){ | |||
13748 | int i; | |||
13749 | assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) )((void) (0)); | |||
13750 | for(i=0; i<pWriter->nDlidx; i++){ | |||
13751 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | |||
13752 | if( pDlidx->buf.n==0 ) break; | |||
13753 | if( bFlush ){ | |||
13754 | assert( pDlidx->pgno!=0 )((void) (0)); | |||
13755 | fts5DataWrite(p, | |||
13756 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | |||
13757 | pDlidx->buf.p, pDlidx->buf.n | |||
13758 | ); | |||
13759 | } | |||
13760 | sqlite3Fts5BufferZero(&pDlidx->buf); | |||
13761 | pDlidx->bPrevValid = 0; | |||
13762 | } | |||
13763 | } | |||
13764 | ||||
13765 | /* | |||
13766 | ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. | |||
13767 | ** Any new array elements are zeroed before returning. | |||
13768 | */ | |||
13769 | static int fts5WriteDlidxGrow( | |||
13770 | Fts5Index *p, | |||
13771 | Fts5SegWriter *pWriter, | |||
13772 | int nLvl | |||
13773 | ){ | |||
13774 | if( p->rc==SQLITE_OK0 && nLvl>=pWriter->nDlidx ){ | |||
13775 | Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64sqlite3_api->realloc64( | |||
13776 | pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl | |||
13777 | ); | |||
13778 | if( aDlidx==0 ){ | |||
13779 | p->rc = SQLITE_NOMEM7; | |||
13780 | }else{ | |||
13781 | size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); | |||
13782 | memset(&aDlidx[pWriter->nDlidx], 0, nByte); | |||
13783 | pWriter->aDlidx = aDlidx; | |||
13784 | pWriter->nDlidx = nLvl; | |||
13785 | } | |||
13786 | } | |||
13787 | return p->rc; | |||
13788 | } | |||
13789 | ||||
13790 | /* | |||
13791 | ** If the current doclist-index accumulating in pWriter->aDlidx[] is large | |||
13792 | ** enough, flush it to disk and return 1. Otherwise discard it and return | |||
13793 | ** zero. | |||
13794 | */ | |||
13795 | static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
13796 | int bFlag = 0; | |||
13797 | ||||
13798 | /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written | |||
13799 | ** to the database, also write the doclist-index to disk. */ | |||
13800 | if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE4 ){ | |||
13801 | bFlag = 1; | |||
13802 | } | |||
13803 | fts5WriteDlidxClear(p, pWriter, bFlag); | |||
13804 | pWriter->nEmpty = 0; | |||
13805 | return bFlag; | |||
13806 | } | |||
13807 | ||||
13808 | /* | |||
13809 | ** This function is called whenever processing of the doclist for the | |||
13810 | ** last term on leaf page (pWriter->iBtPage) is completed. | |||
13811 | ** | |||
13812 | ** The doclist-index for that term is currently stored in-memory within the | |||
13813 | ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function | |||
13814 | ** writes it out to disk. Or, if it is too small to bother with, discards | |||
13815 | ** it. | |||
13816 | ** | |||
13817 | ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. | |||
13818 | */ | |||
13819 | static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
13820 | int bFlag; | |||
13821 | ||||
13822 | assert( pWriter->iBtPage || pWriter->nEmpty==0 )((void) (0)); | |||
13823 | if( pWriter->iBtPage==0 ) return; | |||
13824 | bFlag = fts5WriteFlushDlidx(p, pWriter); | |||
13825 | ||||
13826 | if( p->rc==SQLITE_OK0 ){ | |||
13827 | const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); | |||
13828 | /* The following was already done in fts5WriteInit(): */ | |||
13829 | /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ | |||
13830 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
13831 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); | |||
13832 | sqlite3_stepsqlite3_api->step(p->pIdxWriter); | |||
13833 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxWriter); | |||
13834 | sqlite3_bind_nullsqlite3_api->bind_null(p->pIdxWriter, 2); | |||
13835 | } | |||
13836 | pWriter->iBtPage = 0; | |||
13837 | } | |||
13838 | ||||
13839 | /* | |||
13840 | ** This is called once for each leaf page except the first that contains | |||
13841 | ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that | |||
13842 | ** is larger than all terms written to earlier leaves, and equal to or | |||
13843 | ** smaller than the first term on the new leaf. | |||
13844 | ** | |||
13845 | ** If an error occurs, an error code is left in Fts5Index.rc. If an error | |||
13846 | ** has already occurred when this function is called, it is a no-op. | |||
13847 | */ | |||
13848 | static void fts5WriteBtreeTerm( | |||
13849 | Fts5Index *p, /* FTS5 backend object */ | |||
13850 | Fts5SegWriter *pWriter, /* Writer object */ | |||
13851 | int nTerm, const u8 *pTerm /* First term on new page */ | |||
13852 | ){ | |||
13853 | fts5WriteFlushBtree(p, pWriter); | |||
13854 | if( p->rc==SQLITE_OK0 ){ | |||
13855 | fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pWriter->btterm,nTerm ,pTerm); | |||
13856 | pWriter->iBtPage = pWriter->writer.pgno; | |||
13857 | } | |||
13858 | } | |||
13859 | ||||
13860 | /* | |||
13861 | ** This function is called when flushing a leaf page that contains no | |||
13862 | ** terms at all to disk. | |||
13863 | */ | |||
13864 | static void fts5WriteBtreeNoTerm( | |||
13865 | Fts5Index *p, /* FTS5 backend object */ | |||
13866 | Fts5SegWriter *pWriter /* Writer object */ | |||
13867 | ){ | |||
13868 | /* If there were no rowids on the leaf page either and the doclist-index | |||
13869 | ** has already been started, append an 0x00 byte to it. */ | |||
13870 | if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ | |||
13871 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; | |||
13872 | assert( pDlidx->bPrevValid )((void) (0)); | |||
13873 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); | |||
13874 | } | |||
13875 | ||||
13876 | /* Increment the "number of sequential leaves without a term" counter. */ | |||
13877 | pWriter->nEmpty++; | |||
13878 | } | |||
13879 | ||||
13880 | static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ | |||
13881 | i64 iRowid; | |||
13882 | int iOff; | |||
13883 | ||||
13884 | iOff = 1 + fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); | |||
13885 | fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); | |||
13886 | return iRowid; | |||
13887 | } | |||
13888 | ||||
13889 | /* | |||
13890 | ** Rowid iRowid has just been appended to the current leaf page. It is the | |||
13891 | ** first on the page. This function appends an appropriate entry to the current | |||
13892 | ** doclist-index. | |||
13893 | */ | |||
13894 | static void fts5WriteDlidxAppend( | |||
13895 | Fts5Index *p, | |||
13896 | Fts5SegWriter *pWriter, | |||
13897 | i64 iRowid | |||
13898 | ){ | |||
13899 | int i; | |||
13900 | int bDone = 0; | |||
13901 | ||||
13902 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | |||
13903 | i64 iVal; | |||
13904 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | |||
13905 | ||||
13906 | if( pDlidx->buf.n>=p->pConfig->pgsz ){ | |||
13907 | /* The current doclist-index page is full. Write it to disk and push | |||
13908 | ** a copy of iRowid (which will become the first rowid on the next | |||
13909 | ** doclist-index leaf page) up into the next level of the b-tree | |||
13910 | ** hierarchy. If the node being flushed is currently the root node, | |||
13911 | ** also push its first rowid upwards. */ | |||
13912 | pDlidx->buf.p[0] = 0x01; /* Not the root node */ | |||
13913 | fts5DataWrite(p, | |||
13914 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | |||
13915 | pDlidx->buf.p, pDlidx->buf.n | |||
13916 | ); | |||
13917 | fts5WriteDlidxGrow(p, pWriter, i+2); | |||
13918 | pDlidx = &pWriter->aDlidx[i]; | |||
13919 | if( p->rc==SQLITE_OK0 && pDlidx[1].buf.n==0 ){ | |||
13920 | i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); | |||
13921 | ||||
13922 | /* This was the root node. Push its first rowid up to the new root. */ | |||
13923 | pDlidx[1].pgno = pDlidx->pgno; | |||
13924 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); | |||
13925 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); | |||
13926 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); | |||
13927 | pDlidx[1].bPrevValid = 1; | |||
13928 | pDlidx[1].iPrev = iFirst; | |||
13929 | } | |||
13930 | ||||
13931 | sqlite3Fts5BufferZero(&pDlidx->buf); | |||
13932 | pDlidx->bPrevValid = 0; | |||
13933 | pDlidx->pgno++; | |||
13934 | }else{ | |||
13935 | bDone = 1; | |||
13936 | } | |||
13937 | ||||
13938 | if( pDlidx->bPrevValid ){ | |||
13939 | iVal = (u64)iRowid - (u64)pDlidx->iPrev; | |||
13940 | }else{ | |||
13941 | i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); | |||
13942 | assert( pDlidx->buf.n==0 )((void) (0)); | |||
13943 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); | |||
13944 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); | |||
13945 | iVal = iRowid; | |||
13946 | } | |||
13947 | ||||
13948 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); | |||
13949 | pDlidx->bPrevValid = 1; | |||
13950 | pDlidx->iPrev = iRowid; | |||
13951 | } | |||
13952 | } | |||
13953 | ||||
13954 | static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
13955 | static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; | |||
13956 | Fts5PageWriter *pPage = &pWriter->writer; | |||
13957 | i64 iRowid; | |||
13958 | ||||
13959 | assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) )((void) (0)); | |||
13960 | ||||
13961 | /* Set the szLeaf header field. */ | |||
13962 | assert( 0==fts5GetU16(&pPage->buf.p[2]) )((void) (0)); | |||
13963 | fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); | |||
13964 | ||||
13965 | if( pWriter->bFirstTermInPage ){ | |||
13966 | /* No term was written to this page. */ | |||
13967 | assert( pPage->pgidx.n==0 )((void) (0)); | |||
13968 | fts5WriteBtreeNoTerm(p, pWriter); | |||
13969 | }else{ | |||
13970 | /* Append the pgidx to the page buffer. Set the szLeaf header field. */ | |||
13971 | fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, pPage->pgidx.n,pPage->pgidx.p); | |||
13972 | } | |||
13973 | ||||
13974 | /* Write the page out to disk */ | |||
13975 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pPage ->pgno)) ); | |||
13976 | fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); | |||
13977 | ||||
13978 | /* Initialize the next page. */ | |||
13979 | fts5BufferZero(&pPage->buf)sqlite3Fts5BufferZero(&pPage->buf); | |||
13980 | fts5BufferZero(&pPage->pgidx)sqlite3Fts5BufferZero(&pPage->pgidx); | |||
13981 | fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, 4,zero); | |||
13982 | pPage->iPrevPgidx = 0; | |||
13983 | pPage->pgno++; | |||
13984 | ||||
13985 | /* Increase the leaves written counter */ | |||
13986 | pWriter->nLeafWritten++; | |||
13987 | ||||
13988 | /* The new leaf holds no terms or rowids */ | |||
13989 | pWriter->bFirstTermInPage = 1; | |||
13990 | pWriter->bFirstRowidInPage = 1; | |||
13991 | } | |||
13992 | ||||
13993 | /* | |||
13994 | ** Append term pTerm/nTerm to the segment being written by the writer passed | |||
13995 | ** as the second argument. | |||
13996 | ** | |||
13997 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
13998 | ** already occurred, this function is a no-op. | |||
13999 | */ | |||
14000 | static void fts5WriteAppendTerm( | |||
14001 | Fts5Index *p, | |||
14002 | Fts5SegWriter *pWriter, | |||
14003 | int nTerm, const u8 *pTerm | |||
14004 | ){ | |||
14005 | int nPrefix; /* Bytes of prefix compression for term */ | |||
14006 | Fts5PageWriter *pPage = &pWriter->writer; | |||
14007 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; | |||
14008 | int nMin = MIN(pPage->term.n, nTerm)(((pPage->term.n) < (nTerm)) ? (pPage->term.n) : (nTerm )); | |||
14009 | ||||
14010 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
14011 | assert( pPage->buf.n>=4 )((void) (0)); | |||
14012 | assert( pPage->buf.n>4 || pWriter->bFirstTermInPage )((void) (0)); | |||
14013 | ||||
14014 | /* If the current leaf page is full, flush it to disk. */ | |||
14015 | if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ | |||
14016 | if( pPage->buf.n>4 ){ | |||
14017 | fts5WriteFlushLeaf(p, pWriter); | |||
14018 | if( p->rc!=SQLITE_OK0 ) return; | |||
14019 | } | |||
14020 | fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING)( (u32)((&pPage->buf)->n) + (u32)(nTerm+20) <= ( u32)((&pPage->buf)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&p->rc),(&pPage->buf),(nTerm+20)+(&pPage-> buf)->n) ); | |||
14021 | } | |||
14022 | ||||
14023 | /* TODO1: Updating pgidx here. */ | |||
14024 | pPgidx->n += sqlite3Fts5PutVarint( | |||
14025 | &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx | |||
14026 | ); | |||
14027 | pPage->iPrevPgidx = pPage->buf.n; | |||
14028 | #if 0 | |||
14029 | fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); | |||
14030 | pPgidx->n += 2; | |||
14031 | #endif | |||
14032 | ||||
14033 | if( pWriter->bFirstTermInPage ){ | |||
14034 | nPrefix = 0; | |||
14035 | if( pPage->pgno!=1 ){ | |||
14036 | /* This is the first term on a leaf that is not the leftmost leaf in | |||
14037 | ** the segment b-tree. In this case it is necessary to add a term to | |||
14038 | ** the b-tree hierarchy that is (a) larger than the largest term | |||
14039 | ** already written to the segment and (b) smaller than or equal to | |||
14040 | ** this term. In other words, a prefix of (pTerm/nTerm) that is one | |||
14041 | ** byte longer than the longest prefix (pTerm/nTerm) shares with the | |||
14042 | ** previous term. | |||
14043 | ** | |||
14044 | ** Usually, the previous term is available in pPage->term. The exception | |||
14045 | ** is if this is the first term written in an incremental-merge step. | |||
14046 | ** In this case the previous term is not available, so just write a | |||
14047 | ** copy of (pTerm/nTerm) into the parent node. This is slightly | |||
14048 | ** inefficient, but still correct. */ | |||
14049 | int n = nTerm; | |||
14050 | if( pPage->term.n ){ | |||
14051 | n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm); | |||
14052 | } | |||
14053 | fts5WriteBtreeTerm(p, pWriter, n, pTerm); | |||
14054 | if( p->rc!=SQLITE_OK0 ) return; | |||
14055 | pPage = &pWriter->writer; | |||
14056 | } | |||
14057 | }else{ | |||
14058 | nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm); | |||
14059 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nPrefix); | |||
14060 | } | |||
14061 | ||||
14062 | /* Append the number of bytes of new data, then the term data itself | |||
14063 | ** to the page. */ | |||
14064 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nTerm - nPrefix); | |||
14065 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix])sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nTerm - nPrefix,&pTerm[nPrefix]); | |||
14066 | ||||
14067 | /* Update the Fts5PageWriter.term field. */ | |||
14068 | fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pPage->term,nTerm, pTerm); | |||
14069 | pWriter->bFirstTermInPage = 0; | |||
14070 | ||||
14071 | pWriter->bFirstRowidInPage = 0; | |||
14072 | pWriter->bFirstRowidInDoclist = 1; | |||
14073 | ||||
14074 | assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) )((void) (0)); | |||
14075 | pWriter->aDlidx[0].pgno = pPage->pgno; | |||
14076 | } | |||
14077 | ||||
14078 | /* | |||
14079 | ** Append a rowid and position-list size field to the writers output. | |||
14080 | */ | |||
14081 | static void fts5WriteAppendRowid( | |||
14082 | Fts5Index *p, | |||
14083 | Fts5SegWriter *pWriter, | |||
14084 | i64 iRowid | |||
14085 | ){ | |||
14086 | if( p->rc==SQLITE_OK0 ){ | |||
14087 | Fts5PageWriter *pPage = &pWriter->writer; | |||
14088 | ||||
14089 | if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ | |||
14090 | fts5WriteFlushLeaf(p, pWriter); | |||
14091 | } | |||
14092 | ||||
14093 | /* If this is to be the first rowid written to the page, set the | |||
14094 | ** rowid-pointer in the page-header. Also append a value to the dlidx | |||
14095 | ** buffer, in case a doclist-index is required. */ | |||
14096 | if( pWriter->bFirstRowidInPage ){ | |||
14097 | fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); | |||
14098 | fts5WriteDlidxAppend(p, pWriter, iRowid); | |||
14099 | } | |||
14100 | ||||
14101 | /* Write the rowid. */ | |||
14102 | if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ | |||
14103 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)iRowid); | |||
14104 | }else{ | |||
14105 | assert_nc( p->rc || iRowid>pWriter->iPrevRowid )((void) (0)); | |||
14106 | fts5BufferAppendVarint(&p->rc, &pPage->buf,sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | |||
14107 | (u64)iRowid - (u64)pWriter->iPrevRowidsqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | |||
14108 | )sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid); | |||
14109 | } | |||
14110 | pWriter->iPrevRowid = iRowid; | |||
14111 | pWriter->bFirstRowidInDoclist = 0; | |||
14112 | pWriter->bFirstRowidInPage = 0; | |||
14113 | } | |||
14114 | } | |||
14115 | ||||
14116 | static void fts5WriteAppendPoslistData( | |||
14117 | Fts5Index *p, | |||
14118 | Fts5SegWriter *pWriter, | |||
14119 | const u8 *aData, | |||
14120 | int nData | |||
14121 | ){ | |||
14122 | Fts5PageWriter *pPage = &pWriter->writer; | |||
14123 | const u8 *a = aData; | |||
14124 | int n = nData; | |||
14125 | ||||
14126 | assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK )((void) (0)); | |||
14127 | while( p->rc==SQLITE_OK0 | |||
14128 | && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz | |||
14129 | ){ | |||
14130 | int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; | |||
14131 | int nCopy = 0; | |||
14132 | while( nCopy<nReq ){ | |||
14133 | i64 dummy; | |||
14134 | nCopy += fts5GetVarintsqlite3Fts5GetVarint(&a[nCopy], (u64*)&dummy); | |||
14135 | } | |||
14136 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nCopy,a); | |||
14137 | a += nCopy; | |||
14138 | n -= nCopy; | |||
14139 | fts5WriteFlushLeaf(p, pWriter); | |||
14140 | } | |||
14141 | if( n>0 ){ | |||
14142 | fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, n,a); | |||
14143 | } | |||
14144 | } | |||
14145 | ||||
14146 | /* | |||
14147 | ** Flush any data cached by the writer object to the database. Free any | |||
14148 | ** allocations associated with the writer. | |||
14149 | */ | |||
14150 | static void fts5WriteFinish( | |||
14151 | Fts5Index *p, | |||
14152 | Fts5SegWriter *pWriter, /* Writer object */ | |||
14153 | int *pnLeaf /* OUT: Number of leaf pages in b-tree */ | |||
14154 | ){ | |||
14155 | int i; | |||
14156 | Fts5PageWriter *pLeaf = &pWriter->writer; | |||
14157 | if( p->rc==SQLITE_OK0 ){ | |||
14158 | assert( pLeaf->pgno>=1 )((void) (0)); | |||
14159 | if( pLeaf->buf.n>4 ){ | |||
14160 | fts5WriteFlushLeaf(p, pWriter); | |||
14161 | } | |||
14162 | *pnLeaf = pLeaf->pgno-1; | |||
14163 | if( pLeaf->pgno>1 ){ | |||
14164 | fts5WriteFlushBtree(p, pWriter); | |||
14165 | } | |||
14166 | } | |||
14167 | fts5BufferFree(&pLeaf->term)sqlite3Fts5BufferFree(&pLeaf->term); | |||
14168 | fts5BufferFree(&pLeaf->buf)sqlite3Fts5BufferFree(&pLeaf->buf); | |||
14169 | fts5BufferFree(&pLeaf->pgidx)sqlite3Fts5BufferFree(&pLeaf->pgidx); | |||
14170 | fts5BufferFree(&pWriter->btterm)sqlite3Fts5BufferFree(&pWriter->btterm); | |||
14171 | ||||
14172 | for(i=0; i<pWriter->nDlidx; i++){ | |||
14173 | sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); | |||
14174 | } | |||
14175 | sqlite3_freesqlite3_api->free(pWriter->aDlidx); | |||
14176 | } | |||
14177 | ||||
14178 | static void fts5WriteInit( | |||
14179 | Fts5Index *p, | |||
14180 | Fts5SegWriter *pWriter, | |||
14181 | int iSegid | |||
14182 | ){ | |||
14183 | const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING20; | |||
14184 | ||||
14185 | memset(pWriter, 0, sizeof(Fts5SegWriter)); | |||
14186 | pWriter->iSegid = iSegid; | |||
14187 | ||||
14188 | fts5WriteDlidxGrow(p, pWriter, 1); | |||
14189 | pWriter->writer.pgno = 1; | |||
14190 | pWriter->bFirstTermInPage = 1; | |||
14191 | pWriter->iBtPage = 1; | |||
14192 | ||||
14193 | assert( pWriter->writer.buf.n==0 )((void) (0)); | |||
14194 | assert( pWriter->writer.pgidx.n==0 )((void) (0)); | |||
14195 | ||||
14196 | /* Grow the two buffers to pgsz + padding bytes in size. */ | |||
14197 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); | |||
14198 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); | |||
14199 | ||||
14200 | if( p->pIdxWriter==0 ){ | |||
14201 | Fts5Config *pConfig = p->pConfig; | |||
14202 | fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintfsqlite3_api->mprintf( | |||
14203 | "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", | |||
14204 | pConfig->zDb, pConfig->zName | |||
14205 | )); | |||
14206 | } | |||
14207 | ||||
14208 | if( p->rc==SQLITE_OK0 ){ | |||
14209 | /* Initialize the 4-byte leaf-page header to 0x00. */ | |||
14210 | memset(pWriter->writer.buf.p, 0, 4); | |||
14211 | pWriter->writer.buf.n = 4; | |||
14212 | ||||
14213 | /* Bind the current output segment id to the index-writer. This is an | |||
14214 | ** optimization over binding the same value over and over as rows are | |||
14215 | ** inserted into %_idx by the current writer. */ | |||
14216 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxWriter, 1, pWriter->iSegid); | |||
14217 | } | |||
14218 | } | |||
14219 | ||||
14220 | /* | |||
14221 | ** Iterator pIter was used to iterate through the input segments of on an | |||
14222 | ** incremental merge operation. This function is called if the incremental | |||
14223 | ** merge step has finished but the input has not been completely exhausted. | |||
14224 | */ | |||
14225 | static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ | |||
14226 | int i; | |||
14227 | Fts5Buffer buf; | |||
14228 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
14229 | for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK0; i++){ | |||
14230 | Fts5SegIter *pSeg = &pIter->aSeg[i]; | |||
14231 | if( pSeg->pSeg==0 ){ | |||
14232 | /* no-op */ | |||
14233 | }else if( pSeg->pLeaf==0 ){ | |||
14234 | /* All keys from this input segment have been transfered to the output. | |||
14235 | ** Set both the first and last page-numbers to 0 to indicate that the | |||
14236 | ** segment is now empty. */ | |||
14237 | pSeg->pSeg->pgnoLast = 0; | |||
14238 | pSeg->pSeg->pgnoFirst = 0; | |||
14239 | }else{ | |||
14240 | int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ | |||
14241 | i64 iLeafRowid; | |||
14242 | Fts5Data *pData; | |||
14243 | int iId = pSeg->pSeg->iSegid; | |||
14244 | u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; | |||
14245 | ||||
14246 | iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | |||
14247 | pData = fts5LeafRead(p, iLeafRowid); | |||
14248 | if( pData ){ | |||
14249 | if( iOff>pData->szLeaf ){ | |||
14250 | /* This can occur if the pages that the segments occupy overlap - if | |||
14251 | ** a single page has been assigned to more than one segment. In | |||
14252 | ** this case a prior iteration of this loop may have corrupted the | |||
14253 | ** segment currently being trimmed. */ | |||
14254 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
14255 | }else{ | |||
14256 | fts5BufferZero(&buf)sqlite3Fts5BufferZero(&buf); | |||
14257 | fts5BufferGrow(&p->rc, &buf, pData->nn)( (u32)((&buf)->n) + (u32)(pData->nn) <= (u32)(( &buf)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p-> rc),(&buf),(pData->nn)+(&buf)->n) ); | |||
14258 | fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,sizeof(aHdr ),aHdr); | |||
14259 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->term.n); | |||
14260 | fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pSeg->term .n,pSeg->term.p); | |||
14261 | fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> szLeaf-iOff,&pData->p[iOff]); | |||
14262 | if( p->rc==SQLITE_OK0 ){ | |||
14263 | /* Set the szLeaf field */ | |||
14264 | fts5PutU16(&buf.p[2], (u16)buf.n); | |||
14265 | } | |||
14266 | ||||
14267 | /* Set up the new page-index array */ | |||
14268 | fts5BufferAppendVarint(&p->rc, &buf, 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)4); | |||
14269 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno | |||
14270 | && pSeg->iEndofDoclist<pData->szLeaf | |||
14271 | && pSeg->iPgidxOff<=pData->nn | |||
14272 | ){ | |||
14273 | int nDiff = pData->szLeaf - pSeg->iEndofDoclist; | |||
14274 | fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)buf .n - 1 - nDiff - 4); | |||
14275 | fts5BufferAppendBlob(&p->rc, &buf,sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | |||
14276 | pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | |||
14277 | )sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]); | |||
14278 | } | |||
14279 | ||||
14280 | pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; | |||
14281 | fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(1)) ), iLeafRowid); | |||
14282 | fts5DataWrite(p, iLeafRowid, buf.p, buf.n); | |||
14283 | } | |||
14284 | fts5DataRelease(pData); | |||
14285 | } | |||
14286 | } | |||
14287 | } | |||
14288 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
14289 | } | |||
14290 | ||||
14291 | static void fts5MergeChunkCallback( | |||
14292 | Fts5Index *p, | |||
14293 | void *pCtx, | |||
14294 | const u8 *pChunk, int nChunk | |||
14295 | ){ | |||
14296 | Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; | |||
14297 | fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); | |||
14298 | } | |||
14299 | ||||
14300 | /* | |||
14301 | ** | |||
14302 | */ | |||
14303 | static void fts5IndexMergeLevel( | |||
14304 | Fts5Index *p, /* FTS5 backend object */ | |||
14305 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ | |||
14306 | int iLvl, /* Level to read input from */ | |||
14307 | int *pnRem /* Write up to this many output leaves */ | |||
14308 | ){ | |||
14309 | Fts5Structure *pStruct = *ppStruct; | |||
14310 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
14311 | Fts5StructureLevel *pLvlOut; | |||
14312 | Fts5Iter *pIter = 0; /* Iterator to read input data */ | |||
14313 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ | |||
14314 | int nInput; /* Number of input segments */ | |||
14315 | Fts5SegWriter writer; /* Writer object */ | |||
14316 | Fts5StructureSegment *pSeg; /* Output segment */ | |||
14317 | Fts5Buffer term; | |||
14318 | int bOldest; /* True if the output segment is the oldest */ | |||
14319 | int eDetail = p->pConfig->eDetail; | |||
14320 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
14321 | int bTermWritten = 0; /* True if current term already output */ | |||
14322 | ||||
14323 | assert( iLvl<pStruct->nLevel )((void) (0)); | |||
14324 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | |||
14325 | ||||
14326 | memset(&writer, 0, sizeof(Fts5SegWriter)); | |||
14327 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
14328 | if( pLvl->nMerge ){ | |||
14329 | pLvlOut = &pStruct->aLevel[iLvl+1]; | |||
14330 | assert( pLvlOut->nSeg>0 )((void) (0)); | |||
14331 | nInput = pLvl->nMerge; | |||
14332 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; | |||
14333 | ||||
14334 | fts5WriteInit(p, &writer, pSeg->iSegid); | |||
14335 | writer.writer.pgno = pSeg->pgnoLast+1; | |||
14336 | writer.iBtPage = 0; | |||
14337 | }else{ | |||
14338 | int iSegid = fts5AllocateSegid(p, pStruct); | |||
14339 | ||||
14340 | /* Extend the Fts5Structure object as required to ensure the output | |||
14341 | ** segment exists. */ | |||
14342 | if( iLvl==pStruct->nLevel-1 ){ | |||
14343 | fts5StructureAddLevel(&p->rc, ppStruct); | |||
14344 | pStruct = *ppStruct; | |||
14345 | } | |||
14346 | fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); | |||
14347 | if( p->rc ) return; | |||
14348 | pLvl = &pStruct->aLevel[iLvl]; | |||
14349 | pLvlOut = &pStruct->aLevel[iLvl+1]; | |||
14350 | ||||
14351 | fts5WriteInit(p, &writer, iSegid); | |||
14352 | ||||
14353 | /* Add the new segment to the output level */ | |||
14354 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; | |||
14355 | pLvlOut->nSeg++; | |||
14356 | pSeg->pgnoFirst = 1; | |||
14357 | pSeg->iSegid = iSegid; | |||
14358 | pStruct->nSegment++; | |||
14359 | ||||
14360 | /* Read input from all segments in the input level */ | |||
14361 | nInput = pLvl->nSeg; | |||
14362 | ||||
14363 | /* Set the range of origins that will go into the output segment. */ | |||
14364 | if( pStruct->nOriginCntr>0 ){ | |||
14365 | pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1; | |||
14366 | pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2; | |||
14367 | } | |||
14368 | } | |||
14369 | bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); | |||
14370 | ||||
14371 | assert( iLvl>=0 )((void) (0)); | |||
14372 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); | |||
14373 | fts5MultiIterEof(p, pIter)==0; | |||
14374 | fts5MultiIterNext(p, pIter, 0, 0) | |||
14375 | ){ | |||
14376 | Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
14377 | int nPos; /* position-list size field value */ | |||
14378 | int nTerm; | |||
14379 | const u8 *pTerm; | |||
14380 | ||||
14381 | pTerm = fts5MultiIterTerm(pIter, &nTerm); | |||
14382 | if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm)((nTerm)<=0 ? 0 : memcmp((pTerm), (term.p), (nTerm))) ){ | |||
14383 | if( pnRem && writer.nLeafWritten>nRem ){ | |||
14384 | break; | |||
14385 | } | |||
14386 | fts5BufferSet(&p->rc, &term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&term,nTerm,pTerm); | |||
14387 | bTermWritten =0; | |||
14388 | } | |||
14389 | ||||
14390 | /* Check for key annihilation. */ | |||
14391 | if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; | |||
14392 | ||||
14393 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | |||
14394 | /* This is a new term. Append a term to the output segment. */ | |||
14395 | fts5WriteAppendTerm(p, &writer, nTerm, pTerm); | |||
14396 | bTermWritten = 1; | |||
14397 | } | |||
14398 | ||||
14399 | /* Append the rowid to the output */ | |||
14400 | /* WRITEPOSLISTSIZE */ | |||
14401 | fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); | |||
14402 | ||||
14403 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
14404 | if( pSegIter->bDel ){ | |||
14405 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | |||
14406 | if( pSegIter->nPos>0 ){ | |||
14407 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | |||
14408 | } | |||
14409 | } | |||
14410 | }else{ | |||
14411 | /* Append the position-list data to the output */ | |||
14412 | nPos = pSegIter->nPos*2 + pSegIter->bDel; | |||
14413 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)nPos); | |||
14414 | fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); | |||
14415 | } | |||
14416 | } | |||
14417 | ||||
14418 | /* Flush the last leaf page to disk. Set the output segment b-tree height | |||
14419 | ** and last leaf page number at the same time. */ | |||
14420 | fts5WriteFinish(p, &writer, &pSeg->pgnoLast); | |||
14421 | ||||
14422 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
14423 | if( fts5MultiIterEof(p, pIter) ){ | |||
14424 | int i; | |||
14425 | ||||
14426 | /* Remove the redundant segments from the %_data table */ | |||
14427 | assert( pSeg->nEntry==0 )((void) (0)); | |||
14428 | for(i=0; i<nInput; i++){ | |||
14429 | Fts5StructureSegment *pOld = &pLvl->aSeg[i]; | |||
14430 | pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone); | |||
14431 | fts5DataRemoveSegment(p, pOld); | |||
14432 | } | |||
14433 | ||||
14434 | /* Remove the redundant segments from the input level */ | |||
14435 | if( pLvl->nSeg!=nInput ){ | |||
14436 | int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); | |||
14437 | memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); | |||
14438 | } | |||
14439 | pStruct->nSegment -= nInput; | |||
14440 | pLvl->nSeg -= nInput; | |||
14441 | pLvl->nMerge = 0; | |||
14442 | if( pSeg->pgnoLast==0 ){ | |||
14443 | pLvlOut->nSeg--; | |||
14444 | pStruct->nSegment--; | |||
14445 | } | |||
14446 | }else{ | |||
14447 | assert( pSeg->pgnoLast>0 )((void) (0)); | |||
14448 | fts5TrimSegments(p, pIter); | |||
14449 | pLvl->nMerge = nInput; | |||
14450 | } | |||
14451 | ||||
14452 | fts5MultiIterFree(pIter); | |||
14453 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
14454 | if( pnRem ) *pnRem -= writer.nLeafWritten; | |||
14455 | } | |||
14456 | ||||
14457 | /* | |||
14458 | ** If this is not a contentless_delete=1 table, or if the 'deletemerge' | |||
14459 | ** configuration option is set to 0, then this function always returns -1. | |||
14460 | ** Otherwise, it searches the structure object passed as the second argument | |||
14461 | ** for a level suitable for merging due to having a large number of | |||
14462 | ** tombstones in the tombstone hash. If one is found, its index is returned. | |||
14463 | ** Otherwise, if there is no suitable level, -1. | |||
14464 | */ | |||
14465 | static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){ | |||
14466 | Fts5Config *pConfig = p->pConfig; | |||
14467 | int iRet = -1; | |||
14468 | if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){ | |||
14469 | int ii; | |||
14470 | int nBest = 0; | |||
14471 | ||||
14472 | for(ii=0; ii<pStruct->nLevel; ii++){ | |||
14473 | Fts5StructureLevel *pLvl = &pStruct->aLevel[ii]; | |||
14474 | i64 nEntry = 0; | |||
14475 | i64 nTomb = 0; | |||
14476 | int iSeg; | |||
14477 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
14478 | nEntry += pLvl->aSeg[iSeg].nEntry; | |||
14479 | nTomb += pLvl->aSeg[iSeg].nEntryTombstone; | |||
14480 | } | |||
14481 | assert_nc( nEntry>0 || pLvl->nSeg==0 )((void) (0)); | |||
14482 | if( nEntry>0 ){ | |||
14483 | int nPercent = (nTomb * 100) / nEntry; | |||
14484 | if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){ | |||
14485 | iRet = ii; | |||
14486 | nBest = nPercent; | |||
14487 | } | |||
14488 | } | |||
14489 | ||||
14490 | /* If pLvl is already the input level to an ongoing merge, look no | |||
14491 | ** further for a merge candidate. The caller should be allowed to | |||
14492 | ** continue merging from pLvl first. */ | |||
14493 | if( pLvl->nMerge ) break; | |||
14494 | } | |||
14495 | } | |||
14496 | return iRet; | |||
14497 | } | |||
14498 | ||||
14499 | /* | |||
14500 | ** Do up to nPg pages of automerge work on the index. | |||
14501 | ** | |||
14502 | ** Return true if any changes were actually made, or false otherwise. | |||
14503 | */ | |||
14504 | static int fts5IndexMerge( | |||
14505 | Fts5Index *p, /* FTS5 backend object */ | |||
14506 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | |||
14507 | int nPg, /* Pages of work to do */ | |||
14508 | int nMin /* Minimum number of segments to merge */ | |||
14509 | ){ | |||
14510 | int nRem = nPg; | |||
14511 | int bRet = 0; | |||
14512 | Fts5Structure *pStruct = *ppStruct; | |||
14513 | while( nRem>0 && p->rc==SQLITE_OK0 ){ | |||
14514 | int iLvl; /* To iterate through levels */ | |||
14515 | int iBestLvl = 0; /* Level offering the most input segments */ | |||
14516 | int nBest = 0; /* Number of input segments on best level */ | |||
14517 | ||||
14518 | /* Set iBestLvl to the level to read input segments from. Or to -1 if | |||
14519 | ** there is no level suitable to merge segments from. */ | |||
14520 | assert( pStruct->nLevel>0 )((void) (0)); | |||
14521 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
14522 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
14523 | if( pLvl->nMerge ){ | |||
14524 | if( pLvl->nMerge>nBest ){ | |||
14525 | iBestLvl = iLvl; | |||
14526 | nBest = nMin; | |||
14527 | } | |||
14528 | break; | |||
14529 | } | |||
14530 | if( pLvl->nSeg>nBest ){ | |||
14531 | nBest = pLvl->nSeg; | |||
14532 | iBestLvl = iLvl; | |||
14533 | } | |||
14534 | } | |||
14535 | if( nBest<nMin ){ | |||
14536 | iBestLvl = fts5IndexFindDeleteMerge(p, pStruct); | |||
14537 | } | |||
14538 | ||||
14539 | if( iBestLvl<0 ) break; | |||
14540 | bRet = 1; | |||
14541 | fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); | |||
14542 | if( p->rc==SQLITE_OK0 && pStruct->aLevel[iBestLvl].nMerge==0 ){ | |||
14543 | fts5StructurePromote(p, iBestLvl+1, pStruct); | |||
14544 | } | |||
14545 | ||||
14546 | if( nMin==1 ) nMin = 2; | |||
14547 | } | |||
14548 | *ppStruct = pStruct; | |||
14549 | return bRet; | |||
14550 | } | |||
14551 | ||||
14552 | /* | |||
14553 | ** A total of nLeaf leaf pages of data has just been flushed to a level-0 | |||
14554 | ** segment. This function updates the write-counter accordingly and, if | |||
14555 | ** necessary, performs incremental merge work. | |||
14556 | ** | |||
14557 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
14558 | ** already occurred, this function is a no-op. | |||
14559 | */ | |||
14560 | static void fts5IndexAutomerge( | |||
14561 | Fts5Index *p, /* FTS5 backend object */ | |||
14562 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | |||
14563 | int nLeaf /* Number of output leaves just written */ | |||
14564 | ){ | |||
14565 | if( p->rc==SQLITE_OK0 && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0)((*ppStruct)!=0) ){ | |||
14566 | Fts5Structure *pStruct = *ppStruct; | |||
14567 | u64 nWrite; /* Initial value of write-counter */ | |||
14568 | int nWork; /* Number of work-quanta to perform */ | |||
14569 | int nRem; /* Number of leaf pages left to write */ | |||
14570 | ||||
14571 | /* Update the write-counter. While doing so, set nWork. */ | |||
14572 | nWrite = pStruct->nWriteCounter; | |||
14573 | nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); | |||
14574 | pStruct->nWriteCounter += nLeaf; | |||
14575 | nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); | |||
14576 | ||||
14577 | fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); | |||
14578 | } | |||
14579 | } | |||
14580 | ||||
14581 | static void fts5IndexCrisismerge( | |||
14582 | Fts5Index *p, /* FTS5 backend object */ | |||
14583 | Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ | |||
14584 | ){ | |||
14585 | const int nCrisis = p->pConfig->nCrisisMerge; | |||
14586 | Fts5Structure *pStruct = *ppStruct; | |||
14587 | if( pStruct && pStruct->nLevel>0 ){ | |||
14588 | int iLvl = 0; | |||
14589 | while( p->rc==SQLITE_OK0 && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ | |||
14590 | fts5IndexMergeLevel(p, &pStruct, iLvl, 0); | |||
14591 | assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) )((void) (0)); | |||
14592 | fts5StructurePromote(p, iLvl+1, pStruct); | |||
14593 | iLvl++; | |||
14594 | } | |||
14595 | *ppStruct = pStruct; | |||
14596 | } | |||
14597 | } | |||
14598 | ||||
14599 | static int fts5IndexReturn(Fts5Index *p){ | |||
14600 | int rc = p->rc; | |||
14601 | p->rc = SQLITE_OK0; | |||
14602 | return rc; | |||
14603 | } | |||
14604 | ||||
14605 | /* | |||
14606 | ** Close the read-only blob handle, if it is open. | |||
14607 | */ | |||
14608 | static void sqlite3Fts5IndexCloseReader(Fts5Index *p){ | |||
14609 | fts5IndexCloseReader(p); | |||
14610 | fts5IndexReturn(p); | |||
14611 | } | |||
14612 | ||||
14613 | typedef struct Fts5FlushCtx Fts5FlushCtx; | |||
14614 | struct Fts5FlushCtx { | |||
14615 | Fts5Index *pIdx; | |||
14616 | Fts5SegWriter writer; | |||
14617 | }; | |||
14618 | ||||
14619 | /* | |||
14620 | ** Buffer aBuf[] contains a list of varints, all small enough to fit | |||
14621 | ** in a 32-bit integer. Return the size of the largest prefix of this | |||
14622 | ** list nMax bytes or less in size. | |||
14623 | */ | |||
14624 | static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ | |||
14625 | int ret; | |||
14626 | u32 dummy; | |||
14627 | ret = fts5GetVarint32(aBuf, dummy)sqlite3Fts5GetVarint32(aBuf,(u32*)&(dummy)); | |||
14628 | if( ret<nMax ){ | |||
14629 | while( 1 ){ | |||
14630 | int i = fts5GetVarint32(&aBuf[ret], dummy)sqlite3Fts5GetVarint32(&aBuf[ret],(u32*)&(dummy)); | |||
14631 | if( (ret + i) > nMax ) break; | |||
14632 | ret += i; | |||
14633 | } | |||
14634 | } | |||
14635 | return ret; | |||
14636 | } | |||
14637 | ||||
14638 | /* | |||
14639 | ** Execute the SQL statement: | |||
14640 | ** | |||
14641 | ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno); | |||
14642 | ** | |||
14643 | ** This is used when a secure-delete operation removes the last term | |||
14644 | ** from a segment leaf page. In that case the %_idx entry is removed | |||
14645 | ** too. This is done to ensure that if all instances of a token are | |||
14646 | ** removed from an fts5 database in secure-delete mode, no trace of | |||
14647 | ** the token itself remains in the database. | |||
14648 | */ | |||
14649 | static void fts5SecureDeleteIdxEntry( | |||
14650 | Fts5Index *p, /* FTS5 backend object */ | |||
14651 | int iSegid, /* Id of segment to delete entry for */ | |||
14652 | int iPgno /* Page number within segment */ | |||
14653 | ){ | |||
14654 | if( iPgno!=1 ){ | |||
14655 | assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE )((void) (0)); | |||
14656 | if( p->pDeleteFromIdx==0 ){ | |||
14657 | fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintfsqlite3_api->mprintf( | |||
14658 | "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)", | |||
14659 | p->pConfig->zDb, p->pConfig->zName | |||
14660 | )); | |||
14661 | } | |||
14662 | if( p->rc==SQLITE_OK0 ){ | |||
14663 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 1, iSegid); | |||
14664 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 2, iPgno); | |||
14665 | sqlite3_stepsqlite3_api->step(p->pDeleteFromIdx); | |||
14666 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleteFromIdx); | |||
14667 | } | |||
14668 | } | |||
14669 | } | |||
14670 | ||||
14671 | /* | |||
14672 | ** This is called when a secure-delete operation removes a position-list | |||
14673 | ** that overflows onto segment page iPgno of segment pSeg. This function | |||
14674 | ** rewrites node iPgno, and possibly one or more of its right-hand peers, | |||
14675 | ** to remove this portion of the position list. | |||
14676 | ** | |||
14677 | ** Output variable (*pbLastInDoclist) is set to true if the position-list | |||
14678 | ** removed is followed by a new term or the end-of-segment, or false if | |||
14679 | ** it is followed by another rowid/position list. | |||
14680 | */ | |||
14681 | static void fts5SecureDeleteOverflow( | |||
14682 | Fts5Index *p, | |||
14683 | Fts5StructureSegment *pSeg, | |||
14684 | int iPgno, | |||
14685 | int *pbLastInDoclist | |||
14686 | ){ | |||
14687 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | |||
14688 | int pgno; | |||
14689 | Fts5Data *pLeaf = 0; | |||
14690 | assert( iPgno!=1 )((void) (0)); | |||
14691 | ||||
14692 | *pbLastInDoclist = 1; | |||
14693 | for(pgno=iPgno; p->rc==SQLITE_OK0 && pgno<=pSeg->pgnoLast; pgno++){ | |||
14694 | i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
14695 | int iNext = 0; | |||
14696 | u8 *aPg = 0; | |||
14697 | ||||
14698 | pLeaf = fts5DataRead(p, iRowid); | |||
14699 | if( pLeaf==0 ) break; | |||
14700 | aPg = pLeaf->p; | |||
14701 | ||||
14702 | iNext = fts5GetU16(&aPg[0]); | |||
14703 | if( iNext!=0 ){ | |||
14704 | *pbLastInDoclist = 0; | |||
14705 | } | |||
14706 | if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){ | |||
14707 | fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext)sqlite3Fts5GetVarint32(&aPg[pLeaf->szLeaf],(u32*)& (iNext)); | |||
14708 | } | |||
14709 | ||||
14710 | if( iNext==0 ){ | |||
14711 | /* The page contains no terms or rowids. Replace it with an empty | |||
14712 | ** page and move on to the right-hand peer. */ | |||
14713 | const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04}; | |||
14714 | assert_nc( bDetailNone==0 || pLeaf->nn==4 )((void) (0)); | |||
14715 | if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty)); | |||
14716 | fts5DataRelease(pLeaf); | |||
14717 | pLeaf = 0; | |||
14718 | }else if( bDetailNone ){ | |||
14719 | break; | |||
14720 | }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){ | |||
14721 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
14722 | break; | |||
14723 | }else{ | |||
14724 | int nShift = iNext - 4; | |||
14725 | int nPg; | |||
14726 | ||||
14727 | int nIdx = 0; | |||
14728 | u8 *aIdx = 0; | |||
14729 | ||||
14730 | /* Unless the current page footer is 0 bytes in size (in which case | |||
14731 | ** the new page footer will be as well), allocate and populate a | |||
14732 | ** buffer containing the new page footer. Set stack variables aIdx | |||
14733 | ** and nIdx accordingly. */ | |||
14734 | if( pLeaf->nn>pLeaf->szLeaf ){ | |||
14735 | int iFirst = 0; | |||
14736 | int i1 = pLeaf->szLeaf; | |||
14737 | int i2 = 0; | |||
14738 | ||||
14739 | i1 += fts5GetVarint32(&aPg[i1], iFirst)sqlite3Fts5GetVarint32(&aPg[i1],(u32*)&(iFirst)); | |||
14740 | if( iFirst<iNext ){ | |||
14741 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
14742 | break; | |||
14743 | } | |||
14744 | aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2); | |||
14745 | if( aIdx==0 ) break; | |||
14746 | i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift); | |||
14747 | if( i1<pLeaf->nn ){ | |||
14748 | memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1); | |||
14749 | i2 += (pLeaf->nn-i1); | |||
14750 | } | |||
14751 | nIdx = i2; | |||
14752 | } | |||
14753 | ||||
14754 | /* Modify the contents of buffer aPg[]. Set nPg to the new size | |||
14755 | ** in bytes. The new page is always smaller than the old. */ | |||
14756 | nPg = pLeaf->szLeaf - nShift; | |||
14757 | memmove(&aPg[4], &aPg[4+nShift], nPg-4); | |||
14758 | fts5PutU16(&aPg[2], nPg); | |||
14759 | if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4); | |||
14760 | if( nIdx>0 ){ | |||
14761 | memcpy(&aPg[nPg], aIdx, nIdx); | |||
14762 | nPg += nIdx; | |||
14763 | } | |||
14764 | sqlite3_freesqlite3_api->free(aIdx); | |||
14765 | ||||
14766 | /* Write the new page to disk and exit the loop */ | |||
14767 | assert( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | |||
14768 | fts5DataWrite(p, iRowid, aPg, nPg); | |||
14769 | break; | |||
14770 | } | |||
14771 | } | |||
14772 | fts5DataRelease(pLeaf); | |||
14773 | } | |||
14774 | ||||
14775 | /* | |||
14776 | ** Completely remove the entry that pSeg currently points to from | |||
14777 | ** the database. | |||
14778 | */ | |||
14779 | static void fts5DoSecureDelete( | |||
14780 | Fts5Index *p, | |||
14781 | Fts5SegIter *pSeg | |||
14782 | ){ | |||
14783 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | |||
14784 | int iSegid = pSeg->pSeg->iSegid; | |||
14785 | u8 *aPg = pSeg->pLeaf->p; | |||
14786 | int nPg = pSeg->pLeaf->nn; | |||
14787 | int iPgIdx = pSeg->pLeaf->szLeaf; | |||
14788 | ||||
14789 | u64 iDelta = 0; | |||
14790 | int iNextOff = 0; | |||
14791 | int iOff = 0; | |||
14792 | int nIdx = 0; | |||
14793 | u8 *aIdx = 0; | |||
14794 | int bLastInDoclist = 0; | |||
14795 | int iIdx = 0; | |||
14796 | int iStart = 0; | |||
14797 | int iDelKeyOff = 0; /* Offset of deleted key, if any */ | |||
14798 | ||||
14799 | nIdx = nPg-iPgIdx; | |||
14800 | aIdx = sqlite3Fts5MallocZero(&p->rc, ((i64)nIdx)+16); | |||
14801 | if( p->rc ) return; | |||
14802 | memcpy(aIdx, &aPg[iPgIdx], nIdx); | |||
14803 | ||||
14804 | /* At this point segment iterator pSeg points to the entry | |||
14805 | ** this function should remove from the b-tree segment. | |||
14806 | ** | |||
14807 | ** In detail=full or detail=column mode, pSeg->iLeafOffset is the | |||
14808 | ** offset of the first byte in the position-list for the entry to | |||
14809 | ** remove. Immediately before this comes two varints that will also | |||
14810 | ** need to be removed: | |||
14811 | ** | |||
14812 | ** + the rowid or delta rowid value for the entry, and | |||
14813 | ** + the size of the position list in bytes. | |||
14814 | ** | |||
14815 | ** Or, in detail=none mode, there is a single varint prior to | |||
14816 | ** pSeg->iLeafOffset - the rowid or delta rowid value. | |||
14817 | ** | |||
14818 | ** This block sets the following variables: | |||
14819 | ** | |||
14820 | ** iStart: | |||
14821 | ** The offset of the first byte of the rowid or delta-rowid | |||
14822 | ** value for the doclist entry being removed. | |||
14823 | ** | |||
14824 | ** iDelta: | |||
14825 | ** The value of the rowid or delta-rowid value for the doclist | |||
14826 | ** entry being removed. | |||
14827 | ** | |||
14828 | ** iNextOff: | |||
14829 | ** The offset of the next entry following the position list | |||
14830 | ** for the one being removed. If the position list for this | |||
14831 | ** entry overflows onto the next leaf page, this value will be | |||
14832 | ** greater than pLeaf->szLeaf. | |||
14833 | */ | |||
14834 | { | |||
14835 | int iSOP; /* Start-Of-Position-list */ | |||
14836 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){ | |||
14837 | iStart = pSeg->iTermLeafOffset; | |||
14838 | }else{ | |||
14839 | iStart = fts5GetU16(&aPg[0]); | |||
14840 | } | |||
14841 | ||||
14842 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
14843 | assert_nc( iSOP<=pSeg->iLeafOffset )((void) (0)); | |||
14844 | ||||
14845 | if( bDetailNone ){ | |||
14846 | while( iSOP<pSeg->iLeafOffset ){ | |||
14847 | if( aPg[iSOP]==0x00 ) iSOP++; | |||
14848 | if( aPg[iSOP]==0x00 ) iSOP++; | |||
14849 | iStart = iSOP; | |||
14850 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
14851 | } | |||
14852 | ||||
14853 | iNextOff = iSOP; | |||
14854 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | |||
14855 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | |||
14856 | ||||
14857 | }else{ | |||
14858 | int nPos = 0; | |||
14859 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | |||
14860 | while( iSOP<pSeg->iLeafOffset ){ | |||
14861 | iStart = iSOP + (nPos/2); | |||
14862 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
14863 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | |||
14864 | } | |||
14865 | assert_nc( iSOP==pSeg->iLeafOffset )((void) (0)); | |||
14866 | iNextOff = pSeg->iLeafOffset + pSeg->nPos; | |||
14867 | } | |||
14868 | } | |||
14869 | ||||
14870 | iOff = iStart; | |||
14871 | ||||
14872 | /* If the position-list for the entry being removed flows over past | |||
14873 | ** the end of this page, delete the portion of the position-list on the | |||
14874 | ** next page and beyond. | |||
14875 | ** | |||
14876 | ** Set variable bLastInDoclist to true if this entry happens | |||
14877 | ** to be the last rowid in the doclist for its term. */ | |||
14878 | if( iNextOff>=iPgIdx ){ | |||
14879 | int pgno = pSeg->iLeafPgno+1; | |||
14880 | fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist); | |||
14881 | iNextOff = iPgIdx; | |||
14882 | } | |||
14883 | ||||
14884 | if( pSeg->bDel==0 ){ | |||
14885 | if( iNextOff!=iPgIdx ){ | |||
14886 | /* Loop through the page-footer. If iNextOff (offset of the | |||
14887 | ** entry following the one we are removing) is equal to the | |||
14888 | ** offset of a key on this page, then the entry is the last | |||
14889 | ** in its doclist. */ | |||
14890 | int iKeyOff = 0; | |||
14891 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | |||
14892 | u32 iVal = 0; | |||
14893 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
14894 | iKeyOff += iVal; | |||
14895 | if( iKeyOff==iNextOff ){ | |||
14896 | bLastInDoclist = 1; | |||
14897 | } | |||
14898 | } | |||
14899 | } | |||
14900 | ||||
14901 | /* If this is (a) the first rowid on a page and (b) is not followed by | |||
14902 | ** another position list on the same page, set the "first-rowid" field | |||
14903 | ** of the header to 0. */ | |||
14904 | if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){ | |||
14905 | fts5PutU16(&aPg[0], 0); | |||
14906 | } | |||
14907 | } | |||
14908 | ||||
14909 | if( pSeg->bDel ){ | |||
14910 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta); | |||
14911 | aPg[iOff++] = 0x01; | |||
14912 | }else if( bLastInDoclist==0 ){ | |||
14913 | if( iNextOff!=iPgIdx ){ | |||
14914 | u64 iNextDelta = 0; | |||
14915 | iNextOff += fts5GetVarintsqlite3Fts5GetVarint(&aPg[iNextOff], &iNextDelta); | |||
14916 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta); | |||
14917 | } | |||
14918 | }else if( | |||
14919 | pSeg->iLeafPgno==pSeg->iTermLeafPgno | |||
14920 | && iStart==pSeg->iTermLeafOffset | |||
14921 | ){ | |||
14922 | /* The entry being removed was the only position list in its | |||
14923 | ** doclist. Therefore the term needs to be removed as well. */ | |||
14924 | int iKey = 0; | |||
14925 | int iKeyOff = 0; | |||
14926 | ||||
14927 | /* Set iKeyOff to the offset of the term that will be removed - the | |||
14928 | ** last offset in the footer that is not greater than iStart. */ | |||
14929 | for(iIdx=0; iIdx<nIdx; iKey++){ | |||
14930 | u32 iVal = 0; | |||
14931 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
14932 | if( (iKeyOff+iVal)>(u32)iStart ) break; | |||
14933 | iKeyOff += iVal; | |||
14934 | } | |||
14935 | assert_nc( iKey>=1 )((void) (0)); | |||
14936 | ||||
14937 | /* Set iDelKeyOff to the value of the footer entry to remove from | |||
14938 | ** the page. */ | |||
14939 | iDelKeyOff = iOff = iKeyOff; | |||
14940 | ||||
14941 | if( iNextOff!=iPgIdx ){ | |||
14942 | /* This is the only position-list associated with the term, and there | |||
14943 | ** is another term following it on this page. So the subsequent term | |||
14944 | ** needs to be moved to replace the term associated with the entry | |||
14945 | ** being removed. */ | |||
14946 | int nPrefix = 0; | |||
14947 | int nSuffix = 0; | |||
14948 | int nPrefix2 = 0; | |||
14949 | int nSuffix2 = 0; | |||
14950 | ||||
14951 | iDelKeyOff = iNextOff; | |||
14952 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nPrefix2 )); | |||
14953 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nSuffix2 )); | |||
14954 | ||||
14955 | if( iKey!=1 ){ | |||
14956 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nPrefix) ); | |||
14957 | } | |||
14958 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nSuffix) ); | |||
14959 | ||||
14960 | nPrefix = MIN(nPrefix, nPrefix2)(((nPrefix) < (nPrefix2)) ? (nPrefix) : (nPrefix2)); | |||
14961 | nSuffix = (nPrefix2 + nSuffix2) - nPrefix; | |||
14962 | ||||
14963 | if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){ | |||
14964 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
14965 | }else{ | |||
14966 | if( iKey!=1 ){ | |||
14967 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix); | |||
14968 | } | |||
14969 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix); | |||
14970 | if( nPrefix2>pSeg->term.n ){ | |||
14971 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
14972 | }else if( nPrefix2>nPrefix ){ | |||
14973 | memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix); | |||
14974 | iOff += (nPrefix2-nPrefix); | |||
14975 | } | |||
14976 | memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2); | |||
14977 | iOff += nSuffix2; | |||
14978 | iNextOff += nSuffix2; | |||
14979 | } | |||
14980 | } | |||
14981 | }else if( iStart==4 ){ | |||
14982 | int iPgno; | |||
14983 | ||||
14984 | assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno )((void) (0)); | |||
14985 | /* The entry being removed may be the only position list in | |||
14986 | ** its doclist. */ | |||
14987 | for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){ | |||
14988 | Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPgno)) )); | |||
14989 | int bEmpty = (pPg && pPg->nn==4); | |||
14990 | fts5DataRelease(pPg); | |||
14991 | if( bEmpty==0 ) break; | |||
14992 | } | |||
14993 | ||||
14994 | if( iPgno==pSeg->iTermLeafPgno ){ | |||
14995 | i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | |||
14996 | Fts5Data *pTerm = fts5DataRead(p, iId); | |||
14997 | if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){ | |||
14998 | u8 *aTermIdx = &pTerm->p[pTerm->szLeaf]; | |||
14999 | int nTermIdx = pTerm->nn - pTerm->szLeaf; | |||
15000 | int iTermIdx = 0; | |||
15001 | int iTermOff = 0; | |||
15002 | ||||
15003 | while( 1 ){ | |||
15004 | u32 iVal = 0; | |||
15005 | int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal)sqlite3Fts5GetVarint32(&aTermIdx[iTermIdx],(u32*)&(iVal )); | |||
15006 | iTermOff += iVal; | |||
15007 | if( (iTermIdx+nByte)>=nTermIdx ) break; | |||
15008 | iTermIdx += nByte; | |||
15009 | } | |||
15010 | nTermIdx = iTermIdx; | |||
15011 | ||||
15012 | memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx); | |||
15013 | fts5PutU16(&pTerm->p[2], iTermOff); | |||
15014 | ||||
15015 | fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx); | |||
15016 | if( nTermIdx==0 ){ | |||
15017 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno); | |||
15018 | } | |||
15019 | } | |||
15020 | fts5DataRelease(pTerm); | |||
15021 | } | |||
15022 | } | |||
15023 | ||||
15024 | /* Assuming no error has occurred, this block does final edits to the | |||
15025 | ** leaf page before writing it back to disk. Input variables are: | |||
15026 | ** | |||
15027 | ** nPg: Total initial size of leaf page. | |||
15028 | ** iPgIdx: Initial offset of page footer. | |||
15029 | ** | |||
15030 | ** iOff: Offset to move data to | |||
15031 | ** iNextOff: Offset to move data from | |||
15032 | */ | |||
15033 | if( p->rc==SQLITE_OK0 ){ | |||
15034 | const int nMove = nPg - iNextOff; /* Number of bytes to move */ | |||
15035 | int nShift = iNextOff - iOff; /* Distance to move them */ | |||
15036 | ||||
15037 | int iPrevKeyOut = 0; | |||
15038 | int iKeyIn = 0; | |||
15039 | ||||
15040 | memmove(&aPg[iOff], &aPg[iNextOff], nMove); | |||
15041 | iPgIdx -= nShift; | |||
15042 | nPg = iPgIdx; | |||
15043 | fts5PutU16(&aPg[2], iPgIdx); | |||
15044 | ||||
15045 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | |||
15046 | u32 iVal = 0; | |||
15047 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
15048 | iKeyIn += iVal; | |||
15049 | if( iKeyIn!=iDelKeyOff ){ | |||
15050 | int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0)); | |||
15051 | nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut); | |||
15052 | iPrevKeyOut = iKeyOut; | |||
15053 | } | |||
15054 | } | |||
15055 | ||||
15056 | if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){ | |||
15057 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno); | |||
15058 | } | |||
15059 | ||||
15060 | assert_nc( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | |||
15061 | fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iLeafPgno )) ), aPg, nPg); | |||
15062 | } | |||
15063 | sqlite3_freesqlite3_api->free(aIdx); | |||
15064 | } | |||
15065 | ||||
15066 | /* | |||
15067 | ** This is called as part of flushing a delete to disk in 'secure-delete' | |||
15068 | ** mode. It edits the segments within the database described by argument | |||
15069 | ** pStruct to remove the entries for term zTerm, rowid iRowid. | |||
15070 | ** | |||
15071 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
15072 | ** has occurred. Any error code is also stored in the Fts5Index handle. | |||
15073 | */ | |||
15074 | static int fts5FlushSecureDelete( | |||
15075 | Fts5Index *p, | |||
15076 | Fts5Structure *pStruct, | |||
15077 | const char *zTerm, | |||
15078 | int nTerm, | |||
15079 | i64 iRowid | |||
15080 | ){ | |||
15081 | const int f = FTS5INDEX_QUERY_SKIPHASH0x0040; | |||
15082 | Fts5Iter *pIter = 0; /* Used to find term instance */ | |||
15083 | ||||
15084 | /* If the version number has not been set to SECUREDELETE, do so now. */ | |||
15085 | if( p->pConfig->iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 ){ | |||
15086 | Fts5Config *pConfig = p->pConfig; | |||
15087 | sqlite3_stmt *pStmt = 0; | |||
15088 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | |||
15089 | "REPLACE INTO %Q.'%q_config' VALUES ('version', %d)", | |||
15090 | pConfig->zDb, pConfig->zName, FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
15091 | )); | |||
15092 | if( p->rc==SQLITE_OK0 ){ | |||
15093 | int rc; | |||
15094 | sqlite3_stepsqlite3_api->step(pStmt); | |||
15095 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
15096 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | |||
15097 | pConfig->iCookie++; | |||
15098 | pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE5; | |||
15099 | } | |||
15100 | } | |||
15101 | ||||
15102 | fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); | |||
15103 | if( fts5MultiIterEof(p, pIter)==0 ){ | |||
15104 | i64 iThis = fts5MultiIterRowid(pIter); | |||
15105 | if( iThis<iRowid ){ | |||
15106 | fts5MultiIterNextFrom(p, pIter, iRowid); | |||
15107 | } | |||
15108 | ||||
15109 | if( p->rc==SQLITE_OK0 | |||
15110 | && fts5MultiIterEof(p, pIter)==0 | |||
15111 | && iRowid==fts5MultiIterRowid(pIter) | |||
15112 | ){ | |||
15113 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
15114 | fts5DoSecureDelete(p, pSeg); | |||
15115 | } | |||
15116 | } | |||
15117 | ||||
15118 | fts5MultiIterFree(pIter); | |||
15119 | return p->rc; | |||
15120 | } | |||
15121 | ||||
15122 | ||||
15123 | /* | |||
15124 | ** Flush the contents of in-memory hash table iHash to a new level-0 | |||
15125 | ** segment on disk. Also update the corresponding structure record. | |||
15126 | ** | |||
15127 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
15128 | ** already occurred, this function is a no-op. | |||
15129 | */ | |||
15130 | static void fts5FlushOneHash(Fts5Index *p){ | |||
15131 | Fts5Hash *pHash = p->pHash; | |||
15132 | Fts5Structure *pStruct; | |||
15133 | int iSegid; | |||
15134 | int pgnoLast = 0; /* Last leaf page number in segment */ | |||
15135 | ||||
15136 | /* Obtain a reference to the index structure and allocate a new segment-id | |||
15137 | ** for the new level-0 segment. */ | |||
15138 | pStruct = fts5StructureRead(p); | |||
15139 | fts5StructureInvalidate(p); | |||
15140 | ||||
15141 | if( sqlite3Fts5HashIsEmpty(pHash)==0 ){ | |||
15142 | iSegid = fts5AllocateSegid(p, pStruct); | |||
15143 | if( iSegid ){ | |||
15144 | const int pgsz = p->pConfig->pgsz; | |||
15145 | int eDetail = p->pConfig->eDetail; | |||
15146 | int bSecureDelete = p->pConfig->bSecureDelete; | |||
15147 | Fts5StructureSegment *pSeg; /* New segment within pStruct */ | |||
15148 | Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ | |||
15149 | Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ | |||
15150 | ||||
15151 | Fts5SegWriter writer; | |||
15152 | fts5WriteInit(p, &writer, iSegid); | |||
15153 | ||||
15154 | pBuf = &writer.writer.buf; | |||
15155 | pPgidx = &writer.writer.pgidx; | |||
15156 | ||||
15157 | /* fts5WriteInit() should have initialized the buffers to (most likely) | |||
15158 | ** the maximum space required. */ | |||
15159 | assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | |||
15160 | assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | |||
15161 | ||||
15162 | /* Begin scanning through hash table entries. This loop runs once for each | |||
15163 | ** term/doclist currently stored within the hash table. */ | |||
15164 | if( p->rc==SQLITE_OK0 ){ | |||
15165 | p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); | |||
15166 | } | |||
15167 | while( p->rc==SQLITE_OK0 && 0==sqlite3Fts5HashScanEof(pHash) ){ | |||
15168 | const char *zTerm; /* Buffer containing term */ | |||
15169 | int nTerm; /* Size of zTerm in bytes */ | |||
15170 | const u8 *pDoclist; /* Pointer to doclist for this term */ | |||
15171 | int nDoclist; /* Size of doclist in bytes */ | |||
15172 | ||||
15173 | /* Get the term and doclist for this entry. */ | |||
15174 | sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); | |||
15175 | if( bSecureDelete==0 ){ | |||
15176 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | |||
15177 | if( p->rc!=SQLITE_OK0 ) break; | |||
15178 | assert( writer.bFirstRowidInPage==0 )((void) (0)); | |||
15179 | } | |||
15180 | ||||
15181 | if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ | |||
15182 | /* The entire doclist will fit on the current leaf. */ | |||
15183 | fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pDoclist , nDoclist); (pBuf)->n += nDoclist; }; | |||
15184 | }else{ | |||
15185 | int bTermWritten = !bSecureDelete; | |||
15186 | i64 iRowid = 0; | |||
15187 | i64 iPrev = 0; | |||
15188 | int iOff = 0; | |||
15189 | ||||
15190 | /* The entire doclist will not fit on this leaf. The following | |||
15191 | ** loop iterates through the poslists that make up the current | |||
15192 | ** doclist. */ | |||
15193 | while( p->rc==SQLITE_OK0 && iOff<nDoclist ){ | |||
15194 | u64 iDelta = 0; | |||
15195 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pDoclist[iOff], &iDelta); | |||
15196 | iRowid += iDelta; | |||
15197 | ||||
15198 | /* If in secure delete mode, and if this entry in the poslist is | |||
15199 | ** in fact a delete, then edit the existing segments directly | |||
15200 | ** using fts5FlushSecureDelete(). */ | |||
15201 | if( bSecureDelete ){ | |||
15202 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
15203 | if( iOff<nDoclist && pDoclist[iOff]==0x00 | |||
15204 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | |||
15205 | ){ | |||
15206 | iOff++; | |||
15207 | if( iOff<nDoclist && pDoclist[iOff]==0x00 ){ | |||
15208 | iOff++; | |||
15209 | nDoclist = 0; | |||
15210 | }else{ | |||
15211 | continue; | |||
15212 | } | |||
15213 | } | |||
15214 | }else if( (pDoclist[iOff] & 0x01) | |||
15215 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | |||
15216 | ){ | |||
15217 | if( p->rc!=SQLITE_OK0 || pDoclist[iOff]==0x01 ){ | |||
15218 | iOff++; | |||
15219 | continue; | |||
15220 | } | |||
15221 | } | |||
15222 | } | |||
15223 | ||||
15224 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | |||
15225 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | |||
15226 | bTermWritten = 1; | |||
15227 | assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 )((void) (0)); | |||
15228 | } | |||
15229 | ||||
15230 | if( writer.bFirstRowidInPage ){ | |||
15231 | fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ | |||
15232 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); | |||
15233 | writer.bFirstRowidInPage = 0; | |||
15234 | fts5WriteDlidxAppend(p, &writer, iRowid); | |||
15235 | }else{ | |||
15236 | u64 iRowidDelta = (u64)iRowid - (u64)iPrev; | |||
15237 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta); | |||
15238 | } | |||
15239 | if( p->rc!=SQLITE_OK0 ) break; | |||
15240 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | |||
15241 | iPrev = iRowid; | |||
15242 | ||||
15243 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
15244 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | |||
15245 | pBuf->p[pBuf->n++] = 0; | |||
15246 | iOff++; | |||
15247 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | |||
15248 | pBuf->p[pBuf->n++] = 0; | |||
15249 | iOff++; | |||
15250 | } | |||
15251 | } | |||
15252 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | |||
15253 | fts5WriteFlushLeaf(p, &writer); | |||
15254 | } | |||
15255 | }else{ | |||
15256 | int bDel = 0; | |||
15257 | int nPos = 0; | |||
15258 | int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel); | |||
15259 | if( bDel && bSecureDelete ){ | |||
15260 | fts5BufferAppendVarint(&p->rc, pBuf, nPos*2)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)nPos*2); | |||
15261 | iOff += nCopy; | |||
15262 | nCopy = nPos; | |||
15263 | }else{ | |||
15264 | nCopy += nPos; | |||
15265 | } | |||
15266 | if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ | |||
15267 | /* The entire poslist will fit on the current leaf. So copy | |||
15268 | ** it in one go. */ | |||
15269 | fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pDoclist[iOff], nCopy); (pBuf)->n += nCopy; }; | |||
15270 | }else{ | |||
15271 | /* The entire poslist will not fit on this leaf. So it needs | |||
15272 | ** to be broken into sections. The only qualification being | |||
15273 | ** that each varint must be stored contiguously. */ | |||
15274 | const u8 *pPoslist = &pDoclist[iOff]; | |||
15275 | int iPos = 0; | |||
15276 | while( p->rc==SQLITE_OK0 ){ | |||
15277 | int nSpace = pgsz - pBuf->n - pPgidx->n; | |||
15278 | int n = 0; | |||
15279 | if( (nCopy - iPos)<=nSpace ){ | |||
15280 | n = nCopy - iPos; | |||
15281 | }else{ | |||
15282 | n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); | |||
15283 | } | |||
15284 | assert( n>0 )((void) (0)); | |||
15285 | fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pPoslist[iPos], n); (pBuf)->n += n; }; | |||
15286 | iPos += n; | |||
15287 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | |||
15288 | fts5WriteFlushLeaf(p, &writer); | |||
15289 | } | |||
15290 | if( iPos>=nCopy ) break; | |||
15291 | } | |||
15292 | } | |||
15293 | iOff += nCopy; | |||
15294 | } | |||
15295 | } | |||
15296 | } | |||
15297 | ||||
15298 | /* TODO2: Doclist terminator written here. */ | |||
15299 | /* pBuf->p[pBuf->n++] = '\0'; */ | |||
15300 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | |||
15301 | if( p->rc==SQLITE_OK0 ) sqlite3Fts5HashScanNext(pHash); | |||
15302 | } | |||
15303 | fts5WriteFinish(p, &writer, &pgnoLast); | |||
15304 | ||||
15305 | assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 )((void) (0)); | |||
15306 | if( pgnoLast>0 ){ | |||
15307 | /* Update the Fts5Structure. It is written back to the database by the | |||
15308 | ** fts5StructureRelease() call below. */ | |||
15309 | if( pStruct->nLevel==0 ){ | |||
15310 | fts5StructureAddLevel(&p->rc, &pStruct); | |||
15311 | } | |||
15312 | fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); | |||
15313 | if( p->rc==SQLITE_OK0 ){ | |||
15314 | pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; | |||
15315 | pSeg->iSegid = iSegid; | |||
15316 | pSeg->pgnoFirst = 1; | |||
15317 | pSeg->pgnoLast = pgnoLast; | |||
15318 | if( pStruct->nOriginCntr>0 ){ | |||
15319 | pSeg->iOrigin1 = pStruct->nOriginCntr; | |||
15320 | pSeg->iOrigin2 = pStruct->nOriginCntr; | |||
15321 | pSeg->nEntry = p->nPendingRow; | |||
15322 | pStruct->nOriginCntr++; | |||
15323 | } | |||
15324 | pStruct->nSegment++; | |||
15325 | } | |||
15326 | fts5StructurePromote(p, 0, pStruct); | |||
15327 | } | |||
15328 | } | |||
15329 | } | |||
15330 | ||||
15331 | fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete); | |||
15332 | fts5IndexCrisismerge(p, &pStruct); | |||
15333 | fts5StructureWrite(p, pStruct); | |||
15334 | fts5StructureRelease(pStruct); | |||
15335 | } | |||
15336 | ||||
15337 | /* | |||
15338 | ** Flush any data stored in the in-memory hash tables to the database. | |||
15339 | */ | |||
15340 | static void fts5IndexFlush(Fts5Index *p){ | |||
15341 | /* Unless it is empty, flush the hash table to disk */ | |||
15342 | if( p->flushRc ){ | |||
15343 | p->rc = p->flushRc; | |||
15344 | return; | |||
15345 | } | |||
15346 | if( p->nPendingData || p->nContentlessDelete ){ | |||
15347 | assert( p->pHash )((void) (0)); | |||
15348 | fts5FlushOneHash(p); | |||
15349 | if( p->rc==SQLITE_OK0 ){ | |||
15350 | sqlite3Fts5HashClear(p->pHash); | |||
15351 | p->nPendingData = 0; | |||
15352 | p->nPendingRow = 0; | |||
15353 | p->nContentlessDelete = 0; | |||
15354 | }else if( p->nPendingData || p->nContentlessDelete ){ | |||
15355 | p->flushRc = p->rc; | |||
15356 | } | |||
15357 | } | |||
15358 | } | |||
15359 | ||||
15360 | static Fts5Structure *fts5IndexOptimizeStruct( | |||
15361 | Fts5Index *p, | |||
15362 | Fts5Structure *pStruct | |||
15363 | ){ | |||
15364 | Fts5Structure *pNew = 0; | |||
15365 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel )); | |||
15366 | int nSeg = pStruct->nSegment; | |||
15367 | int i; | |||
15368 | ||||
15369 | /* Figure out if this structure requires optimization. A structure does | |||
15370 | ** not require optimization if either: | |||
15371 | ** | |||
15372 | ** 1. it consists of fewer than two segments, or | |||
15373 | ** 2. all segments are on the same level, or | |||
15374 | ** 3. all segments except one are currently inputs to a merge operation. | |||
15375 | ** | |||
15376 | ** In the first case, if there are no tombstone hash pages, return NULL. In | |||
15377 | ** the second, increment the ref-count on *pStruct and return a copy of the | |||
15378 | ** pointer to it. | |||
15379 | */ | |||
15380 | if( nSeg==0 ) return 0; | |||
15381 | for(i=0; i<pStruct->nLevel; i++){ | |||
15382 | int nThis = pStruct->aLevel[i].nSeg; | |||
15383 | int nMerge = pStruct->aLevel[i].nMerge; | |||
15384 | if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){ | |||
15385 | if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){ | |||
15386 | return 0; | |||
15387 | } | |||
15388 | fts5StructureRef(pStruct); | |||
15389 | return pStruct; | |||
15390 | } | |||
15391 | assert( pStruct->aLevel[i].nMerge<=nThis )((void) (0)); | |||
15392 | } | |||
15393 | ||||
15394 | nByte += (((i64)pStruct->nLevel)+1) * sizeof(Fts5StructureLevel); | |||
15395 | assert( nByte==SZ_FTS5STRUCTURE(pStruct->nLevel+2) )((void) (0)); | |||
15396 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
15397 | ||||
15398 | if( pNew ){ | |||
15399 | Fts5StructureLevel *pLvl; | |||
15400 | nByte = nSeg * sizeof(Fts5StructureSegment); | |||
15401 | pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL)(((pStruct->nLevel+1) < (64)) ? (pStruct->nLevel+1) : (64)); | |||
15402 | pNew->nRef = 1; | |||
15403 | pNew->nWriteCounter = pStruct->nWriteCounter; | |||
15404 | pNew->nOriginCntr = pStruct->nOriginCntr; | |||
15405 | pLvl = &pNew->aLevel[pNew->nLevel-1]; | |||
15406 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
15407 | if( pLvl->aSeg ){ | |||
15408 | int iLvl, iSeg; | |||
15409 | int iSegOut = 0; | |||
15410 | /* Iterate through all segments, from oldest to newest. Add them to | |||
15411 | ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest | |||
15412 | ** segment in the data structure. */ | |||
15413 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | |||
15414 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
15415 | pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
15416 | iSegOut++; | |||
15417 | } | |||
15418 | } | |||
15419 | pNew->nSegment = pLvl->nSeg = nSeg; | |||
15420 | }else{ | |||
15421 | sqlite3_freesqlite3_api->free(pNew); | |||
15422 | pNew = 0; | |||
15423 | } | |||
15424 | } | |||
15425 | ||||
15426 | return pNew; | |||
15427 | } | |||
15428 | ||||
15429 | static int sqlite3Fts5IndexOptimize(Fts5Index *p){ | |||
15430 | Fts5Structure *pStruct; | |||
15431 | Fts5Structure *pNew = 0; | |||
15432 | ||||
15433 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
15434 | fts5IndexFlush(p); | |||
15435 | assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 )((void) (0)); | |||
15436 | pStruct = fts5StructureRead(p); | |||
15437 | assert( p->rc!=SQLITE_OK || pStruct!=0 )((void) (0)); | |||
15438 | fts5StructureInvalidate(p); | |||
15439 | ||||
15440 | if( pStruct ){ | |||
15441 | pNew = fts5IndexOptimizeStruct(p, pStruct); | |||
15442 | } | |||
15443 | fts5StructureRelease(pStruct); | |||
15444 | ||||
15445 | assert( pNew==0 || pNew->nSegment>0 )((void) (0)); | |||
15446 | if( pNew ){ | |||
15447 | int iLvl; | |||
15448 | for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} | |||
15449 | while( p->rc==SQLITE_OK0 && pNew->aLevel[iLvl].nSeg>0 ){ | |||
15450 | int nRem = FTS5_OPT_WORK_UNIT1000; | |||
15451 | fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); | |||
15452 | } | |||
15453 | ||||
15454 | fts5StructureWrite(p, pNew); | |||
15455 | fts5StructureRelease(pNew); | |||
15456 | } | |||
15457 | ||||
15458 | return fts5IndexReturn(p); | |||
15459 | } | |||
15460 | ||||
15461 | /* | |||
15462 | ** This is called to implement the special "VALUES('merge', $nMerge)" | |||
15463 | ** INSERT command. | |||
15464 | */ | |||
15465 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ | |||
15466 | Fts5Structure *pStruct = 0; | |||
15467 | ||||
15468 | fts5IndexFlush(p); | |||
15469 | pStruct = fts5StructureRead(p); | |||
15470 | if( pStruct ){ | |||
15471 | int nMin = p->pConfig->nUsermerge; | |||
15472 | fts5StructureInvalidate(p); | |||
15473 | if( nMerge<0 ){ | |||
15474 | Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); | |||
15475 | fts5StructureRelease(pStruct); | |||
15476 | pStruct = pNew; | |||
15477 | nMin = 1; | |||
15478 | nMerge = nMerge*-1; | |||
15479 | } | |||
15480 | if( pStruct && pStruct->nLevel ){ | |||
15481 | if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ | |||
15482 | fts5StructureWrite(p, pStruct); | |||
15483 | } | |||
15484 | } | |||
15485 | fts5StructureRelease(pStruct); | |||
15486 | } | |||
15487 | return fts5IndexReturn(p); | |||
15488 | } | |||
15489 | ||||
15490 | static void fts5AppendRowid( | |||
15491 | Fts5Index *p, | |||
15492 | u64 iDelta, | |||
15493 | Fts5Iter *pUnused, | |||
15494 | Fts5Buffer *pBuf | |||
15495 | ){ | |||
15496 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
15497 | fts5BufferAppendVarint(&p->rc, pBuf, iDelta)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)iDelta); | |||
15498 | } | |||
15499 | ||||
15500 | static void fts5AppendPoslist( | |||
15501 | Fts5Index *p, | |||
15502 | u64 iDelta, | |||
15503 | Fts5Iter *pMulti, | |||
15504 | Fts5Buffer *pBuf | |||
15505 | ){ | |||
15506 | int nData = pMulti->base.nData; | |||
15507 | int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING8; | |||
15508 | assert( nData>0 )((void) (0)); | |||
15509 | if( p->rc==SQLITE_OK0 && 0==fts5BufferGrow(&p->rc, pBuf, nByte)( (u32)((pBuf)->n) + (u32)(nByte) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf),(nByte)+ (pBuf)->n) ) ){ | |||
15510 | fts5BufferSafeAppendVarint(pBuf, iDelta){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iDelta)); ((void) (0)); }; | |||
15511 | fts5BufferSafeAppendVarint(pBuf, nData*2){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (nData*2)); ((void) (0)); }; | |||
15512 | fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pMulti ->base.pData, nData); (pBuf)->n += nData; }; | |||
15513 | memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING8); | |||
15514 | } | |||
15515 | } | |||
15516 | ||||
15517 | ||||
15518 | static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ | |||
15519 | u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; | |||
15520 | ||||
15521 | assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) )((void) (0)); | |||
15522 | if( p>=pIter->aEof ){ | |||
15523 | pIter->aPoslist = 0; | |||
15524 | }else{ | |||
15525 | i64 iDelta; | |||
15526 | ||||
15527 | p += fts5GetVarintsqlite3Fts5GetVarint(p, (u64*)&iDelta); | |||
15528 | pIter->iRowid += iDelta; | |||
15529 | ||||
15530 | /* Read position list size */ | |||
15531 | if( p[0] & 0x80 ){ | |||
15532 | int nPos; | |||
15533 | pIter->nSize = fts5GetVarint32(p, nPos)sqlite3Fts5GetVarint32(p,(u32*)&(nPos)); | |||
15534 | pIter->nPoslist = (nPos>>1); | |||
15535 | }else{ | |||
15536 | pIter->nPoslist = ((int)(p[0])) >> 1; | |||
15537 | pIter->nSize = 1; | |||
15538 | } | |||
15539 | ||||
15540 | pIter->aPoslist = p; | |||
15541 | if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){ | |||
15542 | pIter->aPoslist = 0; | |||
15543 | } | |||
15544 | } | |||
15545 | } | |||
15546 | ||||
15547 | static void fts5DoclistIterInit( | |||
15548 | Fts5Buffer *pBuf, | |||
15549 | Fts5DoclistIter *pIter | |||
15550 | ){ | |||
15551 | memset(pIter, 0, sizeof(*pIter)); | |||
15552 | if( pBuf->n>0 ){ | |||
15553 | pIter->aPoslist = pBuf->p; | |||
15554 | pIter->aEof = &pBuf->p[pBuf->n]; | |||
15555 | fts5DoclistIterNext(pIter); | |||
15556 | } | |||
15557 | } | |||
15558 | ||||
15559 | #if 0 | |||
15560 | /* | |||
15561 | ** Append a doclist to buffer pBuf. | |||
15562 | ** | |||
15563 | ** This function assumes that space within the buffer has already been | |||
15564 | ** allocated. | |||
15565 | */ | |||
15566 | static void fts5MergeAppendDocid({ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
15567 | Fts5Buffer *pBuf, /* Buffer to write to */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
15568 | i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
15569 | i64 iRowid /* Rowid to append */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
15570 | ){ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); }{ | |||
15571 | assert( pBuf->n!=0 || (*piLastRowid)==0 )((void) (0)); | |||
15572 | fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iRowid - *piLastRowid)); ((void) (0)); }; | |||
15573 | *piLastRowid = iRowid; | |||
15574 | } | |||
15575 | #endif | |||
15576 | ||||
15577 | #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid){ ((void) (0)); { ((pBuf))->n += sqlite3Fts5PutVarint(& ((pBuf))->p[((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid ))); ((void) (0)); }; (iLastRowid) = (iRowid); } { \ | |||
15578 | assert( (pBuf)->n!=0 || (iLastRowid)==0 )((void) (0)); \ | |||
15579 | fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)){ ((pBuf))->n += sqlite3Fts5PutVarint(&((pBuf))->p[ ((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid))); ((void ) (0)); }; \ | |||
15580 | (iLastRowid) = (iRowid); \ | |||
15581 | } | |||
15582 | ||||
15583 | /* | |||
15584 | ** Swap the contents of buffer *p1 with that of *p2. | |||
15585 | */ | |||
15586 | static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ | |||
15587 | Fts5Buffer tmp = *p1; | |||
15588 | *p1 = *p2; | |||
15589 | *p2 = tmp; | |||
15590 | } | |||
15591 | ||||
15592 | static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ | |||
15593 | int i = *piOff; | |||
15594 | if( i>=pBuf->n ){ | |||
15595 | *piOff = -1; | |||
15596 | }else{ | |||
15597 | u64 iVal; | |||
15598 | *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); | |||
15599 | *piRowid += iVal; | |||
15600 | } | |||
15601 | } | |||
15602 | ||||
15603 | /* | |||
15604 | ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. | |||
15605 | ** In this case the buffers consist of a delta-encoded list of rowids only. | |||
15606 | */ | |||
15607 | static void fts5MergeRowidLists( | |||
15608 | Fts5Index *p, /* FTS5 backend object */ | |||
15609 | Fts5Buffer *p1, /* First list to merge */ | |||
15610 | int nBuf, /* Number of entries in apBuf[] */ | |||
15611 | Fts5Buffer *aBuf /* Array of other lists to merge into p1 */ | |||
15612 | ){ | |||
15613 | int i1 = 0; | |||
15614 | int i2 = 0; | |||
15615 | i64 iRowid1 = 0; | |||
15616 | i64 iRowid2 = 0; | |||
15617 | i64 iOut = 0; | |||
15618 | Fts5Buffer *p2 = &aBuf[0]; | |||
15619 | Fts5Buffer out; | |||
15620 | ||||
15621 | (void)nBuf; | |||
15622 | memset(&out, 0, sizeof(out)); | |||
15623 | assert( nBuf==1 )((void) (0)); | |||
15624 | sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); | |||
15625 | if( p->rc ) return; | |||
15626 | ||||
15627 | fts5NextRowid(p1, &i1, &iRowid1); | |||
15628 | fts5NextRowid(p2, &i2, &iRowid2); | |||
15629 | while( i1>=0 || i2>=0 ){ | |||
15630 | if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ | |||
15631 | assert( iOut==0 || iRowid1>iOut )((void) (0)); | |||
15632 | fts5BufferSafeAppendVarint(&out, iRowid1 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid1 - iOut)); ((void) (0)); }; | |||
15633 | iOut = iRowid1; | |||
15634 | fts5NextRowid(p1, &i1, &iRowid1); | |||
15635 | }else{ | |||
15636 | assert( iOut==0 || iRowid2>iOut )((void) (0)); | |||
15637 | fts5BufferSafeAppendVarint(&out, iRowid2 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid2 - iOut)); ((void) (0)); }; | |||
15638 | iOut = iRowid2; | |||
15639 | if( i1>=0 && iRowid1==iRowid2 ){ | |||
15640 | fts5NextRowid(p1, &i1, &iRowid1); | |||
15641 | } | |||
15642 | fts5NextRowid(p2, &i2, &iRowid2); | |||
15643 | } | |||
15644 | } | |||
15645 | ||||
15646 | fts5BufferSwap(&out, p1); | |||
15647 | fts5BufferFree(&out)sqlite3Fts5BufferFree(&out); | |||
15648 | } | |||
15649 | ||||
15650 | typedef struct PrefixMerger PrefixMerger; | |||
15651 | struct PrefixMerger { | |||
15652 | Fts5DoclistIter iter; /* Doclist iterator */ | |||
15653 | i64 iPos; /* For iterating through a position list */ | |||
15654 | int iOff; | |||
15655 | u8 *aPos; | |||
15656 | PrefixMerger *pNext; /* Next in docid/poslist order */ | |||
15657 | }; | |||
15658 | ||||
15659 | static void fts5PrefixMergerInsertByRowid( | |||
15660 | PrefixMerger **ppHead, | |||
15661 | PrefixMerger *p | |||
15662 | ){ | |||
15663 | if( p->iter.aPoslist ){ | |||
15664 | PrefixMerger **pp = ppHead; | |||
15665 | while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){ | |||
15666 | pp = &(*pp)->pNext; | |||
15667 | } | |||
15668 | p->pNext = *pp; | |||
15669 | *pp = p; | |||
15670 | } | |||
15671 | } | |||
15672 | ||||
15673 | static void fts5PrefixMergerInsertByPosition( | |||
15674 | PrefixMerger **ppHead, | |||
15675 | PrefixMerger *p | |||
15676 | ){ | |||
15677 | if( p->iPos>=0 ){ | |||
15678 | PrefixMerger **pp = ppHead; | |||
15679 | while( *pp && p->iPos>(*pp)->iPos ){ | |||
15680 | pp = &(*pp)->pNext; | |||
15681 | } | |||
15682 | p->pNext = *pp; | |||
15683 | *pp = p; | |||
15684 | } | |||
15685 | } | |||
15686 | ||||
15687 | ||||
15688 | /* | |||
15689 | ** Array aBuf[] contains nBuf doclists. These are all merged in with the | |||
15690 | ** doclist in buffer p1. | |||
15691 | */ | |||
15692 | static void fts5MergePrefixLists( | |||
15693 | Fts5Index *p, /* FTS5 backend object */ | |||
15694 | Fts5Buffer *p1, /* First list to merge */ | |||
15695 | int nBuf, /* Number of buffers in array aBuf[] */ | |||
15696 | Fts5Buffer *aBuf /* Other lists to merge in */ | |||
15697 | ){ | |||
15698 | #define fts5PrefixMergerNextPosition(p)sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,& (p)->iOff,&(p)->iPos) \ | |||
15699 | sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) | |||
15700 | #define FTS5_MERGE_NLIST16 16 | |||
15701 | PrefixMerger aMerger[FTS5_MERGE_NLIST16]; | |||
15702 | PrefixMerger *pHead = 0; | |||
15703 | int i; | |||
15704 | int nOut = 0; | |||
15705 | Fts5Buffer out = {0, 0, 0}; | |||
15706 | Fts5Buffer tmp = {0, 0, 0}; | |||
15707 | i64 iLastRowid = 0; | |||
15708 | ||||
15709 | /* Initialize a doclist-iterator for each input buffer. Arrange them in | |||
15710 | ** a linked-list starting at pHead in ascending order of rowid. Avoid | |||
15711 | ** linking any iterators already at EOF into the linked list at all. */ | |||
15712 | assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) )((void) (0)); | |||
15713 | memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); | |||
15714 | pHead = &aMerger[nBuf]; | |||
15715 | fts5DoclistIterInit(p1, &pHead->iter); | |||
15716 | for(i=0; i<nBuf; i++){ | |||
15717 | fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter); | |||
15718 | fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]); | |||
15719 | nOut += aBuf[i].n; | |||
15720 | } | |||
15721 | if( nOut==0 ) return; | |||
15722 | nOut += p1->n + 9 + 10*nBuf; | |||
15723 | ||||
15724 | /* The maximum size of the output is equal to the sum of the | |||
15725 | ** input sizes + 1 varint (9 bytes). The extra varint is because if the | |||
15726 | ** first rowid in one input is a large negative number, and the first in | |||
15727 | ** the other a non-negative number, the delta for the non-negative | |||
15728 | ** number will be larger on disk than the literal integer value | |||
15729 | ** was. | |||
15730 | ** | |||
15731 | ** Or, if the input position-lists are corrupt, then the output might | |||
15732 | ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 | |||
15733 | ** (the value PoslistNext64() uses for EOF) as a position and appending | |||
15734 | ** it to the output. This can happen at most once for each input | |||
15735 | ** position-list, hence (nBuf+1) 10 byte paddings. */ | |||
15736 | if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return; | |||
15737 | ||||
15738 | while( pHead ){ | |||
15739 | fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid){ ((void) (0)); { ((&out))->n += sqlite3Fts5PutVarint( &((&out))->p[((&out))->n], ((u64)(pHead-> iter.iRowid) - (u64)(iLastRowid))); ((void) (0)); }; (iLastRowid ) = (pHead->iter.iRowid); }; | |||
15740 | ||||
15741 | if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ | |||
15742 | /* Merge data from two or more poslists */ | |||
15743 | i64 iPrev = 0; | |||
15744 | int nTmp = FTS5_DATA_ZERO_PADDING8; | |||
15745 | int nMerge = 0; | |||
15746 | PrefixMerger *pSave = pHead; | |||
15747 | PrefixMerger *pThis = 0; | |||
15748 | int nTail = 0; | |||
15749 | ||||
15750 | pHead = 0; | |||
15751 | while( pSave && pSave->iter.iRowid==iLastRowid ){ | |||
15752 | PrefixMerger *pNext = pSave->pNext; | |||
15753 | pSave->iOff = 0; | |||
15754 | pSave->iPos = 0; | |||
15755 | pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize]; | |||
15756 | fts5PrefixMergerNextPosition(pSave)sqlite3Fts5PoslistNext64((pSave)->aPos,(pSave)->iter.nPoslist ,&(pSave)->iOff,&(pSave)->iPos); | |||
15757 | nTmp += pSave->iter.nPoslist + 10; | |||
15758 | nMerge++; | |||
15759 | fts5PrefixMergerInsertByPosition(&pHead, pSave); | |||
15760 | pSave = pNext; | |||
15761 | } | |||
15762 | ||||
15763 | if( pHead==0 || pHead->pNext==0 ){ | |||
15764 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
15765 | break; | |||
15766 | } | |||
15767 | ||||
15768 | /* See the earlier comment in this function for an explanation of why | |||
15769 | ** corrupt input position lists might cause the output to consume | |||
15770 | ** at most nMerge*10 bytes of unexpected space. */ | |||
15771 | if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ | |||
15772 | break; | |||
15773 | } | |||
15774 | fts5BufferZero(&tmp)sqlite3Fts5BufferZero(&tmp); | |||
15775 | ||||
15776 | pThis = pHead; | |||
15777 | pHead = pThis->pNext; | |||
15778 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | |||
15779 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | |||
15780 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | |||
15781 | ||||
15782 | while( pHead->pNext ){ | |||
15783 | pThis = pHead; | |||
15784 | if( pThis->iPos!=iPrev ){ | |||
15785 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | |||
15786 | } | |||
15787 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | |||
15788 | pHead = pThis->pNext; | |||
15789 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | |||
15790 | } | |||
15791 | ||||
15792 | if( pHead->iPos!=iPrev ){ | |||
15793 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos); | |||
15794 | } | |||
15795 | nTail = pHead->iter.nPoslist - pHead->iOff; | |||
15796 | ||||
15797 | /* WRITEPOSLISTSIZE */ | |||
15798 | assert_nc( tmp.n+nTail<=nTmp )((void) (0)); | |||
15799 | assert( tmp.n+nTail<=nTmp+nMerge*10 )((void) (0)); | |||
15800 | if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING8 ){ | |||
15801 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
15802 | break; | |||
15803 | } | |||
15804 | fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], ((tmp.n+nTail) * 2)); ((void) (0)); }; | |||
15805 | fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], tmp.p, tmp.n); (&out)->n += tmp.n; }; | |||
15806 | if( nTail>0 ){ | |||
15807 | fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], &pHead->aPos[pHead->iOff], nTail); (&out)-> n += nTail; }; | |||
15808 | } | |||
15809 | ||||
15810 | pHead = pSave; | |||
15811 | for(i=0; i<nBuf+1; i++){ | |||
15812 | PrefixMerger *pX = &aMerger[i]; | |||
15813 | if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){ | |||
15814 | fts5DoclistIterNext(&pX->iter); | |||
15815 | fts5PrefixMergerInsertByRowid(&pHead, pX); | |||
15816 | } | |||
15817 | } | |||
15818 | ||||
15819 | }else{ | |||
15820 | /* Copy poslist from pHead to output */ | |||
15821 | PrefixMerger *pThis = pHead; | |||
15822 | Fts5DoclistIter *pI = &pThis->iter; | |||
15823 | fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], pI->aPoslist, pI->nPoslist+pI->nSize); (&out) ->n += pI->nPoslist+pI->nSize; }; | |||
15824 | fts5DoclistIterNext(pI); | |||
15825 | pHead = pThis->pNext; | |||
15826 | fts5PrefixMergerInsertByRowid(&pHead, pThis); | |||
15827 | } | |||
15828 | } | |||
15829 | ||||
15830 | fts5BufferFree(p1)sqlite3Fts5BufferFree(p1); | |||
15831 | fts5BufferFree(&tmp)sqlite3Fts5BufferFree(&tmp); | |||
15832 | memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING8); | |||
15833 | *p1 = out; | |||
15834 | } | |||
15835 | ||||
15836 | ||||
15837 | /* | |||
15838 | ** Iterate through a range of entries in the FTS index, invoking the xVisit | |||
15839 | ** callback for each of them. | |||
15840 | ** | |||
15841 | ** Parameter pToken points to an nToken buffer containing an FTS index term | |||
15842 | ** (i.e. a document term with the preceding 1 byte index identifier - | |||
15843 | ** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits | |||
15844 | ** all entries for terms that have pToken/nToken as a prefix. If bPrefix | |||
15845 | ** is false, then only entries with pToken/nToken as the entire key are | |||
15846 | ** visited. | |||
15847 | ** | |||
15848 | ** If the current table is a tokendata=1 table, then if bPrefix is true then | |||
15849 | ** each index term is treated separately. However, if bPrefix is false, then | |||
15850 | ** all index terms corresponding to pToken/nToken are collapsed into a single | |||
15851 | ** term before the callback is invoked. | |||
15852 | ** | |||
15853 | ** The callback invoked for each entry visited is specified by paramter xVisit. | |||
15854 | ** Each time it is invoked, it is passed a pointer to the Fts5Index object, | |||
15855 | ** a copy of the 7th paramter to this function (pCtx) and a pointer to the | |||
15856 | ** iterator that indicates the current entry. If the current entry is the | |||
15857 | ** first with a new term (i.e. different from that of the previous entry, | |||
15858 | ** including the very first term), then the final two parameters are passed | |||
15859 | ** a pointer to the term and its size in bytes, respectively. If the current | |||
15860 | ** entry is not the first associated with its term, these two parameters | |||
15861 | ** are passed 0. | |||
15862 | ** | |||
15863 | ** If parameter pColset is not NULL, then it is used to filter entries before | |||
15864 | ** the callback is invoked. | |||
15865 | */ | |||
15866 | static int fts5VisitEntries( | |||
15867 | Fts5Index *p, /* Fts5 index object */ | |||
15868 | Fts5Colset *pColset, /* Columns filter to apply, or NULL */ | |||
15869 | u8 *pToken, /* Buffer containing token */ | |||
15870 | int nToken, /* Size of buffer pToken in bytes */ | |||
15871 | int bPrefix, /* True for a prefix scan */ | |||
15872 | void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int), | |||
15873 | void *pCtx /* Passed as second argument to xVisit() */ | |||
15874 | ){ | |||
15875 | const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN0x0008 : 0) | |||
15876 | | FTS5INDEX_QUERY_SKIPEMPTY0x0010 | |||
15877 | | FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
15878 | Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ | |||
15879 | int bNewTerm = 1; | |||
15880 | Fts5Structure *pStruct = fts5StructureRead(p); | |||
15881 | ||||
15882 | fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); | |||
15883 | fts5IterSetOutputCb(&p->rc, p1); | |||
15884 | for( /* no-op */ ; | |||
15885 | fts5MultiIterEof(p, p1)==0; | |||
15886 | fts5MultiIterNext2(p, p1, &bNewTerm) | |||
15887 | ){ | |||
15888 | Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; | |||
15889 | int nNew = 0; | |||
15890 | const u8 *pNew = 0; | |||
15891 | ||||
15892 | p1->xSetOutputs(p1, pSeg); | |||
15893 | if( p->rc ) break; | |||
15894 | ||||
15895 | if( bNewTerm ){ | |||
15896 | nNew = pSeg->term.n; | |||
15897 | pNew = pSeg->term.p; | |||
15898 | if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break; | |||
15899 | } | |||
15900 | ||||
15901 | xVisit(p, pCtx, p1, pNew, nNew); | |||
15902 | } | |||
15903 | fts5MultiIterFree(p1); | |||
15904 | ||||
15905 | fts5StructureRelease(pStruct); | |||
15906 | return p->rc; | |||
15907 | } | |||
15908 | ||||
15909 | ||||
15910 | /* | |||
15911 | ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an | |||
15912 | ** array of these for each row it visits (so all iRowid fields are the same). | |||
15913 | ** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an | |||
15914 | ** array of these for the entire query (in which case iRowid fields may take | |||
15915 | ** a variety of values). | |||
15916 | ** | |||
15917 | ** Each instance in the array indicates the iterator (and therefore term) | |||
15918 | ** associated with position iPos of rowid iRowid. This is used by the | |||
15919 | ** xInstToken() API. | |||
15920 | ** | |||
15921 | ** iRowid: | |||
15922 | ** Rowid for the current entry. | |||
15923 | ** | |||
15924 | ** iPos: | |||
15925 | ** Position of current entry within row. In the usual ((iCol<<32)+iOff) | |||
15926 | ** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). | |||
15927 | ** | |||
15928 | ** iIter: | |||
15929 | ** If the Fts5TokenDataIter iterator that the entry is part of is | |||
15930 | ** actually an iterator (i.e. with nIter>0, not just a container for | |||
15931 | ** Fts5TokenDataMap structures), then this variable is an index into | |||
15932 | ** the apIter[] array. The corresponding term is that which the iterator | |||
15933 | ** at apIter[iIter] currently points to. | |||
15934 | ** | |||
15935 | ** Or, if the Fts5TokenDataIter iterator is just a container object | |||
15936 | ** (nIter==0), then iIter is an index into the term.p[] buffer where | |||
15937 | ** the term is stored. | |||
15938 | ** | |||
15939 | ** nByte: | |||
15940 | ** In the case where iIter is an index into term.p[], this variable | |||
15941 | ** is the size of the term in bytes. If iIter is an index into apIter[], | |||
15942 | ** this variable is unused. | |||
15943 | */ | |||
15944 | struct Fts5TokenDataMap { | |||
15945 | i64 iRowid; /* Row this token is located in */ | |||
15946 | i64 iPos; /* Position of token */ | |||
15947 | int iIter; /* Iterator token was read from */ | |||
15948 | int nByte; /* Length of token in bytes (or 0) */ | |||
15949 | }; | |||
15950 | ||||
15951 | /* | |||
15952 | ** An object used to supplement Fts5Iter for tokendata=1 iterators. | |||
15953 | ** | |||
15954 | ** This object serves two purposes. The first is as a container for an array | |||
15955 | ** of Fts5TokenDataMap structures, which are used to find the token required | |||
15956 | ** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and | |||
15957 | ** aMap[] variables. | |||
15958 | */ | |||
15959 | struct Fts5TokenDataIter { | |||
15960 | int nMapAlloc; /* Allocated size of aMap[] in entries */ | |||
15961 | int nMap; /* Number of valid entries in aMap[] */ | |||
15962 | Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ | |||
15963 | ||||
15964 | /* The following are used for prefix-queries only. */ | |||
15965 | Fts5Buffer terms; | |||
15966 | ||||
15967 | /* The following are used for other full-token tokendata queries only. */ | |||
15968 | int nIter; | |||
15969 | int nIterAlloc; | |||
15970 | Fts5PoslistReader *aPoslistReader; | |||
15971 | int *aPoslistToIter; | |||
15972 | Fts5Iter *apIter[FLEXARRAY]; | |||
15973 | }; | |||
15974 | ||||
15975 | /* Size in bytes of an Fts5TokenDataIter object holding up to N iterators */ | |||
15976 | #define SZ_FTS5TOKENDATAITER(N)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter )) \ | |||
15977 | (offsetof(Fts5TokenDataIter,apIter)__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter)) | |||
15978 | ||||
15979 | /* | |||
15980 | ** The two input arrays - a1[] and a2[] - are in sorted order. This function | |||
15981 | ** merges the two arrays together and writes the result to output array | |||
15982 | ** aOut[]. aOut[] is guaranteed to be large enough to hold the result. | |||
15983 | ** | |||
15984 | ** Duplicate entries are copied into the output. So the size of the output | |||
15985 | ** array is always (n1+n2) entries. | |||
15986 | */ | |||
15987 | static void fts5TokendataMerge( | |||
15988 | Fts5TokenDataMap *a1, int n1, /* Input array 1 */ | |||
15989 | Fts5TokenDataMap *a2, int n2, /* Input array 2 */ | |||
15990 | Fts5TokenDataMap *aOut /* Output array */ | |||
15991 | ){ | |||
15992 | int i1 = 0; | |||
15993 | int i2 = 0; | |||
15994 | ||||
15995 | assert( n1>=0 && n2>=0 )((void) (0)); | |||
15996 | while( i1<n1 || i2<n2 ){ | |||
15997 | Fts5TokenDataMap *pOut = &aOut[i1+i2]; | |||
15998 | if( i2>=n2 || (i1<n1 && ( | |||
15999 | a1[i1].iRowid<a2[i2].iRowid | |||
16000 | || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos) | |||
16001 | ))){ | |||
16002 | memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap)); | |||
16003 | i1++; | |||
16004 | }else{ | |||
16005 | memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap)); | |||
16006 | i2++; | |||
16007 | } | |||
16008 | } | |||
16009 | } | |||
16010 | ||||
16011 | ||||
16012 | /* | |||
16013 | ** Append a mapping to the token-map belonging to object pT. | |||
16014 | */ | |||
16015 | static void fts5TokendataIterAppendMap( | |||
16016 | Fts5Index *p, | |||
16017 | Fts5TokenDataIter *pT, | |||
16018 | int iIter, | |||
16019 | int nByte, | |||
16020 | i64 iRowid, | |||
16021 | i64 iPos | |||
16022 | ){ | |||
16023 | if( p->rc==SQLITE_OK0 ){ | |||
16024 | if( pT->nMap==pT->nMapAlloc ){ | |||
16025 | int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; | |||
16026 | int nAlloc = nNew * sizeof(Fts5TokenDataMap); | |||
16027 | Fts5TokenDataMap *aNew; | |||
16028 | ||||
16029 | aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc(pT->aMap, nAlloc); | |||
16030 | if( aNew==0 ){ | |||
16031 | p->rc = SQLITE_NOMEM7; | |||
16032 | return; | |||
16033 | } | |||
16034 | ||||
16035 | pT->aMap = aNew; | |||
16036 | pT->nMapAlloc = nNew; | |||
16037 | } | |||
16038 | ||||
16039 | pT->aMap[pT->nMap].iRowid = iRowid; | |||
16040 | pT->aMap[pT->nMap].iPos = iPos; | |||
16041 | pT->aMap[pT->nMap].iIter = iIter; | |||
16042 | pT->aMap[pT->nMap].nByte = nByte; | |||
16043 | pT->nMap++; | |||
16044 | } | |||
16045 | } | |||
16046 | ||||
16047 | /* | |||
16048 | ** Sort the contents of the pT->aMap[] array. | |||
16049 | ** | |||
16050 | ** The sorting algorithm requires a malloc(). If this fails, an error code | |||
16051 | ** is left in Fts5Index.rc before returning. | |||
16052 | */ | |||
16053 | static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ | |||
16054 | Fts5TokenDataMap *aTmp = 0; | |||
16055 | int nByte = pT->nMap * sizeof(Fts5TokenDataMap); | |||
16056 | ||||
16057 | aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
16058 | if( aTmp ){ | |||
16059 | Fts5TokenDataMap *a1 = pT->aMap; | |||
16060 | Fts5TokenDataMap *a2 = aTmp; | |||
16061 | i64 nHalf; | |||
16062 | ||||
16063 | for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){ | |||
16064 | int i1; | |||
16065 | for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){ | |||
16066 | int n1 = MIN(nHalf, pT->nMap-i1)(((nHalf) < (pT->nMap-i1)) ? (nHalf) : (pT->nMap-i1) ); | |||
16067 | int n2 = MIN(nHalf, pT->nMap-i1-n1)(((nHalf) < (pT->nMap-i1-n1)) ? (nHalf) : (pT->nMap- i1-n1)); | |||
16068 | fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); | |||
16069 | } | |||
16070 | SWAPVAL(Fts5TokenDataMap*, a1, a2){ Fts5TokenDataMap* tmp; tmp=a1; a1=a2; a2=tmp; }; | |||
16071 | } | |||
16072 | ||||
16073 | if( a1!=pT->aMap ){ | |||
16074 | memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); | |||
16075 | } | |||
16076 | sqlite3_freesqlite3_api->free(aTmp); | |||
16077 | ||||
16078 | #ifdef SQLITE_DEBUG | |||
16079 | { | |||
16080 | int ii; | |||
16081 | for(ii=1; ii<pT->nMap; ii++){ | |||
16082 | Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; | |||
16083 | Fts5TokenDataMap *p2 = &pT->aMap[ii]; | |||
16084 | assert( p1->iRowid<p2->iRowid((void) (0)) | |||
16085 | || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)((void) (0)) | |||
16086 | )((void) (0)); | |||
16087 | } | |||
16088 | } | |||
16089 | #endif | |||
16090 | } | |||
16091 | } | |||
16092 | ||||
16093 | /* | |||
16094 | ** Delete an Fts5TokenDataIter structure and its contents. | |||
16095 | */ | |||
16096 | static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ | |||
16097 | if( pSet ){ | |||
16098 | int ii; | |||
16099 | for(ii=0; ii<pSet->nIter; ii++){ | |||
16100 | fts5MultiIterFree(pSet->apIter[ii]); | |||
16101 | } | |||
16102 | fts5BufferFree(&pSet->terms)sqlite3Fts5BufferFree(&pSet->terms); | |||
16103 | sqlite3_freesqlite3_api->free(pSet->aPoslistReader); | |||
16104 | sqlite3_freesqlite3_api->free(pSet->aMap); | |||
16105 | sqlite3_freesqlite3_api->free(pSet); | |||
16106 | } | |||
16107 | } | |||
16108 | ||||
16109 | ||||
16110 | /* | |||
16111 | ** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() | |||
16112 | ** to pass data to prefixIterSetupTokendataCb(). | |||
16113 | */ | |||
16114 | typedef struct TokendataSetupCtx TokendataSetupCtx; | |||
16115 | struct TokendataSetupCtx { | |||
16116 | Fts5TokenDataIter *pT; /* Object being populated with mappings */ | |||
16117 | int iTermOff; /* Offset of current term in terms.p[] */ | |||
16118 | int nTermByte; /* Size of current term in bytes */ | |||
16119 | }; | |||
16120 | ||||
16121 | /* | |||
16122 | ** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This | |||
16123 | ** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each | |||
16124 | ** position in the current position-list. It doesn't matter that some of | |||
16125 | ** these may be out of order - they will be sorted later. | |||
16126 | */ | |||
16127 | static void prefixIterSetupTokendataCb( | |||
16128 | Fts5Index *p, | |||
16129 | void *pCtx, | |||
16130 | Fts5Iter *p1, | |||
16131 | const u8 *pNew, | |||
16132 | int nNew | |||
16133 | ){ | |||
16134 | TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; | |||
16135 | int iPosOff = 0; | |||
16136 | i64 iPos = 0; | |||
16137 | ||||
16138 | if( pNew ){ | |||
16139 | pSetup->nTermByte = nNew-1; | |||
16140 | pSetup->iTermOff = pSetup->pT->terms.n; | |||
16141 | fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1)sqlite3Fts5BufferAppendBlob(&p->rc,&pSetup->pT-> terms,nNew-1,pNew+1); | |||
16142 | } | |||
16143 | ||||
16144 | while( 0==sqlite3Fts5PoslistNext64( | |||
16145 | p1->base.pData, p1->base.nData, &iPosOff, &iPos | |||
16146 | ) ){ | |||
16147 | fts5TokendataIterAppendMap(p, | |||
16148 | pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos | |||
16149 | ); | |||
16150 | } | |||
16151 | } | |||
16152 | ||||
16153 | ||||
16154 | /* | |||
16155 | ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). | |||
16156 | */ | |||
16157 | typedef struct PrefixSetupCtx PrefixSetupCtx; | |||
16158 | struct PrefixSetupCtx { | |||
16159 | void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); | |||
16160 | void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); | |||
16161 | i64 iLastRowid; | |||
16162 | int nMerge; | |||
16163 | Fts5Buffer *aBuf; | |||
16164 | int nBuf; | |||
16165 | Fts5Buffer doclist; | |||
16166 | TokendataSetupCtx *pTokendata; | |||
16167 | }; | |||
16168 | ||||
16169 | /* | |||
16170 | ** fts5VisitEntries() callback used by fts5SetupPrefixIter() | |||
16171 | */ | |||
16172 | static void prefixIterSetupCb( | |||
16173 | Fts5Index *p, | |||
16174 | void *pCtx, | |||
16175 | Fts5Iter *p1, | |||
16176 | const u8 *pNew, | |||
16177 | int nNew | |||
16178 | ){ | |||
16179 | PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx; | |||
16180 | const int nMerge = pSetup->nMerge; | |||
16181 | ||||
16182 | if( p1->base.nData>0 ){ | |||
16183 | if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){ | |||
16184 | int i; | |||
16185 | for(i=0; p->rc==SQLITE_OK0 && pSetup->doclist.n; i++){ | |||
16186 | int i1 = i*nMerge; | |||
16187 | int iStore; | |||
16188 | assert( i1+nMerge<=pSetup->nBuf )((void) (0)); | |||
16189 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | |||
16190 | if( pSetup->aBuf[iStore].n==0 ){ | |||
16191 | fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]); | |||
16192 | fts5BufferZero(&pSetup->doclist)sqlite3Fts5BufferZero(&pSetup->doclist); | |||
16193 | break; | |||
16194 | } | |||
16195 | } | |||
16196 | if( iStore==i1+nMerge ){ | |||
16197 | pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]); | |||
16198 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | |||
16199 | fts5BufferZero(&pSetup->aBuf[iStore])sqlite3Fts5BufferZero(&pSetup->aBuf[iStore]); | |||
16200 | } | |||
16201 | } | |||
16202 | } | |||
16203 | pSetup->iLastRowid = 0; | |||
16204 | } | |||
16205 | ||||
16206 | pSetup->xAppend( | |||
16207 | p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist | |||
16208 | ); | |||
16209 | pSetup->iLastRowid = p1->base.iRowid; | |||
16210 | } | |||
16211 | ||||
16212 | if( pSetup->pTokendata ){ | |||
16213 | prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew); | |||
16214 | } | |||
16215 | } | |||
16216 | ||||
16217 | static void fts5SetupPrefixIter( | |||
16218 | Fts5Index *p, /* Index to read from */ | |||
16219 | int bDesc, /* True for "ORDER BY rowid DESC" */ | |||
16220 | int iIdx, /* Index to scan for data */ | |||
16221 | u8 *pToken, /* Buffer containing prefix to match */ | |||
16222 | int nToken, /* Size of buffer pToken in bytes */ | |||
16223 | Fts5Colset *pColset, /* Restrict matches to these columns */ | |||
16224 | Fts5Iter **ppIter /* OUT: New iterator */ | |||
16225 | ){ | |||
16226 | Fts5Structure *pStruct; | |||
16227 | PrefixSetupCtx s; | |||
16228 | TokendataSetupCtx s2; | |||
16229 | ||||
16230 | memset(&s, 0, sizeof(s)); | |||
16231 | memset(&s2, 0, sizeof(s2)); | |||
16232 | ||||
16233 | s.nMerge = 1; | |||
16234 | s.iLastRowid = 0; | |||
16235 | s.nBuf = 32; | |||
16236 | if( iIdx==0 | |||
16237 | && p->pConfig->eDetail==FTS5_DETAIL_FULL0 | |||
16238 | && p->pConfig->bPrefixInsttoken | |||
16239 | ){ | |||
16240 | s.pTokendata = &s2; | |||
16241 | s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
16242 | } | |||
16243 | ||||
16244 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
16245 | s.xMerge = fts5MergeRowidLists; | |||
16246 | s.xAppend = fts5AppendRowid; | |||
16247 | }else{ | |||
16248 | s.nMerge = FTS5_MERGE_NLIST16-1; | |||
16249 | s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ | |||
16250 | s.xMerge = fts5MergePrefixLists; | |||
16251 | s.xAppend = fts5AppendPoslist; | |||
16252 | } | |||
16253 | ||||
16254 | s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf); | |||
16255 | pStruct = fts5StructureRead(p); | |||
16256 | assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) )((void) (0)); | |||
16257 | ||||
16258 | if( p->rc==SQLITE_OK0 ){ | |||
16259 | void *pCtx = (void*)&s; | |||
16260 | int i; | |||
16261 | Fts5Data *pData; | |||
16262 | ||||
16263 | /* If iIdx is non-zero, then it is the number of a prefix-index for | |||
16264 | ** prefixes 1 character longer than the prefix being queried for. That | |||
16265 | ** index contains all the doclists required, except for the one | |||
16266 | ** corresponding to the prefix itself. That one is extracted from the | |||
16267 | ** main term index here. */ | |||
16268 | if( iIdx!=0 ){ | |||
16269 | pToken[0] = FTS5_MAIN_PREFIX'0'; | |||
16270 | fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx); | |||
16271 | } | |||
16272 | ||||
16273 | pToken[0] = FTS5_MAIN_PREFIX'0' + iIdx; | |||
16274 | fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx); | |||
16275 | ||||
16276 | assert( (s.nBuf%s.nMerge)==0 )((void) (0)); | |||
16277 | for(i=0; i<s.nBuf; i+=s.nMerge){ | |||
16278 | int iFree; | |||
16279 | if( p->rc==SQLITE_OK0 ){ | |||
16280 | s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]); | |||
16281 | } | |||
16282 | for(iFree=i; iFree<i+s.nMerge; iFree++){ | |||
16283 | fts5BufferFree(&s.aBuf[iFree])sqlite3Fts5BufferFree(&s.aBuf[iFree]); | |||
16284 | } | |||
16285 | } | |||
16286 | ||||
16287 | pData = fts5IdxMalloc(p, sizeof(*pData) | |||
16288 | + ((i64)s.doclist.n)+FTS5_DATA_ZERO_PADDING8); | |||
16289 | assert( pData!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
16290 | if( pData ){ | |||
16291 | pData->p = (u8*)&pData[1]; | |||
16292 | pData->nn = pData->szLeaf = s.doclist.n; | |||
16293 | if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); | |||
16294 | fts5MultiIterNew2(p, pData, bDesc, ppIter); | |||
16295 | } | |||
16296 | ||||
16297 | assert( (*ppIter)!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
16298 | if( p->rc==SQLITE_OK0 && s.pTokendata ){ | |||
16299 | fts5TokendataIterSortMap(p, s2.pT); | |||
16300 | (*ppIter)->pTokenDataIter = s2.pT; | |||
16301 | s2.pT = 0; | |||
16302 | } | |||
16303 | } | |||
16304 | ||||
16305 | fts5TokendataIterDelete(s2.pT); | |||
16306 | fts5BufferFree(&s.doclist)sqlite3Fts5BufferFree(&s.doclist); | |||
16307 | fts5StructureRelease(pStruct); | |||
16308 | sqlite3_freesqlite3_api->free(s.aBuf); | |||
16309 | } | |||
16310 | ||||
16311 | ||||
16312 | /* | |||
16313 | ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain | |||
16314 | ** to the document with rowid iRowid. | |||
16315 | */ | |||
16316 | static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ | |||
16317 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
16318 | ||||
16319 | /* Allocate the hash table if it has not already been allocated */ | |||
16320 | if( p->pHash==0 ){ | |||
16321 | p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); | |||
16322 | } | |||
16323 | ||||
16324 | /* Flush the hash table to disk if required */ | |||
16325 | if( iRowid<p->iWriteRowid | |||
16326 | || (iRowid==p->iWriteRowid && p->bDelete==0) | |||
16327 | || (p->nPendingData > p->pConfig->nHashSize) | |||
16328 | ){ | |||
16329 | fts5IndexFlush(p); | |||
16330 | } | |||
16331 | ||||
16332 | p->iWriteRowid = iRowid; | |||
16333 | p->bDelete = bDelete; | |||
16334 | if( bDelete==0 ){ | |||
16335 | p->nPendingRow++; | |||
16336 | } | |||
16337 | return fts5IndexReturn(p); | |||
16338 | } | |||
16339 | ||||
16340 | /* | |||
16341 | ** Commit data to disk. | |||
16342 | */ | |||
16343 | static int sqlite3Fts5IndexSync(Fts5Index *p){ | |||
16344 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
16345 | fts5IndexFlush(p); | |||
16346 | fts5IndexCloseReader(p); | |||
16347 | return fts5IndexReturn(p); | |||
16348 | } | |||
16349 | ||||
16350 | /* | |||
16351 | ** Discard any data stored in the in-memory hash tables. Do not write it | |||
16352 | ** to the database. Additionally, assume that the contents of the %_data | |||
16353 | ** table may have changed on disk. So any in-memory caches of %_data | |||
16354 | ** records must be invalidated. | |||
16355 | */ | |||
16356 | static int sqlite3Fts5IndexRollback(Fts5Index *p){ | |||
16357 | fts5IndexCloseReader(p); | |||
16358 | fts5IndexDiscardData(p); | |||
16359 | fts5StructureInvalidate(p); | |||
16360 | return fts5IndexReturn(p); | |||
16361 | } | |||
16362 | ||||
16363 | /* | |||
16364 | ** The %_data table is completely empty when this function is called. This | |||
16365 | ** function populates it with the initial structure objects for each index, | |||
16366 | ** and the initial version of the "averages" record (a zero-byte blob). | |||
16367 | */ | |||
16368 | static int sqlite3Fts5IndexReinit(Fts5Index *p){ | |||
16369 | Fts5Structure *pTmp; | |||
16370 | u8 tmpSpace[SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))]; | |||
16371 | fts5StructureInvalidate(p); | |||
16372 | fts5IndexDiscardData(p); | |||
16373 | pTmp = (Fts5Structure*)tmpSpace; | |||
16374 | memset(pTmp, 0, SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))); | |||
16375 | if( p->pConfig->bContentlessDelete ){ | |||
16376 | pTmp->nOriginCntr = 1; | |||
16377 | } | |||
16378 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, (const u8*)"", 0); | |||
16379 | fts5StructureWrite(p, pTmp); | |||
16380 | return fts5IndexReturn(p); | |||
16381 | } | |||
16382 | ||||
16383 | /* | |||
16384 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | |||
16385 | ** and initialize the underlying %_data table. | |||
16386 | ** | |||
16387 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | |||
16388 | ** Otherwise, set *pp to NULL and return an SQLite error code. | |||
16389 | */ | |||
16390 | static int sqlite3Fts5IndexOpen( | |||
16391 | Fts5Config *pConfig, | |||
16392 | int bCreate, | |||
16393 | Fts5Index **pp, | |||
16394 | char **pzErr | |||
16395 | ){ | |||
16396 | int rc = SQLITE_OK0; | |||
16397 | Fts5Index *p; /* New object */ | |||
16398 | ||||
16399 | *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); | |||
16400 | if( rc==SQLITE_OK0 ){ | |||
16401 | p->pConfig = pConfig; | |||
16402 | p->nWorkUnit = FTS5_WORK_UNIT64; | |||
16403 | p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); | |||
16404 | if( p->zDataTbl && bCreate ){ | |||
16405 | rc = sqlite3Fts5CreateTable( | |||
16406 | pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr | |||
16407 | ); | |||
16408 | if( rc==SQLITE_OK0 ){ | |||
16409 | rc = sqlite3Fts5CreateTable(pConfig, "idx", | |||
16410 | "segid, term, pgno, PRIMARY KEY(segid, term)", | |||
16411 | 1, pzErr | |||
16412 | ); | |||
16413 | } | |||
16414 | if( rc==SQLITE_OK0 ){ | |||
16415 | rc = sqlite3Fts5IndexReinit(p); | |||
16416 | } | |||
16417 | } | |||
16418 | } | |||
16419 | ||||
16420 | assert( rc!=SQLITE_OK || p->rc==SQLITE_OK )((void) (0)); | |||
16421 | if( rc ){ | |||
16422 | sqlite3Fts5IndexClose(p); | |||
16423 | *pp = 0; | |||
16424 | } | |||
16425 | return rc; | |||
16426 | } | |||
16427 | ||||
16428 | /* | |||
16429 | ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). | |||
16430 | */ | |||
16431 | static int sqlite3Fts5IndexClose(Fts5Index *p){ | |||
16432 | int rc = SQLITE_OK0; | |||
16433 | if( p ){ | |||
16434 | assert( p->pReader==0 )((void) (0)); | |||
16435 | fts5StructureInvalidate(p); | |||
16436 | sqlite3_finalizesqlite3_api->finalize(p->pWriter); | |||
16437 | sqlite3_finalizesqlite3_api->finalize(p->pDeleter); | |||
16438 | sqlite3_finalizesqlite3_api->finalize(p->pIdxWriter); | |||
16439 | sqlite3_finalizesqlite3_api->finalize(p->pIdxDeleter); | |||
16440 | sqlite3_finalizesqlite3_api->finalize(p->pIdxSelect); | |||
16441 | sqlite3_finalizesqlite3_api->finalize(p->pIdxNextSelect); | |||
16442 | sqlite3_finalizesqlite3_api->finalize(p->pDataVersion); | |||
16443 | sqlite3_finalizesqlite3_api->finalize(p->pDeleteFromIdx); | |||
16444 | sqlite3Fts5HashFree(p->pHash); | |||
16445 | sqlite3_freesqlite3_api->free(p->zDataTbl); | |||
16446 | sqlite3_freesqlite3_api->free(p); | |||
16447 | } | |||
16448 | return rc; | |||
16449 | } | |||
16450 | ||||
16451 | /* | |||
16452 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | |||
16453 | ** size. Return the number of bytes in the nChar character prefix of the | |||
16454 | ** buffer, or 0 if there are less than nChar characters in total. | |||
16455 | */ | |||
16456 | static int sqlite3Fts5IndexCharlenToBytelen( | |||
16457 | const char *p, | |||
16458 | int nByte, | |||
16459 | int nChar | |||
16460 | ){ | |||
16461 | int n = 0; | |||
16462 | int i; | |||
16463 | for(i=0; i<nChar; i++){ | |||
16464 | if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ | |||
16465 | if( (unsigned char)p[n++]>=0xc0 ){ | |||
16466 | if( n>=nByte ) return 0; | |||
16467 | while( (p[n] & 0xc0)==0x80 ){ | |||
16468 | n++; | |||
16469 | if( n>=nByte ){ | |||
16470 | if( i+1==nChar ) break; | |||
16471 | return 0; | |||
16472 | } | |||
16473 | } | |||
16474 | } | |||
16475 | } | |||
16476 | return n; | |||
16477 | } | |||
16478 | ||||
16479 | /* | |||
16480 | ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of | |||
16481 | ** unicode characters in the string. | |||
16482 | */ | |||
16483 | static int fts5IndexCharlen(const char *pIn, int nIn){ | |||
16484 | int nChar = 0; | |||
16485 | int i = 0; | |||
16486 | while( i<nIn ){ | |||
16487 | if( (unsigned char)pIn[i++]>=0xc0 ){ | |||
16488 | while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; | |||
16489 | } | |||
16490 | nChar++; | |||
16491 | } | |||
16492 | return nChar; | |||
16493 | } | |||
16494 | ||||
16495 | /* | |||
16496 | ** Insert or remove data to or from the index. Each time a document is | |||
16497 | ** added to or removed from the index, this function is called one or more | |||
16498 | ** times. | |||
16499 | ** | |||
16500 | ** For an insert, it must be called once for each token in the new document. | |||
16501 | ** If the operation is a delete, it must be called (at least) once for each | |||
16502 | ** unique token in the document with an iCol value less than zero. The iPos | |||
16503 | ** argument is ignored for a delete. | |||
16504 | */ | |||
16505 | static int sqlite3Fts5IndexWrite( | |||
16506 | Fts5Index *p, /* Index to write to */ | |||
16507 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
16508 | int iPos, /* Position of token within column */ | |||
16509 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
16510 | ){ | |||
16511 | int i; /* Used to iterate through indexes */ | |||
16512 | int rc = SQLITE_OK0; /* Return code */ | |||
16513 | Fts5Config *pConfig = p->pConfig; | |||
16514 | ||||
16515 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
16516 | assert( (iCol<0)==p->bDelete )((void) (0)); | |||
16517 | ||||
16518 | /* Add the entry to the main terms index. */ | |||
16519 | rc = sqlite3Fts5HashWrite( | |||
16520 | p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX'0', pToken, nToken | |||
16521 | ); | |||
16522 | ||||
16523 | for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK0; i++){ | |||
16524 | const int nChar = pConfig->aPrefix[i]; | |||
16525 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | |||
16526 | if( nByte ){ | |||
16527 | rc = sqlite3Fts5HashWrite(p->pHash, | |||
16528 | p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX'0'+i+1), pToken, | |||
16529 | nByte | |||
16530 | ); | |||
16531 | } | |||
16532 | } | |||
16533 | ||||
16534 | return rc; | |||
16535 | } | |||
16536 | ||||
16537 | /* | |||
16538 | ** pToken points to a buffer of size nToken bytes containing a search | |||
16539 | ** term, including the index number at the start, used on a tokendata=1 | |||
16540 | ** table. This function returns true if the term in buffer pBuf matches | |||
16541 | ** token pToken/nToken. | |||
16542 | */ | |||
16543 | static int fts5IsTokendataPrefix( | |||
16544 | Fts5Buffer *pBuf, | |||
16545 | const u8 *pToken, | |||
16546 | int nToken | |||
16547 | ){ | |||
16548 | return ( | |||
16549 | pBuf->n>=nToken | |||
16550 | && 0==memcmp(pBuf->p, pToken, nToken) | |||
16551 | && (pBuf->n==nToken || pBuf->p[nToken]==0x00) | |||
16552 | ); | |||
16553 | } | |||
16554 | ||||
16555 | /* | |||
16556 | ** Ensure the segment-iterator passed as the only argument points to EOF. | |||
16557 | */ | |||
16558 | static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ | |||
16559 | fts5DataRelease(pSeg->pLeaf); | |||
16560 | pSeg->pLeaf = 0; | |||
16561 | } | |||
16562 | ||||
16563 | static void fts5IterClose(Fts5IndexIter *pIndexIter){ | |||
16564 | if( pIndexIter ){ | |||
16565 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
16566 | Fts5Index *pIndex = pIter->pIndex; | |||
16567 | fts5TokendataIterDelete(pIter->pTokenDataIter); | |||
16568 | fts5MultiIterFree(pIter); | |||
16569 | fts5IndexCloseReader(pIndex); | |||
16570 | } | |||
16571 | } | |||
16572 | ||||
16573 | /* | |||
16574 | ** This function appends iterator pAppend to Fts5TokenDataIter pIn and | |||
16575 | ** returns the result. | |||
16576 | */ | |||
16577 | static Fts5TokenDataIter *fts5AppendTokendataIter( | |||
16578 | Fts5Index *p, /* Index object (for error code) */ | |||
16579 | Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */ | |||
16580 | Fts5Iter *pAppend /* Append this iterator */ | |||
16581 | ){ | |||
16582 | Fts5TokenDataIter *pRet = pIn; | |||
16583 | ||||
16584 | if( p->rc==SQLITE_OK0 ){ | |||
16585 | if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ | |||
16586 | int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; | |||
16587 | int nByte = SZ_FTS5TOKENDATAITER(nAlloc+1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (nAlloc+1)*sizeof (Fts5Iter)); | |||
16588 | Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_reallocsqlite3_api->realloc(pIn, nByte); | |||
16589 | ||||
16590 | if( pNew==0 ){ | |||
16591 | p->rc = SQLITE_NOMEM7; | |||
16592 | }else{ | |||
16593 | if( pIn==0 ) memset(pNew, 0, nByte); | |||
16594 | pRet = pNew; | |||
16595 | pNew->nIterAlloc = nAlloc; | |||
16596 | } | |||
16597 | } | |||
16598 | } | |||
16599 | if( p->rc ){ | |||
16600 | fts5IterClose((Fts5IndexIter*)pAppend); | |||
16601 | }else{ | |||
16602 | pRet->apIter[pRet->nIter++] = pAppend; | |||
16603 | } | |||
16604 | assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc )((void) (0)); | |||
16605 | ||||
16606 | return pRet; | |||
16607 | } | |||
16608 | ||||
16609 | /* | |||
16610 | ** The iterator passed as the only argument must be a tokendata=1 iterator | |||
16611 | ** (pIter->pTokenDataIter!=0). This function sets the iterator output | |||
16612 | ** variables (pIter->base.*) according to the contents of the current | |||
16613 | ** row. | |||
16614 | */ | |||
16615 | static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ | |||
16616 | int ii; | |||
16617 | int nHit = 0; | |||
16618 | i64 iRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | |||
16619 | int iMin = 0; | |||
16620 | ||||
16621 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
16622 | ||||
16623 | pIter->base.nData = 0; | |||
16624 | pIter->base.pData = 0; | |||
16625 | ||||
16626 | for(ii=0; ii<pT->nIter; ii++){ | |||
16627 | Fts5Iter *p = pT->apIter[ii]; | |||
16628 | if( p->base.bEof==0 ){ | |||
16629 | if( nHit==0 || p->base.iRowid<iRowid ){ | |||
16630 | iRowid = p->base.iRowid; | |||
16631 | nHit = 1; | |||
16632 | pIter->base.pData = p->base.pData; | |||
16633 | pIter->base.nData = p->base.nData; | |||
16634 | iMin = ii; | |||
16635 | }else if( p->base.iRowid==iRowid ){ | |||
16636 | nHit++; | |||
16637 | } | |||
16638 | } | |||
16639 | } | |||
16640 | ||||
16641 | if( nHit==0 ){ | |||
16642 | pIter->base.bEof = 1; | |||
16643 | }else{ | |||
16644 | int eDetail = pIter->pIndex->pConfig->eDetail; | |||
16645 | pIter->base.bEof = 0; | |||
16646 | pIter->base.iRowid = iRowid; | |||
16647 | ||||
16648 | if( nHit==1 && eDetail==FTS5_DETAIL_FULL0 ){ | |||
16649 | fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1); | |||
16650 | }else | |||
16651 | if( nHit>1 && eDetail!=FTS5_DETAIL_NONE1 ){ | |||
16652 | int nReader = 0; | |||
16653 | int nByte = 0; | |||
16654 | i64 iPrev = 0; | |||
16655 | ||||
16656 | /* Allocate array of iterators if they are not already allocated. */ | |||
16657 | if( pT->aPoslistReader==0 ){ | |||
16658 | pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero( | |||
16659 | &pIter->pIndex->rc, | |||
16660 | pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int)) | |||
16661 | ); | |||
16662 | if( pT->aPoslistReader==0 ) return; | |||
16663 | pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter]; | |||
16664 | } | |||
16665 | ||||
16666 | /* Populate an iterator for each poslist that will be merged */ | |||
16667 | for(ii=0; ii<pT->nIter; ii++){ | |||
16668 | Fts5Iter *p = pT->apIter[ii]; | |||
16669 | if( iRowid==p->base.iRowid ){ | |||
16670 | pT->aPoslistToIter[nReader] = ii; | |||
16671 | sqlite3Fts5PoslistReaderInit( | |||
16672 | p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] | |||
16673 | ); | |||
16674 | nByte += p->base.nData; | |||
16675 | } | |||
16676 | } | |||
16677 | ||||
16678 | /* Ensure the output buffer is large enough */ | |||
16679 | if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10)( (u32)((&pIter->poslist)->n) + (u32)(nByte+nHit*10 ) <= (u32)((&pIter->poslist)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&pIter->pIndex->rc),(&pIter->poslist),(nByte +nHit*10)+(&pIter->poslist)->n) ) ){ | |||
16680 | return; | |||
16681 | } | |||
16682 | ||||
16683 | /* Ensure the token-mapping is large enough */ | |||
16684 | if( eDetail==FTS5_DETAIL_FULL0 && pT->nMapAlloc<(pT->nMap + nByte) ){ | |||
16685 | int nNew = (pT->nMapAlloc + nByte) * 2; | |||
16686 | Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc( | |||
16687 | pT->aMap, nNew*sizeof(Fts5TokenDataMap) | |||
16688 | ); | |||
16689 | if( aNew==0 ){ | |||
16690 | pIter->pIndex->rc = SQLITE_NOMEM7; | |||
16691 | return; | |||
16692 | } | |||
16693 | pT->aMap = aNew; | |||
16694 | pT->nMapAlloc = nNew; | |||
16695 | } | |||
16696 | ||||
16697 | pIter->poslist.n = 0; | |||
16698 | ||||
16699 | while( 1 ){ | |||
16700 | i64 iMinPos = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
16701 | ||||
16702 | /* Find smallest position */ | |||
16703 | iMin = 0; | |||
16704 | for(ii=0; ii<nReader; ii++){ | |||
16705 | Fts5PoslistReader *pReader = &pT->aPoslistReader[ii]; | |||
16706 | if( pReader->bEof==0 ){ | |||
16707 | if( pReader->iPos<iMinPos ){ | |||
16708 | iMinPos = pReader->iPos; | |||
16709 | iMin = ii; | |||
16710 | } | |||
16711 | } | |||
16712 | } | |||
16713 | ||||
16714 | /* If all readers were at EOF, break out of the loop. */ | |||
16715 | if( iMinPos==LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) ) break; | |||
16716 | ||||
16717 | sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); | |||
16718 | sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); | |||
16719 | ||||
16720 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
16721 | pT->aMap[pT->nMap].iPos = iMinPos; | |||
16722 | pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin]; | |||
16723 | pT->aMap[pT->nMap].iRowid = iRowid; | |||
16724 | pT->nMap++; | |||
16725 | } | |||
16726 | } | |||
16727 | ||||
16728 | pIter->base.pData = pIter->poslist.p; | |||
16729 | pIter->base.nData = pIter->poslist.n; | |||
16730 | } | |||
16731 | } | |||
16732 | } | |||
16733 | ||||
16734 | /* | |||
16735 | ** The iterator passed as the only argument must be a tokendata=1 iterator | |||
16736 | ** (pIter->pTokenDataIter!=0). This function advances the iterator. If | |||
16737 | ** argument bFrom is false, then the iterator is advanced to the next | |||
16738 | ** entry. Or, if bFrom is true, it is advanced to the first entry with | |||
16739 | ** a rowid of iFrom or greater. | |||
16740 | */ | |||
16741 | static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ | |||
16742 | int ii; | |||
16743 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
16744 | Fts5Index *pIndex = pIter->pIndex; | |||
16745 | ||||
16746 | for(ii=0; ii<pT->nIter; ii++){ | |||
16747 | Fts5Iter *p = pT->apIter[ii]; | |||
16748 | if( p->base.bEof==0 | |||
16749 | && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom)) | |||
16750 | ){ | |||
16751 | fts5MultiIterNext(pIndex, p, bFrom, iFrom); | |||
16752 | while( bFrom && p->base.bEof==0 | |||
16753 | && p->base.iRowid<iFrom | |||
16754 | && pIndex->rc==SQLITE_OK0 | |||
16755 | ){ | |||
16756 | fts5MultiIterNext(pIndex, p, 0, 0); | |||
16757 | } | |||
16758 | } | |||
16759 | } | |||
16760 | ||||
16761 | if( pIndex->rc==SQLITE_OK0 ){ | |||
16762 | fts5IterSetOutputsTokendata(pIter); | |||
16763 | } | |||
16764 | } | |||
16765 | ||||
16766 | /* | |||
16767 | ** If the segment-iterator passed as the first argument is at EOF, then | |||
16768 | ** set pIter->term to a copy of buffer pTerm. | |||
16769 | */ | |||
16770 | static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ | |||
16771 | if( pIter && pIter->aSeg[0].pLeaf==0 ){ | |||
16772 | fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p)sqlite3Fts5BufferSet(&pIter->pIndex->rc,&pIter-> aSeg[0].term,pTerm->n,pTerm->p); | |||
16773 | } | |||
16774 | } | |||
16775 | ||||
16776 | /* | |||
16777 | ** This function sets up an iterator to use for a non-prefix query on a | |||
16778 | ** tokendata=1 table. | |||
16779 | */ | |||
16780 | static Fts5Iter *fts5SetupTokendataIter( | |||
16781 | Fts5Index *p, /* FTS index to query */ | |||
16782 | const u8 *pToken, /* Buffer containing query term */ | |||
16783 | int nToken, /* Size of buffer pToken in bytes */ | |||
16784 | Fts5Colset *pColset /* Colset to filter on */ | |||
16785 | ){ | |||
16786 | Fts5Iter *pRet = 0; | |||
16787 | Fts5TokenDataIter *pSet = 0; | |||
16788 | Fts5Structure *pStruct = 0; | |||
16789 | const int flags = FTS5INDEX_QUERY_SCANONETERM0x0100 | FTS5INDEX_QUERY_SCAN0x0008; | |||
16790 | ||||
16791 | Fts5Buffer bSeek = {0, 0, 0}; | |||
16792 | Fts5Buffer *pSmall = 0; | |||
16793 | ||||
16794 | fts5IndexFlush(p); | |||
16795 | pStruct = fts5StructureRead(p); | |||
16796 | ||||
16797 | while( p->rc==SQLITE_OK0 ){ | |||
16798 | Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; | |||
16799 | Fts5Iter *pNew = 0; | |||
16800 | Fts5SegIter *pNewIter = 0; | |||
16801 | Fts5SegIter *pPrevIter = 0; | |||
16802 | ||||
16803 | int iLvl, iSeg, ii; | |||
16804 | ||||
16805 | pNew = fts5MultiIterAlloc(p, pStruct->nSegment); | |||
16806 | if( pSmall ){ | |||
16807 | fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p)sqlite3Fts5BufferSet(&p->rc,&bSeek,pSmall->n,pSmall ->p); | |||
16808 | fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0")sqlite3Fts5BufferAppendBlob(&p->rc,&bSeek,1,(const u8*)"\0"); | |||
16809 | }else{ | |||
16810 | fts5BufferSet(&p->rc, &bSeek, nToken, pToken)sqlite3Fts5BufferSet(&p->rc,&bSeek,nToken,pToken); | |||
16811 | } | |||
16812 | if( p->rc ){ | |||
16813 | fts5IterClose((Fts5IndexIter*)pNew); | |||
16814 | break; | |||
16815 | } | |||
16816 | ||||
16817 | pNewIter = &pNew->aSeg[0]; | |||
16818 | pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); | |||
16819 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
16820 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | |||
16821 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
16822 | int bDone = 0; | |||
16823 | ||||
16824 | if( pPrevIter ){ | |||
16825 | if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ | |||
16826 | memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); | |||
16827 | memset(pPrevIter, 0, sizeof(Fts5SegIter)); | |||
16828 | bDone = 1; | |||
16829 | }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){ | |||
16830 | fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); | |||
16831 | bDone = 1; | |||
16832 | } | |||
16833 | } | |||
16834 | ||||
16835 | if( bDone==0 ){ | |||
16836 | fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); | |||
16837 | } | |||
16838 | ||||
16839 | if( pPrevIter ){ | |||
16840 | if( pPrevIter->pTombArray ){ | |||
16841 | pNewIter->pTombArray = pPrevIter->pTombArray; | |||
16842 | pNewIter->pTombArray->nRef++; | |||
16843 | } | |||
16844 | }else{ | |||
16845 | fts5SegIterAllocTombstone(p, pNewIter); | |||
16846 | } | |||
16847 | ||||
16848 | pNewIter++; | |||
16849 | if( pPrevIter ) pPrevIter++; | |||
16850 | if( p->rc ) break; | |||
16851 | } | |||
16852 | } | |||
16853 | fts5TokendataSetTermIfEof(pPrev, pSmall); | |||
16854 | ||||
16855 | pNew->bSkipEmpty = 1; | |||
16856 | pNew->pColset = pColset; | |||
16857 | fts5IterSetOutputCb(&p->rc, pNew); | |||
16858 | ||||
16859 | /* Loop through all segments in the new iterator. Find the smallest | |||
16860 | ** term that any segment-iterator points to. Iterator pNew will be | |||
16861 | ** used for this term. Also, set any iterator that points to a term that | |||
16862 | ** does not match pToken/nToken to point to EOF */ | |||
16863 | pSmall = 0; | |||
16864 | for(ii=0; ii<pNew->nSeg; ii++){ | |||
16865 | Fts5SegIter *pII = &pNew->aSeg[ii]; | |||
16866 | if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ | |||
16867 | fts5SegIterSetEOF(pII); | |||
16868 | } | |||
16869 | if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ | |||
16870 | pSmall = &pII->term; | |||
16871 | } | |||
16872 | } | |||
16873 | ||||
16874 | /* If pSmall is still NULL at this point, then the new iterator does | |||
16875 | ** not point to any terms that match the query. So delete it and break | |||
16876 | ** out of the loop - all required iterators have been collected. */ | |||
16877 | if( pSmall==0 ){ | |||
16878 | fts5IterClose((Fts5IndexIter*)pNew); | |||
16879 | break; | |||
16880 | } | |||
16881 | ||||
16882 | /* Append this iterator to the set and continue. */ | |||
16883 | pSet = fts5AppendTokendataIter(p, pSet, pNew); | |||
16884 | } | |||
16885 | ||||
16886 | if( p->rc==SQLITE_OK0 && pSet ){ | |||
16887 | int ii; | |||
16888 | for(ii=0; ii<pSet->nIter; ii++){ | |||
16889 | Fts5Iter *pIter = pSet->apIter[ii]; | |||
16890 | int iSeg; | |||
16891 | for(iSeg=0; iSeg<pIter->nSeg; iSeg++){ | |||
16892 | pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM0x01; | |||
16893 | } | |||
16894 | fts5MultiIterFinishSetup(p, pIter); | |||
16895 | } | |||
16896 | } | |||
16897 | ||||
16898 | if( p->rc==SQLITE_OK0 ){ | |||
16899 | pRet = fts5MultiIterAlloc(p, 0); | |||
16900 | } | |||
16901 | if( pRet ){ | |||
16902 | pRet->nSeg = 0; | |||
16903 | pRet->pTokenDataIter = pSet; | |||
16904 | if( pSet ){ | |||
16905 | fts5IterSetOutputsTokendata(pRet); | |||
16906 | }else{ | |||
16907 | pRet->base.bEof = 1; | |||
16908 | } | |||
16909 | }else{ | |||
16910 | fts5TokendataIterDelete(pSet); | |||
16911 | } | |||
16912 | ||||
16913 | fts5StructureRelease(pStruct); | |||
16914 | fts5BufferFree(&bSeek)sqlite3Fts5BufferFree(&bSeek); | |||
16915 | return pRet; | |||
16916 | } | |||
16917 | ||||
16918 | /* | |||
16919 | ** Open a new iterator to iterate though all rowid that match the | |||
16920 | ** specified token or token prefix. | |||
16921 | */ | |||
16922 | static int sqlite3Fts5IndexQuery( | |||
16923 | Fts5Index *p, /* FTS index to query */ | |||
16924 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | |||
16925 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | |||
16926 | Fts5Colset *pColset, /* Match these columns only */ | |||
16927 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | |||
16928 | ){ | |||
16929 | Fts5Config *pConfig = p->pConfig; | |||
16930 | Fts5Iter *pRet = 0; | |||
16931 | Fts5Buffer buf = {0, 0, 0}; | |||
16932 | ||||
16933 | /* If the QUERY_SCAN flag is set, all other flags must be clear. */ | |||
16934 | assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN )((void) (0)); | |||
16935 | ||||
16936 | if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ | |||
16937 | int iIdx = 0; /* Index to search */ | |||
16938 | int iPrefixIdx = 0; /* +1 prefix index */ | |||
16939 | int bTokendata = pConfig->bTokendata; | |||
16940 | assert( buf.p!=0 )((void) (0)); | |||
16941 | if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); | |||
16942 | ||||
16943 | /* The NOTOKENDATA flag is set when each token in a tokendata=1 table | |||
16944 | ** should be treated individually, instead of merging all those with | |||
16945 | ** a common prefix into a single entry. This is used, for example, by | |||
16946 | ** queries performed as part of an integrity-check, or by the fts5vocab | |||
16947 | ** module. */ | |||
16948 | if( flags & (FTS5INDEX_QUERY_NOTOKENDATA0x0080|FTS5INDEX_QUERY_SCAN0x0008) ){ | |||
16949 | bTokendata = 0; | |||
16950 | } | |||
16951 | ||||
16952 | /* Figure out which index to search and set iIdx accordingly. If this | |||
16953 | ** is a prefix query for which there is no prefix index, set iIdx to | |||
16954 | ** greater than pConfig->nPrefix to indicate that the query will be | |||
16955 | ** satisfied by scanning multiple terms in the main index. | |||
16956 | ** | |||
16957 | ** If the QUERY_TEST_NOIDX flag was specified, then this must be a | |||
16958 | ** prefix-query. Instead of using a prefix-index (if one exists), | |||
16959 | ** evaluate the prefix query using the main FTS index. This is used | |||
16960 | ** for internal sanity checking by the integrity-check in debug | |||
16961 | ** mode only. */ | |||
16962 | #ifdef SQLITE_DEBUG | |||
16963 | if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX0x0004) ){ | |||
16964 | assert( flags & FTS5INDEX_QUERY_PREFIX )((void) (0)); | |||
16965 | iIdx = 1+pConfig->nPrefix; | |||
16966 | }else | |||
16967 | #endif | |||
16968 | if( flags & FTS5INDEX_QUERY_PREFIX0x0001 ){ | |||
16969 | int nChar = fts5IndexCharlen(pToken, nToken); | |||
16970 | for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ | |||
16971 | int nIdxChar = pConfig->aPrefix[iIdx-1]; | |||
16972 | if( nIdxChar==nChar ) break; | |||
16973 | if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; | |||
16974 | } | |||
16975 | } | |||
16976 | ||||
16977 | if( bTokendata && iIdx==0 ){ | |||
16978 | buf.p[0] = FTS5_MAIN_PREFIX'0'; | |||
16979 | pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); | |||
16980 | }else if( iIdx<=pConfig->nPrefix ){ | |||
16981 | /* Straight index lookup */ | |||
16982 | Fts5Structure *pStruct = fts5StructureRead(p); | |||
16983 | buf.p[0] = (u8)(FTS5_MAIN_PREFIX'0' + iIdx); | |||
16984 | if( pStruct ){ | |||
16985 | fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY0x0010, | |||
16986 | pColset, buf.p, nToken+1, -1, 0, &pRet | |||
16987 | ); | |||
16988 | fts5StructureRelease(pStruct); | |||
16989 | } | |||
16990 | }else{ | |||
16991 | /* Scan multiple terms in the main index for a prefix query. */ | |||
16992 | int bDesc = (flags & FTS5INDEX_QUERY_DESC0x0002)!=0; | |||
16993 | fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); | |||
16994 | if( pRet==0 ){ | |||
16995 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
16996 | }else{ | |||
16997 | assert( pRet->pColset==0 )((void) (0)); | |||
16998 | fts5IterSetOutputCb(&p->rc, pRet); | |||
16999 | if( p->rc==SQLITE_OK0 ){ | |||
17000 | Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; | |||
17001 | if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); | |||
17002 | } | |||
17003 | } | |||
17004 | } | |||
17005 | ||||
17006 | if( p->rc ){ | |||
17007 | fts5IterClose((Fts5IndexIter*)pRet); | |||
17008 | pRet = 0; | |||
17009 | fts5IndexCloseReader(p); | |||
17010 | } | |||
17011 | ||||
17012 | *ppIter = (Fts5IndexIter*)pRet; | |||
17013 | sqlite3Fts5BufferFree(&buf); | |||
17014 | } | |||
17015 | return fts5IndexReturn(p); | |||
17016 | } | |||
17017 | ||||
17018 | /* | |||
17019 | ** Return true if the iterator passed as the only argument is at EOF. | |||
17020 | */ | |||
17021 | /* | |||
17022 | ** Move to the next matching rowid. | |||
17023 | */ | |||
17024 | static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ | |||
17025 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17026 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | |||
17027 | if( pIter->nSeg==0 ){ | |||
17028 | assert( pIter->pTokenDataIter )((void) (0)); | |||
17029 | fts5TokendataIterNext(pIter, 0, 0); | |||
17030 | }else{ | |||
17031 | fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); | |||
17032 | } | |||
17033 | return fts5IndexReturn(pIter->pIndex); | |||
17034 | } | |||
17035 | ||||
17036 | /* | |||
17037 | ** Move to the next matching term/rowid. Used by the fts5vocab module. | |||
17038 | */ | |||
17039 | static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ | |||
17040 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17041 | Fts5Index *p = pIter->pIndex; | |||
17042 | ||||
17043 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | |||
17044 | ||||
17045 | fts5MultiIterNext(p, pIter, 0, 0); | |||
17046 | if( p->rc==SQLITE_OK0 ){ | |||
17047 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
17048 | if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX'0' ){ | |||
17049 | fts5DataRelease(pSeg->pLeaf); | |||
17050 | pSeg->pLeaf = 0; | |||
17051 | pIter->base.bEof = 1; | |||
17052 | } | |||
17053 | } | |||
17054 | ||||
17055 | return fts5IndexReturn(pIter->pIndex); | |||
17056 | } | |||
17057 | ||||
17058 | /* | |||
17059 | ** Move to the next matching rowid that occurs at or after iMatch. The | |||
17060 | ** definition of "at or after" depends on whether this iterator iterates | |||
17061 | ** in ascending or descending rowid order. | |||
17062 | */ | |||
17063 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ | |||
17064 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17065 | if( pIter->nSeg==0 ){ | |||
17066 | assert( pIter->pTokenDataIter )((void) (0)); | |||
17067 | fts5TokendataIterNext(pIter, 1, iMatch); | |||
17068 | }else{ | |||
17069 | fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); | |||
17070 | } | |||
17071 | return fts5IndexReturn(pIter->pIndex); | |||
17072 | } | |||
17073 | ||||
17074 | /* | |||
17075 | ** Return the current term. | |||
17076 | */ | |||
17077 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ | |||
17078 | int n; | |||
17079 | const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); | |||
17080 | assert_nc( z || n<=1 )((void) (0)); | |||
17081 | *pn = n-1; | |||
17082 | return (z ? &z[1] : 0); | |||
17083 | } | |||
17084 | ||||
17085 | /* | |||
17086 | ** pIter is a prefix query. This function populates pIter->pTokenDataIter | |||
17087 | ** with an Fts5TokenDataIter object containing mappings for all rows | |||
17088 | ** matched by the query. | |||
17089 | */ | |||
17090 | static int fts5SetupPrefixIterTokendata( | |||
17091 | Fts5Iter *pIter, | |||
17092 | const char *pToken, /* Token prefix to search for */ | |||
17093 | int nToken /* Size of pToken in bytes */ | |||
17094 | ){ | |||
17095 | Fts5Index *p = pIter->pIndex; | |||
17096 | Fts5Buffer token = {0, 0, 0}; | |||
17097 | TokendataSetupCtx ctx; | |||
17098 | ||||
17099 | memset(&ctx, 0, sizeof(ctx)); | |||
17100 | ||||
17101 | fts5BufferGrow(&p->rc, &token, nToken+1)( (u32)((&token)->n) + (u32)(nToken+1) <= (u32)((& token)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc ),(&token),(nToken+1)+(&token)->n) ); | |||
17102 | assert( token.p!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
17103 | ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | |||
17104 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
17105 | ||||
17106 | if( p->rc
| |||
17107 | ||||
17108 | /* Fill in the token prefix to search for */ | |||
17109 | token.p[0] = FTS5_MAIN_PREFIX'0'; | |||
| ||||
17110 | memcpy(&token.p[1], pToken, nToken); | |||
17111 | token.n = nToken+1; | |||
17112 | ||||
17113 | fts5VisitEntries( | |||
17114 | p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx | |||
17115 | ); | |||
17116 | ||||
17117 | fts5TokendataIterSortMap(p, ctx.pT); | |||
17118 | } | |||
17119 | ||||
17120 | if( p->rc==SQLITE_OK0 ){ | |||
17121 | pIter->pTokenDataIter = ctx.pT; | |||
17122 | }else{ | |||
17123 | fts5TokendataIterDelete(ctx.pT); | |||
17124 | } | |||
17125 | fts5BufferFree(&token)sqlite3Fts5BufferFree(&token); | |||
17126 | ||||
17127 | return fts5IndexReturn(p); | |||
17128 | } | |||
17129 | ||||
17130 | /* | |||
17131 | ** This is used by xInstToken() to access the token at offset iOff, column | |||
17132 | ** iCol of row iRowid. The token is returned via output variables *ppOut | |||
17133 | ** and *pnOut. The iterator passed as the first argument must be a tokendata=1 | |||
17134 | ** iterator (pIter->pTokenDataIter!=0). | |||
17135 | ** | |||
17136 | ** pToken/nToken: | |||
17137 | */ | |||
17138 | static int sqlite3Fts5IterToken( | |||
17139 | Fts5IndexIter *pIndexIter, | |||
17140 | const char *pToken, int nToken, | |||
17141 | i64 iRowid, | |||
17142 | int iCol, | |||
17143 | int iOff, | |||
17144 | const char **ppOut, int *pnOut | |||
17145 | ){ | |||
17146 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17147 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
17148 | i64 iPos = (((i64)iCol)<<32) + iOff; | |||
17149 | Fts5TokenDataMap *aMap = 0; | |||
17150 | int i1 = 0; | |||
17151 | int i2 = 0; | |||
17152 | int iTest = 0; | |||
17153 | ||||
17154 | assert( pT || (pToken && pIter->nSeg>0) )((void) (0)); | |||
17155 | if( pT==0 ){ | |||
17156 | int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken); | |||
17157 | if( rc!=SQLITE_OK0 ) return rc; | |||
17158 | pT = pIter->pTokenDataIter; | |||
17159 | } | |||
17160 | ||||
17161 | i2 = pT->nMap; | |||
17162 | aMap = pT->aMap; | |||
17163 | ||||
17164 | while( i2>i1 ){ | |||
17165 | iTest = (i1 + i2) / 2; | |||
17166 | ||||
17167 | if( aMap[iTest].iRowid<iRowid ){ | |||
17168 | i1 = iTest+1; | |||
17169 | }else if( aMap[iTest].iRowid>iRowid ){ | |||
17170 | i2 = iTest; | |||
17171 | }else{ | |||
17172 | if( aMap[iTest].iPos<iPos ){ | |||
17173 | if( aMap[iTest].iPos<0 ){ | |||
17174 | break; | |||
17175 | } | |||
17176 | i1 = iTest+1; | |||
17177 | }else if( aMap[iTest].iPos>iPos ){ | |||
17178 | i2 = iTest; | |||
17179 | }else{ | |||
17180 | break; | |||
17181 | } | |||
17182 | } | |||
17183 | } | |||
17184 | ||||
17185 | if( i2>i1 ){ | |||
17186 | if( pIter->nSeg==0 ){ | |||
17187 | Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; | |||
17188 | *ppOut = (const char*)pMap->aSeg[0].term.p+1; | |||
17189 | *pnOut = pMap->aSeg[0].term.n-1; | |||
17190 | }else{ | |||
17191 | Fts5TokenDataMap *p = &aMap[iTest]; | |||
17192 | *ppOut = (const char*)&pT->terms.p[p->iIter]; | |||
17193 | *pnOut = aMap[iTest].nByte; | |||
17194 | } | |||
17195 | } | |||
17196 | ||||
17197 | return SQLITE_OK0; | |||
17198 | } | |||
17199 | ||||
17200 | /* | |||
17201 | ** Clear any existing entries from the token-map associated with the | |||
17202 | ** iterator passed as the only argument. | |||
17203 | */ | |||
17204 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ | |||
17205 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17206 | if( pIter && pIter->pTokenDataIter | |||
17207 | && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL0) | |||
17208 | ){ | |||
17209 | pIter->pTokenDataIter->nMap = 0; | |||
17210 | } | |||
17211 | } | |||
17212 | ||||
17213 | /* | |||
17214 | ** Set a token-mapping for the iterator passed as the first argument. This | |||
17215 | ** is used in detail=column or detail=none mode when a token is requested | |||
17216 | ** using the xInstToken() API. In this case the caller tokenizers the | |||
17217 | ** current row and configures the token-mapping via multiple calls to this | |||
17218 | ** function. | |||
17219 | */ | |||
17220 | static int sqlite3Fts5IndexIterWriteTokendata( | |||
17221 | Fts5IndexIter *pIndexIter, | |||
17222 | const char *pToken, int nToken, | |||
17223 | i64 iRowid, int iCol, int iOff | |||
17224 | ){ | |||
17225 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
17226 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
17227 | Fts5Index *p = pIter->pIndex; | |||
17228 | i64 iPos = (((i64)iCol)<<32) + iOff; | |||
17229 | ||||
17230 | assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL )((void) (0)); | |||
17231 | assert( pIter->pTokenDataIter || pIter->nSeg>0 )((void) (0)); | |||
17232 | if( pIter->nSeg>0 ){ | |||
17233 | /* This is a prefix term iterator. */ | |||
17234 | if( pT==0 ){ | |||
17235 | pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | |||
17236 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
17237 | pIter->pTokenDataIter = pT; | |||
17238 | } | |||
17239 | if( pT ){ | |||
17240 | fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos); | |||
17241 | fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken)sqlite3Fts5BufferAppendBlob(&p->rc,&pT->terms,nToken ,(const u8*)pToken); | |||
17242 | } | |||
17243 | }else{ | |||
17244 | int ii; | |||
17245 | for(ii=0; ii<pT->nIter; ii++){ | |||
17246 | Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; | |||
17247 | if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; | |||
17248 | } | |||
17249 | if( ii<pT->nIter ){ | |||
17250 | fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos); | |||
17251 | } | |||
17252 | } | |||
17253 | return fts5IndexReturn(p); | |||
17254 | } | |||
17255 | ||||
17256 | /* | |||
17257 | ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). | |||
17258 | */ | |||
17259 | static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ | |||
17260 | if( pIndexIter ){ | |||
17261 | Fts5Index *pIndex = ((Fts5Iter*)pIndexIter)->pIndex; | |||
17262 | fts5IterClose(pIndexIter); | |||
17263 | fts5IndexReturn(pIndex); | |||
17264 | } | |||
17265 | } | |||
17266 | ||||
17267 | /* | |||
17268 | ** Read and decode the "averages" record from the database. | |||
17269 | ** | |||
17270 | ** Parameter anSize must point to an array of size nCol, where nCol is | |||
17271 | ** the number of user defined columns in the FTS table. | |||
17272 | */ | |||
17273 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ | |||
17274 | int nCol = p->pConfig->nCol; | |||
17275 | Fts5Data *pData; | |||
17276 | ||||
17277 | *pnRow = 0; | |||
17278 | memset(anSize, 0, sizeof(i64) * nCol); | |||
17279 | pData = fts5DataRead(p, FTS5_AVERAGES_ROWID1); | |||
17280 | if( p->rc==SQLITE_OK0 && pData->nn ){ | |||
17281 | int i = 0; | |||
17282 | int iCol; | |||
17283 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)pnRow); | |||
17284 | for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ | |||
17285 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); | |||
17286 | } | |||
17287 | } | |||
17288 | ||||
17289 | fts5DataRelease(pData); | |||
17290 | return fts5IndexReturn(p); | |||
17291 | } | |||
17292 | ||||
17293 | /* | |||
17294 | ** Replace the current "averages" record with the contents of the buffer | |||
17295 | ** supplied as the second argument. | |||
17296 | */ | |||
17297 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ | |||
17298 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
17299 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, pData, nData); | |||
17300 | return fts5IndexReturn(p); | |||
17301 | } | |||
17302 | ||||
17303 | /* | |||
17304 | ** Return the total number of blocks this module has read from the %_data | |||
17305 | ** table since it was created. | |||
17306 | */ | |||
17307 | static int sqlite3Fts5IndexReads(Fts5Index *p){ | |||
17308 | return p->nRead; | |||
17309 | } | |||
17310 | ||||
17311 | /* | |||
17312 | ** Set the 32-bit cookie value stored at the start of all structure | |||
17313 | ** records to the value passed as the second argument. | |||
17314 | ** | |||
17315 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
17316 | ** occurs. | |||
17317 | */ | |||
17318 | static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ | |||
17319 | int rc; /* Return code */ | |||
17320 | Fts5Config *pConfig = p->pConfig; /* Configuration object */ | |||
17321 | u8 aCookie[4]; /* Binary representation of iNew */ | |||
17322 | sqlite3_blob *pBlob = 0; | |||
17323 | ||||
17324 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
17325 | sqlite3Fts5Put32(aCookie, iNew); | |||
17326 | ||||
17327 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, | |||
17328 | "block", FTS5_STRUCTURE_ROWID10, 1, &pBlob | |||
17329 | ); | |||
17330 | if( rc==SQLITE_OK0 ){ | |||
17331 | sqlite3_blob_writesqlite3_api->blob_write(pBlob, aCookie, 4, 0); | |||
17332 | rc = sqlite3_blob_closesqlite3_api->blob_close(pBlob); | |||
17333 | } | |||
17334 | ||||
17335 | return rc; | |||
17336 | } | |||
17337 | ||||
17338 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ | |||
17339 | Fts5Structure *pStruct; | |||
17340 | pStruct = fts5StructureRead(p); | |||
17341 | fts5StructureRelease(pStruct); | |||
17342 | return fts5IndexReturn(p); | |||
17343 | } | |||
17344 | ||||
17345 | /* | |||
17346 | ** Retrieve the origin value that will be used for the segment currently | |||
17347 | ** being accumulated in the in-memory hash table when it is flushed to | |||
17348 | ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to | |||
17349 | ** the queried value. Or, if an error occurs, an error code is returned | |||
17350 | ** and the final value of (*piOrigin) is undefined. | |||
17351 | */ | |||
17352 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){ | |||
17353 | Fts5Structure *pStruct; | |||
17354 | pStruct = fts5StructureRead(p); | |||
17355 | if( pStruct ){ | |||
17356 | *piOrigin = pStruct->nOriginCntr; | |||
17357 | fts5StructureRelease(pStruct); | |||
17358 | } | |||
17359 | return fts5IndexReturn(p); | |||
17360 | } | |||
17361 | ||||
17362 | /* | |||
17363 | ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages | |||
17364 | ** associated with the same segment. This function adds rowid iRowid to | |||
17365 | ** the hash table. The caller is required to guarantee that there is at | |||
17366 | ** least one free slot on the page. | |||
17367 | ** | |||
17368 | ** If parameter bForce is false and the hash table is deemed to be full | |||
17369 | ** (more than half of the slots are occupied), then non-zero is returned | |||
17370 | ** and iRowid not inserted. Or, if bForce is true or if the hash table page | |||
17371 | ** is not full, iRowid is inserted and zero returned. | |||
17372 | */ | |||
17373 | static int fts5IndexTombstoneAddToPage( | |||
17374 | Fts5Data *pPg, | |||
17375 | int bForce, | |||
17376 | int nPg, | |||
17377 | u64 iRowid | |||
17378 | ){ | |||
17379 | const int szKey = TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8); | |||
17380 | const int nSlot = TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1); | |||
17381 | const int nElem = fts5GetU32(&pPg->p[4]); | |||
17382 | int iSlot = (iRowid / nPg) % nSlot; | |||
17383 | int nCollide = nSlot; | |||
17384 | ||||
17385 | if( szKey==4 && iRowid>0xFFFFFFFF ) return 2; | |||
17386 | if( iRowid==0 ){ | |||
17387 | pPg->p[1] = 0x01; | |||
17388 | return 0; | |||
17389 | } | |||
17390 | ||||
17391 | if( bForce==0 && nElem>=(nSlot/2) ){ | |||
17392 | return 1; | |||
17393 | } | |||
17394 | ||||
17395 | fts5PutU32(&pPg->p[4], nElem+1); | |||
17396 | if( szKey==4 ){ | |||
17397 | u32 *aSlot = (u32*)&pPg->p[8]; | |||
17398 | while( aSlot[iSlot] ){ | |||
17399 | iSlot = (iSlot + 1) % nSlot; | |||
17400 | if( nCollide--==0 ) return 0; | |||
17401 | } | |||
17402 | fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid); | |||
17403 | }else{ | |||
17404 | u64 *aSlot = (u64*)&pPg->p[8]; | |||
17405 | while( aSlot[iSlot] ){ | |||
17406 | iSlot = (iSlot + 1) % nSlot; | |||
17407 | if( nCollide--==0 ) return 0; | |||
17408 | } | |||
17409 | fts5PutU64((u8*)&aSlot[iSlot], iRowid); | |||
17410 | } | |||
17411 | ||||
17412 | return 0; | |||
17413 | } | |||
17414 | ||||
17415 | /* | |||
17416 | ** This function attempts to build a new hash containing all the keys | |||
17417 | ** currently in the tombstone hash table for segment pSeg. The new | |||
17418 | ** hash will be stored in the nOut buffers passed in array apOut[]. | |||
17419 | ** All pages of the new hash use key-size szKey (4 or 8). | |||
17420 | ** | |||
17421 | ** Return 0 if the hash is successfully rebuilt into the nOut pages. | |||
17422 | ** Or non-zero if it is not (because one page became overfull). In this | |||
17423 | ** case the caller should retry with a larger nOut parameter. | |||
17424 | ** | |||
17425 | ** Parameter pData1 is page iPg1 of the hash table being rebuilt. | |||
17426 | */ | |||
17427 | static int fts5IndexTombstoneRehash( | |||
17428 | Fts5Index *p, | |||
17429 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | |||
17430 | Fts5Data *pData1, /* One page of current hash - or NULL */ | |||
17431 | int iPg1, /* Which page of the current hash is pData1 */ | |||
17432 | int szKey, /* 4 or 8, the keysize */ | |||
17433 | int nOut, /* Number of output pages */ | |||
17434 | Fts5Data **apOut /* Array of output hash pages */ | |||
17435 | ){ | |||
17436 | int ii; | |||
17437 | int res = 0; | |||
17438 | ||||
17439 | /* Initialize the headers of all the output pages */ | |||
17440 | for(ii=0; ii<nOut; ii++){ | |||
17441 | apOut[ii]->p[0] = szKey; | |||
17442 | fts5PutU32(&apOut[ii]->p[4], 0); | |||
17443 | } | |||
17444 | ||||
17445 | /* Loop through the current pages of the hash table. */ | |||
17446 | for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){ | |||
17447 | Fts5Data *pData = 0; /* Page ii of the current hash table */ | |||
17448 | Fts5Data *pFree = 0; /* Free this at the end of the loop */ | |||
17449 | ||||
17450 | if( iPg1==ii ){ | |||
17451 | pData = pData1; | |||
17452 | }else{ | |||
17453 | pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) )); | |||
17454 | } | |||
17455 | ||||
17456 | if( pData ){ | |||
17457 | int szKeyIn = TOMBSTONE_KEYSIZE(pData)(pData->p[0]==4 ? 4 : 8); | |||
17458 | int nSlotIn = (pData->nn - 8) / szKeyIn; | |||
17459 | int iIn; | |||
17460 | for(iIn=0; iIn<nSlotIn; iIn++){ | |||
17461 | u64 iVal = 0; | |||
17462 | ||||
17463 | /* Read the value from slot iIn of the input page into iVal. */ | |||
17464 | if( szKeyIn==4 ){ | |||
17465 | u32 *aSlot = (u32*)&pData->p[8]; | |||
17466 | if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]); | |||
17467 | }else{ | |||
17468 | u64 *aSlot = (u64*)&pData->p[8]; | |||
17469 | if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]); | |||
17470 | } | |||
17471 | ||||
17472 | /* If iVal is not 0 at this point, insert it into the new hash table */ | |||
17473 | if( iVal ){ | |||
17474 | Fts5Data *pPg = apOut[(iVal % nOut)]; | |||
17475 | res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal); | |||
17476 | if( res ) break; | |||
17477 | } | |||
17478 | } | |||
17479 | ||||
17480 | /* If this is page 0 of the old hash, copy the rowid-0-flag from the | |||
17481 | ** old hash to the new. */ | |||
17482 | if( ii==0 ){ | |||
17483 | apOut[0]->p[1] = pData->p[1]; | |||
17484 | } | |||
17485 | } | |||
17486 | fts5DataRelease(pFree); | |||
17487 | } | |||
17488 | ||||
17489 | return res; | |||
17490 | } | |||
17491 | ||||
17492 | /* | |||
17493 | ** This is called to rebuild the hash table belonging to segment pSeg. | |||
17494 | ** If parameter pData1 is not NULL, then one page of the existing hash table | |||
17495 | ** has already been loaded - pData1, which is page iPg1. The key-size for | |||
17496 | ** the new hash table is szKey (4 or 8). | |||
17497 | ** | |||
17498 | ** If successful, the new hash table is not written to disk. Instead, | |||
17499 | ** output parameter (*pnOut) is set to the number of pages in the new | |||
17500 | ** hash table, and (*papOut) to point to an array of buffers containing | |||
17501 | ** the new page data. | |||
17502 | ** | |||
17503 | ** If an error occurs, an error code is left in the Fts5Index object and | |||
17504 | ** both output parameters set to 0 before returning. | |||
17505 | */ | |||
17506 | static void fts5IndexTombstoneRebuild( | |||
17507 | Fts5Index *p, | |||
17508 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | |||
17509 | Fts5Data *pData1, /* One page of current hash - or NULL */ | |||
17510 | int iPg1, /* Which page of the current hash is pData1 */ | |||
17511 | int szKey, /* 4 or 8, the keysize */ | |||
17512 | int *pnOut, /* OUT: Number of output pages */ | |||
17513 | Fts5Data ***papOut /* OUT: Output hash pages */ | |||
17514 | ){ | |||
17515 | const int MINSLOT = 32; | |||
17516 | int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey)(((MINSLOT) > ((p->pConfig->pgsz - 8) / szKey)) ? (MINSLOT ) : ((p->pConfig->pgsz - 8) / szKey)); | |||
17517 | int nSlot = 0; /* Number of slots in each output page */ | |||
17518 | int nOut = 0; | |||
17519 | ||||
17520 | /* Figure out how many output pages (nOut) and how many slots per | |||
17521 | ** page (nSlot). There are three possibilities: | |||
17522 | ** | |||
17523 | ** 1. The hash table does not yet exist. In this case the new hash | |||
17524 | ** table will consist of a single page with MINSLOT slots. | |||
17525 | ** | |||
17526 | ** 2. The hash table exists but is currently a single page. In this | |||
17527 | ** case an attempt is made to grow the page to accommodate the new | |||
17528 | ** entry. The page is allowed to grow up to nSlotPerPage (see above) | |||
17529 | ** slots. | |||
17530 | ** | |||
17531 | ** 3. The hash table already consists of more than one page, or of | |||
17532 | ** a single page already so large that it cannot be grown. In this | |||
17533 | ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage | |||
17534 | ** slots each, where nPg is the current number of pages in the | |||
17535 | ** hash table. | |||
17536 | */ | |||
17537 | if( pSeg->nPgTombstone==0 ){ | |||
17538 | /* Case 1. */ | |||
17539 | nOut = 1; | |||
17540 | nSlot = MINSLOT; | |||
17541 | }else if( pSeg->nPgTombstone==1 ){ | |||
17542 | /* Case 2. */ | |||
17543 | int nElem = (int)fts5GetU32(&pData1->p[4]); | |||
17544 | assert( pData1 && iPg1==0 )((void) (0)); | |||
17545 | nOut = 1; | |||
17546 | nSlot = MAX(nElem*4, MINSLOT)(((nElem*4) > (MINSLOT)) ? (nElem*4) : (MINSLOT)); | |||
17547 | if( nSlot>nSlotPerPage ) nOut = 0; | |||
17548 | } | |||
17549 | if( nOut==0 ){ | |||
17550 | /* Case 3. */ | |||
17551 | nOut = (pSeg->nPgTombstone * 2 + 1); | |||
17552 | nSlot = nSlotPerPage; | |||
17553 | } | |||
17554 | ||||
17555 | /* Allocate the required array and output pages */ | |||
17556 | while( 1 ){ | |||
17557 | int res = 0; | |||
17558 | int ii = 0; | |||
17559 | int szPage = 0; | |||
17560 | Fts5Data **apOut = 0; | |||
17561 | ||||
17562 | /* Allocate space for the new hash table */ | |||
17563 | assert( nSlot>=MINSLOT )((void) (0)); | |||
17564 | apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut); | |||
17565 | szPage = 8 + nSlot*szKey; | |||
17566 | for(ii=0; ii<nOut; ii++){ | |||
17567 | Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc, | |||
17568 | sizeof(Fts5Data)+szPage | |||
17569 | ); | |||
17570 | if( pNew ){ | |||
17571 | pNew->nn = szPage; | |||
17572 | pNew->p = (u8*)&pNew[1]; | |||
17573 | apOut[ii] = pNew; | |||
17574 | } | |||
17575 | } | |||
17576 | ||||
17577 | /* Rebuild the hash table. */ | |||
17578 | if( p->rc==SQLITE_OK0 ){ | |||
17579 | res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut); | |||
17580 | } | |||
17581 | if( res==0 ){ | |||
17582 | if( p->rc ){ | |||
17583 | fts5IndexFreeArray(apOut, nOut); | |||
17584 | apOut = 0; | |||
17585 | nOut = 0; | |||
17586 | } | |||
17587 | *pnOut = nOut; | |||
17588 | *papOut = apOut; | |||
17589 | break; | |||
17590 | } | |||
17591 | ||||
17592 | /* If control flows to here, it was not possible to rebuild the hash | |||
17593 | ** table. Free all buffers and then try again with more pages. */ | |||
17594 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
17595 | fts5IndexFreeArray(apOut, nOut); | |||
17596 | nSlot = nSlotPerPage; | |||
17597 | nOut = nOut*2 + 1; | |||
17598 | } | |||
17599 | } | |||
17600 | ||||
17601 | ||||
17602 | /* | |||
17603 | ** Add a tombstone for rowid iRowid to segment pSeg. | |||
17604 | */ | |||
17605 | static void fts5IndexTombstoneAdd( | |||
17606 | Fts5Index *p, | |||
17607 | Fts5StructureSegment *pSeg, | |||
17608 | u64 iRowid | |||
17609 | ){ | |||
17610 | Fts5Data *pPg = 0; | |||
17611 | int iPg = -1; | |||
17612 | int szKey = 0; | |||
17613 | int nHash = 0; | |||
17614 | Fts5Data **apHash = 0; | |||
17615 | ||||
17616 | p->nContentlessDelete++; | |||
17617 | ||||
17618 | if( pSeg->nPgTombstone>0 ){ | |||
17619 | iPg = iRowid % pSeg->nPgTombstone; | |||
17620 | pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) )); | |||
17621 | if( pPg==0 ){ | |||
17622 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
17623 | return; | |||
17624 | } | |||
17625 | ||||
17626 | if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){ | |||
17627 | fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) ), pPg->p, pPg->nn); | |||
17628 | fts5DataRelease(pPg); | |||
17629 | return; | |||
17630 | } | |||
17631 | } | |||
17632 | ||||
17633 | /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */ | |||
17634 | szKey = pPg ? TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) : 4; | |||
17635 | if( iRowid>0xFFFFFFFF ) szKey = 8; | |||
17636 | ||||
17637 | /* Rebuild the hash table */ | |||
17638 | fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash); | |||
17639 | assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) )((void) (0)); | |||
17640 | ||||
17641 | /* If all has succeeded, write the new rowid into one of the new hash | |||
17642 | ** table pages, then write them all out to disk. */ | |||
17643 | if( nHash ){ | |||
17644 | int ii = 0; | |||
17645 | fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid); | |||
17646 | for(ii=0; ii<nHash; ii++){ | |||
17647 | i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) ); | |||
17648 | fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn); | |||
17649 | } | |||
17650 | pSeg->nPgTombstone = nHash; | |||
17651 | fts5StructureWrite(p, p->pStruct); | |||
17652 | } | |||
17653 | ||||
17654 | fts5DataRelease(pPg); | |||
17655 | fts5IndexFreeArray(apHash, nHash); | |||
17656 | } | |||
17657 | ||||
17658 | /* | |||
17659 | ** Add iRowid to the tombstone list of the segment or segments that contain | |||
17660 | ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite | |||
17661 | ** error code otherwise. | |||
17662 | */ | |||
17663 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){ | |||
17664 | Fts5Structure *pStruct; | |||
17665 | pStruct = fts5StructureRead(p); | |||
17666 | if( pStruct ){ | |||
17667 | int bFound = 0; /* True after pSeg->nEntryTombstone incr. */ | |||
17668 | int iLvl; | |||
17669 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | |||
17670 | int iSeg; | |||
17671 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | |||
17672 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
17673 | if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){ | |||
17674 | if( bFound==0 ){ | |||
17675 | pSeg->nEntryTombstone++; | |||
17676 | bFound = 1; | |||
17677 | } | |||
17678 | fts5IndexTombstoneAdd(p, pSeg, iRowid); | |||
17679 | } | |||
17680 | } | |||
17681 | } | |||
17682 | fts5StructureRelease(pStruct); | |||
17683 | } | |||
17684 | return fts5IndexReturn(p); | |||
17685 | } | |||
17686 | ||||
17687 | /************************************************************************* | |||
17688 | ************************************************************************** | |||
17689 | ** Below this point is the implementation of the integrity-check | |||
17690 | ** functionality. | |||
17691 | */ | |||
17692 | ||||
17693 | /* | |||
17694 | ** Return a simple checksum value based on the arguments. | |||
17695 | */ | |||
17696 | static u64 sqlite3Fts5IndexEntryCksum( | |||
17697 | i64 iRowid, | |||
17698 | int iCol, | |||
17699 | int iPos, | |||
17700 | int iIdx, | |||
17701 | const char *pTerm, | |||
17702 | int nTerm | |||
17703 | ){ | |||
17704 | int i; | |||
17705 | u64 ret = iRowid; | |||
17706 | ret += (ret<<3) + iCol; | |||
17707 | ret += (ret<<3) + iPos; | |||
17708 | if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX'0' + iIdx); | |||
17709 | for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; | |||
17710 | return ret; | |||
17711 | } | |||
17712 | ||||
17713 | #ifdef SQLITE_DEBUG | |||
17714 | /* | |||
17715 | ** This function is purely an internal test. It does not contribute to | |||
17716 | ** FTS functionality, or even the integrity-check, in any way. | |||
17717 | ** | |||
17718 | ** Instead, it tests that the same set of pgno/rowid combinations are | |||
17719 | ** visited regardless of whether the doclist-index identified by parameters | |||
17720 | ** iSegid/iLeaf is iterated in forwards or reverse order. | |||
17721 | */ | |||
17722 | static void fts5TestDlidxReverse( | |||
17723 | Fts5Index *p, | |||
17724 | int iSegid, /* Segment id to load from */ | |||
17725 | int iLeaf /* Load doclist-index for this leaf */ | |||
17726 | ){ | |||
17727 | Fts5DlidxIter *pDlidx = 0; | |||
17728 | u64 cksum1 = 13; | |||
17729 | u64 cksum2 = 13; | |||
17730 | ||||
17731 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); | |||
17732 | fts5DlidxIterEof(p, pDlidx)==0; | |||
17733 | fts5DlidxIterNext(p, pDlidx) | |||
17734 | ){ | |||
17735 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | |||
17736 | int pgno = fts5DlidxIterPgno(pDlidx); | |||
17737 | assert( pgno>iLeaf )((void) (0)); | |||
17738 | cksum1 += iRowid + ((i64)pgno<<32); | |||
17739 | } | |||
17740 | fts5DlidxIterFree(pDlidx); | |||
17741 | pDlidx = 0; | |||
17742 | ||||
17743 | for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); | |||
17744 | fts5DlidxIterEof(p, pDlidx)==0; | |||
17745 | fts5DlidxIterPrev(p, pDlidx) | |||
17746 | ){ | |||
17747 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | |||
17748 | int pgno = fts5DlidxIterPgno(pDlidx); | |||
17749 | assert( fts5DlidxIterPgno(pDlidx)>iLeaf )((void) (0)); | |||
17750 | cksum2 += iRowid + ((i64)pgno<<32); | |||
17751 | } | |||
17752 | fts5DlidxIterFree(pDlidx); | |||
17753 | pDlidx = 0; | |||
17754 | ||||
17755 | if( p->rc==SQLITE_OK0 && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17756 | } | |||
17757 | ||||
17758 | static int fts5QueryCksum( | |||
17759 | Fts5Index *p, /* Fts5 index object */ | |||
17760 | int iIdx, | |||
17761 | const char *z, /* Index key to query for */ | |||
17762 | int n, /* Size of index key in bytes */ | |||
17763 | int flags, /* Flags for Fts5IndexQuery */ | |||
17764 | u64 *pCksum /* IN/OUT: Checksum value */ | |||
17765 | ){ | |||
17766 | int eDetail = p->pConfig->eDetail; | |||
17767 | u64 cksum = *pCksum; | |||
17768 | Fts5IndexIter *pIter = 0; | |||
17769 | int rc = sqlite3Fts5IndexQuery( | |||
17770 | p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA0x0080), 0, &pIter | |||
17771 | ); | |||
17772 | ||||
17773 | while( rc==SQLITE_OK0 && ALWAYS(pIter!=0)(pIter!=0) && 0==sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | |||
17774 | i64 rowid = pIter->iRowid; | |||
17775 | ||||
17776 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
17777 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); | |||
17778 | }else{ | |||
17779 | Fts5PoslistReader sReader; | |||
17780 | for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); | |||
17781 | sReader.bEof==0; | |||
17782 | sqlite3Fts5PoslistReaderNext(&sReader) | |||
17783 | ){ | |||
17784 | int iCol = FTS5_POS2COLUMN(sReader.iPos)(int)((sReader.iPos >> 32) & 0x7FFFFFFF); | |||
17785 | int iOff = FTS5_POS2OFFSET(sReader.iPos)(int)(sReader.iPos & 0x7FFFFFFF); | |||
17786 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); | |||
17787 | } | |||
17788 | } | |||
17789 | if( rc==SQLITE_OK0 ){ | |||
17790 | rc = sqlite3Fts5IterNext(pIter); | |||
17791 | } | |||
17792 | } | |||
17793 | fts5IterClose(pIter); | |||
17794 | ||||
17795 | *pCksum = cksum; | |||
17796 | return rc; | |||
17797 | } | |||
17798 | ||||
17799 | /* | |||
17800 | ** Check if buffer z[], size n bytes, contains as series of valid utf-8 | |||
17801 | ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not | |||
17802 | ** contain valid utf-8, return non-zero. | |||
17803 | */ | |||
17804 | static int fts5TestUtf8(const char *z, int n){ | |||
17805 | int i = 0; | |||
17806 | assert_nc( n>0 )((void) (0)); | |||
17807 | while( i<n ){ | |||
17808 | if( (z[i] & 0x80)==0x00 ){ | |||
17809 | i++; | |||
17810 | }else | |||
17811 | if( (z[i] & 0xE0)==0xC0 ){ | |||
17812 | if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1; | |||
17813 | i += 2; | |||
17814 | }else | |||
17815 | if( (z[i] & 0xF0)==0xE0 ){ | |||
17816 | if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
17817 | i += 3; | |||
17818 | }else | |||
17819 | if( (z[i] & 0xF8)==0xF0 ){ | |||
17820 | if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
17821 | if( (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
17822 | i += 3; | |||
17823 | }else{ | |||
17824 | return 1; | |||
17825 | } | |||
17826 | } | |||
17827 | ||||
17828 | return 0; | |||
17829 | } | |||
17830 | ||||
17831 | /* | |||
17832 | ** This function is also purely an internal test. It does not contribute to | |||
17833 | ** FTS functionality, or even the integrity-check, in any way. | |||
17834 | */ | |||
17835 | static void fts5TestTerm( | |||
17836 | Fts5Index *p, | |||
17837 | Fts5Buffer *pPrev, /* Previous term */ | |||
17838 | const char *z, int n, /* Possibly new term to test */ | |||
17839 | u64 expected, | |||
17840 | u64 *pCksum | |||
17841 | ){ | |||
17842 | int rc = p->rc; | |||
17843 | if( pPrev->n==0 ){ | |||
17844 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | |||
17845 | }else | |||
17846 | if( rc==SQLITE_OK0 && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ | |||
17847 | u64 cksum3 = *pCksum; | |||
17848 | const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ | |||
17849 | int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ | |||
17850 | int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX'0'); | |||
17851 | int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX0x0001); | |||
17852 | u64 ck1 = 0; | |||
17853 | u64 ck2 = 0; | |||
17854 | ||||
17855 | /* Check that the results returned for ASC and DESC queries are | |||
17856 | ** the same. If not, call this corruption. */ | |||
17857 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); | |||
17858 | if( rc==SQLITE_OK0 ){ | |||
17859 | int f = flags|FTS5INDEX_QUERY_DESC0x0002; | |||
17860 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
17861 | } | |||
17862 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17863 | ||||
17864 | /* If this is a prefix query, check that the results returned if the | |||
17865 | ** the index is disabled are the same. In both ASC and DESC order. | |||
17866 | ** | |||
17867 | ** This check may only be performed if the hash table is empty. This | |||
17868 | ** is because the hash table only supports a single scan query at | |||
17869 | ** a time, and the multi-iter loop from which this function is called | |||
17870 | ** is already performing such a scan. | |||
17871 | ** | |||
17872 | ** Also only do this if buffer zTerm contains nTerm bytes of valid | |||
17873 | ** utf-8. Otherwise, the last part of the buffer contents might contain | |||
17874 | ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 | |||
17875 | ** character stored in the main fts index, which will cause the | |||
17876 | ** test to fail. */ | |||
17877 | if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ | |||
17878 | if( iIdx>0 && rc==SQLITE_OK0 ){ | |||
17879 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004; | |||
17880 | ck2 = 0; | |||
17881 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
17882 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17883 | } | |||
17884 | if( iIdx>0 && rc==SQLITE_OK0 ){ | |||
17885 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004|FTS5INDEX_QUERY_DESC0x0002; | |||
17886 | ck2 = 0; | |||
17887 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
17888 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17889 | } | |||
17890 | } | |||
17891 | ||||
17892 | cksum3 ^= ck1; | |||
17893 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | |||
17894 | ||||
17895 | if( rc==SQLITE_OK0 && cksum3!=expected ){ | |||
17896 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17897 | } | |||
17898 | *pCksum = cksum3; | |||
17899 | } | |||
17900 | p->rc = rc; | |||
17901 | } | |||
17902 | ||||
17903 | #else | |||
17904 | # define fts5TestDlidxReverse(x,y,z) | |||
17905 | # define fts5TestTerm(u,v,w,x,y,z) | |||
17906 | #endif | |||
17907 | ||||
17908 | /* | |||
17909 | ** Check that: | |||
17910 | ** | |||
17911 | ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and | |||
17912 | ** contain zero terms. | |||
17913 | ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and | |||
17914 | ** contain zero rowids. | |||
17915 | */ | |||
17916 | static void fts5IndexIntegrityCheckEmpty( | |||
17917 | Fts5Index *p, | |||
17918 | Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ | |||
17919 | int iFirst, | |||
17920 | int iNoRowid, | |||
17921 | int iLast | |||
17922 | ){ | |||
17923 | int i; | |||
17924 | ||||
17925 | /* Now check that the iter.nEmpty leaves following the current leaf | |||
17926 | ** (a) exist and (b) contain no terms. */ | |||
17927 | for(i=iFirst; p->rc==SQLITE_OK0 && i<=iLast; i++){ | |||
17928 | Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(i)) )); | |||
17929 | if( pLeaf ){ | |||
17930 | if( !fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17931 | if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17932 | } | |||
17933 | fts5DataRelease(pLeaf); | |||
17934 | } | |||
17935 | } | |||
17936 | ||||
17937 | static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ | |||
17938 | i64 iTermOff = 0; | |||
17939 | int ii; | |||
17940 | ||||
17941 | Fts5Buffer buf1 = {0,0,0}; | |||
17942 | Fts5Buffer buf2 = {0,0,0}; | |||
17943 | ||||
17944 | ii = pLeaf->szLeaf; | |||
17945 | while( ii<pLeaf->nn && p->rc==SQLITE_OK0 ){ | |||
17946 | int res; | |||
17947 | i64 iOff; | |||
17948 | int nIncr; | |||
17949 | ||||
17950 | ii += fts5GetVarint32(&pLeaf->p[ii], nIncr)sqlite3Fts5GetVarint32(&pLeaf->p[ii],(u32*)&(nIncr )); | |||
17951 | iTermOff += nIncr; | |||
17952 | iOff = iTermOff; | |||
17953 | ||||
17954 | if( iOff>=pLeaf->szLeaf ){ | |||
17955 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17956 | }else if( iTermOff==nIncr ){ | |||
17957 | int nByte; | |||
17958 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | |||
17959 | if( (iOff+nByte)>pLeaf->szLeaf ){ | |||
17960 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17961 | }else{ | |||
17962 | fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferSet(&p->rc,&buf1,nByte,&pLeaf ->p[iOff]); | |||
17963 | } | |||
17964 | }else{ | |||
17965 | int nKeep, nByte; | |||
17966 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nKeep )); | |||
17967 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | |||
17968 | if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ | |||
17969 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17970 | }else{ | |||
17971 | buf1.n = nKeep; | |||
17972 | fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf1,nByte,& pLeaf->p[iOff]); | |||
17973 | } | |||
17974 | ||||
17975 | if( p->rc==SQLITE_OK0 ){ | |||
17976 | res = fts5BufferCompare(&buf1, &buf2); | |||
17977 | if( res<=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
17978 | } | |||
17979 | } | |||
17980 | fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p)sqlite3Fts5BufferSet(&p->rc,&buf2,buf1.n,buf1.p); | |||
17981 | } | |||
17982 | ||||
17983 | fts5BufferFree(&buf1)sqlite3Fts5BufferFree(&buf1); | |||
17984 | fts5BufferFree(&buf2)sqlite3Fts5BufferFree(&buf2); | |||
17985 | } | |||
17986 | ||||
17987 | static void fts5IndexIntegrityCheckSegment( | |||
17988 | Fts5Index *p, /* FTS5 backend object */ | |||
17989 | Fts5StructureSegment *pSeg /* Segment to check internal consistency */ | |||
17990 | ){ | |||
17991 | Fts5Config *pConfig = p->pConfig; | |||
17992 | int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5); | |||
17993 | sqlite3_stmt *pStmt = 0; | |||
17994 | int rc2; | |||
17995 | int iIdxPrevLeaf = pSeg->pgnoFirst-1; | |||
17996 | int iDlidxPrevLeaf = pSeg->pgnoLast; | |||
17997 | ||||
17998 | if( pSeg->pgnoFirst==0 ) return; | |||
17999 | ||||
18000 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | |||
18001 | "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d " | |||
18002 | "ORDER BY 1, 2", | |||
18003 | pConfig->zDb, pConfig->zName, pSeg->iSegid | |||
18004 | )); | |||
18005 | ||||
18006 | /* Iterate through the b-tree hierarchy. */ | |||
18007 | while( p->rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | |||
18008 | i64 iRow; /* Rowid for this leaf */ | |||
18009 | Fts5Data *pLeaf; /* Data for this leaf */ | |||
18010 | ||||
18011 | const char *zIdxTerm = (const char*)sqlite3_column_blobsqlite3_api->column_blob(pStmt, 1); | |||
18012 | int nIdxTerm = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, 1); | |||
18013 | int iIdxLeaf = sqlite3_column_intsqlite3_api->column_int(pStmt, 2); | |||
18014 | int bIdxDlidx = sqlite3_column_intsqlite3_api->column_int(pStmt, 3); | |||
18015 | ||||
18016 | /* If the leaf in question has already been trimmed from the segment, | |||
18017 | ** ignore this b-tree entry. Otherwise, load it into memory. */ | |||
18018 | if( iIdxLeaf<pSeg->pgnoFirst ) continue; | |||
18019 | iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iIdxLeaf)) ); | |||
18020 | pLeaf = fts5LeafRead(p, iRow); | |||
18021 | if( pLeaf==0 ) break; | |||
18022 | ||||
18023 | /* Check that the leaf contains at least one term, and that it is equal | |||
18024 | ** to or larger than the split-key in zIdxTerm. Also check that if there | |||
18025 | ** is also a rowid pointer within the leaf page header, it points to a | |||
18026 | ** location before the term. */ | |||
18027 | if( pLeaf->nn<=pLeaf->szLeaf ){ | |||
18028 | ||||
18029 | if( nIdxTerm==0 | |||
18030 | && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
18031 | && pLeaf->nn==pLeaf->szLeaf | |||
18032 | && pLeaf->nn==4 | |||
18033 | ){ | |||
18034 | /* special case - the very first page in a segment keeps its %_idx | |||
18035 | ** entry even if all the terms are removed from it by secure-delete | |||
18036 | ** operations. */ | |||
18037 | }else{ | |||
18038 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18039 | } | |||
18040 | ||||
18041 | }else{ | |||
18042 | int iOff; /* Offset of first term on leaf */ | |||
18043 | int iRowidOff; /* Offset of first rowid on leaf */ | |||
18044 | int nTerm; /* Size of term on leaf in bytes */ | |||
18045 | int res; /* Comparison of term and split-key */ | |||
18046 | ||||
18047 | iOff = fts5LeafFirstTermOff(pLeaf); | |||
18048 | iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | |||
18049 | if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){ | |||
18050 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18051 | }else{ | |||
18052 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nTerm )); | |||
18053 | res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm))(((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm)))<=0 ? 0 : memcmp((&pLeaf->p[iOff]), (zIdxTerm), ((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm))))); | |||
18054 | if( res==0 ) res = nTerm - nIdxTerm; | |||
18055 | if( res<0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18056 | } | |||
18057 | ||||
18058 | fts5IntegrityCheckPgidx(p, pLeaf); | |||
18059 | } | |||
18060 | fts5DataRelease(pLeaf); | |||
18061 | if( p->rc ) break; | |||
18062 | ||||
18063 | /* Now check that the iter.nEmpty leaves following the current leaf | |||
18064 | ** (a) exist and (b) contain no terms. */ | |||
18065 | fts5IndexIntegrityCheckEmpty( | |||
18066 | p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 | |||
18067 | ); | |||
18068 | if( p->rc ) break; | |||
18069 | ||||
18070 | /* If there is a doclist-index, check that it looks right. */ | |||
18071 | if( bIdxDlidx ){ | |||
18072 | Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ | |||
18073 | int iPrevLeaf = iIdxLeaf; | |||
18074 | int iSegid = pSeg->iSegid; | |||
18075 | int iPg = 0; | |||
18076 | i64 iKey; | |||
18077 | ||||
18078 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); | |||
18079 | fts5DlidxIterEof(p, pDlidx)==0; | |||
18080 | fts5DlidxIterNext(p, pDlidx) | |||
18081 | ){ | |||
18082 | ||||
18083 | /* Check any rowid-less pages that occur before the current leaf. */ | |||
18084 | for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ | |||
18085 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPg)) ); | |||
18086 | pLeaf = fts5DataRead(p, iKey); | |||
18087 | if( pLeaf ){ | |||
18088 | if( fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))!=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18089 | fts5DataRelease(pLeaf); | |||
18090 | } | |||
18091 | } | |||
18092 | iPrevLeaf = fts5DlidxIterPgno(pDlidx); | |||
18093 | ||||
18094 | /* Check that the leaf page indicated by the iterator really does | |||
18095 | ** contain the rowid suggested by the same. */ | |||
18096 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPrevLeaf)) ); | |||
18097 | pLeaf = fts5DataRead(p, iKey); | |||
18098 | if( pLeaf ){ | |||
18099 | i64 iRowid; | |||
18100 | int iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | |||
18101 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
18102 | if( iRowidOff>=pLeaf->szLeaf ){ | |||
18103 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18104 | }else if( bSecureDelete==0 || iRowidOff>0 ){ | |||
18105 | i64 iDlRowid = fts5DlidxIterRowid(pDlidx); | |||
18106 | fts5GetVarintsqlite3Fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); | |||
18107 | if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){ | |||
18108 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18109 | } | |||
18110 | } | |||
18111 | fts5DataRelease(pLeaf); | |||
18112 | } | |||
18113 | } | |||
18114 | ||||
18115 | iDlidxPrevLeaf = iPg; | |||
18116 | fts5DlidxIterFree(pDlidx); | |||
18117 | fts5TestDlidxReverse(p, iSegid, iIdxLeaf); | |||
18118 | }else{ | |||
18119 | iDlidxPrevLeaf = pSeg->pgnoLast; | |||
18120 | /* TODO: Check there is no doclist index */ | |||
18121 | } | |||
18122 | ||||
18123 | iIdxPrevLeaf = iIdxLeaf; | |||
18124 | } | |||
18125 | ||||
18126 | rc2 = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
18127 | if( p->rc==SQLITE_OK0 ) p->rc = rc2; | |||
18128 | ||||
18129 | /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ | |||
18130 | #if 0 | |||
18131 | if( p->rc==SQLITE_OK0 && iter.iLeaf!=pSeg->pgnoLast ){ | |||
18132 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18133 | } | |||
18134 | #endif | |||
18135 | } | |||
18136 | ||||
18137 | ||||
18138 | /* | |||
18139 | ** Run internal checks to ensure that the FTS index (a) is internally | |||
18140 | ** consistent and (b) contains entries for which the XOR of the checksums | |||
18141 | ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. | |||
18142 | ** | |||
18143 | ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the | |||
18144 | ** checksum does not match. Return SQLITE_OK if all checks pass without | |||
18145 | ** error, or some other SQLite error code if another error (e.g. OOM) | |||
18146 | ** occurs. | |||
18147 | */ | |||
18148 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){ | |||
18149 | int eDetail = p->pConfig->eDetail; | |||
18150 | u64 cksum2 = 0; /* Checksum based on contents of indexes */ | |||
18151 | Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ | |||
18152 | Fts5Iter *pIter; /* Used to iterate through entire index */ | |||
18153 | Fts5Structure *pStruct; /* Index structure */ | |||
18154 | int iLvl, iSeg; | |||
18155 | ||||
18156 | #ifdef SQLITE_DEBUG | |||
18157 | /* Used by extra internal tests only run if NDEBUG is not defined */ | |||
18158 | u64 cksum3 = 0; /* Checksum based on contents of indexes */ | |||
18159 | Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ | |||
18160 | #endif | |||
18161 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
18162 | ||||
18163 | /* Load the FTS index structure */ | |||
18164 | pStruct = fts5StructureRead(p); | |||
18165 | if( pStruct==0 ){ | |||
18166 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
18167 | return fts5IndexReturn(p); | |||
18168 | } | |||
18169 | ||||
18170 | /* Check that the internal nodes of each segment match the leaves */ | |||
18171 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
18172 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
18173 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
18174 | fts5IndexIntegrityCheckSegment(p, pSeg); | |||
18175 | } | |||
18176 | } | |||
18177 | ||||
18178 | /* The cksum argument passed to this function is a checksum calculated | |||
18179 | ** based on all expected entries in the FTS index (including prefix index | |||
18180 | ** entries). This block checks that a checksum calculated based on the | |||
18181 | ** actual contents of FTS index is identical. | |||
18182 | ** | |||
18183 | ** Two versions of the same checksum are calculated. The first (stack | |||
18184 | ** variable cksum2) based on entries extracted from the full-text index | |||
18185 | ** while doing a linear scan of each individual index in turn. | |||
18186 | ** | |||
18187 | ** As each term visited by the linear scans, a separate query for the | |||
18188 | ** same term is performed. cksum3 is calculated based on the entries | |||
18189 | ** extracted by these queries. | |||
18190 | */ | |||
18191 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); | |||
18192 | fts5MultiIterEof(p, pIter)==0; | |||
18193 | fts5MultiIterNext(p, pIter, 0, 0) | |||
18194 | ){ | |||
18195 | int n; /* Size of term in bytes */ | |||
18196 | i64 iPos = 0; /* Position read from poslist */ | |||
18197 | int iOff = 0; /* Offset within poslist */ | |||
18198 | i64 iRowid = fts5MultiIterRowid(pIter); | |||
18199 | char *z = (char*)fts5MultiIterTerm(pIter, &n); | |||
18200 | ||||
18201 | /* If this is a new term, query for it. Update cksum3 with the results. */ | |||
18202 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); | |||
18203 | if( p->rc ) break; | |||
18204 | ||||
18205 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
18206 | if( 0==fts5MultiIterIsEmpty(p, pIter) ){ | |||
18207 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); | |||
18208 | } | |||
18209 | }else{ | |||
18210 | poslist.n = 0; | |||
18211 | fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); | |||
18212 | fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0")sqlite3Fts5BufferAppendBlob(&p->rc,&poslist,4,(const u8*)"\0\0\0\0"); | |||
18213 | while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ | |||
18214 | int iCol = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | |||
18215 | int iTokOff = FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF); | |||
18216 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); | |||
18217 | } | |||
18218 | } | |||
18219 | } | |||
18220 | fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); | |||
18221 | ||||
18222 | fts5MultiIterFree(pIter); | |||
18223 | if( p->rc==SQLITE_OK0 && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18224 | ||||
18225 | fts5StructureRelease(pStruct); | |||
18226 | #ifdef SQLITE_DEBUG | |||
18227 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
18228 | #endif | |||
18229 | fts5BufferFree(&poslist)sqlite3Fts5BufferFree(&poslist); | |||
18230 | return fts5IndexReturn(p); | |||
18231 | } | |||
18232 | ||||
18233 | /************************************************************************* | |||
18234 | ************************************************************************** | |||
18235 | ** Below this point is the implementation of the fts5_decode() scalar | |||
18236 | ** function only. | |||
18237 | */ | |||
18238 | ||||
18239 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18240 | /* | |||
18241 | ** Decode a segment-data rowid from the %_data table. This function is | |||
18242 | ** the opposite of macro FTS5_SEGMENT_ROWID(). | |||
18243 | */ | |||
18244 | static void fts5DecodeRowid( | |||
18245 | i64 iRowid, /* Rowid from %_data table */ | |||
18246 | int *pbTombstone, /* OUT: Tombstone hash flag */ | |||
18247 | int *piSegid, /* OUT: Segment id */ | |||
18248 | int *pbDlidx, /* OUT: Dlidx flag */ | |||
18249 | int *piHeight, /* OUT: Height */ | |||
18250 | int *piPgno /* OUT: Page number */ | |||
18251 | ){ | |||
18252 | *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B31) - 1)); | |||
18253 | iRowid >>= FTS5_DATA_PAGE_B31; | |||
18254 | ||||
18255 | *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B5) - 1)); | |||
18256 | iRowid >>= FTS5_DATA_HEIGHT_B5; | |||
18257 | ||||
18258 | *pbDlidx = (int)(iRowid & 0x0001); | |||
18259 | iRowid >>= FTS5_DATA_DLI_B1; | |||
18260 | ||||
18261 | *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B16) - 1)); | |||
18262 | iRowid >>= FTS5_DATA_ID_B16; | |||
18263 | ||||
18264 | *pbTombstone = (int)(iRowid & 0x0001); | |||
18265 | } | |||
18266 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18267 | ||||
18268 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18269 | static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ | |||
18270 | int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */ | |||
18271 | fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | |||
18272 | ||||
18273 | if( iSegid==0 ){ | |||
18274 | if( iKey==FTS5_AVERAGES_ROWID1 ){ | |||
18275 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); | |||
18276 | }else{ | |||
18277 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); | |||
18278 | } | |||
18279 | } | |||
18280 | else{ | |||
18281 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}", | |||
18282 | bDlidx ? "dlidx " : "", | |||
18283 | bTomb ? "tombstone " : "", | |||
18284 | iSegid, iHeight, iPgno | |||
18285 | ); | |||
18286 | } | |||
18287 | } | |||
18288 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18289 | ||||
18290 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18291 | static void fts5DebugStructure( | |||
18292 | int *pRc, /* IN/OUT: error code */ | |||
18293 | Fts5Buffer *pBuf, | |||
18294 | Fts5Structure *p | |||
18295 | ){ | |||
18296 | int iLvl, iSeg; /* Iterate through levels, segments */ | |||
18297 | ||||
18298 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ | |||
18299 | Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; | |||
18300 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, | |||
18301 | " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg | |||
18302 | ); | |||
18303 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
18304 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
18305 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d", | |||
18306 | pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast | |||
18307 | ); | |||
18308 | if( pSeg->iOrigin1>0 ){ | |||
18309 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld", | |||
18310 | pSeg->iOrigin1, pSeg->iOrigin2 | |||
18311 | ); | |||
18312 | } | |||
18313 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | |||
18314 | } | |||
18315 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | |||
18316 | } | |||
18317 | } | |||
18318 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18319 | ||||
18320 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18321 | /* | |||
18322 | ** This is part of the fts5_decode() debugging aid. | |||
18323 | ** | |||
18324 | ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This | |||
18325 | ** function appends a human-readable representation of the same object | |||
18326 | ** to the buffer passed as the second argument. | |||
18327 | */ | |||
18328 | static void fts5DecodeStructure( | |||
18329 | int *pRc, /* IN/OUT: error code */ | |||
18330 | Fts5Buffer *pBuf, | |||
18331 | const u8 *pBlob, int nBlob | |||
18332 | ){ | |||
18333 | int rc; /* Return code */ | |||
18334 | Fts5Structure *p = 0; /* Decoded structure object */ | |||
18335 | ||||
18336 | rc = fts5StructureDecode(pBlob, nBlob, 0, &p); | |||
18337 | if( rc!=SQLITE_OK0 ){ | |||
18338 | *pRc = rc; | |||
18339 | return; | |||
18340 | } | |||
18341 | ||||
18342 | fts5DebugStructure(pRc, pBuf, p); | |||
18343 | fts5StructureRelease(p); | |||
18344 | } | |||
18345 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18346 | ||||
18347 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18348 | /* | |||
18349 | ** This is part of the fts5_decode() debugging aid. | |||
18350 | ** | |||
18351 | ** Arguments pBlob/nBlob contain an "averages" record. This function | |||
18352 | ** appends a human-readable representation of record to the buffer passed | |||
18353 | ** as the second argument. | |||
18354 | */ | |||
18355 | static void fts5DecodeAverages( | |||
18356 | int *pRc, /* IN/OUT: error code */ | |||
18357 | Fts5Buffer *pBuf, | |||
18358 | const u8 *pBlob, int nBlob | |||
18359 | ){ | |||
18360 | int i = 0; | |||
18361 | const char *zSpace = ""; | |||
18362 | ||||
18363 | while( i<nBlob ){ | |||
18364 | u64 iVal; | |||
18365 | i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); | |||
18366 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); | |||
18367 | zSpace = " "; | |||
18368 | } | |||
18369 | } | |||
18370 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18371 | ||||
18372 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18373 | /* | |||
18374 | ** Buffer (a/n) is assumed to contain a list of serialized varints. Read | |||
18375 | ** each varint and append its string representation to buffer pBuf. Return | |||
18376 | ** after either the input buffer is exhausted or a 0 value is read. | |||
18377 | ** | |||
18378 | ** The return value is the number of bytes read from the input buffer. | |||
18379 | */ | |||
18380 | static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | |||
18381 | int iOff = 0; | |||
18382 | while( iOff<n ){ | |||
18383 | int iVal; | |||
18384 | iOff += fts5GetVarint32(&a[iOff], iVal)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(iVal)); | |||
18385 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); | |||
18386 | } | |||
18387 | return iOff; | |||
18388 | } | |||
18389 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18390 | ||||
18391 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18392 | /* | |||
18393 | ** The start of buffer (a/n) contains the start of a doclist. The doclist | |||
18394 | ** may or may not finish within the buffer. This function appends a text | |||
18395 | ** representation of the part of the doclist that is present to buffer | |||
18396 | ** pBuf. | |||
18397 | ** | |||
18398 | ** The return value is the number of bytes read from the input buffer. | |||
18399 | */ | |||
18400 | static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | |||
18401 | i64 iDocid = 0; | |||
18402 | int iOff = 0; | |||
18403 | ||||
18404 | if( n>0 ){ | |||
18405 | iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); | |||
18406 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | |||
18407 | } | |||
18408 | while( iOff<n ){ | |||
18409 | int nPos; | |||
18410 | int bDel; | |||
18411 | iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); | |||
18412 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); | |||
18413 | iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)(((n-iOff) < (nPos)) ? (n-iOff) : (nPos))); | |||
18414 | if( iOff<n ){ | |||
18415 | i64 iDelta; | |||
18416 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); | |||
18417 | iDocid += iDelta; | |||
18418 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | |||
18419 | } | |||
18420 | } | |||
18421 | ||||
18422 | return iOff; | |||
18423 | } | |||
18424 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18425 | ||||
18426 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18427 | /* | |||
18428 | ** This function is part of the fts5_decode() debugging function. It is | |||
18429 | ** only ever used with detail=none tables. | |||
18430 | ** | |||
18431 | ** Buffer (pData/nData) contains a doclist in the format used by detail=none | |||
18432 | ** tables. This function appends a human-readable version of that list to | |||
18433 | ** buffer pBuf. | |||
18434 | ** | |||
18435 | ** If *pRc is other than SQLITE_OK when this function is called, it is a | |||
18436 | ** no-op. If an OOM or other error occurs within this function, *pRc is | |||
18437 | ** set to an SQLite error code before returning. The final state of buffer | |||
18438 | ** pBuf is undefined in this case. | |||
18439 | */ | |||
18440 | static void fts5DecodeRowidList( | |||
18441 | int *pRc, /* IN/OUT: Error code */ | |||
18442 | Fts5Buffer *pBuf, /* Buffer to append text to */ | |||
18443 | const u8 *pData, int nData /* Data to decode list-of-rowids from */ | |||
18444 | ){ | |||
18445 | int i = 0; | |||
18446 | i64 iRowid = 0; | |||
18447 | ||||
18448 | while( i<nData ){ | |||
18449 | const char *zApp = ""; | |||
18450 | u64 iVal; | |||
18451 | i += sqlite3Fts5GetVarint(&pData[i], &iVal); | |||
18452 | iRowid += iVal; | |||
18453 | ||||
18454 | if( i<nData && pData[i]==0x00 ){ | |||
18455 | i++; | |||
18456 | if( i<nData && pData[i]==0x00 ){ | |||
18457 | i++; | |||
18458 | zApp = "+"; | |||
18459 | }else{ | |||
18460 | zApp = "*"; | |||
18461 | } | |||
18462 | } | |||
18463 | ||||
18464 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); | |||
18465 | } | |||
18466 | } | |||
18467 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18468 | ||||
18469 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18470 | static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){ | |||
18471 | int ii; | |||
18472 | fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1)( (u32)((pBuf)->n) + (u32)(pTerm->n*2 + 1) <= (u32)( (pBuf)->nSpace) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),( pTerm->n*2 + 1)+(pBuf)->n) ); | |||
18473 | if( *pRc==SQLITE_OK0 ){ | |||
18474 | for(ii=0; ii<pTerm->n; ii++){ | |||
18475 | if( pTerm->p[ii]==0x00 ){ | |||
18476 | pBuf->p[pBuf->n++] = '\\'; | |||
18477 | pBuf->p[pBuf->n++] = '0'; | |||
18478 | }else{ | |||
18479 | pBuf->p[pBuf->n++] = pTerm->p[ii]; | |||
18480 | } | |||
18481 | } | |||
18482 | pBuf->p[pBuf->n] = 0x00; | |||
18483 | } | |||
18484 | } | |||
18485 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18486 | ||||
18487 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18488 | /* | |||
18489 | ** The implementation of user-defined scalar function fts5_decode(). | |||
18490 | */ | |||
18491 | static void fts5DecodeFunction( | |||
18492 | sqlite3_context *pCtx, /* Function call context */ | |||
18493 | int nArg, /* Number of args (always 2) */ | |||
18494 | sqlite3_value **apVal /* Function arguments */ | |||
18495 | ){ | |||
18496 | i64 iRowid; /* Rowid for record being decoded */ | |||
18497 | int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ | |||
18498 | int bTomb; | |||
18499 | const u8 *aBlob; int n; /* Record to decode */ | |||
18500 | u8 *a = 0; | |||
18501 | Fts5Buffer s; /* Build up text to return here */ | |||
18502 | int rc = SQLITE_OK0; /* Return code */ | |||
18503 | sqlite3_int64 nSpace = 0; | |||
18504 | int eDetailNone = (sqlite3_user_datasqlite3_api->user_data(pCtx)!=0); | |||
18505 | ||||
18506 | assert( nArg==2 )((void) (0)); | |||
18507 | UNUSED_PARAM(nArg)(void)(nArg); | |||
18508 | memset(&s, 0, sizeof(Fts5Buffer)); | |||
18509 | iRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); | |||
18510 | ||||
18511 | /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] | |||
18512 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents | |||
18513 | ** buffer overreads even if the record is corrupt. */ | |||
18514 | n = sqlite3_value_bytessqlite3_api->value_bytes(apVal[1]); | |||
18515 | aBlob = sqlite3_value_blobsqlite3_api->value_blob(apVal[1]); | |||
18516 | nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING8; | |||
18517 | a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); | |||
18518 | if( a==0 ) goto decode_out; | |||
18519 | if( n>0 ) memcpy(a, aBlob, n); | |||
18520 | ||||
18521 | fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | |||
18522 | ||||
18523 | fts5DebugRowid(&rc, &s, iRowid); | |||
18524 | if( bDlidx ){ | |||
18525 | Fts5Data dlidx; | |||
18526 | Fts5DlidxLvl lvl; | |||
18527 | ||||
18528 | dlidx.p = a; | |||
18529 | dlidx.nn = n; | |||
18530 | ||||
18531 | memset(&lvl, 0, sizeof(Fts5DlidxLvl)); | |||
18532 | lvl.pData = &dlidx; | |||
18533 | lvl.iLeafPgno = iPgno; | |||
18534 | ||||
18535 | for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ | |||
18536 | sqlite3Fts5BufferAppendPrintf(&rc, &s, | |||
18537 | " %d(%lld)", lvl.iLeafPgno, lvl.iRowid | |||
18538 | ); | |||
18539 | } | |||
18540 | }else if( bTomb ){ | |||
18541 | u32 nElem = fts5GetU32(&a[4]); | |||
18542 | int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8; | |||
18543 | int nSlot = (n - 8) / szKey; | |||
18544 | int ii; | |||
18545 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem); | |||
18546 | if( aBlob[1] ){ | |||
18547 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0"); | |||
18548 | } | |||
18549 | for(ii=0; ii<nSlot; ii++){ | |||
18550 | u64 iVal = 0; | |||
18551 | if( szKey==4 ){ | |||
18552 | u32 *aSlot = (u32*)&aBlob[8]; | |||
18553 | if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]); | |||
18554 | }else{ | |||
18555 | u64 *aSlot = (u64*)&aBlob[8]; | |||
18556 | if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]); | |||
18557 | } | |||
18558 | if( iVal!=0 ){ | |||
18559 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal); | |||
18560 | } | |||
18561 | } | |||
18562 | }else if( iSegid==0 ){ | |||
18563 | if( iRowid==FTS5_AVERAGES_ROWID1 ){ | |||
18564 | fts5DecodeAverages(&rc, &s, a, n); | |||
18565 | }else{ | |||
18566 | fts5DecodeStructure(&rc, &s, a, n); | |||
18567 | } | |||
18568 | }else if( eDetailNone ){ | |||
18569 | Fts5Buffer term; /* Current term read from page */ | |||
18570 | int szLeaf; | |||
18571 | int iPgidxOff = szLeaf = fts5GetU16(&a[2]); | |||
18572 | int iTermOff; | |||
18573 | int nKeep = 0; | |||
18574 | int iOff; | |||
18575 | ||||
18576 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
18577 | ||||
18578 | /* Decode any entries that occur before the first term. */ | |||
18579 | if( szLeaf<n ){ | |||
18580 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | |||
18581 | }else{ | |||
18582 | iTermOff = szLeaf; | |||
18583 | } | |||
18584 | fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); | |||
18585 | ||||
18586 | iOff = iTermOff; | |||
18587 | while( iOff<szLeaf && rc==SQLITE_OK0 ){ | |||
18588 | int nAppend; | |||
18589 | ||||
18590 | /* Read the term data for the next term*/ | |||
18591 | iOff += fts5GetVarint32(&a[iOff], nAppend)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nAppend)); | |||
18592 | term.n = nKeep; | |||
18593 | fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nAppend,&a[ iOff]); | |||
18594 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | |||
18595 | fts5BufferAppendTerm(&rc, &s, &term); | |||
18596 | iOff += nAppend; | |||
18597 | ||||
18598 | /* Figure out where the doclist for this term ends */ | |||
18599 | if( iPgidxOff<n ){ | |||
18600 | int nIncr; | |||
18601 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nIncr)); | |||
18602 | iTermOff += nIncr; | |||
18603 | }else{ | |||
18604 | iTermOff = szLeaf; | |||
18605 | } | |||
18606 | if( iTermOff>szLeaf ){ | |||
18607 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18608 | }else{ | |||
18609 | fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); | |||
18610 | } | |||
18611 | iOff = iTermOff; | |||
18612 | if( iOff<szLeaf ){ | |||
18613 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | |||
18614 | } | |||
18615 | } | |||
18616 | ||||
18617 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
18618 | }else{ | |||
18619 | Fts5Buffer term; /* Current term read from page */ | |||
18620 | int szLeaf; /* Offset of pgidx in a[] */ | |||
18621 | int iPgidxOff; | |||
18622 | int iPgidxPrev = 0; /* Previous value read from pgidx */ | |||
18623 | int iTermOff = 0; | |||
18624 | int iRowidOff = 0; | |||
18625 | int iOff; | |||
18626 | int nDoclist; | |||
18627 | ||||
18628 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
18629 | ||||
18630 | if( n<4 ){ | |||
18631 | sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); | |||
18632 | goto decode_out; | |||
18633 | }else{ | |||
18634 | iRowidOff = fts5GetU16(&a[0]); | |||
18635 | iPgidxOff = szLeaf = fts5GetU16(&a[2]); | |||
18636 | if( iPgidxOff<n ){ | |||
18637 | fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | |||
18638 | }else if( iPgidxOff>n ){ | |||
18639 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18640 | goto decode_out; | |||
18641 | } | |||
18642 | } | |||
18643 | ||||
18644 | /* Decode the position list tail at the start of the page */ | |||
18645 | if( iRowidOff!=0 ){ | |||
18646 | iOff = iRowidOff; | |||
18647 | }else if( iTermOff!=0 ){ | |||
18648 | iOff = iTermOff; | |||
18649 | }else{ | |||
18650 | iOff = szLeaf; | |||
18651 | } | |||
18652 | if( iOff>n ){ | |||
18653 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18654 | goto decode_out; | |||
18655 | } | |||
18656 | fts5DecodePoslist(&rc, &s, &a[4], iOff-4); | |||
18657 | ||||
18658 | /* Decode any more doclist data that appears on the page before the | |||
18659 | ** first term. */ | |||
18660 | nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; | |||
18661 | if( nDoclist+iOff>n ){ | |||
18662 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18663 | goto decode_out; | |||
18664 | } | |||
18665 | fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); | |||
18666 | ||||
18667 | while( iPgidxOff<n && rc==SQLITE_OK0 ){ | |||
18668 | int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ | |||
18669 | int nByte; /* Bytes of data */ | |||
18670 | int iEnd; | |||
18671 | ||||
18672 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | |||
18673 | iPgidxPrev += nByte; | |||
18674 | iOff = iPgidxPrev; | |||
18675 | ||||
18676 | if( iPgidxOff<n ){ | |||
18677 | fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | |||
18678 | iEnd = iPgidxPrev + nByte; | |||
18679 | }else{ | |||
18680 | iEnd = szLeaf; | |||
18681 | } | |||
18682 | if( iEnd>szLeaf ){ | |||
18683 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18684 | break; | |||
18685 | } | |||
18686 | ||||
18687 | if( bFirst==0 ){ | |||
18688 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | |||
18689 | if( nByte>term.n ){ | |||
18690 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18691 | break; | |||
18692 | } | |||
18693 | term.n = nByte; | |||
18694 | } | |||
18695 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | |||
18696 | if( iOff+nByte>n ){ | |||
18697 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
18698 | break; | |||
18699 | } | |||
18700 | fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nByte,&a[iOff ]); | |||
18701 | iOff += nByte; | |||
18702 | ||||
18703 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | |||
18704 | fts5BufferAppendTerm(&rc, &s, &term); | |||
18705 | iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); | |||
18706 | } | |||
18707 | ||||
18708 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
18709 | } | |||
18710 | ||||
18711 | decode_out: | |||
18712 | sqlite3_freesqlite3_api->free(a); | |||
18713 | if( rc==SQLITE_OK0 ){ | |||
18714 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
18715 | }else{ | |||
18716 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
18717 | } | |||
18718 | fts5BufferFree(&s)sqlite3Fts5BufferFree(&s); | |||
18719 | } | |||
18720 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18721 | ||||
18722 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18723 | /* | |||
18724 | ** The implementation of user-defined scalar function fts5_rowid(). | |||
18725 | */ | |||
18726 | static void fts5RowidFunction( | |||
18727 | sqlite3_context *pCtx, /* Function call context */ | |||
18728 | int nArg, /* Number of args (always 2) */ | |||
18729 | sqlite3_value **apVal /* Function arguments */ | |||
18730 | ){ | |||
18731 | const char *zArg; | |||
18732 | if( nArg==0 ){ | |||
18733 | sqlite3_result_errorsqlite3_api->result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); | |||
18734 | }else{ | |||
18735 | zArg = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | |||
18736 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "segment") ){ | |||
18737 | i64 iRowid; | |||
18738 | int segid, pgno; | |||
18739 | if( nArg!=3 ){ | |||
18740 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
18741 | "should be: fts5_rowid('segment', segid, pgno))", -1 | |||
18742 | ); | |||
18743 | }else{ | |||
18744 | segid = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | |||
18745 | pgno = sqlite3_value_intsqlite3_api->value_int(apVal[2]); | |||
18746 | iRowid = FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
18747 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, iRowid); | |||
18748 | } | |||
18749 | }else{ | |||
18750 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
18751 | "first arg to fts5_rowid() must be 'segment'" , -1 | |||
18752 | ); | |||
18753 | } | |||
18754 | } | |||
18755 | } | |||
18756 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18757 | ||||
18758 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18759 | ||||
18760 | typedef struct Fts5StructVtab Fts5StructVtab; | |||
18761 | struct Fts5StructVtab { | |||
18762 | sqlite3_vtab base; | |||
18763 | }; | |||
18764 | ||||
18765 | typedef struct Fts5StructVcsr Fts5StructVcsr; | |||
18766 | struct Fts5StructVcsr { | |||
18767 | sqlite3_vtab_cursor base; | |||
18768 | Fts5Structure *pStruct; | |||
18769 | int iLevel; | |||
18770 | int iSeg; | |||
18771 | int iRowid; | |||
18772 | }; | |||
18773 | ||||
18774 | /* | |||
18775 | ** Create a new fts5_structure() table-valued function. | |||
18776 | */ | |||
18777 | static int fts5structConnectMethod( | |||
18778 | sqlite3 *db, | |||
18779 | void *pAux, | |||
18780 | int argc, const char *const*argv, | |||
18781 | sqlite3_vtab **ppVtab, | |||
18782 | char **pzErr | |||
18783 | ){ | |||
18784 | Fts5StructVtab *pNew = 0; | |||
18785 | int rc = SQLITE_OK0; | |||
18786 | ||||
18787 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, | |||
18788 | "CREATE TABLE xyz(" | |||
18789 | "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, " | |||
18790 | "npgtombstone, nentrytombstone, nentry, struct HIDDEN);" | |||
18791 | ); | |||
18792 | if( rc==SQLITE_OK0 ){ | |||
18793 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
18794 | } | |||
18795 | ||||
18796 | *ppVtab = (sqlite3_vtab*)pNew; | |||
18797 | return rc; | |||
18798 | } | |||
18799 | ||||
18800 | /* | |||
18801 | ** We must have a single struct=? constraint that will be passed through | |||
18802 | ** into the xFilter method. If there is no valid struct=? constraint, | |||
18803 | ** then return an SQLITE_CONSTRAINT error. | |||
18804 | */ | |||
18805 | static int fts5structBestIndexMethod( | |||
18806 | sqlite3_vtab *tab, | |||
18807 | sqlite3_index_info *pIdxInfo | |||
18808 | ){ | |||
18809 | int i; | |||
18810 | int rc = SQLITE_CONSTRAINT19; | |||
18811 | struct sqlite3_index_constraint *p; | |||
18812 | pIdxInfo->estimatedCost = (double)100; | |||
18813 | pIdxInfo->estimatedRows = 100; | |||
18814 | pIdxInfo->idxNum = 0; | |||
18815 | for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){ | |||
18816 | if( p->usable==0 ) continue; | |||
18817 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && p->iColumn==11 ){ | |||
18818 | rc = SQLITE_OK0; | |||
18819 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
18820 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; | |||
18821 | break; | |||
18822 | } | |||
18823 | } | |||
18824 | return rc; | |||
18825 | } | |||
18826 | ||||
18827 | /* | |||
18828 | ** This method is the destructor for bytecodevtab objects. | |||
18829 | */ | |||
18830 | static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){ | |||
18831 | Fts5StructVtab *p = (Fts5StructVtab*)pVtab; | |||
18832 | sqlite3_freesqlite3_api->free(p); | |||
18833 | return SQLITE_OK0; | |||
18834 | } | |||
18835 | ||||
18836 | /* | |||
18837 | ** Constructor for a new bytecodevtab_cursor object. | |||
18838 | */ | |||
18839 | static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){ | |||
18840 | int rc = SQLITE_OK0; | |||
18841 | Fts5StructVcsr *pNew = 0; | |||
18842 | ||||
18843 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
18844 | *ppCsr = (sqlite3_vtab_cursor*)pNew; | |||
18845 | ||||
18846 | return SQLITE_OK0; | |||
18847 | } | |||
18848 | ||||
18849 | /* | |||
18850 | ** Destructor for a bytecodevtab_cursor. | |||
18851 | */ | |||
18852 | static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){ | |||
18853 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
18854 | fts5StructureRelease(pCsr->pStruct); | |||
18855 | sqlite3_freesqlite3_api->free(pCsr); | |||
18856 | return SQLITE_OK0; | |||
18857 | } | |||
18858 | ||||
18859 | ||||
18860 | /* | |||
18861 | ** Advance a bytecodevtab_cursor to its next row of output. | |||
18862 | */ | |||
18863 | static int fts5structNextMethod(sqlite3_vtab_cursor *cur){ | |||
18864 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
18865 | Fts5Structure *p = pCsr->pStruct; | |||
18866 | ||||
18867 | assert( pCsr->pStruct )((void) (0)); | |||
18868 | pCsr->iSeg++; | |||
18869 | pCsr->iRowid++; | |||
18870 | while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){ | |||
18871 | pCsr->iLevel++; | |||
18872 | pCsr->iSeg = 0; | |||
18873 | } | |||
18874 | if( pCsr->iLevel>=p->nLevel ){ | |||
18875 | fts5StructureRelease(pCsr->pStruct); | |||
18876 | pCsr->pStruct = 0; | |||
18877 | } | |||
18878 | return SQLITE_OK0; | |||
18879 | } | |||
18880 | ||||
18881 | /* | |||
18882 | ** Return TRUE if the cursor has been moved off of the last | |||
18883 | ** row of output. | |||
18884 | */ | |||
18885 | static int fts5structEofMethod(sqlite3_vtab_cursor *cur){ | |||
18886 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
18887 | return pCsr->pStruct==0; | |||
18888 | } | |||
18889 | ||||
18890 | static int fts5structRowidMethod( | |||
18891 | sqlite3_vtab_cursor *cur, | |||
18892 | sqlite_int64 *piRowid | |||
18893 | ){ | |||
18894 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
18895 | *piRowid = pCsr->iRowid; | |||
18896 | return SQLITE_OK0; | |||
18897 | } | |||
18898 | ||||
18899 | /* | |||
18900 | ** Return values of columns for the row at which the bytecodevtab_cursor | |||
18901 | ** is currently pointing. | |||
18902 | */ | |||
18903 | static int fts5structColumnMethod( | |||
18904 | sqlite3_vtab_cursor *cur, /* The cursor */ | |||
18905 | sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ | |||
18906 | int i /* Which column to return */ | |||
18907 | ){ | |||
18908 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
18909 | Fts5Structure *p = pCsr->pStruct; | |||
18910 | Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg]; | |||
18911 | ||||
18912 | switch( i ){ | |||
18913 | case 0: /* level */ | |||
18914 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iLevel); | |||
18915 | break; | |||
18916 | case 1: /* segment */ | |||
18917 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg); | |||
18918 | break; | |||
18919 | case 2: /* merge */ | |||
18920 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge); | |||
18921 | break; | |||
18922 | case 3: /* segid */ | |||
18923 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->iSegid); | |||
18924 | break; | |||
18925 | case 4: /* leaf1 */ | |||
18926 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoFirst); | |||
18927 | break; | |||
18928 | case 5: /* leaf2 */ | |||
18929 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoLast); | |||
18930 | break; | |||
18931 | case 6: /* origin1 */ | |||
18932 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin1); | |||
18933 | break; | |||
18934 | case 7: /* origin2 */ | |||
18935 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin2); | |||
18936 | break; | |||
18937 | case 8: /* npgtombstone */ | |||
18938 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->nPgTombstone); | |||
18939 | break; | |||
18940 | case 9: /* nentrytombstone */ | |||
18941 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntryTombstone); | |||
18942 | break; | |||
18943 | case 10: /* nentry */ | |||
18944 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntry); | |||
18945 | break; | |||
18946 | } | |||
18947 | return SQLITE_OK0; | |||
18948 | } | |||
18949 | ||||
18950 | /* | |||
18951 | ** Initialize a cursor. | |||
18952 | ** | |||
18953 | ** idxNum==0 means show all subprograms | |||
18954 | ** idxNum==1 means show only the main bytecode and omit subprograms. | |||
18955 | */ | |||
18956 | static int fts5structFilterMethod( | |||
18957 | sqlite3_vtab_cursor *pVtabCursor, | |||
18958 | int idxNum, const char *idxStr, | |||
18959 | int argc, sqlite3_value **argv | |||
18960 | ){ | |||
18961 | Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor; | |||
18962 | int rc = SQLITE_OK0; | |||
18963 | ||||
18964 | const u8 *aBlob = 0; | |||
18965 | int nBlob = 0; | |||
18966 | ||||
18967 | assert( argc==1 )((void) (0)); | |||
18968 | fts5StructureRelease(pCsr->pStruct); | |||
18969 | pCsr->pStruct = 0; | |||
18970 | ||||
18971 | nBlob = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); | |||
18972 | aBlob = (const u8*)sqlite3_value_blobsqlite3_api->value_blob(argv[0]); | |||
18973 | rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct); | |||
18974 | if( rc==SQLITE_OK0 ){ | |||
18975 | pCsr->iLevel = 0; | |||
18976 | pCsr->iRowid = 0; | |||
18977 | pCsr->iSeg = -1; | |||
18978 | rc = fts5structNextMethod(pVtabCursor); | |||
18979 | } | |||
18980 | ||||
18981 | return rc; | |||
18982 | } | |||
18983 | ||||
18984 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
18985 | ||||
18986 | /* | |||
18987 | ** This is called as part of registering the FTS5 module with database | |||
18988 | ** connection db. It registers several user-defined scalar functions useful | |||
18989 | ** with FTS5. | |||
18990 | ** | |||
18991 | ** If successful, SQLITE_OK is returned. If an error occurs, some other | |||
18992 | ** SQLite error code is returned instead. | |||
18993 | */ | |||
18994 | static int sqlite3Fts5IndexInit(sqlite3 *db){ | |||
18995 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
18996 | int rc = sqlite3_create_functionsqlite3_api->create_function( | |||
18997 | db, "fts5_decode", 2, SQLITE_UTF81, 0, fts5DecodeFunction, 0, 0 | |||
18998 | ); | |||
18999 | ||||
19000 | if( rc==SQLITE_OK0 ){ | |||
19001 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
19002 | db, "fts5_decode_none", 2, | |||
19003 | SQLITE_UTF81, (void*)db, fts5DecodeFunction, 0, 0 | |||
19004 | ); | |||
19005 | } | |||
19006 | ||||
19007 | if( rc==SQLITE_OK0 ){ | |||
19008 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
19009 | db, "fts5_rowid", -1, SQLITE_UTF81, 0, fts5RowidFunction, 0, 0 | |||
19010 | ); | |||
19011 | } | |||
19012 | ||||
19013 | if( rc==SQLITE_OK0 ){ | |||
19014 | static const sqlite3_module fts5structure_module = { | |||
19015 | 0, /* iVersion */ | |||
19016 | 0, /* xCreate */ | |||
19017 | fts5structConnectMethod, /* xConnect */ | |||
19018 | fts5structBestIndexMethod, /* xBestIndex */ | |||
19019 | fts5structDisconnectMethod, /* xDisconnect */ | |||
19020 | 0, /* xDestroy */ | |||
19021 | fts5structOpenMethod, /* xOpen */ | |||
19022 | fts5structCloseMethod, /* xClose */ | |||
19023 | fts5structFilterMethod, /* xFilter */ | |||
19024 | fts5structNextMethod, /* xNext */ | |||
19025 | fts5structEofMethod, /* xEof */ | |||
19026 | fts5structColumnMethod, /* xColumn */ | |||
19027 | fts5structRowidMethod, /* xRowid */ | |||
19028 | 0, /* xUpdate */ | |||
19029 | 0, /* xBegin */ | |||
19030 | 0, /* xSync */ | |||
19031 | 0, /* xCommit */ | |||
19032 | 0, /* xRollback */ | |||
19033 | 0, /* xFindFunction */ | |||
19034 | 0, /* xRename */ | |||
19035 | 0, /* xSavepoint */ | |||
19036 | 0, /* xRelease */ | |||
19037 | 0, /* xRollbackTo */ | |||
19038 | 0, /* xShadowName */ | |||
19039 | 0 /* xIntegrity */ | |||
19040 | }; | |||
19041 | rc = sqlite3_create_modulesqlite3_api->create_module(db, "fts5_structure", &fts5structure_module, 0); | |||
19042 | } | |||
19043 | return rc; | |||
19044 | #else | |||
19045 | return SQLITE_OK0; | |||
19046 | UNUSED_PARAM(db)(void)(db); | |||
19047 | #endif | |||
19048 | } | |||
19049 | ||||
19050 | ||||
19051 | static int sqlite3Fts5IndexReset(Fts5Index *p){ | |||
19052 | assert( p->pStruct==0 || p->iStructVersion!=0 )((void) (0)); | |||
19053 | if( fts5IndexDataVersion(p)!=p->iStructVersion ){ | |||
19054 | fts5StructureInvalidate(p); | |||
19055 | } | |||
19056 | return fts5IndexReturn(p); | |||
19057 | } | |||
19058 | ||||
19059 | #line 1 "fts5_main.c" | |||
19060 | /* | |||
19061 | ** 2014 Jun 09 | |||
19062 | ** | |||
19063 | ** The author disclaims copyright to this source code. In place of | |||
19064 | ** a legal notice, here is a blessing: | |||
19065 | ** | |||
19066 | ** May you do good and not evil. | |||
19067 | ** May you find forgiveness for yourself and forgive others. | |||
19068 | ** May you share freely, never taking more than you give. | |||
19069 | ** | |||
19070 | ****************************************************************************** | |||
19071 | ** | |||
19072 | ** This is an SQLite module implementing full-text search. | |||
19073 | */ | |||
19074 | ||||
19075 | ||||
19076 | /* #include "fts5Int.h" */ | |||
19077 | ||||
19078 | /* | |||
19079 | ** This variable is set to false when running tests for which the on disk | |||
19080 | ** structures should not be corrupt. Otherwise, true. If it is false, extra | |||
19081 | ** assert() conditions in the fts5 code are activated - conditions that are | |||
19082 | ** only true if it is guaranteed that the fts5 database is not corrupt. | |||
19083 | */ | |||
19084 | #ifdef SQLITE_DEBUG | |||
19085 | int sqlite3_fts5_may_be_corrupt = 1; | |||
19086 | #endif | |||
19087 | ||||
19088 | ||||
19089 | typedef struct Fts5Auxdata Fts5Auxdata; | |||
19090 | typedef struct Fts5Auxiliary Fts5Auxiliary; | |||
19091 | typedef struct Fts5Cursor Fts5Cursor; | |||
19092 | typedef struct Fts5FullTable Fts5FullTable; | |||
19093 | typedef struct Fts5Sorter Fts5Sorter; | |||
19094 | typedef struct Fts5TokenizerModule Fts5TokenizerModule; | |||
19095 | ||||
19096 | /* | |||
19097 | ** NOTES ON TRANSACTIONS: | |||
19098 | ** | |||
19099 | ** SQLite invokes the following virtual table methods as transactions are | |||
19100 | ** opened and closed by the user: | |||
19101 | ** | |||
19102 | ** xBegin(): Start of a new transaction. | |||
19103 | ** xSync(): Initial part of two-phase commit. | |||
19104 | ** xCommit(): Final part of two-phase commit. | |||
19105 | ** xRollback(): Rollback the transaction. | |||
19106 | ** | |||
19107 | ** Anything that is required as part of a commit that may fail is performed | |||
19108 | ** in the xSync() callback. Current versions of SQLite ignore any errors | |||
19109 | ** returned by xCommit(). | |||
19110 | ** | |||
19111 | ** And as sub-transactions are opened/closed: | |||
19112 | ** | |||
19113 | ** xSavepoint(int S): Open savepoint S. | |||
19114 | ** xRelease(int S): Commit and close savepoint S. | |||
19115 | ** xRollbackTo(int S): Rollback to start of savepoint S. | |||
19116 | ** | |||
19117 | ** During a write-transaction the fts5_index.c module may cache some data | |||
19118 | ** in-memory. It is flushed to disk whenever xSync(), xRelease() or | |||
19119 | ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() | |||
19120 | ** is called. | |||
19121 | ** | |||
19122 | ** Additionally, if SQLITE_DEBUG is defined, an instance of the following | |||
19123 | ** structure is used to record the current transaction state. This information | |||
19124 | ** is not required, but it is used in the assert() statements executed by | |||
19125 | ** function fts5CheckTransactionState() (see below). | |||
19126 | */ | |||
19127 | struct Fts5TransactionState { | |||
19128 | int eState; /* 0==closed, 1==open, 2==synced */ | |||
19129 | int iSavepoint; /* Number of open savepoints (0 -> none) */ | |||
19130 | }; | |||
19131 | ||||
19132 | /* | |||
19133 | ** A single object of this type is allocated when the FTS5 module is | |||
19134 | ** registered with a database handle. It is used to store pointers to | |||
19135 | ** all registered FTS5 extensions - tokenizers and auxiliary functions. | |||
19136 | */ | |||
19137 | struct Fts5Global { | |||
19138 | fts5_api api; /* User visible part of object (see fts5.h) */ | |||
19139 | sqlite3 *db; /* Associated database connection */ | |||
19140 | i64 iNextId; /* Used to allocate unique cursor ids */ | |||
19141 | Fts5Auxiliary *pAux; /* First in list of all aux. functions */ | |||
19142 | Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ | |||
19143 | Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ | |||
19144 | Fts5Cursor *pCsr; /* First in list of all open cursors */ | |||
19145 | u32 aLocaleHdr[4]; | |||
19146 | }; | |||
19147 | ||||
19148 | /* | |||
19149 | ** Size of header on fts5_locale() values. And macro to access a buffer | |||
19150 | ** containing a copy of the header from an Fts5Config pointer. | |||
19151 | */ | |||
19152 | #define FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | |||
19153 | #define FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) | |||
19154 | ||||
19155 | #define FTS5_INSTTOKEN_SUBTYPE73 73 | |||
19156 | ||||
19157 | /* | |||
19158 | ** Each auxiliary function registered with the FTS5 module is represented | |||
19159 | ** by an object of the following type. All such objects are stored as part | |||
19160 | ** of the Fts5Global.pAux list. | |||
19161 | */ | |||
19162 | struct Fts5Auxiliary { | |||
19163 | Fts5Global *pGlobal; /* Global context for this function */ | |||
19164 | char *zFunc; /* Function name (nul-terminated) */ | |||
19165 | void *pUserData; /* User-data pointer */ | |||
19166 | fts5_extension_function xFunc; /* Callback function */ | |||
19167 | void (*xDestroy)(void*); /* Destructor function */ | |||
19168 | Fts5Auxiliary *pNext; /* Next registered auxiliary function */ | |||
19169 | }; | |||
19170 | ||||
19171 | /* | |||
19172 | ** Each tokenizer module registered with the FTS5 module is represented | |||
19173 | ** by an object of the following type. All such objects are stored as part | |||
19174 | ** of the Fts5Global.pTok list. | |||
19175 | ** | |||
19176 | ** bV2Native: | |||
19177 | ** True if the tokenizer was registered using xCreateTokenizer_v2(), false | |||
19178 | ** for xCreateTokenizer(). If this variable is true, then x2 is populated | |||
19179 | ** with the routines as supplied by the caller and x1 contains synthesized | |||
19180 | ** wrapper routines. In this case the user-data pointer passed to | |||
19181 | ** x1.xCreate should be a pointer to the Fts5TokenizerModule structure, | |||
19182 | ** not a copy of pUserData. | |||
19183 | ** | |||
19184 | ** Of course, if bV2Native is false, then x1 contains the real routines and | |||
19185 | ** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule | |||
19186 | ** object should be passed to x2.xCreate. | |||
19187 | ** | |||
19188 | ** The synthesized wrapper routines are necessary for xFindTokenizer(_v2) | |||
19189 | ** calls. | |||
19190 | */ | |||
19191 | struct Fts5TokenizerModule { | |||
19192 | char *zName; /* Name of tokenizer */ | |||
19193 | void *pUserData; /* User pointer passed to xCreate() */ | |||
19194 | int bV2Native; /* True if v2 native tokenizer */ | |||
19195 | fts5_tokenizer x1; /* Tokenizer functions */ | |||
19196 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | |||
19197 | void (*xDestroy)(void*); /* Destructor function */ | |||
19198 | Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ | |||
19199 | }; | |||
19200 | ||||
19201 | struct Fts5FullTable { | |||
19202 | Fts5Table p; /* Public class members from fts5Int.h */ | |||
19203 | Fts5Storage *pStorage; /* Document store */ | |||
19204 | Fts5Global *pGlobal; /* Global (connection wide) data */ | |||
19205 | Fts5Cursor *pSortCsr; /* Sort data from this cursor */ | |||
19206 | int iSavepoint; /* Successful xSavepoint()+1 */ | |||
19207 | ||||
19208 | #ifdef SQLITE_DEBUG | |||
19209 | struct Fts5TransactionState ts; | |||
19210 | #endif | |||
19211 | }; | |||
19212 | ||||
19213 | struct Fts5MatchPhrase { | |||
19214 | Fts5Buffer *pPoslist; /* Pointer to current poslist */ | |||
19215 | int nTerm; /* Size of phrase in terms */ | |||
19216 | }; | |||
19217 | ||||
19218 | /* | |||
19219 | ** pStmt: | |||
19220 | ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; | |||
19221 | ** | |||
19222 | ** aIdx[]: | |||
19223 | ** There is one entry in the aIdx[] array for each phrase in the query, | |||
19224 | ** the value of which is the offset within aPoslist[] following the last | |||
19225 | ** byte of the position list for the corresponding phrase. | |||
19226 | */ | |||
19227 | struct Fts5Sorter { | |||
19228 | sqlite3_stmt *pStmt; | |||
19229 | i64 iRowid; /* Current rowid */ | |||
19230 | const u8 *aPoslist; /* Position lists for current row */ | |||
19231 | int nIdx; /* Number of entries in aIdx[] */ | |||
19232 | int aIdx[FLEXARRAY]; /* Offsets into aPoslist for current row */ | |||
19233 | }; | |||
19234 | ||||
19235 | /* Size (int bytes) of an Fts5Sorter object with N indexes */ | |||
19236 | #define SZ_FTS5SORTER(N)(__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) (offsetof(Fts5Sorter,nIdx)__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) | |||
19237 | ||||
19238 | /* | |||
19239 | ** Virtual-table cursor object. | |||
19240 | ** | |||
19241 | ** iSpecial: | |||
19242 | ** If this is a 'special' query (refer to function fts5SpecialMatch()), | |||
19243 | ** then this variable contains the result of the query. | |||
19244 | ** | |||
19245 | ** iFirstRowid, iLastRowid: | |||
19246 | ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the | |||
19247 | ** cursor iterates in ascending order of rowids, iFirstRowid is the lower | |||
19248 | ** limit of rowids to return, and iLastRowid the upper. In other words, the | |||
19249 | ** WHERE clause in the user's query might have been: | |||
19250 | ** | |||
19251 | ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid | |||
19252 | ** | |||
19253 | ** If the cursor iterates in descending order of rowid, iFirstRowid | |||
19254 | ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid | |||
19255 | ** the lower. | |||
19256 | */ | |||
19257 | struct Fts5Cursor { | |||
19258 | sqlite3_vtab_cursor base; /* Base class used by SQLite core */ | |||
19259 | Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ | |||
19260 | int *aColumnSize; /* Values for xColumnSize() */ | |||
19261 | i64 iCsrId; /* Cursor id */ | |||
19262 | ||||
19263 | /* Zero from this point onwards on cursor reset */ | |||
19264 | int ePlan; /* FTS5_PLAN_XXX value */ | |||
19265 | int bDesc; /* True for "ORDER BY rowid DESC" queries */ | |||
19266 | i64 iFirstRowid; /* Return no rowids earlier than this */ | |||
19267 | i64 iLastRowid; /* Return no rowids later than this */ | |||
19268 | sqlite3_stmt *pStmt; /* Statement used to read %_content */ | |||
19269 | Fts5Expr *pExpr; /* Expression for MATCH queries */ | |||
19270 | Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ | |||
19271 | int csrflags; /* Mask of cursor flags (see below) */ | |||
19272 | i64 iSpecial; /* Result of special query */ | |||
19273 | ||||
19274 | /* "rank" function. Populated on demand from vtab.xColumn(). */ | |||
19275 | char *zRank; /* Custom rank function */ | |||
19276 | char *zRankArgs; /* Custom rank function args */ | |||
19277 | Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ | |||
19278 | int nRankArg; /* Number of trailing arguments for rank() */ | |||
19279 | sqlite3_value **apRankArg; /* Array of trailing arguments */ | |||
19280 | sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ | |||
19281 | ||||
19282 | /* Auxiliary data storage */ | |||
19283 | Fts5Auxiliary *pAux; /* Currently executing extension function */ | |||
19284 | Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ | |||
19285 | ||||
19286 | /* Cache used by auxiliary API functions xInst() and xInstCount() */ | |||
19287 | Fts5PoslistReader *aInstIter; /* One for each phrase */ | |||
19288 | int nInstAlloc; /* Size of aInst[] array (entries / 3) */ | |||
19289 | int nInstCount; /* Number of phrase instances */ | |||
19290 | int *aInst; /* 3 integers per phrase instance */ | |||
19291 | }; | |||
19292 | ||||
19293 | /* | |||
19294 | ** Bits that make up the "idxNum" parameter passed indirectly by | |||
19295 | ** xBestIndex() to xFilter(). | |||
19296 | */ | |||
19297 | #define FTS5_BI_MATCH0x0001 0x0001 /* <tbl> MATCH ? */ | |||
19298 | #define FTS5_BI_RANK0x0002 0x0002 /* rank MATCH ? */ | |||
19299 | #define FTS5_BI_ROWID_EQ0x0004 0x0004 /* rowid == ? */ | |||
19300 | #define FTS5_BI_ROWID_LE0x0008 0x0008 /* rowid <= ? */ | |||
19301 | #define FTS5_BI_ROWID_GE0x0010 0x0010 /* rowid >= ? */ | |||
19302 | ||||
19303 | #define FTS5_BI_ORDER_RANK0x0020 0x0020 | |||
19304 | #define FTS5_BI_ORDER_ROWID0x0040 0x0040 | |||
19305 | #define FTS5_BI_ORDER_DESC0x0080 0x0080 | |||
19306 | ||||
19307 | /* | |||
19308 | ** Values for Fts5Cursor.csrflags | |||
19309 | */ | |||
19310 | #define FTS5CSR_EOF0x01 0x01 | |||
19311 | #define FTS5CSR_REQUIRE_CONTENT0x02 0x02 | |||
19312 | #define FTS5CSR_REQUIRE_DOCSIZE0x04 0x04 | |||
19313 | #define FTS5CSR_REQUIRE_INST0x08 0x08 | |||
19314 | #define FTS5CSR_FREE_ZRANK0x10 0x10 | |||
19315 | #define FTS5CSR_REQUIRE_RESEEK0x20 0x20 | |||
19316 | #define FTS5CSR_REQUIRE_POSLIST0x40 0x40 | |||
19317 | ||||
19318 | #define BitFlagAllTest(x,y)(((x) & (y))==(y)) (((x) & (y))==(y)) | |||
19319 | #define BitFlagTest(x,y)(((x) & (y))!=0) (((x) & (y))!=0) | |||
19320 | ||||
19321 | ||||
19322 | /* | |||
19323 | ** Macros to Set(), Clear() and Test() cursor flags. | |||
19324 | */ | |||
19325 | #define CsrFlagSet(pCsr, flag)((pCsr)->csrflags |= (flag)) ((pCsr)->csrflags |= (flag)) | |||
19326 | #define CsrFlagClear(pCsr, flag)((pCsr)->csrflags &= ~(flag)) ((pCsr)->csrflags &= ~(flag)) | |||
19327 | #define CsrFlagTest(pCsr, flag)((pCsr)->csrflags & (flag)) ((pCsr)->csrflags & (flag)) | |||
19328 | ||||
19329 | struct Fts5Auxdata { | |||
19330 | Fts5Auxiliary *pAux; /* Extension to which this belongs */ | |||
19331 | void *pPtr; /* Pointer value */ | |||
19332 | void(*xDelete)(void*); /* Destructor */ | |||
19333 | Fts5Auxdata *pNext; /* Next object in linked list */ | |||
19334 | }; | |||
19335 | ||||
19336 | #ifdef SQLITE_DEBUG | |||
19337 | #define FTS5_BEGIN 1 | |||
19338 | #define FTS5_SYNC 2 | |||
19339 | #define FTS5_COMMIT 3 | |||
19340 | #define FTS5_ROLLBACK 4 | |||
19341 | #define FTS5_SAVEPOINT 5 | |||
19342 | #define FTS5_RELEASE 6 | |||
19343 | #define FTS5_ROLLBACKTO 7 | |||
19344 | static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ | |||
19345 | switch( op ){ | |||
19346 | case FTS5_BEGIN: | |||
19347 | assert( p->ts.eState==0 )((void) (0)); | |||
19348 | p->ts.eState = 1; | |||
19349 | p->ts.iSavepoint = -1; | |||
19350 | break; | |||
19351 | ||||
19352 | case FTS5_SYNC: | |||
19353 | assert( p->ts.eState==1 || p->ts.eState==2 )((void) (0)); | |||
19354 | p->ts.eState = 2; | |||
19355 | break; | |||
19356 | ||||
19357 | case FTS5_COMMIT: | |||
19358 | assert( p->ts.eState==2 )((void) (0)); | |||
19359 | p->ts.eState = 0; | |||
19360 | break; | |||
19361 | ||||
19362 | case FTS5_ROLLBACK: | |||
19363 | assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 )((void) (0)); | |||
19364 | p->ts.eState = 0; | |||
19365 | break; | |||
19366 | ||||
19367 | case FTS5_SAVEPOINT: | |||
19368 | assert( p->ts.eState>=1 )((void) (0)); | |||
19369 | assert( iSavepoint>=0 )((void) (0)); | |||
19370 | assert( iSavepoint>=p->ts.iSavepoint )((void) (0)); | |||
19371 | p->ts.iSavepoint = iSavepoint; | |||
19372 | break; | |||
19373 | ||||
19374 | case FTS5_RELEASE: | |||
19375 | assert( p->ts.eState>=1 )((void) (0)); | |||
19376 | assert( iSavepoint>=0 )((void) (0)); | |||
19377 | assert( iSavepoint<=p->ts.iSavepoint )((void) (0)); | |||
19378 | p->ts.iSavepoint = iSavepoint-1; | |||
19379 | break; | |||
19380 | ||||
19381 | case FTS5_ROLLBACKTO: | |||
19382 | assert( p->ts.eState>=1 )((void) (0)); | |||
19383 | assert( iSavepoint>=-1 )((void) (0)); | |||
19384 | /* The following assert() can fail if another vtab strikes an error | |||
19385 | ** within an xSavepoint() call then SQLite calls xRollbackTo() - without | |||
19386 | ** having called xSavepoint() on this vtab. */ | |||
19387 | /* assert( iSavepoint<=p->ts.iSavepoint ); */ | |||
19388 | p->ts.iSavepoint = iSavepoint; | |||
19389 | break; | |||
19390 | } | |||
19391 | } | |||
19392 | #else | |||
19393 | # define fts5CheckTransactionState(x,y,z) | |||
19394 | #endif | |||
19395 | ||||
19396 | /* | |||
19397 | ** Return true if pTab is a contentless table. If parameter bIncludeUnindexed | |||
19398 | ** is true, this includes contentless tables that store UNINDEXED columns | |||
19399 | ** only. | |||
19400 | */ | |||
19401 | static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){ | |||
19402 | int eContent = pTab->p.pConfig->eContent; | |||
19403 | return ( | |||
19404 | eContent==FTS5_CONTENT_NONE1 | |||
19405 | || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED3) | |||
19406 | ); | |||
19407 | } | |||
19408 | ||||
19409 | /* | |||
19410 | ** Delete a virtual table handle allocated by fts5InitVtab(). | |||
19411 | */ | |||
19412 | static void fts5FreeVtab(Fts5FullTable *pTab){ | |||
19413 | if( pTab ){ | |||
19414 | sqlite3Fts5IndexClose(pTab->p.pIndex); | |||
19415 | sqlite3Fts5StorageClose(pTab->pStorage); | |||
19416 | sqlite3Fts5ConfigFree(pTab->p.pConfig); | |||
19417 | sqlite3_freesqlite3_api->free(pTab); | |||
19418 | } | |||
19419 | } | |||
19420 | ||||
19421 | /* | |||
19422 | ** The xDisconnect() virtual table method. | |||
19423 | */ | |||
19424 | static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ | |||
19425 | fts5FreeVtab((Fts5FullTable*)pVtab); | |||
19426 | return SQLITE_OK0; | |||
19427 | } | |||
19428 | ||||
19429 | /* | |||
19430 | ** The xDestroy() virtual table method. | |||
19431 | */ | |||
19432 | static int fts5DestroyMethod(sqlite3_vtab *pVtab){ | |||
19433 | Fts5Table *pTab = (Fts5Table*)pVtab; | |||
19434 | int rc = sqlite3Fts5DropAll(pTab->pConfig); | |||
19435 | if( rc==SQLITE_OK0 ){ | |||
19436 | fts5FreeVtab((Fts5FullTable*)pVtab); | |||
19437 | } | |||
19438 | return rc; | |||
19439 | } | |||
19440 | ||||
19441 | /* | |||
19442 | ** This function is the implementation of both the xConnect and xCreate | |||
19443 | ** methods of the FTS3 virtual table. | |||
19444 | ** | |||
19445 | ** The argv[] array contains the following: | |||
19446 | ** | |||
19447 | ** argv[0] -> module name ("fts5") | |||
19448 | ** argv[1] -> database name | |||
19449 | ** argv[2] -> table name | |||
19450 | ** argv[...] -> "column name" and other module argument fields. | |||
19451 | */ | |||
19452 | static int fts5InitVtab( | |||
19453 | int bCreate, /* True for xCreate, false for xConnect */ | |||
19454 | sqlite3 *db, /* The SQLite database connection */ | |||
19455 | void *pAux, /* Hash table containing tokenizers */ | |||
19456 | int argc, /* Number of elements in argv array */ | |||
19457 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
19458 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | |||
19459 | char **pzErr /* Write any error message here */ | |||
19460 | ){ | |||
19461 | Fts5Global *pGlobal = (Fts5Global*)pAux; | |||
19462 | const char **azConfig = (const char**)argv; | |||
19463 | int rc = SQLITE_OK0; /* Return code */ | |||
19464 | Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ | |||
19465 | Fts5FullTable *pTab = 0; /* New virtual table object */ | |||
19466 | ||||
19467 | /* Allocate the new vtab object and parse the configuration */ | |||
19468 | pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable)); | |||
19469 | if( rc==SQLITE_OK0 ){ | |||
19470 | rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); | |||
19471 | assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 )((void) (0)); | |||
19472 | } | |||
19473 | if( rc==SQLITE_OK0 ){ | |||
19474 | pConfig->pzErrmsg = pzErr; | |||
19475 | pTab->p.pConfig = pConfig; | |||
19476 | pTab->pGlobal = pGlobal; | |||
19477 | if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){ | |||
19478 | rc = sqlite3Fts5LoadTokenizer(pConfig); | |||
19479 | } | |||
19480 | } | |||
19481 | ||||
19482 | /* Open the index sub-system */ | |||
19483 | if( rc==SQLITE_OK0 ){ | |||
19484 | rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr); | |||
19485 | } | |||
19486 | ||||
19487 | /* Open the storage sub-system */ | |||
19488 | if( rc==SQLITE_OK0 ){ | |||
19489 | rc = sqlite3Fts5StorageOpen( | |||
19490 | pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr | |||
19491 | ); | |||
19492 | } | |||
19493 | ||||
19494 | /* Call sqlite3_declare_vtab() */ | |||
19495 | if( rc==SQLITE_OK0 ){ | |||
19496 | rc = sqlite3Fts5ConfigDeclareVtab(pConfig); | |||
19497 | } | |||
19498 | ||||
19499 | /* Load the initial configuration */ | |||
19500 | if( rc==SQLITE_OK0 ){ | |||
19501 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie-1); | |||
19502 | } | |||
19503 | ||||
19504 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
19505 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT1, (int)1); | |||
19506 | } | |||
19507 | if( rc==SQLITE_OK0 ){ | |||
19508 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_INNOCUOUS2); | |||
19509 | } | |||
19510 | ||||
19511 | if( pConfig ) pConfig->pzErrmsg = 0; | |||
19512 | if( rc!=SQLITE_OK0 ){ | |||
19513 | fts5FreeVtab(pTab); | |||
19514 | pTab = 0; | |||
19515 | }else if( bCreate ){ | |||
19516 | fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); | |||
19517 | } | |||
19518 | *ppVTab = (sqlite3_vtab*)pTab; | |||
19519 | return rc; | |||
19520 | } | |||
19521 | ||||
19522 | /* | |||
19523 | ** The xConnect() and xCreate() methods for the virtual table. All the | |||
19524 | ** work is done in function fts5InitVtab(). | |||
19525 | */ | |||
19526 | static int fts5ConnectMethod( | |||
19527 | sqlite3 *db, /* Database connection */ | |||
19528 | void *pAux, /* Pointer to tokenizer hash table */ | |||
19529 | int argc, /* Number of elements in argv array */ | |||
19530 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
19531 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
19532 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
19533 | ){ | |||
19534 | return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); | |||
19535 | } | |||
19536 | static int fts5CreateMethod( | |||
19537 | sqlite3 *db, /* Database connection */ | |||
19538 | void *pAux, /* Pointer to tokenizer hash table */ | |||
19539 | int argc, /* Number of elements in argv array */ | |||
19540 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
19541 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
19542 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
19543 | ){ | |||
19544 | return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); | |||
19545 | } | |||
19546 | ||||
19547 | /* | |||
19548 | ** The different query plans. | |||
19549 | */ | |||
19550 | #define FTS5_PLAN_MATCH1 1 /* (<tbl> MATCH ?) */ | |||
19551 | #define FTS5_PLAN_SOURCE2 2 /* A source cursor for SORTED_MATCH */ | |||
19552 | #define FTS5_PLAN_SPECIAL3 3 /* An internal query */ | |||
19553 | #define FTS5_PLAN_SORTED_MATCH4 4 /* (<tbl> MATCH ? ORDER BY rank) */ | |||
19554 | #define FTS5_PLAN_SCAN5 5 /* No usable constraint */ | |||
19555 | #define FTS5_PLAN_ROWID6 6 /* (rowid = ?) */ | |||
19556 | ||||
19557 | /* | |||
19558 | ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this | |||
19559 | ** extension is currently being used by a version of SQLite too old to | |||
19560 | ** support index-info flags. In that case this function is a no-op. | |||
19561 | */ | |||
19562 | static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ | |||
19563 | #if SQLITE_VERSION_NUMBER3050001>=3008012 | |||
19564 | #ifndef SQLITE_CORE | |||
19565 | if( sqlite3_libversion_numbersqlite3_api->libversion_number()>=3008012 ) | |||
19566 | #endif | |||
19567 | { | |||
19568 | pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE0x00000001; | |||
19569 | } | |||
19570 | #endif | |||
19571 | } | |||
19572 | ||||
19573 | static int fts5UsePatternMatch( | |||
19574 | Fts5Config *pConfig, | |||
19575 | struct sqlite3_index_constraint *p | |||
19576 | ){ | |||
19577 | assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB )((void) (0)); | |||
19578 | assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE )((void) (0)); | |||
19579 | if( pConfig->t.ePattern==FTS5_PATTERN_GLOB66 && p->op==FTS5_PATTERN_GLOB66 ){ | |||
19580 | return 1; | |||
19581 | } | |||
19582 | if( pConfig->t.ePattern==FTS5_PATTERN_LIKE65 | |||
19583 | && (p->op==FTS5_PATTERN_LIKE65 || p->op==FTS5_PATTERN_GLOB66) | |||
19584 | ){ | |||
19585 | return 1; | |||
19586 | } | |||
19587 | return 0; | |||
19588 | } | |||
19589 | ||||
19590 | /* | |||
19591 | ** Implementation of the xBestIndex method for FTS5 tables. Within the | |||
19592 | ** WHERE constraint, it searches for the following: | |||
19593 | ** | |||
19594 | ** 1. A MATCH constraint against the table column. | |||
19595 | ** 2. A MATCH constraint against the "rank" column. | |||
19596 | ** 3. A MATCH constraint against some other column. | |||
19597 | ** 4. An == constraint against the rowid column. | |||
19598 | ** 5. A < or <= constraint against the rowid column. | |||
19599 | ** 6. A > or >= constraint against the rowid column. | |||
19600 | ** | |||
19601 | ** Within the ORDER BY, the following are supported: | |||
19602 | ** | |||
19603 | ** 5. ORDER BY rank [ASC|DESC] | |||
19604 | ** 6. ORDER BY rowid [ASC|DESC] | |||
19605 | ** | |||
19606 | ** Information for the xFilter call is passed via both the idxNum and | |||
19607 | ** idxStr variables. Specifically, idxNum is a bitmask of the following | |||
19608 | ** flags used to encode the ORDER BY clause: | |||
19609 | ** | |||
19610 | ** FTS5_BI_ORDER_RANK | |||
19611 | ** FTS5_BI_ORDER_ROWID | |||
19612 | ** FTS5_BI_ORDER_DESC | |||
19613 | ** | |||
19614 | ** idxStr is used to encode data from the WHERE clause. For each argument | |||
19615 | ** passed to the xFilter method, the following is appended to idxStr: | |||
19616 | ** | |||
19617 | ** Match against table column: "m" | |||
19618 | ** Match against rank column: "r" | |||
19619 | ** Match against other column: "M<column-number>" | |||
19620 | ** LIKE against other column: "L<column-number>" | |||
19621 | ** GLOB against other column: "G<column-number>" | |||
19622 | ** Equality constraint against the rowid: "=" | |||
19623 | ** A < or <= against the rowid: "<" | |||
19624 | ** A > or >= against the rowid: ">" | |||
19625 | ** | |||
19626 | ** This function ensures that there is at most one "r" or "=". And that if | |||
19627 | ** there exists an "=" then there is no "<" or ">". | |||
19628 | ** | |||
19629 | ** If an unusable MATCH operator is present in the WHERE clause, then | |||
19630 | ** SQLITE_CONSTRAINT is returned. | |||
19631 | ** | |||
19632 | ** Costs are assigned as follows: | |||
19633 | ** | |||
19634 | ** a) If a MATCH operator is present, the cost depends on the other | |||
19635 | ** constraints also present. As follows: | |||
19636 | ** | |||
19637 | ** * No other constraints: cost=1000.0 | |||
19638 | ** * One rowid range constraint: cost=750.0 | |||
19639 | ** * Both rowid range constraints: cost=500.0 | |||
19640 | ** * An == rowid constraint: cost=100.0 | |||
19641 | ** | |||
19642 | ** b) Otherwise, if there is no MATCH: | |||
19643 | ** | |||
19644 | ** * No other constraints: cost=1000000.0 | |||
19645 | ** * One rowid range constraint: cost=750000.0 | |||
19646 | ** * Both rowid range constraints: cost=250000.0 | |||
19647 | ** * An == rowid constraint: cost=10.0 | |||
19648 | ** | |||
19649 | ** Costs are not modified by the ORDER BY clause. | |||
19650 | */ | |||
19651 | static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ | |||
19652 | Fts5Table *pTab = (Fts5Table*)pVTab; | |||
19653 | Fts5Config *pConfig = pTab->pConfig; | |||
19654 | const int nCol = pConfig->nCol; | |||
19655 | int idxFlags = 0; /* Parameter passed through to xFilter() */ | |||
19656 | int i; | |||
19657 | ||||
19658 | char *idxStr; | |||
19659 | int iIdxStr = 0; | |||
19660 | int iCons = 0; | |||
19661 | ||||
19662 | int bSeenEq = 0; | |||
19663 | int bSeenGt = 0; | |||
19664 | int bSeenLt = 0; | |||
19665 | int nSeenMatch = 0; | |||
19666 | int bSeenRank = 0; | |||
19667 | ||||
19668 | ||||
19669 | assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
19670 | assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
19671 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
19672 | assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
19673 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
19674 | ||||
19675 | if( pConfig->bLock ){ | |||
19676 | pTab->base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
19677 | "recursively defined fts5 content table" | |||
19678 | ); | |||
19679 | return SQLITE_ERROR1; | |||
19680 | } | |||
19681 | ||||
19682 | idxStr = (char*)sqlite3_mallocsqlite3_api->malloc(pInfo->nConstraint * 8 + 1); | |||
19683 | if( idxStr==0 ) return SQLITE_NOMEM7; | |||
19684 | pInfo->idxStr = idxStr; | |||
19685 | pInfo->needToFreeIdxStr = 1; | |||
19686 | ||||
19687 | for(i=0; i<pInfo->nConstraint; i++){ | |||
19688 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
19689 | int iCol = p->iColumn; | |||
19690 | if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH64 | |||
19691 | || (p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol>=nCol) | |||
19692 | ){ | |||
19693 | /* A MATCH operator or equivalent */ | |||
19694 | if( p->usable==0 || iCol<0 ){ | |||
19695 | /* As there exists an unusable MATCH constraint this is an | |||
19696 | ** unusable plan. Return SQLITE_CONSTRAINT. */ | |||
19697 | idxStr[iIdxStr] = 0; | |||
19698 | return SQLITE_CONSTRAINT19; | |||
19699 | }else{ | |||
19700 | if( iCol==nCol+1 ){ | |||
19701 | if( bSeenRank ) continue; | |||
19702 | idxStr[iIdxStr++] = 'r'; | |||
19703 | bSeenRank = 1; | |||
19704 | }else{ | |||
19705 | nSeenMatch++; | |||
19706 | idxStr[iIdxStr++] = 'M'; | |||
19707 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | |||
19708 | idxStr += strlen(&idxStr[iIdxStr]); | |||
19709 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | |||
19710 | } | |||
19711 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
19712 | pInfo->aConstraintUsage[i].omit = 1; | |||
19713 | } | |||
19714 | }else if( p->usable ){ | |||
19715 | if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){ | |||
19716 | assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB )((void) (0)); | |||
19717 | idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE65 ? 'L' : 'G'; | |||
19718 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | |||
19719 | idxStr += strlen(&idxStr[iIdxStr]); | |||
19720 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
19721 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | |||
19722 | nSeenMatch++; | |||
19723 | }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol<0 ){ | |||
19724 | idxStr[iIdxStr++] = '='; | |||
19725 | bSeenEq = 1; | |||
19726 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
19727 | } | |||
19728 | } | |||
19729 | } | |||
19730 | ||||
19731 | if( bSeenEq==0 ){ | |||
19732 | for(i=0; i<pInfo->nConstraint; i++){ | |||
19733 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
19734 | if( p->iColumn<0 && p->usable ){ | |||
19735 | int op = p->op; | |||
19736 | if( op==SQLITE_INDEX_CONSTRAINT_LT16 || op==SQLITE_INDEX_CONSTRAINT_LE8 ){ | |||
19737 | if( bSeenLt ) continue; | |||
19738 | idxStr[iIdxStr++] = '<'; | |||
19739 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
19740 | bSeenLt = 1; | |||
19741 | }else | |||
19742 | if( op==SQLITE_INDEX_CONSTRAINT_GT4 || op==SQLITE_INDEX_CONSTRAINT_GE32 ){ | |||
19743 | if( bSeenGt ) continue; | |||
19744 | idxStr[iIdxStr++] = '>'; | |||
19745 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
19746 | bSeenGt = 1; | |||
19747 | } | |||
19748 | } | |||
19749 | } | |||
19750 | } | |||
19751 | idxStr[iIdxStr] = '\0'; | |||
19752 | ||||
19753 | /* Set idxFlags flags for the ORDER BY clause | |||
19754 | ** | |||
19755 | ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". | |||
19756 | */ | |||
19757 | if( pInfo->nOrderBy==1 ){ | |||
19758 | int iSort = pInfo->aOrderBy[0].iColumn; | |||
19759 | if( iSort==(pConfig->nCol+1) && nSeenMatch>0 ){ | |||
19760 | idxFlags |= FTS5_BI_ORDER_RANK0x0020; | |||
19761 | }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ | |||
19762 | idxFlags |= FTS5_BI_ORDER_ROWID0x0040; | |||
19763 | } | |||
19764 | if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID)(((idxFlags) & (0x0020|0x0040))!=0) ){ | |||
19765 | pInfo->orderByConsumed = 1; | |||
19766 | if( pInfo->aOrderBy[0].desc ){ | |||
19767 | idxFlags |= FTS5_BI_ORDER_DESC0x0080; | |||
19768 | } | |||
19769 | } | |||
19770 | } | |||
19771 | ||||
19772 | /* Calculate the estimated cost based on the flags set in idxFlags. */ | |||
19773 | if( bSeenEq ){ | |||
19774 | pInfo->estimatedCost = nSeenMatch ? 1000.0 : 10.0; | |||
19775 | if( nSeenMatch==0 ) fts5SetUniqueFlag(pInfo); | |||
19776 | }else if( bSeenLt && bSeenGt ){ | |||
19777 | pInfo->estimatedCost = nSeenMatch ? 5000.0 : 250000.0; | |||
19778 | }else if( bSeenLt || bSeenGt ){ | |||
19779 | pInfo->estimatedCost = nSeenMatch ? 7500.0 : 750000.0; | |||
19780 | }else{ | |||
19781 | pInfo->estimatedCost = nSeenMatch ? 10000.0 : 1000000.0; | |||
19782 | } | |||
19783 | for(i=1; i<nSeenMatch; i++){ | |||
19784 | pInfo->estimatedCost *= 0.4; | |||
19785 | } | |||
19786 | ||||
19787 | pInfo->idxNum = idxFlags; | |||
19788 | return SQLITE_OK0; | |||
19789 | } | |||
19790 | ||||
19791 | static int fts5NewTransaction(Fts5FullTable *pTab){ | |||
19792 | Fts5Cursor *pCsr; | |||
19793 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
19794 | if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK0; | |||
19795 | } | |||
19796 | return sqlite3Fts5StorageReset(pTab->pStorage); | |||
19797 | } | |||
19798 | ||||
19799 | /* | |||
19800 | ** Implementation of xOpen method. | |||
19801 | */ | |||
19802 | static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ | |||
19803 | Fts5FullTable *pTab = (Fts5FullTable*)pVTab; | |||
19804 | Fts5Config *pConfig = pTab->p.pConfig; | |||
19805 | Fts5Cursor *pCsr = 0; /* New cursor object */ | |||
19806 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
19807 | int rc; /* Return code */ | |||
19808 | ||||
19809 | rc = fts5NewTransaction(pTab); | |||
19810 | if( rc==SQLITE_OK0 ){ | |||
19811 | nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); | |||
19812 | pCsr = (Fts5Cursor*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
19813 | if( pCsr ){ | |||
19814 | Fts5Global *pGlobal = pTab->pGlobal; | |||
19815 | memset(pCsr, 0, (size_t)nByte); | |||
19816 | pCsr->aColumnSize = (int*)&pCsr[1]; | |||
19817 | pCsr->pNext = pGlobal->pCsr; | |||
19818 | pGlobal->pCsr = pCsr; | |||
19819 | pCsr->iCsrId = ++pGlobal->iNextId; | |||
19820 | }else{ | |||
19821 | rc = SQLITE_NOMEM7; | |||
19822 | } | |||
19823 | } | |||
19824 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | |||
19825 | return rc; | |||
19826 | } | |||
19827 | ||||
19828 | static int fts5StmtType(Fts5Cursor *pCsr){ | |||
19829 | if( pCsr->ePlan==FTS5_PLAN_SCAN5 ){ | |||
19830 | return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC1 : FTS5_STMT_SCAN_ASC0; | |||
19831 | } | |||
19832 | return FTS5_STMT_LOOKUP2; | |||
19833 | } | |||
19834 | ||||
19835 | /* | |||
19836 | ** This function is called after the cursor passed as the only argument | |||
19837 | ** is moved to point at a different row. It clears all cached data | |||
19838 | ** specific to the previous row stored by the cursor object. | |||
19839 | */ | |||
19840 | static void fts5CsrNewrow(Fts5Cursor *pCsr){ | |||
19841 | CsrFlagSet(pCsr,((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
19842 | FTS5CSR_REQUIRE_CONTENT((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
19843 | | FTS5CSR_REQUIRE_DOCSIZE((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
19844 | | FTS5CSR_REQUIRE_INST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
19845 | | FTS5CSR_REQUIRE_POSLIST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
19846 | )((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)); | |||
19847 | } | |||
19848 | ||||
19849 | static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ | |||
19850 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
19851 | Fts5Auxdata *pData; | |||
19852 | Fts5Auxdata *pNext; | |||
19853 | ||||
19854 | sqlite3_freesqlite3_api->free(pCsr->aInstIter); | |||
19855 | sqlite3_freesqlite3_api->free(pCsr->aInst); | |||
19856 | if( pCsr->pStmt ){ | |||
19857 | int eStmt = fts5StmtType(pCsr); | |||
19858 | sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); | |||
19859 | } | |||
19860 | if( pCsr->pSorter ){ | |||
19861 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
19862 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | |||
19863 | sqlite3_freesqlite3_api->free(pSorter); | |||
19864 | } | |||
19865 | ||||
19866 | if( pCsr->ePlan!=FTS5_PLAN_SOURCE2 ){ | |||
19867 | sqlite3Fts5ExprFree(pCsr->pExpr); | |||
19868 | } | |||
19869 | ||||
19870 | for(pData=pCsr->pAuxdata; pData; pData=pNext){ | |||
19871 | pNext = pData->pNext; | |||
19872 | if( pData->xDelete ) pData->xDelete(pData->pPtr); | |||
19873 | sqlite3_freesqlite3_api->free(pData); | |||
19874 | } | |||
19875 | ||||
19876 | sqlite3_finalizesqlite3_api->finalize(pCsr->pRankArgStmt); | |||
19877 | sqlite3_freesqlite3_api->free(pCsr->apRankArg); | |||
19878 | ||||
19879 | if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags & (0x10)) ){ | |||
19880 | sqlite3_freesqlite3_api->free(pCsr->zRank); | |||
19881 | sqlite3_freesqlite3_api->free(pCsr->zRankArgs); | |||
19882 | } | |||
19883 | ||||
19884 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | |||
19885 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); | |||
19886 | } | |||
19887 | ||||
19888 | ||||
19889 | /* | |||
19890 | ** Close the cursor. For additional information see the documentation | |||
19891 | ** on the xClose method of the virtual table interface. | |||
19892 | */ | |||
19893 | static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ | |||
19894 | if( pCursor ){ | |||
19895 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
19896 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
19897 | Fts5Cursor **pp; | |||
19898 | ||||
19899 | fts5FreeCursorComponents(pCsr); | |||
19900 | /* Remove the cursor from the Fts5Global.pCsr list */ | |||
19901 | for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); | |||
19902 | *pp = pCsr->pNext; | |||
19903 | ||||
19904 | sqlite3_freesqlite3_api->free(pCsr); | |||
19905 | } | |||
19906 | return SQLITE_OK0; | |||
19907 | } | |||
19908 | ||||
19909 | static int fts5SorterNext(Fts5Cursor *pCsr){ | |||
19910 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
19911 | int rc; | |||
19912 | ||||
19913 | rc = sqlite3_stepsqlite3_api->step(pSorter->pStmt); | |||
19914 | if( rc==SQLITE_DONE101 ){ | |||
19915 | rc = SQLITE_OK0; | |||
19916 | CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags |= (0x01|0x02)); | |||
19917 | }else if( rc==SQLITE_ROW100 ){ | |||
19918 | const u8 *a; | |||
19919 | const u8 *aBlob; | |||
19920 | int nBlob; | |||
19921 | int i; | |||
19922 | int iOff = 0; | |||
19923 | rc = SQLITE_OK0; | |||
19924 | ||||
19925 | pSorter->iRowid = sqlite3_column_int64sqlite3_api->column_int64(pSorter->pStmt, 0); | |||
19926 | nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pSorter->pStmt, 1); | |||
19927 | aBlob = a = sqlite3_column_blobsqlite3_api->column_blob(pSorter->pStmt, 1); | |||
19928 | ||||
19929 | /* nBlob==0 in detail=none mode. */ | |||
19930 | if( nBlob>0 ){ | |||
19931 | for(i=0; i<(pSorter->nIdx-1); i++){ | |||
19932 | int iVal; | |||
19933 | a += fts5GetVarint32(a, iVal)sqlite3Fts5GetVarint32(a,(u32*)&(iVal)); | |||
19934 | iOff += iVal; | |||
19935 | pSorter->aIdx[i] = iOff; | |||
19936 | } | |||
19937 | pSorter->aIdx[i] = &aBlob[nBlob] - a; | |||
19938 | pSorter->aPoslist = a; | |||
19939 | } | |||
19940 | ||||
19941 | fts5CsrNewrow(pCsr); | |||
19942 | } | |||
19943 | ||||
19944 | return rc; | |||
19945 | } | |||
19946 | ||||
19947 | ||||
19948 | /* | |||
19949 | ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors | |||
19950 | ** open on table pTab. | |||
19951 | */ | |||
19952 | static void fts5TripCursors(Fts5FullTable *pTab){ | |||
19953 | Fts5Cursor *pCsr; | |||
19954 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
19955 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
19956 | && pCsr->base.pVtab==(sqlite3_vtab*)pTab | |||
19957 | ){ | |||
19958 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags |= (0x20)); | |||
19959 | } | |||
19960 | } | |||
19961 | } | |||
19962 | ||||
19963 | /* | |||
19964 | ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first | |||
19965 | ** argument, close and reopen all Fts5IndexIter iterators that the cursor | |||
19966 | ** is using. Then attempt to move the cursor to a rowid equal to or laster | |||
19967 | ** (in the cursors sort order - ASC or DESC) than the current rowid. | |||
19968 | ** | |||
19969 | ** If the new rowid is not equal to the old, set output parameter *pbSkip | |||
19970 | ** to 1 before returning. Otherwise, leave it unchanged. | |||
19971 | ** | |||
19972 | ** Return SQLITE_OK if successful or if no reseek was required, or an | |||
19973 | ** error code if an error occurred. | |||
19974 | */ | |||
19975 | static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ | |||
19976 | int rc = SQLITE_OK0; | |||
19977 | assert( *pbSkip==0 )((void) (0)); | |||
19978 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags & (0x20)) ){ | |||
19979 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
19980 | int bDesc = pCsr->bDesc; | |||
19981 | i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); | |||
19982 | ||||
19983 | rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc); | |||
19984 | if( rc==SQLITE_OK0 && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ | |||
19985 | *pbSkip = 1; | |||
19986 | } | |||
19987 | ||||
19988 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags &= ~(0x20)); | |||
19989 | fts5CsrNewrow(pCsr); | |||
19990 | if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ | |||
19991 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
19992 | *pbSkip = 1; | |||
19993 | } | |||
19994 | } | |||
19995 | return rc; | |||
19996 | } | |||
19997 | ||||
19998 | ||||
19999 | /* | |||
20000 | ** Advance the cursor to the next row in the table that matches the | |||
20001 | ** search criteria. | |||
20002 | ** | |||
20003 | ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned | |||
20004 | ** even if we reach end-of-file. The fts5EofMethod() will be called | |||
20005 | ** subsequently to determine whether or not an EOF was hit. | |||
20006 | */ | |||
20007 | static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ | |||
20008 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
20009 | int rc; | |||
20010 | ||||
20011 | assert( (pCsr->ePlan<3)==((void) (0)) | |||
20012 | (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)((void) (0)) | |||
20013 | )((void) (0)); | |||
20014 | assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) )((void) (0)); | |||
20015 | ||||
20016 | /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table, | |||
20017 | ** clear any token mappings accumulated at the fts5_index.c level. In | |||
20018 | ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH, | |||
20019 | ** we need to retain the mappings for the entire query. */ | |||
20020 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
20021 | && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata | |||
20022 | ){ | |||
20023 | sqlite3Fts5ExprClearTokens(pCsr->pExpr); | |||
20024 | } | |||
20025 | ||||
20026 | if( pCsr->ePlan<3 ){ | |||
20027 | int bSkip = 0; | |||
20028 | if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; | |||
20029 | rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); | |||
20030 | CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr))((pCsr)->csrflags |= (sqlite3Fts5ExprEof(pCsr->pExpr))); | |||
20031 | fts5CsrNewrow(pCsr); | |||
20032 | }else{ | |||
20033 | switch( pCsr->ePlan ){ | |||
20034 | case FTS5_PLAN_SPECIAL3: { | |||
20035 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
20036 | rc = SQLITE_OK0; | |||
20037 | break; | |||
20038 | } | |||
20039 | ||||
20040 | case FTS5_PLAN_SORTED_MATCH4: { | |||
20041 | rc = fts5SorterNext(pCsr); | |||
20042 | break; | |||
20043 | } | |||
20044 | ||||
20045 | default: { | |||
20046 | Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig; | |||
20047 | pConfig->bLock++; | |||
20048 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | |||
20049 | pConfig->bLock--; | |||
20050 | if( rc!=SQLITE_ROW100 ){ | |||
20051 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
20052 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
20053 | if( rc!=SQLITE_OK0 ){ | |||
20054 | pCursor->pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
20055 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db) | |||
20056 | ); | |||
20057 | } | |||
20058 | }else{ | |||
20059 | rc = SQLITE_OK0; | |||
20060 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags |= (0x04)); | |||
20061 | } | |||
20062 | break; | |||
20063 | } | |||
20064 | } | |||
20065 | } | |||
20066 | ||||
20067 | return rc; | |||
20068 | } | |||
20069 | ||||
20070 | ||||
20071 | static int fts5PrepareStatement( | |||
20072 | sqlite3_stmt **ppStmt, | |||
20073 | Fts5Config *pConfig, | |||
20074 | const char *zFmt, | |||
20075 | ... | |||
20076 | ){ | |||
20077 | sqlite3_stmt *pRet = 0; | |||
20078 | int rc; | |||
20079 | char *zSql; | |||
20080 | va_list ap; | |||
20081 | ||||
20082 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
20083 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
20084 | if( zSql==0 ){ | |||
20085 | rc = SQLITE_NOMEM7; | |||
20086 | }else{ | |||
20087 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | |||
20088 | SQLITE_PREPARE_PERSISTENT0x01, &pRet, 0); | |||
20089 | if( rc!=SQLITE_OK0 ){ | |||
20090 | sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db)); | |||
20091 | } | |||
20092 | sqlite3_freesqlite3_api->free(zSql); | |||
20093 | } | |||
20094 | ||||
20095 | va_end(ap)__builtin_va_end(ap); | |||
20096 | *ppStmt = pRet; | |||
20097 | return rc; | |||
20098 | } | |||
20099 | ||||
20100 | static int fts5CursorFirstSorted( | |||
20101 | Fts5FullTable *pTab, | |||
20102 | Fts5Cursor *pCsr, | |||
20103 | int bDesc | |||
20104 | ){ | |||
20105 | Fts5Config *pConfig = pTab->p.pConfig; | |||
20106 | Fts5Sorter *pSorter; | |||
20107 | int nPhrase; | |||
20108 | sqlite3_int64 nByte; | |||
20109 | int rc; | |||
20110 | const char *zRank = pCsr->zRank; | |||
20111 | const char *zRankArgs = pCsr->zRankArgs; | |||
20112 | ||||
20113 | nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
20114 | nByte = SZ_FTS5SORTER(nPhrase)(__builtin_offsetof(Fts5Sorter, nIdx)+((nPhrase+2)/2)*sizeof( i64)); | |||
20115 | pSorter = (Fts5Sorter*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
20116 | if( pSorter==0 ) return SQLITE_NOMEM7; | |||
20117 | memset(pSorter, 0, (size_t)nByte); | |||
20118 | pSorter->nIdx = nPhrase; | |||
20119 | ||||
20120 | /* TODO: It would be better to have some system for reusing statement | |||
20121 | ** handles here, rather than preparing a new one for each query. But that | |||
20122 | ** is not possible as SQLite reference counts the virtual table objects. | |||
20123 | ** And since the statement required here reads from this very virtual | |||
20124 | ** table, saving it creates a circular reference. | |||
20125 | ** | |||
20126 | ** If SQLite a built-in statement cache, this wouldn't be a problem. */ | |||
20127 | rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, | |||
20128 | "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s", | |||
20129 | pConfig->zDb, pConfig->zName, zRank, pConfig->zName, | |||
20130 | (zRankArgs ? ", " : ""), | |||
20131 | (zRankArgs ? zRankArgs : ""), | |||
20132 | bDesc ? "DESC" : "ASC" | |||
20133 | ); | |||
20134 | ||||
20135 | pCsr->pSorter = pSorter; | |||
20136 | if( rc==SQLITE_OK0 ){ | |||
20137 | assert( pTab->pSortCsr==0 )((void) (0)); | |||
20138 | pTab->pSortCsr = pCsr; | |||
20139 | rc = fts5SorterNext(pCsr); | |||
20140 | pTab->pSortCsr = 0; | |||
20141 | } | |||
20142 | ||||
20143 | if( rc!=SQLITE_OK0 ){ | |||
20144 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | |||
20145 | sqlite3_freesqlite3_api->free(pSorter); | |||
20146 | pCsr->pSorter = 0; | |||
20147 | } | |||
20148 | ||||
20149 | return rc; | |||
20150 | } | |||
20151 | ||||
20152 | static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){ | |||
20153 | int rc; | |||
20154 | Fts5Expr *pExpr = pCsr->pExpr; | |||
20155 | rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc); | |||
20156 | if( sqlite3Fts5ExprEof(pExpr) ){ | |||
20157 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
20158 | } | |||
20159 | fts5CsrNewrow(pCsr); | |||
20160 | return rc; | |||
20161 | } | |||
20162 | ||||
20163 | /* | |||
20164 | ** Process a "special" query. A special query is identified as one with a | |||
20165 | ** MATCH expression that begins with a '*' character. The remainder of | |||
20166 | ** the text passed to the MATCH operator are used as the special query | |||
20167 | ** parameters. | |||
20168 | */ | |||
20169 | static int fts5SpecialMatch( | |||
20170 | Fts5FullTable *pTab, | |||
20171 | Fts5Cursor *pCsr, | |||
20172 | const char *zQuery | |||
20173 | ){ | |||
20174 | int rc = SQLITE_OK0; /* Return code */ | |||
20175 | const char *z = zQuery; /* Special query text */ | |||
20176 | int n; /* Number of bytes in text at z */ | |||
20177 | ||||
20178 | while( z[0]==' ' ) z++; | |||
20179 | for(n=0; z[n] && z[n]!=' '; n++); | |||
20180 | ||||
20181 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | |||
20182 | pCsr->ePlan = FTS5_PLAN_SPECIAL3; | |||
20183 | ||||
20184 | if( n==5 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("reads", z, n) ){ | |||
20185 | pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex); | |||
20186 | } | |||
20187 | else if( n==2 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("id", z, n) ){ | |||
20188 | pCsr->iSpecial = pCsr->iCsrId; | |||
20189 | } | |||
20190 | else{ | |||
20191 | /* An unrecognized directive. Return an error message. */ | |||
20192 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("unknown special query: %.*s", n, z); | |||
20193 | rc = SQLITE_ERROR1; | |||
20194 | } | |||
20195 | ||||
20196 | return rc; | |||
20197 | } | |||
20198 | ||||
20199 | /* | |||
20200 | ** Search for an auxiliary function named zName that can be used with table | |||
20201 | ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary | |||
20202 | ** structure. Otherwise, if no such function exists, return NULL. | |||
20203 | */ | |||
20204 | static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){ | |||
20205 | Fts5Auxiliary *pAux; | |||
20206 | ||||
20207 | for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ | |||
20208 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pAux->zFunc)==0 ) return pAux; | |||
20209 | } | |||
20210 | ||||
20211 | /* No function of the specified name was found. Return 0. */ | |||
20212 | return 0; | |||
20213 | } | |||
20214 | ||||
20215 | ||||
20216 | static int fts5FindRankFunction(Fts5Cursor *pCsr){ | |||
20217 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
20218 | Fts5Config *pConfig = pTab->p.pConfig; | |||
20219 | int rc = SQLITE_OK0; | |||
20220 | Fts5Auxiliary *pAux = 0; | |||
20221 | const char *zRank = pCsr->zRank; | |||
20222 | const char *zRankArgs = pCsr->zRankArgs; | |||
20223 | ||||
20224 | if( zRankArgs ){ | |||
20225 | char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); | |||
20226 | if( zSql ){ | |||
20227 | sqlite3_stmt *pStmt = 0; | |||
20228 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | |||
20229 | SQLITE_PREPARE_PERSISTENT0x01, &pStmt, 0); | |||
20230 | sqlite3_freesqlite3_api->free(zSql); | |||
20231 | assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 )((void) (0)); | |||
20232 | if( rc==SQLITE_OK0 ){ | |||
20233 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | |||
20234 | sqlite3_int64 nByte; | |||
20235 | pCsr->nRankArg = sqlite3_column_countsqlite3_api->column_count(pStmt); | |||
20236 | nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; | |||
20237 | pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); | |||
20238 | if( rc==SQLITE_OK0 ){ | |||
20239 | int i; | |||
20240 | for(i=0; i<pCsr->nRankArg; i++){ | |||
20241 | pCsr->apRankArg[i] = sqlite3_column_valuesqlite3_api->column_value(pStmt, i); | |||
20242 | } | |||
20243 | } | |||
20244 | pCsr->pRankArgStmt = pStmt; | |||
20245 | }else{ | |||
20246 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
20247 | assert( rc!=SQLITE_OK )((void) (0)); | |||
20248 | } | |||
20249 | } | |||
20250 | } | |||
20251 | } | |||
20252 | ||||
20253 | if( rc==SQLITE_OK0 ){ | |||
20254 | pAux = fts5FindAuxiliary(pTab, zRank); | |||
20255 | if( pAux==0 ){ | |||
20256 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | |||
20257 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("no such function: %s", zRank); | |||
20258 | rc = SQLITE_ERROR1; | |||
20259 | } | |||
20260 | } | |||
20261 | ||||
20262 | pCsr->pRank = pAux; | |||
20263 | return rc; | |||
20264 | } | |||
20265 | ||||
20266 | ||||
20267 | static int fts5CursorParseRank( | |||
20268 | Fts5Config *pConfig, | |||
20269 | Fts5Cursor *pCsr, | |||
20270 | sqlite3_value *pRank | |||
20271 | ){ | |||
20272 | int rc = SQLITE_OK0; | |||
20273 | if( pRank ){ | |||
20274 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(pRank); | |||
20275 | char *zRank = 0; | |||
20276 | char *zRankArgs = 0; | |||
20277 | ||||
20278 | if( z==0 ){ | |||
20279 | if( sqlite3_value_typesqlite3_api->value_type(pRank)==SQLITE_NULL5 ) rc = SQLITE_ERROR1; | |||
20280 | }else{ | |||
20281 | rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); | |||
20282 | } | |||
20283 | if( rc==SQLITE_OK0 ){ | |||
20284 | pCsr->zRank = zRank; | |||
20285 | pCsr->zRankArgs = zRankArgs; | |||
20286 | CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags |= (0x10)); | |||
20287 | }else if( rc==SQLITE_ERROR1 ){ | |||
20288 | pCsr->base.pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
20289 | "parse error in rank function: %s", z | |||
20290 | ); | |||
20291 | } | |||
20292 | }else{ | |||
20293 | if( pConfig->zRank ){ | |||
20294 | pCsr->zRank = (char*)pConfig->zRank; | |||
20295 | pCsr->zRankArgs = (char*)pConfig->zRankArgs; | |||
20296 | }else{ | |||
20297 | pCsr->zRank = (char*)FTS5_DEFAULT_RANK"bm25"; | |||
20298 | pCsr->zRankArgs = 0; | |||
20299 | } | |||
20300 | } | |||
20301 | return rc; | |||
20302 | } | |||
20303 | ||||
20304 | static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ | |||
20305 | if( pVal ){ | |||
20306 | int eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal); | |||
20307 | if( eType==SQLITE_INTEGER1 ){ | |||
20308 | return sqlite3_value_int64sqlite3_api->value_int64(pVal); | |||
20309 | } | |||
20310 | } | |||
20311 | return iDefault; | |||
20312 | } | |||
20313 | ||||
20314 | /* | |||
20315 | ** Set the error message on the virtual table passed as the first argument. | |||
20316 | */ | |||
20317 | static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ | |||
20318 | va_list ap; /* ... printf arguments */ | |||
20319 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | |||
20320 | sqlite3_freesqlite3_api->free(p->p.base.zErrMsg); | |||
20321 | p->p.base.zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | |||
20322 | va_end(ap)__builtin_va_end(ap); | |||
20323 | } | |||
20324 | ||||
20325 | /* | |||
20326 | ** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale | |||
20327 | ** specified by pLocale/nLocale. The buffer indicated by pLocale must remain | |||
20328 | ** valid until after the final call to sqlite3Fts5Tokenize() that will use | |||
20329 | ** the locale. | |||
20330 | */ | |||
20331 | static void sqlite3Fts5SetLocale( | |||
20332 | Fts5Config *pConfig, | |||
20333 | const char *zLocale, | |||
20334 | int nLocale | |||
20335 | ){ | |||
20336 | Fts5TokenizerConfig *pT = &pConfig->t; | |||
20337 | pT->pLocale = zLocale; | |||
20338 | pT->nLocale = nLocale; | |||
20339 | } | |||
20340 | ||||
20341 | /* | |||
20342 | ** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale(). | |||
20343 | */ | |||
20344 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ | |||
20345 | sqlite3Fts5SetLocale(pConfig, 0, 0); | |||
20346 | } | |||
20347 | ||||
20348 | /* | |||
20349 | ** Return true if the value passed as the only argument is an | |||
20350 | ** fts5_locale() value. | |||
20351 | */ | |||
20352 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ | |||
20353 | int ret = 0; | |||
20354 | if( sqlite3_value_typesqlite3_api->value_type(pVal)==SQLITE_BLOB4 ){ | |||
20355 | /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case. | |||
20356 | ** If the blob was created using zeroblob(), then sqlite3_value_blob() | |||
20357 | ** may call malloc(). If this malloc() fails, then the values returned | |||
20358 | ** by both value_blob() and value_bytes() will be 0. If value_bytes() were | |||
20359 | ** called first, then the NULL pointer returned by value_blob() might | |||
20360 | ** be dereferenced. */ | |||
20361 | const u8 *pBlob = sqlite3_value_blobsqlite3_api->value_blob(pVal); | |||
20362 | int nBlob = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
20363 | if( nBlob>FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | |||
20364 | && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)), FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))) | |||
20365 | ){ | |||
20366 | ret = 1; | |||
20367 | } | |||
20368 | } | |||
20369 | return ret; | |||
20370 | } | |||
20371 | ||||
20372 | /* | |||
20373 | ** Value pVal is guaranteed to be an fts5_locale() value, according to | |||
20374 | ** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale | |||
20375 | ** from the value and returns them separately. | |||
20376 | ** | |||
20377 | ** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set | |||
20378 | ** to point to buffers containing the text and locale, as utf-8, | |||
20379 | ** respectively. In this case output parameters (*pnText) and (*pnLoc) are | |||
20380 | ** set to the sizes in bytes of these two buffers. | |||
20381 | ** | |||
20382 | ** Or, if an error occurs, then an SQLite error code is returned. The final | |||
20383 | ** value of the four output parameters is undefined in this case. | |||
20384 | */ | |||
20385 | static int sqlite3Fts5DecodeLocaleValue( | |||
20386 | sqlite3_value *pVal, | |||
20387 | const char **ppText, | |||
20388 | int *pnText, | |||
20389 | const char **ppLoc, | |||
20390 | int *pnLoc | |||
20391 | ){ | |||
20392 | const char *p = sqlite3_value_blobsqlite3_api->value_blob(pVal); | |||
20393 | int n = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
20394 | int nLoc = 0; | |||
20395 | ||||
20396 | assert( sqlite3_value_type(pVal)==SQLITE_BLOB )((void) (0)); | |||
20397 | assert( n>FTS5_LOCALE_HDR_SIZE )((void) (0)); | |||
20398 | ||||
20399 | for(nLoc=FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); p[nLoc]; nLoc++){ | |||
20400 | if( nLoc==(n-1) ){ | |||
20401 | return SQLITE_MISMATCH20; | |||
20402 | } | |||
20403 | } | |||
20404 | *ppLoc = &p[FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))]; | |||
20405 | *pnLoc = nLoc - FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | |||
20406 | ||||
20407 | *ppText = &p[nLoc+1]; | |||
20408 | *pnText = n - nLoc - 1; | |||
20409 | return SQLITE_OK0; | |||
20410 | } | |||
20411 | ||||
20412 | /* | |||
20413 | ** Argument pVal is the text of a full-text search expression. It may or | |||
20414 | ** may not have been wrapped by fts5_locale(). This function extracts | |||
20415 | ** the text of the expression, and sets output variable (*pzText) to | |||
20416 | ** point to a nul-terminated buffer containing the expression. | |||
20417 | ** | |||
20418 | ** If pVal was an fts5_locale() value, then sqlite3Fts5SetLocale() is called | |||
20419 | ** to set the tokenizer to use the specified locale. | |||
20420 | ** | |||
20421 | ** If output variable (*pbFreeAndReset) is set to true, then the caller | |||
20422 | ** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer | |||
20423 | ** locale, and (b) call sqlite3_free() to free (*pzText). | |||
20424 | */ | |||
20425 | static int fts5ExtractExprText( | |||
20426 | Fts5Config *pConfig, /* Fts5 configuration */ | |||
20427 | sqlite3_value *pVal, /* Value to extract expression text from */ | |||
20428 | char **pzText, /* OUT: nul-terminated buffer of text */ | |||
20429 | int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */ | |||
20430 | ){ | |||
20431 | int rc = SQLITE_OK0; | |||
20432 | ||||
20433 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
20434 | const char *pText = 0; | |||
20435 | int nText = 0; | |||
20436 | const char *pLoc = 0; | |||
20437 | int nLoc = 0; | |||
20438 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
20439 | *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, pText); | |||
20440 | if( rc==SQLITE_OK0 ){ | |||
20441 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
20442 | } | |||
20443 | *pbFreeAndReset = 1; | |||
20444 | }else{ | |||
20445 | *pzText = (char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
20446 | *pbFreeAndReset = 0; | |||
20447 | } | |||
20448 | ||||
20449 | return rc; | |||
20450 | } | |||
20451 | ||||
20452 | ||||
20453 | /* | |||
20454 | ** This is the xFilter interface for the virtual table. See | |||
20455 | ** the virtual table xFilter method documentation for additional | |||
20456 | ** information. | |||
20457 | ** | |||
20458 | ** There are three possible query strategies: | |||
20459 | ** | |||
20460 | ** 1. Full-text search using a MATCH operator. | |||
20461 | ** 2. A by-rowid lookup. | |||
20462 | ** 3. A full-table scan. | |||
20463 | */ | |||
20464 | static int fts5FilterMethod( | |||
20465 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | |||
20466 | int idxNum, /* Strategy index */ | |||
20467 | const char *idxStr, /* Unused */ | |||
20468 | int nVal, /* Number of elements in apVal */ | |||
20469 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | |||
20470 | ){ | |||
20471 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
20472 | Fts5Config *pConfig = pTab->p.pConfig; | |||
20473 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
20474 | int rc = SQLITE_OK0; /* Error code */ | |||
20475 | int bDesc; /* True if ORDER BY [rank|rowid] DESC */ | |||
20476 | int bOrderByRank; /* True if ORDER BY rank */ | |||
20477 | sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ | |||
20478 | sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ | |||
20479 | sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ | |||
20480 | sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ | |||
20481 | int iCol; /* Column on LHS of MATCH operator */ | |||
20482 | char **pzErrmsg = pConfig->pzErrmsg; | |||
20483 | int bPrefixInsttoken = pConfig->bPrefixInsttoken; | |||
20484 | int i; | |||
20485 | int iIdxStr = 0; | |||
20486 | Fts5Expr *pExpr = 0; | |||
20487 | ||||
20488 | assert( pConfig->bLock==0 )((void) (0)); | |||
20489 | if( pCsr->ePlan ){ | |||
20490 | fts5FreeCursorComponents(pCsr); | |||
20491 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); | |||
20492 | } | |||
20493 | ||||
20494 | assert( pCsr->pStmt==0 )((void) (0)); | |||
20495 | assert( pCsr->pExpr==0 )((void) (0)); | |||
20496 | assert( pCsr->csrflags==0 )((void) (0)); | |||
20497 | assert( pCsr->pRank==0 )((void) (0)); | |||
20498 | assert( pCsr->zRank==0 )((void) (0)); | |||
20499 | assert( pCsr->zRankArgs==0 )((void) (0)); | |||
20500 | assert( pTab->pSortCsr==0 || nVal==0 )((void) (0)); | |||
20501 | ||||
20502 | assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg )((void) (0)); | |||
20503 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
20504 | ||||
20505 | /* Decode the arguments passed through to this function. */ | |||
20506 | for(i=0; i<nVal; i++){ | |||
20507 | switch( idxStr[iIdxStr++] ){ | |||
20508 | case 'r': | |||
20509 | pRank = apVal[i]; | |||
20510 | break; | |||
20511 | case 'M': { | |||
20512 | char *zText = 0; | |||
20513 | int bFreeAndReset = 0; | |||
20514 | int bInternal = 0; | |||
20515 | ||||
20516 | rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); | |||
20517 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
20518 | if( zText==0 ) zText = ""; | |||
20519 | if( sqlite3_value_subtypesqlite3_api->value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE73 ){ | |||
20520 | pConfig->bPrefixInsttoken = 1; | |||
20521 | } | |||
20522 | ||||
20523 | iCol = 0; | |||
20524 | do{ | |||
20525 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | |||
20526 | iIdxStr++; | |||
20527 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | |||
20528 | ||||
20529 | if( zText[0]=='*' ){ | |||
20530 | /* The user has issued a query of the form "MATCH '*...'". This | |||
20531 | ** indicates that the MATCH expression is not a full text query, | |||
20532 | ** but a request for an internal parameter. */ | |||
20533 | rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); | |||
20534 | bInternal = 1; | |||
20535 | }else{ | |||
20536 | char **pzErr = &pTab->p.base.zErrMsg; | |||
20537 | rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); | |||
20538 | if( rc==SQLITE_OK0 ){ | |||
20539 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | |||
20540 | pExpr = 0; | |||
20541 | } | |||
20542 | } | |||
20543 | ||||
20544 | if( bFreeAndReset ){ | |||
20545 | sqlite3_freesqlite3_api->free(zText); | |||
20546 | sqlite3Fts5ClearLocale(pConfig); | |||
20547 | } | |||
20548 | ||||
20549 | if( bInternal || rc!=SQLITE_OK0 ) goto filter_out; | |||
20550 | ||||
20551 | break; | |||
20552 | } | |||
20553 | case 'L': | |||
20554 | case 'G': { | |||
20555 | int bGlob = (idxStr[iIdxStr-1]=='G'); | |||
20556 | const char *zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[i]); | |||
20557 | iCol = 0; | |||
20558 | do{ | |||
20559 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | |||
20560 | iIdxStr++; | |||
20561 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | |||
20562 | if( zText ){ | |||
20563 | rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr); | |||
20564 | } | |||
20565 | if( rc==SQLITE_OK0 ){ | |||
20566 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | |||
20567 | pExpr = 0; | |||
20568 | } | |||
20569 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
20570 | break; | |||
20571 | } | |||
20572 | case '=': | |||
20573 | pRowidEq = apVal[i]; | |||
20574 | break; | |||
20575 | case '<': | |||
20576 | pRowidLe = apVal[i]; | |||
20577 | break; | |||
20578 | default: assert( idxStr[iIdxStr-1]=='>' )((void) (0)); | |||
20579 | pRowidGe = apVal[i]; | |||
20580 | break; | |||
20581 | } | |||
20582 | } | |||
20583 | bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK0x0020) ? 1 : 0); | |||
20584 | pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC0x0080) ? 1 : 0); | |||
20585 | ||||
20586 | /* Set the cursor upper and lower rowid limits. Only some strategies | |||
20587 | ** actually use them. This is ok, as the xBestIndex() method leaves the | |||
20588 | ** sqlite3_index_constraint.omit flag clear for range constraints | |||
20589 | ** on the rowid field. */ | |||
20590 | if( pRowidEq ){ | |||
20591 | pRowidLe = pRowidGe = pRowidEq; | |||
20592 | } | |||
20593 | if( bDesc ){ | |||
20594 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | |||
20595 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | |||
20596 | }else{ | |||
20597 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | |||
20598 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | |||
20599 | } | |||
20600 | ||||
20601 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
20602 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
20603 | ||||
20604 | if( pTab->pSortCsr ){ | |||
20605 | /* If pSortCsr is non-NULL, then this call is being made as part of | |||
20606 | ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is | |||
20607 | ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will | |||
20608 | ** return results to the user for this query. The current cursor | |||
20609 | ** (pCursor) is used to execute the query issued by function | |||
20610 | ** fts5CursorFirstSorted() above. */ | |||
20611 | assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 )((void) (0)); | |||
20612 | assert( nVal==0 && bOrderByRank==0 && bDesc==0 )((void) (0)); | |||
20613 | assert( pCsr->iLastRowid==LARGEST_INT64 )((void) (0)); | |||
20614 | assert( pCsr->iFirstRowid==SMALLEST_INT64 )((void) (0)); | |||
20615 | if( pTab->pSortCsr->bDesc ){ | |||
20616 | pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid; | |||
20617 | pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid; | |||
20618 | }else{ | |||
20619 | pCsr->iLastRowid = pTab->pSortCsr->iLastRowid; | |||
20620 | pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid; | |||
20621 | } | |||
20622 | pCsr->ePlan = FTS5_PLAN_SOURCE2; | |||
20623 | pCsr->pExpr = pTab->pSortCsr->pExpr; | |||
20624 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | |||
20625 | }else if( pCsr->pExpr ){ | |||
20626 | assert( rc==SQLITE_OK )((void) (0)); | |||
20627 | rc = fts5CursorParseRank(pConfig, pCsr, pRank); | |||
20628 | if( rc==SQLITE_OK0 ){ | |||
20629 | if( bOrderByRank ){ | |||
20630 | pCsr->ePlan = FTS5_PLAN_SORTED_MATCH4; | |||
20631 | rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); | |||
20632 | }else{ | |||
20633 | pCsr->ePlan = FTS5_PLAN_MATCH1; | |||
20634 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | |||
20635 | } | |||
20636 | } | |||
20637 | }else if( pConfig->zContent==0 ){ | |||
20638 | fts5SetVtabError(pTab,"%s: table does not support scanning",pConfig->zName); | |||
20639 | rc = SQLITE_ERROR1; | |||
20640 | }else{ | |||
20641 | /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup | |||
20642 | ** by rowid (ePlan==FTS5_PLAN_ROWID). */ | |||
20643 | pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID6 : FTS5_PLAN_SCAN5); | |||
20644 | rc = sqlite3Fts5StorageStmt( | |||
20645 | pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg | |||
20646 | ); | |||
20647 | if( rc==SQLITE_OK0 ){ | |||
20648 | if( pRowidEq!=0 ){ | |||
20649 | assert( pCsr->ePlan==FTS5_PLAN_ROWID )((void) (0)); | |||
20650 | sqlite3_bind_valuesqlite3_api->bind_value(pCsr->pStmt, 1, pRowidEq); | |||
20651 | }else{ | |||
20652 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); | |||
20653 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); | |||
20654 | } | |||
20655 | rc = fts5NextMethod(pCursor); | |||
20656 | } | |||
20657 | } | |||
20658 | ||||
20659 | filter_out: | |||
20660 | sqlite3Fts5ExprFree(pExpr); | |||
20661 | pConfig->pzErrmsg = pzErrmsg; | |||
20662 | pConfig->bPrefixInsttoken = bPrefixInsttoken; | |||
20663 | return rc; | |||
20664 | } | |||
20665 | ||||
20666 | /* | |||
20667 | ** This is the xEof method of the virtual table. SQLite calls this | |||
20668 | ** routine to find out if it has reached the end of a result set. | |||
20669 | */ | |||
20670 | static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ | |||
20671 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
20672 | return (CsrFlagTest(pCsr, FTS5CSR_EOF)((pCsr)->csrflags & (0x01)) ? 1 : 0); | |||
20673 | } | |||
20674 | ||||
20675 | /* | |||
20676 | ** Return the rowid that the cursor currently points to. | |||
20677 | */ | |||
20678 | static i64 fts5CursorRowid(Fts5Cursor *pCsr){ | |||
20679 | assert( pCsr->ePlan==FTS5_PLAN_MATCH((void) (0)) | |||
20680 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH((void) (0)) | |||
20681 | || pCsr->ePlan==FTS5_PLAN_SOURCE((void) (0)) | |||
20682 | || pCsr->ePlan==FTS5_PLAN_SCAN((void) (0)) | |||
20683 | || pCsr->ePlan==FTS5_PLAN_ROWID((void) (0)) | |||
20684 | )((void) (0)); | |||
20685 | if( pCsr->pSorter ){ | |||
20686 | return pCsr->pSorter->iRowid; | |||
20687 | }else if( pCsr->ePlan>=FTS5_PLAN_SCAN5 ){ | |||
20688 | return sqlite3_column_int64sqlite3_api->column_int64(pCsr->pStmt, 0); | |||
20689 | }else{ | |||
20690 | return sqlite3Fts5ExprRowid(pCsr->pExpr); | |||
20691 | } | |||
20692 | } | |||
20693 | ||||
20694 | /* | |||
20695 | ** This is the xRowid method. The SQLite core calls this routine to | |||
20696 | ** retrieve the rowid for the current row of the result set. fts5 | |||
20697 | ** exposes %_content.rowid as the rowid for the virtual table. The | |||
20698 | ** rowid should be written to *pRowid. | |||
20699 | */ | |||
20700 | static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ | |||
20701 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
20702 | int ePlan = pCsr->ePlan; | |||
20703 | ||||
20704 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | |||
20705 | if( ePlan==FTS5_PLAN_SPECIAL3 ){ | |||
20706 | *pRowid = 0; | |||
20707 | }else{ | |||
20708 | *pRowid = fts5CursorRowid(pCsr); | |||
20709 | } | |||
20710 | ||||
20711 | return SQLITE_OK0; | |||
20712 | } | |||
20713 | ||||
20714 | ||||
20715 | /* | |||
20716 | ** If the cursor requires seeking (bSeekRequired flag is set), seek it. | |||
20717 | ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. | |||
20718 | ** | |||
20719 | ** If argument bErrormsg is true and an error occurs, an error message may | |||
20720 | ** be left in sqlite3_vtab.zErrMsg. | |||
20721 | */ | |||
20722 | static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ | |||
20723 | int rc = SQLITE_OK0; | |||
20724 | ||||
20725 | /* If the cursor does not yet have a statement handle, obtain one now. */ | |||
20726 | if( pCsr->pStmt==0 ){ | |||
20727 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
20728 | int eStmt = fts5StmtType(pCsr); | |||
20729 | rc = sqlite3Fts5StorageStmt( | |||
20730 | pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0) | |||
20731 | ); | |||
20732 | assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 )((void) (0)); | |||
20733 | assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) )((void) (0)); | |||
20734 | } | |||
20735 | ||||
20736 | if( rc==SQLITE_OK0 && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags & (0x02)) ){ | |||
20737 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
20738 | assert( pCsr->pExpr )((void) (0)); | |||
20739 | sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
20740 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); | |||
20741 | pTab->pConfig->bLock++; | |||
20742 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | |||
20743 | pTab->pConfig->bLock--; | |||
20744 | if( rc==SQLITE_ROW100 ){ | |||
20745 | rc = SQLITE_OK0; | |||
20746 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags &= ~(0x02)); | |||
20747 | }else{ | |||
20748 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
20749 | if( rc==SQLITE_OK0 ){ | |||
20750 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
20751 | fts5SetVtabError((Fts5FullTable*)pTab, | |||
20752 | "fts5: missing row %lld from content table %s", | |||
20753 | fts5CursorRowid(pCsr), | |||
20754 | pTab->pConfig->zContent | |||
20755 | ); | |||
20756 | }else if( pTab->pConfig->pzErrmsg ){ | |||
20757 | fts5SetVtabError((Fts5FullTable*)pTab, | |||
20758 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pTab->pConfig->db) | |||
20759 | ); | |||
20760 | } | |||
20761 | } | |||
20762 | } | |||
20763 | return rc; | |||
20764 | } | |||
20765 | ||||
20766 | /* | |||
20767 | ** This function is called to handle an FTS INSERT command. In other words, | |||
20768 | ** an INSERT statement of the form: | |||
20769 | ** | |||
20770 | ** INSERT INTO fts(fts) VALUES($pCmd) | |||
20771 | ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) | |||
20772 | ** | |||
20773 | ** Argument pVal is the value assigned to column "fts" by the INSERT | |||
20774 | ** statement. This function returns SQLITE_OK if successful, or an SQLite | |||
20775 | ** error code if an error occurs. | |||
20776 | ** | |||
20777 | ** The commands implemented by this function are documented in the "Special | |||
20778 | ** INSERT Directives" section of the documentation. It should be updated if | |||
20779 | ** more commands are added to this function. | |||
20780 | */ | |||
20781 | static int fts5SpecialInsert( | |||
20782 | Fts5FullTable *pTab, /* Fts5 table object */ | |||
20783 | const char *zCmd, /* Text inserted into table-name column */ | |||
20784 | sqlite3_value *pVal /* Value inserted into rank column */ | |||
20785 | ){ | |||
20786 | Fts5Config *pConfig = pTab->p.pConfig; | |||
20787 | int rc = SQLITE_OK0; | |||
20788 | int bError = 0; | |||
20789 | int bLoadConfig = 0; | |||
20790 | ||||
20791 | if( 0==sqlite3_stricmpsqlite3_api->stricmp("delete-all", zCmd) ){ | |||
20792 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
20793 | fts5SetVtabError(pTab, | |||
20794 | "'delete-all' may only be used with a " | |||
20795 | "contentless or external content fts5 table" | |||
20796 | ); | |||
20797 | rc = SQLITE_ERROR1; | |||
20798 | }else{ | |||
20799 | rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); | |||
20800 | } | |||
20801 | bLoadConfig = 1; | |||
20802 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("rebuild", zCmd) ){ | |||
20803 | if( fts5IsContentless(pTab, 1) ){ | |||
20804 | fts5SetVtabError(pTab, | |||
20805 | "'rebuild' may not be used with a contentless fts5 table" | |||
20806 | ); | |||
20807 | rc = SQLITE_ERROR1; | |||
20808 | }else{ | |||
20809 | rc = sqlite3Fts5StorageRebuild(pTab->pStorage); | |||
20810 | } | |||
20811 | bLoadConfig = 1; | |||
20812 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("optimize", zCmd) ){ | |||
20813 | rc = sqlite3Fts5StorageOptimize(pTab->pStorage); | |||
20814 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("merge", zCmd) ){ | |||
20815 | int nMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
20816 | rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); | |||
20817 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("integrity-check", zCmd) ){ | |||
20818 | int iArg = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
20819 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg); | |||
20820 | #ifdef SQLITE_DEBUG | |||
20821 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("prefix-index", zCmd) ){ | |||
20822 | pConfig->bPrefixIndex = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
20823 | #endif | |||
20824 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("flush", zCmd) ){ | |||
20825 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
20826 | }else{ | |||
20827 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
20828 | if( rc==SQLITE_OK0 ){ | |||
20829 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
20830 | } | |||
20831 | if( rc==SQLITE_OK0 ){ | |||
20832 | rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError); | |||
20833 | } | |||
20834 | if( rc==SQLITE_OK0 ){ | |||
20835 | if( bError ){ | |||
20836 | rc = SQLITE_ERROR1; | |||
20837 | }else{ | |||
20838 | rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); | |||
20839 | } | |||
20840 | } | |||
20841 | } | |||
20842 | ||||
20843 | if( rc==SQLITE_OK0 && bLoadConfig ){ | |||
20844 | pTab->p.pConfig->iCookie--; | |||
20845 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
20846 | } | |||
20847 | ||||
20848 | return rc; | |||
20849 | } | |||
20850 | ||||
20851 | static int fts5SpecialDelete( | |||
20852 | Fts5FullTable *pTab, | |||
20853 | sqlite3_value **apVal | |||
20854 | ){ | |||
20855 | int rc = SQLITE_OK0; | |||
20856 | int eType1 = sqlite3_value_typesqlite3_api->value_type(apVal[1]); | |||
20857 | if( eType1==SQLITE_INTEGER1 ){ | |||
20858 | sqlite3_int64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | |||
20859 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0); | |||
20860 | } | |||
20861 | return rc; | |||
20862 | } | |||
20863 | ||||
20864 | static void fts5StorageInsert( | |||
20865 | int *pRc, | |||
20866 | Fts5FullTable *pTab, | |||
20867 | sqlite3_value **apVal, | |||
20868 | i64 *piRowid | |||
20869 | ){ | |||
20870 | int rc = *pRc; | |||
20871 | if( rc==SQLITE_OK0 ){ | |||
20872 | rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid); | |||
20873 | } | |||
20874 | if( rc==SQLITE_OK0 ){ | |||
20875 | rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); | |||
20876 | } | |||
20877 | *pRc = rc; | |||
20878 | } | |||
20879 | ||||
20880 | /* | |||
20881 | ** | |||
20882 | ** This function is called when the user attempts an UPDATE on a contentless | |||
20883 | ** table. Parameter bRowidModified is true if the UPDATE statement modifies | |||
20884 | ** the rowid value. Parameter apVal[] contains the new values for each user | |||
20885 | ** defined column of the fts5 table. pConfig is the configuration object of the | |||
20886 | ** table being updated (guaranteed to be contentless). The contentless_delete=1 | |||
20887 | ** and contentless_unindexed=1 options may or may not be set. | |||
20888 | ** | |||
20889 | ** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite | |||
20890 | ** error code if it cannot. In this case an error message is also loaded into | |||
20891 | ** pConfig. Output parameter (*pbContent) is set to true if the caller should | |||
20892 | ** update the %_content table only - not the FTS index or any other shadow | |||
20893 | ** table. This occurs when an UPDATE modifies only UNINDEXED columns of the | |||
20894 | ** table. | |||
20895 | ** | |||
20896 | ** An UPDATE may proceed if: | |||
20897 | ** | |||
20898 | ** * The only columns modified are UNINDEXED columns, or | |||
20899 | ** | |||
20900 | ** * The contentless_delete=1 option was specified and all of the indexed | |||
20901 | ** columns (not a subset) have been modified. | |||
20902 | */ | |||
20903 | static int fts5ContentlessUpdate( | |||
20904 | Fts5Config *pConfig, | |||
20905 | sqlite3_value **apVal, | |||
20906 | int bRowidModified, | |||
20907 | int *pbContent | |||
20908 | ){ | |||
20909 | int ii; | |||
20910 | int bSeenIndex = 0; /* Have seen modified indexed column */ | |||
20911 | int bSeenIndexNC = 0; /* Have seen unmodified indexed column */ | |||
20912 | int rc = SQLITE_OK0; | |||
20913 | ||||
20914 | for(ii=0; ii<pConfig->nCol; ii++){ | |||
20915 | if( pConfig->abUnindexed[ii]==0 ){ | |||
20916 | if( sqlite3_value_nochangesqlite3_api->value_nochange(apVal[ii]) ){ | |||
20917 | bSeenIndexNC++; | |||
20918 | }else{ | |||
20919 | bSeenIndex++; | |||
20920 | } | |||
20921 | } | |||
20922 | } | |||
20923 | ||||
20924 | if( bSeenIndex==0 && bRowidModified==0 ){ | |||
20925 | *pbContent = 1; | |||
20926 | }else{ | |||
20927 | if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){ | |||
20928 | rc = SQLITE_ERROR1; | |||
20929 | sqlite3Fts5ConfigErrmsg(pConfig, | |||
20930 | (pConfig->bContentlessDelete ? | |||
20931 | "%s a subset of columns on fts5 contentless-delete table: %s" : | |||
20932 | "%s contentless fts5 table: %s") | |||
20933 | , "cannot UPDATE", pConfig->zName | |||
20934 | ); | |||
20935 | } | |||
20936 | } | |||
20937 | ||||
20938 | return rc; | |||
20939 | } | |||
20940 | ||||
20941 | /* | |||
20942 | ** This function is the implementation of the xUpdate callback used by | |||
20943 | ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be | |||
20944 | ** inserted, updated or deleted. | |||
20945 | ** | |||
20946 | ** A delete specifies a single argument - the rowid of the row to remove. | |||
20947 | ** | |||
20948 | ** Update and insert operations pass: | |||
20949 | ** | |||
20950 | ** 1. The "old" rowid, or NULL. | |||
20951 | ** 2. The "new" rowid. | |||
20952 | ** 3. Values for each of the nCol matchable columns. | |||
20953 | ** 4. Values for the two hidden columns (<tablename> and "rank"). | |||
20954 | */ | |||
20955 | static int fts5UpdateMethod( | |||
20956 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
20957 | int nArg, /* Size of argument array */ | |||
20958 | sqlite3_value **apVal, /* Array of arguments */ | |||
20959 | sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ | |||
20960 | ){ | |||
20961 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
20962 | Fts5Config *pConfig = pTab->p.pConfig; | |||
20963 | int eType0; /* value_type() of apVal[0] */ | |||
20964 | int rc = SQLITE_OK0; /* Return code */ | |||
20965 | ||||
20966 | /* A transaction must be open when this is called. */ | |||
20967 | assert( pTab->ts.eState==1 || pTab->ts.eState==2 )((void) (0)); | |||
20968 | ||||
20969 | assert( pVtab->zErrMsg==0 )((void) (0)); | |||
20970 | assert( nArg==1 || nArg==(2+pConfig->nCol+2) )((void) (0)); | |||
20971 | assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER((void) (0)) | |||
20972 | || sqlite3_value_type(apVal[0])==SQLITE_NULL((void) (0)) | |||
20973 | )((void) (0)); | |||
20974 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | |||
20975 | if( pConfig->pgsz==0 ){ | |||
20976 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie); | |||
20977 | if( rc!=SQLITE_OK0 ) return rc; | |||
20978 | } | |||
20979 | ||||
20980 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
20981 | ||||
20982 | /* Put any active cursors into REQUIRE_SEEK state. */ | |||
20983 | fts5TripCursors(pTab); | |||
20984 | ||||
20985 | eType0 = sqlite3_value_typesqlite3_api->value_type(apVal[0]); | |||
20986 | if( eType0==SQLITE_NULL5 | |||
20987 | && sqlite3_value_typesqlite3_api->value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL5 | |||
20988 | ){ | |||
20989 | /* A "special" INSERT op. These are handled separately. */ | |||
20990 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2+pConfig->nCol]); | |||
20991 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | |||
20992 | && 0==sqlite3_stricmpsqlite3_api->stricmp("delete", z) | |||
20993 | ){ | |||
20994 | if( pConfig->bContentlessDelete ){ | |||
20995 | fts5SetVtabError(pTab, | |||
20996 | "'delete' may not be used with a contentless_delete=1 table" | |||
20997 | ); | |||
20998 | rc = SQLITE_ERROR1; | |||
20999 | }else{ | |||
21000 | rc = fts5SpecialDelete(pTab, apVal); | |||
21001 | } | |||
21002 | }else{ | |||
21003 | rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); | |||
21004 | } | |||
21005 | }else{ | |||
21006 | /* A regular INSERT, UPDATE or DELETE statement. The trick here is that | |||
21007 | ** any conflict on the rowid value must be detected before any | |||
21008 | ** modifications are made to the database file. There are 4 cases: | |||
21009 | ** | |||
21010 | ** 1) DELETE | |||
21011 | ** 2) UPDATE (rowid not modified) | |||
21012 | ** 3) UPDATE (rowid modified) | |||
21013 | ** 4) INSERT | |||
21014 | ** | |||
21015 | ** Cases 3 and 4 may violate the rowid constraint. | |||
21016 | */ | |||
21017 | int eConflict = SQLITE_ABORT4; | |||
21018 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || pConfig->bContentlessDelete ){ | |||
21019 | eConflict = sqlite3_vtab_on_conflictsqlite3_api->vtab_on_conflict(pConfig->db); | |||
21020 | } | |||
21021 | ||||
21022 | assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL )((void) (0)); | |||
21023 | assert( nArg!=1 || eType0==SQLITE_INTEGER )((void) (0)); | |||
21024 | ||||
21025 | /* DELETE */ | |||
21026 | if( nArg==1 ){ | |||
21027 | /* It is only possible to DELETE from a contentless table if the | |||
21028 | ** contentless_delete=1 flag is set. */ | |||
21029 | if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){ | |||
21030 | fts5SetVtabError(pTab, | |||
21031 | "cannot DELETE from contentless fts5 table: %s", pConfig->zName | |||
21032 | ); | |||
21033 | rc = SQLITE_ERROR1; | |||
21034 | }else{ | |||
21035 | i64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Rowid to delete */ | |||
21036 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); | |||
21037 | } | |||
21038 | } | |||
21039 | ||||
21040 | /* INSERT or UPDATE */ | |||
21041 | else{ | |||
21042 | int eType1 = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[1]); | |||
21043 | ||||
21044 | /* It is an error to write an fts5_locale() value to a table without | |||
21045 | ** the locale=1 option. */ | |||
21046 | if( pConfig->bLocale==0 ){ | |||
21047 | int ii; | |||
21048 | for(ii=0; ii<pConfig->nCol; ii++){ | |||
21049 | sqlite3_value *pVal = apVal[ii+2]; | |||
21050 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
21051 | fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); | |||
21052 | rc = SQLITE_MISMATCH20; | |||
21053 | goto update_out; | |||
21054 | } | |||
21055 | } | |||
21056 | } | |||
21057 | ||||
21058 | if( eType0!=SQLITE_INTEGER1 ){ | |||
21059 | /* An INSERT statement. If the conflict-mode is REPLACE, first remove | |||
21060 | ** the current entry (if any). */ | |||
21061 | if( eConflict==SQLITE_REPLACE5 && eType1==SQLITE_INTEGER1 ){ | |||
21062 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* Rowid to delete */ | |||
21063 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); | |||
21064 | } | |||
21065 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
21066 | } | |||
21067 | ||||
21068 | /* UPDATE */ | |||
21069 | else{ | |||
21070 | Fts5Storage *pStorage = pTab->pStorage; | |||
21071 | i64 iOld = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Old rowid */ | |||
21072 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* New rowid */ | |||
21073 | int bContent = 0; /* Content only update */ | |||
21074 | ||||
21075 | /* If this is a contentless table (including contentless_unindexed=1 | |||
21076 | ** tables), check if the UPDATE may proceed. */ | |||
21077 | if( fts5IsContentless(pTab, 1) ){ | |||
21078 | rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent); | |||
21079 | if( rc!=SQLITE_OK0 ) goto update_out; | |||
21080 | } | |||
21081 | ||||
21082 | if( eType1!=SQLITE_INTEGER1 ){ | |||
21083 | rc = SQLITE_MISMATCH20; | |||
21084 | }else if( iOld!=iNew ){ | |||
21085 | assert( bContent==0 )((void) (0)); | |||
21086 | if( eConflict==SQLITE_REPLACE5 ){ | |||
21087 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | |||
21088 | if( rc==SQLITE_OK0 ){ | |||
21089 | rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0); | |||
21090 | } | |||
21091 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
21092 | }else{ | |||
21093 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | |||
21094 | if( rc==SQLITE_OK0 ){ | |||
21095 | rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid); | |||
21096 | } | |||
21097 | if( rc==SQLITE_OK0 ){ | |||
21098 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0); | |||
21099 | } | |||
21100 | if( rc==SQLITE_OK0 ){ | |||
21101 | rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid); | |||
21102 | } | |||
21103 | } | |||
21104 | }else if( bContent ){ | |||
21105 | /* This occurs when an UPDATE on a contentless table affects *only* | |||
21106 | ** UNINDEXED columns. This is a no-op for contentless_unindexed=0 | |||
21107 | ** tables, or a write to the %_content table only for =1 tables. */ | |||
21108 | assert( fts5IsContentless(pTab, 1) )((void) (0)); | |||
21109 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | |||
21110 | if( rc==SQLITE_OK0 ){ | |||
21111 | rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid); | |||
21112 | } | |||
21113 | }else{ | |||
21114 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | |||
21115 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
21116 | } | |||
21117 | sqlite3Fts5StorageReleaseDeleteRow(pStorage); | |||
21118 | } | |||
21119 | } | |||
21120 | } | |||
21121 | ||||
21122 | update_out: | |||
21123 | pTab->p.pConfig->pzErrmsg = 0; | |||
21124 | return rc; | |||
21125 | } | |||
21126 | ||||
21127 | /* | |||
21128 | ** Implementation of xSync() method. | |||
21129 | */ | |||
21130 | static int fts5SyncMethod(sqlite3_vtab *pVtab){ | |||
21131 | int rc; | |||
21132 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
21133 | fts5CheckTransactionState(pTab, FTS5_SYNC, 0); | |||
21134 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
21135 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
21136 | pTab->p.pConfig->pzErrmsg = 0; | |||
21137 | return rc; | |||
21138 | } | |||
21139 | ||||
21140 | /* | |||
21141 | ** Implementation of xBegin() method. | |||
21142 | */ | |||
21143 | static int fts5BeginMethod(sqlite3_vtab *pVtab){ | |||
21144 | int rc = fts5NewTransaction((Fts5FullTable*)pVtab); | |||
21145 | if( rc==SQLITE_OK0 ){ | |||
21146 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0); | |||
21147 | } | |||
21148 | return rc; | |||
21149 | } | |||
21150 | ||||
21151 | /* | |||
21152 | ** Implementation of xCommit() method. This is a no-op. The contents of | |||
21153 | ** the pending-terms hash-table have already been flushed into the database | |||
21154 | ** by fts5SyncMethod(). | |||
21155 | */ | |||
21156 | static int fts5CommitMethod(sqlite3_vtab *pVtab){ | |||
21157 | UNUSED_PARAM(pVtab)(void)(pVtab); /* Call below is a no-op for NDEBUG builds */ | |||
21158 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0); | |||
21159 | return SQLITE_OK0; | |||
21160 | } | |||
21161 | ||||
21162 | /* | |||
21163 | ** Implementation of xRollback(). Discard the contents of the pending-terms | |||
21164 | ** hash-table. Any changes made to the database are reverted by SQLite. | |||
21165 | */ | |||
21166 | static int fts5RollbackMethod(sqlite3_vtab *pVtab){ | |||
21167 | int rc; | |||
21168 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
21169 | fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); | |||
21170 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | |||
21171 | pTab->p.pConfig->pgsz = 0; | |||
21172 | return rc; | |||
21173 | } | |||
21174 | ||||
21175 | static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); | |||
21176 | ||||
21177 | static void *fts5ApiUserData(Fts5Context *pCtx){ | |||
21178 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21179 | return pCsr->pAux->pUserData; | |||
21180 | } | |||
21181 | ||||
21182 | static int fts5ApiColumnCount(Fts5Context *pCtx){ | |||
21183 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21184 | return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; | |||
21185 | } | |||
21186 | ||||
21187 | static int fts5ApiColumnTotalSize( | |||
21188 | Fts5Context *pCtx, | |||
21189 | int iCol, | |||
21190 | sqlite3_int64 *pnToken | |||
21191 | ){ | |||
21192 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21193 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
21194 | return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); | |||
21195 | } | |||
21196 | ||||
21197 | static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ | |||
21198 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21199 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
21200 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); | |||
21201 | } | |||
21202 | ||||
21203 | /* | |||
21204 | ** Implementation of xTokenize_v2() API. | |||
21205 | */ | |||
21206 | static int fts5ApiTokenize_v2( | |||
21207 | Fts5Context *pCtx, | |||
21208 | const char *pText, int nText, | |||
21209 | const char *pLoc, int nLoc, | |||
21210 | void *pUserData, | |||
21211 | int (*xToken)(void*, int, const char*, int, int, int) | |||
21212 | ){ | |||
21213 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21214 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
21215 | int rc = SQLITE_OK0; | |||
21216 | ||||
21217 | sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc); | |||
21218 | rc = sqlite3Fts5Tokenize(pTab->pConfig, | |||
21219 | FTS5_TOKENIZE_AUX0x0008, pText, nText, pUserData, xToken | |||
21220 | ); | |||
21221 | sqlite3Fts5SetLocale(pTab->pConfig, 0, 0); | |||
21222 | ||||
21223 | return rc; | |||
21224 | } | |||
21225 | ||||
21226 | /* | |||
21227 | ** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0 | |||
21228 | ** passed as the locale. | |||
21229 | */ | |||
21230 | static int fts5ApiTokenize( | |||
21231 | Fts5Context *pCtx, | |||
21232 | const char *pText, int nText, | |||
21233 | void *pUserData, | |||
21234 | int (*xToken)(void*, int, const char*, int, int, int) | |||
21235 | ){ | |||
21236 | return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken); | |||
21237 | } | |||
21238 | ||||
21239 | static int fts5ApiPhraseCount(Fts5Context *pCtx){ | |||
21240 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21241 | return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
21242 | } | |||
21243 | ||||
21244 | static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ | |||
21245 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21246 | return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); | |||
21247 | } | |||
21248 | ||||
21249 | /* | |||
21250 | ** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This | |||
21251 | ** function extracts the text value of column iCol of the current row. | |||
21252 | ** Additionally, if there is an associated locale, it invokes | |||
21253 | ** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller | |||
21254 | ** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point | |||
21255 | ** after this function returns. | |||
21256 | ** | |||
21257 | ** If successful, (*ppText) is set to point to a buffer containing the text | |||
21258 | ** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that | |||
21259 | ** buffer in bytes. It is not guaranteed to be nul-terminated. If an error | |||
21260 | ** occurs, an SQLite error code is returned. The final values of the two | |||
21261 | ** output parameters are undefined in this case. | |||
21262 | */ | |||
21263 | static int fts5TextFromStmt( | |||
21264 | Fts5Config *pConfig, | |||
21265 | sqlite3_stmt *pStmt, | |||
21266 | int iCol, | |||
21267 | const char **ppText, | |||
21268 | int *pnText | |||
21269 | ){ | |||
21270 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pStmt, iCol+1); | |||
21271 | const char *pLoc = 0; | |||
21272 | int nLoc = 0; | |||
21273 | int rc = SQLITE_OK0; | |||
21274 | ||||
21275 | if( pConfig->bLocale | |||
21276 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
21277 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
21278 | ){ | |||
21279 | rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc); | |||
21280 | }else{ | |||
21281 | *ppText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
21282 | *pnText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
21283 | if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
21284 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pStmt, iCol+1+pConfig->nCol); | |||
21285 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, iCol+1+pConfig->nCol); | |||
21286 | } | |||
21287 | } | |||
21288 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
21289 | return rc; | |||
21290 | } | |||
21291 | ||||
21292 | static int fts5ApiColumnText( | |||
21293 | Fts5Context *pCtx, | |||
21294 | int iCol, | |||
21295 | const char **pz, | |||
21296 | int *pn | |||
21297 | ){ | |||
21298 | int rc = SQLITE_OK0; | |||
21299 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21300 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
21301 | ||||
21302 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
21303 | if( iCol<0 || iCol>=pTab->pConfig->nCol ){ | |||
21304 | rc = SQLITE_RANGE25; | |||
21305 | }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){ | |||
21306 | *pz = 0; | |||
21307 | *pn = 0; | |||
21308 | }else{ | |||
21309 | rc = fts5SeekCursor(pCsr, 0); | |||
21310 | if( rc==SQLITE_OK0 ){ | |||
21311 | rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn); | |||
21312 | sqlite3Fts5ClearLocale(pTab->pConfig); | |||
21313 | } | |||
21314 | } | |||
21315 | return rc; | |||
21316 | } | |||
21317 | ||||
21318 | /* | |||
21319 | ** This is called by various API functions - xInst, xPhraseFirst, | |||
21320 | ** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase | |||
21321 | ** of the current row. This function works for both detail=full tables (in | |||
21322 | ** which case the position-list was read from the fts index) or for other | |||
21323 | ** detail= modes if the row content is available. | |||
21324 | */ | |||
21325 | static int fts5CsrPoslist( | |||
21326 | Fts5Cursor *pCsr, /* Fts5 cursor object */ | |||
21327 | int iPhrase, /* Phrase to find position list for */ | |||
21328 | const u8 **pa, /* OUT: Pointer to position list buffer */ | |||
21329 | int *pn /* OUT: Size of (*pa) in bytes */ | |||
21330 | ){ | |||
21331 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
21332 | int rc = SQLITE_OK0; | |||
21333 | int bLive = (pCsr->pSorter==0); | |||
21334 | ||||
21335 | if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ | |||
21336 | rc = SQLITE_RANGE25; | |||
21337 | }else if( pConfig->eDetail!=FTS5_DETAIL_FULL0 | |||
21338 | && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | |||
21339 | ){ | |||
21340 | *pa = 0; | |||
21341 | *pn = 0; | |||
21342 | return SQLITE_OK0; | |||
21343 | }else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags & (0x40)) ){ | |||
21344 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
21345 | Fts5PoslistPopulator *aPopulator; | |||
21346 | int i; | |||
21347 | ||||
21348 | aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); | |||
21349 | if( aPopulator==0 ) rc = SQLITE_NOMEM7; | |||
21350 | if( rc==SQLITE_OK0 ){ | |||
21351 | rc = fts5SeekCursor(pCsr, 0); | |||
21352 | } | |||
21353 | for(i=0; i<pConfig->nCol && rc==SQLITE_OK0; i++){ | |||
21354 | const char *z = 0; | |||
21355 | int n = 0; | |||
21356 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | |||
21357 | if( rc==SQLITE_OK0 ){ | |||
21358 | rc = sqlite3Fts5ExprPopulatePoslists( | |||
21359 | pConfig, pCsr->pExpr, aPopulator, i, z, n | |||
21360 | ); | |||
21361 | } | |||
21362 | sqlite3Fts5ClearLocale(pConfig); | |||
21363 | } | |||
21364 | sqlite3_freesqlite3_api->free(aPopulator); | |||
21365 | ||||
21366 | if( pCsr->pSorter ){ | |||
21367 | sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); | |||
21368 | } | |||
21369 | } | |||
21370 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags &= ~(0x40)); | |||
21371 | } | |||
21372 | ||||
21373 | if( rc==SQLITE_OK0 ){ | |||
21374 | if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
21375 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
21376 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | |||
21377 | *pn = pSorter->aIdx[iPhrase] - i1; | |||
21378 | *pa = &pSorter->aPoslist[i1]; | |||
21379 | }else{ | |||
21380 | *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); | |||
21381 | } | |||
21382 | }else{ | |||
21383 | *pa = 0; | |||
21384 | *pn = 0; | |||
21385 | } | |||
21386 | ||||
21387 | return rc; | |||
21388 | } | |||
21389 | ||||
21390 | /* | |||
21391 | ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated | |||
21392 | ** correctly for the current view. Return SQLITE_OK if successful, or an | |||
21393 | ** SQLite error code otherwise. | |||
21394 | */ | |||
21395 | static int fts5CacheInstArray(Fts5Cursor *pCsr){ | |||
21396 | int rc = SQLITE_OK0; | |||
21397 | Fts5PoslistReader *aIter; /* One iterator for each phrase */ | |||
21398 | int nIter; /* Number of iterators/phrases */ | |||
21399 | int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol; | |||
21400 | ||||
21401 | nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
21402 | if( pCsr->aInstIter==0 ){ | |||
21403 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter; | |||
21404 | pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); | |||
21405 | } | |||
21406 | aIter = pCsr->aInstIter; | |||
21407 | ||||
21408 | if( aIter ){ | |||
21409 | int nInst = 0; /* Number instances seen so far */ | |||
21410 | int i; | |||
21411 | ||||
21412 | /* Initialize all iterators */ | |||
21413 | for(i=0; i<nIter && rc==SQLITE_OK0; i++){ | |||
21414 | const u8 *a; | |||
21415 | int n; | |||
21416 | rc = fts5CsrPoslist(pCsr, i, &a, &n); | |||
21417 | if( rc==SQLITE_OK0 ){ | |||
21418 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | |||
21419 | } | |||
21420 | } | |||
21421 | ||||
21422 | if( rc==SQLITE_OK0 ){ | |||
21423 | while( 1 ){ | |||
21424 | int *aInst; | |||
21425 | int iBest = -1; | |||
21426 | for(i=0; i<nIter; i++){ | |||
21427 | if( (aIter[i].bEof==0) | |||
21428 | && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) | |||
21429 | ){ | |||
21430 | iBest = i; | |||
21431 | } | |||
21432 | } | |||
21433 | if( iBest<0 ) break; | |||
21434 | ||||
21435 | nInst++; | |||
21436 | if( nInst>=pCsr->nInstAlloc ){ | |||
21437 | int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; | |||
21438 | aInst = (int*)sqlite3_realloc64sqlite3_api->realloc64( | |||
21439 | pCsr->aInst, nNewSize*sizeof(int)*3 | |||
21440 | ); | |||
21441 | if( aInst ){ | |||
21442 | pCsr->aInst = aInst; | |||
21443 | pCsr->nInstAlloc = nNewSize; | |||
21444 | }else{ | |||
21445 | nInst--; | |||
21446 | rc = SQLITE_NOMEM7; | |||
21447 | break; | |||
21448 | } | |||
21449 | } | |||
21450 | ||||
21451 | aInst = &pCsr->aInst[3 * (nInst-1)]; | |||
21452 | aInst[0] = iBest; | |||
21453 | aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos)(int)((aIter[iBest].iPos >> 32) & 0x7FFFFFFF); | |||
21454 | aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos)(int)(aIter[iBest].iPos & 0x7FFFFFFF); | |||
21455 | assert( aInst[1]>=0 )((void) (0)); | |||
21456 | if( aInst[1]>=nCol ){ | |||
21457 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
21458 | break; | |||
21459 | } | |||
21460 | sqlite3Fts5PoslistReaderNext(&aIter[iBest]); | |||
21461 | } | |||
21462 | } | |||
21463 | ||||
21464 | pCsr->nInstCount = nInst; | |||
21465 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags &= ~(0x08)); | |||
21466 | } | |||
21467 | return rc; | |||
21468 | } | |||
21469 | ||||
21470 | static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ | |||
21471 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21472 | int rc = SQLITE_OK0; | |||
21473 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
21474 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) ){ | |||
21475 | *pnInst = pCsr->nInstCount; | |||
21476 | } | |||
21477 | return rc; | |||
21478 | } | |||
21479 | ||||
21480 | static int fts5ApiInst( | |||
21481 | Fts5Context *pCtx, | |||
21482 | int iIdx, | |||
21483 | int *piPhrase, | |||
21484 | int *piCol, | |||
21485 | int *piOff | |||
21486 | ){ | |||
21487 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21488 | int rc = SQLITE_OK0; | |||
21489 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
21490 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | |||
21491 | ){ | |||
21492 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | |||
21493 | rc = SQLITE_RANGE25; | |||
21494 | }else{ | |||
21495 | *piPhrase = pCsr->aInst[iIdx*3]; | |||
21496 | *piCol = pCsr->aInst[iIdx*3 + 1]; | |||
21497 | *piOff = pCsr->aInst[iIdx*3 + 2]; | |||
21498 | } | |||
21499 | } | |||
21500 | return rc; | |||
21501 | } | |||
21502 | ||||
21503 | static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ | |||
21504 | return fts5CursorRowid((Fts5Cursor*)pCtx); | |||
21505 | } | |||
21506 | ||||
21507 | static int fts5ColumnSizeCb( | |||
21508 | void *pContext, /* Pointer to int */ | |||
21509 | int tflags, | |||
21510 | const char *pUnused, /* Buffer containing token */ | |||
21511 | int nUnused, /* Size of token in bytes */ | |||
21512 | int iUnused1, /* Start offset of token */ | |||
21513 | int iUnused2 /* End offset of token */ | |||
21514 | ){ | |||
21515 | int *pCnt = (int*)pContext; | |||
21516 | UNUSED_PARAM2(pUnused, nUnused)(void)(pUnused), (void)(nUnused); | |||
21517 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
21518 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | |||
21519 | (*pCnt)++; | |||
21520 | } | |||
21521 | return SQLITE_OK0; | |||
21522 | } | |||
21523 | ||||
21524 | static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ | |||
21525 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21526 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
21527 | Fts5Config *pConfig = pTab->p.pConfig; | |||
21528 | int rc = SQLITE_OK0; | |||
21529 | ||||
21530 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags & (0x04)) ){ | |||
21531 | if( pConfig->bColumnsize ){ | |||
21532 | i64 iRowid = fts5CursorRowid(pCsr); | |||
21533 | rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); | |||
21534 | }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | |||
21535 | int i; | |||
21536 | for(i=0; i<pConfig->nCol; i++){ | |||
21537 | if( pConfig->abUnindexed[i]==0 ){ | |||
21538 | pCsr->aColumnSize[i] = -1; | |||
21539 | } | |||
21540 | } | |||
21541 | }else{ | |||
21542 | int i; | |||
21543 | rc = fts5SeekCursor(pCsr, 0); | |||
21544 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
21545 | if( pConfig->abUnindexed[i]==0 ){ | |||
21546 | const char *z = 0; | |||
21547 | int n = 0; | |||
21548 | pCsr->aColumnSize[i] = 0; | |||
21549 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | |||
21550 | if( rc==SQLITE_OK0 ){ | |||
21551 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX0x0008, | |||
21552 | z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb | |||
21553 | ); | |||
21554 | } | |||
21555 | sqlite3Fts5ClearLocale(pConfig); | |||
21556 | } | |||
21557 | } | |||
21558 | } | |||
21559 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags &= ~(0x04)); | |||
21560 | } | |||
21561 | if( iCol<0 ){ | |||
21562 | int i; | |||
21563 | *pnToken = 0; | |||
21564 | for(i=0; i<pConfig->nCol; i++){ | |||
21565 | *pnToken += pCsr->aColumnSize[i]; | |||
21566 | } | |||
21567 | }else if( iCol<pConfig->nCol ){ | |||
21568 | *pnToken = pCsr->aColumnSize[iCol]; | |||
21569 | }else{ | |||
21570 | *pnToken = 0; | |||
21571 | rc = SQLITE_RANGE25; | |||
21572 | } | |||
21573 | return rc; | |||
21574 | } | |||
21575 | ||||
21576 | /* | |||
21577 | ** Implementation of the xSetAuxdata() method. | |||
21578 | */ | |||
21579 | static int fts5ApiSetAuxdata( | |||
21580 | Fts5Context *pCtx, /* Fts5 context */ | |||
21581 | void *pPtr, /* Pointer to save as auxdata */ | |||
21582 | void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ | |||
21583 | ){ | |||
21584 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21585 | Fts5Auxdata *pData; | |||
21586 | ||||
21587 | /* Search through the cursors list of Fts5Auxdata objects for one that | |||
21588 | ** corresponds to the currently executing auxiliary function. */ | |||
21589 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | |||
21590 | if( pData->pAux==pCsr->pAux ) break; | |||
21591 | } | |||
21592 | ||||
21593 | if( pData ){ | |||
21594 | if( pData->xDelete ){ | |||
21595 | pData->xDelete(pData->pPtr); | |||
21596 | } | |||
21597 | }else{ | |||
21598 | int rc = SQLITE_OK0; | |||
21599 | pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); | |||
21600 | if( pData==0 ){ | |||
21601 | if( xDelete ) xDelete(pPtr); | |||
21602 | return rc; | |||
21603 | } | |||
21604 | pData->pAux = pCsr->pAux; | |||
21605 | pData->pNext = pCsr->pAuxdata; | |||
21606 | pCsr->pAuxdata = pData; | |||
21607 | } | |||
21608 | ||||
21609 | pData->xDelete = xDelete; | |||
21610 | pData->pPtr = pPtr; | |||
21611 | return SQLITE_OK0; | |||
21612 | } | |||
21613 | ||||
21614 | static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ | |||
21615 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21616 | Fts5Auxdata *pData; | |||
21617 | void *pRet = 0; | |||
21618 | ||||
21619 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | |||
21620 | if( pData->pAux==pCsr->pAux ) break; | |||
21621 | } | |||
21622 | ||||
21623 | if( pData ){ | |||
21624 | pRet = pData->pPtr; | |||
21625 | if( bClear ){ | |||
21626 | pData->pPtr = 0; | |||
21627 | pData->xDelete = 0; | |||
21628 | } | |||
21629 | } | |||
21630 | ||||
21631 | return pRet; | |||
21632 | } | |||
21633 | ||||
21634 | static void fts5ApiPhraseNext( | |||
21635 | Fts5Context *pCtx, | |||
21636 | Fts5PhraseIter *pIter, | |||
21637 | int *piCol, int *piOff | |||
21638 | ){ | |||
21639 | if( pIter->a>=pIter->b ){ | |||
21640 | *piCol = -1; | |||
21641 | *piOff = -1; | |||
21642 | }else{ | |||
21643 | int iVal; | |||
21644 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
21645 | if( iVal==1 ){ | |||
21646 | /* Avoid returning a (*piCol) value that is too large for the table, | |||
21647 | ** even if the position-list is corrupt. The caller might not be | |||
21648 | ** expecting it. */ | |||
21649 | int nCol = ((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab))->pConfig->nCol; | |||
21650 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
21651 | *piCol = (iVal>=nCol ? nCol-1 : iVal); | |||
21652 | *piOff = 0; | |||
21653 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
21654 | } | |||
21655 | *piOff += (iVal-2); | |||
21656 | } | |||
21657 | } | |||
21658 | ||||
21659 | static int fts5ApiPhraseFirst( | |||
21660 | Fts5Context *pCtx, | |||
21661 | int iPhrase, | |||
21662 | Fts5PhraseIter *pIter, | |||
21663 | int *piCol, int *piOff | |||
21664 | ){ | |||
21665 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21666 | int n; | |||
21667 | int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | |||
21668 | if( rc==SQLITE_OK0 ){ | |||
21669 | assert( pIter->a || n==0 )((void) (0)); | |||
21670 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
21671 | *piCol = 0; | |||
21672 | *piOff = 0; | |||
21673 | fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); | |||
21674 | } | |||
21675 | return rc; | |||
21676 | } | |||
21677 | ||||
21678 | static void fts5ApiPhraseNextColumn( | |||
21679 | Fts5Context *pCtx, | |||
21680 | Fts5PhraseIter *pIter, | |||
21681 | int *piCol | |||
21682 | ){ | |||
21683 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21684 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
21685 | ||||
21686 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
21687 | if( pIter->a>=pIter->b ){ | |||
21688 | *piCol = -1; | |||
21689 | }else{ | |||
21690 | int iIncr; | |||
21691 | pIter->a += fts5GetVarint32(&pIter->a[0], iIncr)sqlite3Fts5GetVarint32(&pIter->a[0],(u32*)&(iIncr) ); | |||
21692 | *piCol += (iIncr-2); | |||
21693 | } | |||
21694 | }else{ | |||
21695 | while( 1 ){ | |||
21696 | int dummy; | |||
21697 | if( pIter->a>=pIter->b ){ | |||
21698 | *piCol = -1; | |||
21699 | return; | |||
21700 | } | |||
21701 | if( pIter->a[0]==0x01 ) break; | |||
21702 | pIter->a += fts5GetVarint32(pIter->a, dummy)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(dummy)); | |||
21703 | } | |||
21704 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | |||
21705 | } | |||
21706 | } | |||
21707 | ||||
21708 | static int fts5ApiPhraseFirstColumn( | |||
21709 | Fts5Context *pCtx, | |||
21710 | int iPhrase, | |||
21711 | Fts5PhraseIter *pIter, | |||
21712 | int *piCol | |||
21713 | ){ | |||
21714 | int rc = SQLITE_OK0; | |||
21715 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21716 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
21717 | ||||
21718 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
21719 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
21720 | int n; | |||
21721 | if( pSorter ){ | |||
21722 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | |||
21723 | n = pSorter->aIdx[iPhrase] - i1; | |||
21724 | pIter->a = &pSorter->aPoslist[i1]; | |||
21725 | }else{ | |||
21726 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); | |||
21727 | } | |||
21728 | if( rc==SQLITE_OK0 ){ | |||
21729 | assert( pIter->a || n==0 )((void) (0)); | |||
21730 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
21731 | *piCol = 0; | |||
21732 | fts5ApiPhraseNextColumn(pCtx, pIter, piCol); | |||
21733 | } | |||
21734 | }else{ | |||
21735 | int n; | |||
21736 | rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | |||
21737 | if( rc==SQLITE_OK0 ){ | |||
21738 | assert( pIter->a || n==0 )((void) (0)); | |||
21739 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
21740 | if( n<=0 ){ | |||
21741 | *piCol = -1; | |||
21742 | }else if( pIter->a[0]==0x01 ){ | |||
21743 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | |||
21744 | }else{ | |||
21745 | *piCol = 0; | |||
21746 | } | |||
21747 | } | |||
21748 | } | |||
21749 | ||||
21750 | return rc; | |||
21751 | } | |||
21752 | ||||
21753 | /* | |||
21754 | ** xQueryToken() API implemenetation. | |||
21755 | */ | |||
21756 | static int fts5ApiQueryToken( | |||
21757 | Fts5Context* pCtx, | |||
21758 | int iPhrase, | |||
21759 | int iToken, | |||
21760 | const char **ppOut, | |||
21761 | int *pnOut | |||
21762 | ){ | |||
21763 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21764 | return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); | |||
21765 | } | |||
21766 | ||||
21767 | /* | |||
21768 | ** xInstToken() API implemenetation. | |||
21769 | */ | |||
21770 | static int fts5ApiInstToken( | |||
21771 | Fts5Context *pCtx, | |||
21772 | int iIdx, | |||
21773 | int iToken, | |||
21774 | const char **ppOut, int *pnOut | |||
21775 | ){ | |||
21776 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21777 | int rc = SQLITE_OK0; | |||
21778 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
| ||||
21779 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | |||
21780 | ){ | |||
21781 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | |||
21782 | rc = SQLITE_RANGE25; | |||
21783 | }else{ | |||
21784 | int iPhrase = pCsr->aInst[iIdx*3]; | |||
21785 | int iCol = pCsr->aInst[iIdx*3 + 1]; | |||
21786 | int iOff = pCsr->aInst[iIdx*3 + 2]; | |||
21787 | i64 iRowid = fts5CursorRowid(pCsr); | |||
21788 | rc = sqlite3Fts5ExprInstToken( | |||
21789 | pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut | |||
21790 | ); | |||
21791 | } | |||
21792 | } | |||
21793 | return rc; | |||
21794 | } | |||
21795 | ||||
21796 | ||||
21797 | static int fts5ApiQueryPhrase(Fts5Context*, int, void*, | |||
21798 | int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) | |||
21799 | ); | |||
21800 | ||||
21801 | /* | |||
21802 | ** The xColumnLocale() API. | |||
21803 | */ | |||
21804 | static int fts5ApiColumnLocale( | |||
21805 | Fts5Context *pCtx, | |||
21806 | int iCol, | |||
21807 | const char **pzLocale, | |||
21808 | int *pnLocale | |||
21809 | ){ | |||
21810 | int rc = SQLITE_OK0; | |||
21811 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21812 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
21813 | ||||
21814 | *pzLocale = 0; | |||
21815 | *pnLocale = 0; | |||
21816 | ||||
21817 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
21818 | if( iCol<0 || iCol>=pConfig->nCol ){ | |||
21819 | rc = SQLITE_RANGE25; | |||
21820 | }else if( | |||
21821 | pConfig->abUnindexed[iCol]==0 | |||
21822 | && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | |||
21823 | && pConfig->bLocale | |||
21824 | ){ | |||
21825 | rc = fts5SeekCursor(pCsr, 0); | |||
21826 | if( rc==SQLITE_OK0 ){ | |||
21827 | const char *zDummy = 0; | |||
21828 | int nDummy = 0; | |||
21829 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &zDummy, &nDummy); | |||
21830 | if( rc==SQLITE_OK0 ){ | |||
21831 | *pzLocale = pConfig->t.pLocale; | |||
21832 | *pnLocale = pConfig->t.nLocale; | |||
21833 | } | |||
21834 | sqlite3Fts5ClearLocale(pConfig); | |||
21835 | } | |||
21836 | } | |||
21837 | ||||
21838 | return rc; | |||
21839 | } | |||
21840 | ||||
21841 | static const Fts5ExtensionApi sFts5Api = { | |||
21842 | 4, /* iVersion */ | |||
21843 | fts5ApiUserData, | |||
21844 | fts5ApiColumnCount, | |||
21845 | fts5ApiRowCount, | |||
21846 | fts5ApiColumnTotalSize, | |||
21847 | fts5ApiTokenize, | |||
21848 | fts5ApiPhraseCount, | |||
21849 | fts5ApiPhraseSize, | |||
21850 | fts5ApiInstCount, | |||
21851 | fts5ApiInst, | |||
21852 | fts5ApiRowid, | |||
21853 | fts5ApiColumnText, | |||
21854 | fts5ApiColumnSize, | |||
21855 | fts5ApiQueryPhrase, | |||
21856 | fts5ApiSetAuxdata, | |||
21857 | fts5ApiGetAuxdata, | |||
21858 | fts5ApiPhraseFirst, | |||
21859 | fts5ApiPhraseNext, | |||
21860 | fts5ApiPhraseFirstColumn, | |||
21861 | fts5ApiPhraseNextColumn, | |||
21862 | fts5ApiQueryToken, | |||
21863 | fts5ApiInstToken, | |||
21864 | fts5ApiColumnLocale, | |||
21865 | fts5ApiTokenize_v2 | |||
21866 | }; | |||
21867 | ||||
21868 | /* | |||
21869 | ** Implementation of API function xQueryPhrase(). | |||
21870 | */ | |||
21871 | static int fts5ApiQueryPhrase( | |||
21872 | Fts5Context *pCtx, | |||
21873 | int iPhrase, | |||
21874 | void *pUserData, | |||
21875 | int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) | |||
21876 | ){ | |||
21877 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
21878 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
21879 | int rc; | |||
21880 | Fts5Cursor *pNew = 0; | |||
21881 | ||||
21882 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); | |||
21883 | if( rc==SQLITE_OK0 ){ | |||
21884 | pNew->ePlan = FTS5_PLAN_MATCH1; | |||
21885 | pNew->iFirstRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | |||
21886 | pNew->iLastRowid = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
21887 | pNew->base.pVtab = (sqlite3_vtab*)pTab; | |||
21888 | rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); | |||
21889 | } | |||
21890 | ||||
21891 | if( rc==SQLITE_OK0 ){ | |||
21892 | for(rc = fts5CursorFirst(pTab, pNew, 0); | |||
21893 | rc==SQLITE_OK0 && CsrFlagTest(pNew, FTS5CSR_EOF)((pNew)->csrflags & (0x01))==0; | |||
21894 | rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) | |||
21895 | ){ | |||
21896 | rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); | |||
21897 | if( rc!=SQLITE_OK0 ){ | |||
21898 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
21899 | break; | |||
21900 | } | |||
21901 | } | |||
21902 | } | |||
21903 | ||||
21904 | fts5CloseMethod((sqlite3_vtab_cursor*)pNew); | |||
21905 | return rc; | |||
21906 | } | |||
21907 | ||||
21908 | static void fts5ApiInvoke( | |||
21909 | Fts5Auxiliary *pAux, | |||
21910 | Fts5Cursor *pCsr, | |||
21911 | sqlite3_context *context, | |||
21912 | int argc, | |||
21913 | sqlite3_value **argv | |||
21914 | ){ | |||
21915 | assert( pCsr->pAux==0 )((void) (0)); | |||
21916 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
21917 | pCsr->pAux = pAux; | |||
21918 | pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); | |||
21919 | pCsr->pAux = 0; | |||
21920 | } | |||
21921 | ||||
21922 | static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ | |||
21923 | Fts5Cursor *pCsr; | |||
21924 | for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
21925 | if( pCsr->iCsrId==iCsrId ) break; | |||
21926 | } | |||
21927 | return pCsr; | |||
21928 | } | |||
21929 | ||||
21930 | /* | |||
21931 | ** Parameter zFmt is a printf() style formatting string. This function | |||
21932 | ** formats it using the trailing arguments and returns the result as | |||
21933 | ** an error message to the context passed as the first argument. | |||
21934 | */ | |||
21935 | static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){ | |||
21936 | char *zErr = 0; | |||
21937 | va_list ap; | |||
21938 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
21939 | zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
21940 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
21941 | sqlite3_freesqlite3_api->free(zErr); | |||
21942 | va_end(ap)__builtin_va_end(ap); | |||
21943 | } | |||
21944 | ||||
21945 | static void fts5ApiCallback( | |||
21946 | sqlite3_context *context, | |||
21947 | int argc, | |||
21948 | sqlite3_value **argv | |||
21949 | ){ | |||
21950 | ||||
21951 | Fts5Auxiliary *pAux; | |||
21952 | Fts5Cursor *pCsr; | |||
21953 | i64 iCsrId; | |||
21954 | ||||
21955 | assert( argc>=1 )((void) (0)); | |||
21956 | pAux = (Fts5Auxiliary*)sqlite3_user_datasqlite3_api->user_data(context); | |||
21957 | iCsrId = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | |||
21958 | ||||
21959 | pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); | |||
21960 | if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL3) ){ | |||
21961 | fts5ResultError(context, "no such cursor: %lld", iCsrId); | |||
21962 | }else{ | |||
21963 | sqlite3_vtab *pTab = pCsr->base.pVtab; | |||
21964 | fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); | |||
21965 | sqlite3_freesqlite3_api->free(pTab->zErrMsg); | |||
21966 | pTab->zErrMsg = 0; | |||
21967 | } | |||
21968 | } | |||
21969 | ||||
21970 | ||||
21971 | /* | |||
21972 | ** Given cursor id iId, return a pointer to the corresponding Fts5Table | |||
21973 | ** object. Or NULL If the cursor id does not exist. | |||
21974 | */ | |||
21975 | static Fts5Table *sqlite3Fts5TableFromCsrid( | |||
21976 | Fts5Global *pGlobal, /* FTS5 global context for db handle */ | |||
21977 | i64 iCsrId /* Id of cursor to find */ | |||
21978 | ){ | |||
21979 | Fts5Cursor *pCsr; | |||
21980 | pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); | |||
21981 | if( pCsr ){ | |||
21982 | return (Fts5Table*)pCsr->base.pVtab; | |||
21983 | } | |||
21984 | return 0; | |||
21985 | } | |||
21986 | ||||
21987 | /* | |||
21988 | ** Return a "position-list blob" corresponding to the current position of | |||
21989 | ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains | |||
21990 | ** the current position-list for each phrase in the query associated with | |||
21991 | ** cursor pCsr. | |||
21992 | ** | |||
21993 | ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is | |||
21994 | ** the number of phrases in the query. Following the varints are the | |||
21995 | ** concatenated position lists for each phrase, in order. | |||
21996 | ** | |||
21997 | ** The first varint (if it exists) contains the size of the position list | |||
21998 | ** for phrase 0. The second (same disclaimer) contains the size of position | |||
21999 | ** list 1. And so on. There is no size field for the final position list, | |||
22000 | ** as it can be derived from the total size of the blob. | |||
22001 | */ | |||
22002 | static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ | |||
22003 | int i; | |||
22004 | int rc = SQLITE_OK0; | |||
22005 | int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
22006 | Fts5Buffer val; | |||
22007 | ||||
22008 | memset(&val, 0, sizeof(Fts5Buffer)); | |||
22009 | switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ | |||
22010 | case FTS5_DETAIL_FULL0: | |||
22011 | ||||
22012 | /* Append the varints */ | |||
22013 | for(i=0; i<(nPhrase-1); i++){ | |||
22014 | const u8 *dummy; | |||
22015 | int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); | |||
22016 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | |||
22017 | } | |||
22018 | ||||
22019 | /* Append the position lists */ | |||
22020 | for(i=0; i<nPhrase; i++){ | |||
22021 | const u8 *pPoslist; | |||
22022 | int nPoslist; | |||
22023 | nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); | |||
22024 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | |||
22025 | } | |||
22026 | break; | |||
22027 | ||||
22028 | case FTS5_DETAIL_COLUMNS2: | |||
22029 | ||||
22030 | /* Append the varints */ | |||
22031 | for(i=0; rc==SQLITE_OK0 && i<(nPhrase-1); i++){ | |||
22032 | const u8 *dummy; | |||
22033 | int nByte; | |||
22034 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); | |||
22035 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | |||
22036 | } | |||
22037 | ||||
22038 | /* Append the position lists */ | |||
22039 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | |||
22040 | const u8 *pPoslist; | |||
22041 | int nPoslist; | |||
22042 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); | |||
22043 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | |||
22044 | } | |||
22045 | break; | |||
22046 | ||||
22047 | default: | |||
22048 | break; | |||
22049 | } | |||
22050 | ||||
22051 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, val.p, val.n, sqlite3_freesqlite3_api->free); | |||
22052 | return rc; | |||
22053 | } | |||
22054 | ||||
22055 | /* | |||
22056 | ** This is the xColumn method, called by SQLite to request a value from | |||
22057 | ** the row that the supplied cursor currently points to. | |||
22058 | */ | |||
22059 | static int fts5ColumnMethod( | |||
22060 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | |||
22061 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | |||
22062 | int iCol /* Index of column to read value from */ | |||
22063 | ){ | |||
22064 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
22065 | Fts5Config *pConfig = pTab->p.pConfig; | |||
22066 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
22067 | int rc = SQLITE_OK0; | |||
22068 | ||||
22069 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | |||
22070 | ||||
22071 | if( pCsr->ePlan==FTS5_PLAN_SPECIAL3 ){ | |||
22072 | if( iCol==pConfig->nCol ){ | |||
22073 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iSpecial); | |||
22074 | } | |||
22075 | }else | |||
22076 | ||||
22077 | if( iCol==pConfig->nCol ){ | |||
22078 | /* User is requesting the value of the special column with the same name | |||
22079 | ** as the table. Return the cursor integer id number. This value is only | |||
22080 | ** useful in that it may be passed as the first argument to an FTS5 | |||
22081 | ** auxiliary function. */ | |||
22082 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iCsrId); | |||
22083 | }else if( iCol==pConfig->nCol+1 ){ | |||
22084 | /* The value of the "rank" column. */ | |||
22085 | ||||
22086 | if( pCsr->ePlan==FTS5_PLAN_SOURCE2 ){ | |||
22087 | fts5PoslistBlob(pCtx, pCsr); | |||
22088 | }else if( | |||
22089 | pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
22090 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH4 | |||
22091 | ){ | |||
22092 | if( pCsr->pRank || SQLITE_OK0==(rc = fts5FindRankFunction(pCsr)) ){ | |||
22093 | fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); | |||
22094 | } | |||
22095 | } | |||
22096 | }else{ | |||
22097 | if( !sqlite3_vtab_nochangesqlite3_api->vtab_nochange(pCtx) && pConfig->eContent!=FTS5_CONTENT_NONE1 ){ | |||
22098 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
22099 | rc = fts5SeekCursor(pCsr, 1); | |||
22100 | if( rc==SQLITE_OK0 ){ | |||
22101 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pCsr->pStmt, iCol+1); | |||
22102 | if( pConfig->bLocale | |||
22103 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
22104 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
22105 | ){ | |||
22106 | const char *z = 0; | |||
22107 | int n = 0; | |||
22108 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n); | |||
22109 | if( rc==SQLITE_OK0 ){ | |||
22110 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
22111 | } | |||
22112 | sqlite3Fts5ClearLocale(pConfig); | |||
22113 | }else{ | |||
22114 | sqlite3_result_valuesqlite3_api->result_value(pCtx, pVal); | |||
22115 | } | |||
22116 | } | |||
22117 | ||||
22118 | pConfig->pzErrmsg = 0; | |||
22119 | } | |||
22120 | } | |||
22121 | ||||
22122 | return rc; | |||
22123 | } | |||
22124 | ||||
22125 | ||||
22126 | /* | |||
22127 | ** This routine implements the xFindFunction method for the FTS3 | |||
22128 | ** virtual table. | |||
22129 | */ | |||
22130 | static int fts5FindFunctionMethod( | |||
22131 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
22132 | int nUnused, /* Number of SQL function arguments */ | |||
22133 | const char *zName, /* Name of SQL function */ | |||
22134 | void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ | |||
22135 | void **ppArg /* OUT: User data for *pxFunc */ | |||
22136 | ){ | |||
22137 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22138 | Fts5Auxiliary *pAux; | |||
22139 | ||||
22140 | UNUSED_PARAM(nUnused)(void)(nUnused); | |||
22141 | pAux = fts5FindAuxiliary(pTab, zName); | |||
22142 | if( pAux ){ | |||
22143 | *pxFunc = fts5ApiCallback; | |||
22144 | *ppArg = (void*)pAux; | |||
22145 | return 1; | |||
22146 | } | |||
22147 | ||||
22148 | /* No function of the specified name was found. Return 0. */ | |||
22149 | return 0; | |||
22150 | } | |||
22151 | ||||
22152 | /* | |||
22153 | ** Implementation of FTS5 xRename method. Rename an fts5 table. | |||
22154 | */ | |||
22155 | static int fts5RenameMethod( | |||
22156 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
22157 | const char *zName /* New name of table */ | |||
22158 | ){ | |||
22159 | int rc; | |||
22160 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22161 | rc = sqlite3Fts5StorageRename(pTab->pStorage, zName); | |||
22162 | return rc; | |||
22163 | } | |||
22164 | ||||
22165 | static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){ | |||
22166 | fts5TripCursors((Fts5FullTable*)pTab); | |||
22167 | return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage); | |||
22168 | } | |||
22169 | ||||
22170 | /* | |||
22171 | ** The xSavepoint() method. | |||
22172 | ** | |||
22173 | ** Flush the contents of the pending-terms table to disk. | |||
22174 | */ | |||
22175 | static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
22176 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22177 | int rc = SQLITE_OK0; | |||
22178 | ||||
22179 | fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); | |||
22180 | rc = sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); | |||
22181 | if( rc==SQLITE_OK0 ){ | |||
22182 | pTab->iSavepoint = iSavepoint+1; | |||
22183 | } | |||
22184 | return rc; | |||
22185 | } | |||
22186 | ||||
22187 | /* | |||
22188 | ** The xRelease() method. | |||
22189 | ** | |||
22190 | ** This is a no-op. | |||
22191 | */ | |||
22192 | static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
22193 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22194 | int rc = SQLITE_OK0; | |||
22195 | fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); | |||
22196 | if( (iSavepoint+1)<pTab->iSavepoint ){ | |||
22197 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
22198 | if( rc==SQLITE_OK0 ){ | |||
22199 | pTab->iSavepoint = iSavepoint; | |||
22200 | } | |||
22201 | } | |||
22202 | return rc; | |||
22203 | } | |||
22204 | ||||
22205 | /* | |||
22206 | ** The xRollbackTo() method. | |||
22207 | ** | |||
22208 | ** Discard the contents of the pending terms table. | |||
22209 | */ | |||
22210 | static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
22211 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22212 | int rc = SQLITE_OK0; | |||
22213 | fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); | |||
22214 | fts5TripCursors(pTab); | |||
22215 | if( (iSavepoint+1)<=pTab->iSavepoint ){ | |||
22216 | pTab->p.pConfig->pgsz = 0; | |||
22217 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | |||
22218 | } | |||
22219 | return rc; | |||
22220 | } | |||
22221 | ||||
22222 | /* | |||
22223 | ** Register a new auxiliary function with global context pGlobal. | |||
22224 | */ | |||
22225 | static int fts5CreateAux( | |||
22226 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
22227 | const char *zName, /* Name of new function */ | |||
22228 | void *pUserData, /* User data for aux. function */ | |||
22229 | fts5_extension_function xFunc, /* Aux. function implementation */ | |||
22230 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
22231 | ){ | |||
22232 | Fts5Global *pGlobal = (Fts5Global*)pApi; | |||
22233 | int rc = sqlite3_overload_functionsqlite3_api->overload_function(pGlobal->db, zName, -1); | |||
22234 | if( rc==SQLITE_OK0 ){ | |||
22235 | Fts5Auxiliary *pAux; | |||
22236 | sqlite3_int64 nName; /* Size of zName in bytes, including \0 */ | |||
22237 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
22238 | ||||
22239 | nName = strlen(zName) + 1; | |||
22240 | nByte = sizeof(Fts5Auxiliary) + nName; | |||
22241 | pAux = (Fts5Auxiliary*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
22242 | if( pAux ){ | |||
22243 | memset(pAux, 0, (size_t)nByte); | |||
22244 | pAux->zFunc = (char*)&pAux[1]; | |||
22245 | memcpy(pAux->zFunc, zName, nName); | |||
22246 | pAux->pGlobal = pGlobal; | |||
22247 | pAux->pUserData = pUserData; | |||
22248 | pAux->xFunc = xFunc; | |||
22249 | pAux->xDestroy = xDestroy; | |||
22250 | pAux->pNext = pGlobal->pAux; | |||
22251 | pGlobal->pAux = pAux; | |||
22252 | }else{ | |||
22253 | rc = SQLITE_NOMEM7; | |||
22254 | } | |||
22255 | } | |||
22256 | ||||
22257 | return rc; | |||
22258 | } | |||
22259 | ||||
22260 | /* | |||
22261 | ** This function is used by xCreateTokenizer_v2() and xCreateTokenizer(). | |||
22262 | ** It allocates and partially populates a new Fts5TokenizerModule object. | |||
22263 | ** The new object is already linked into the Fts5Global context before | |||
22264 | ** returning. | |||
22265 | ** | |||
22266 | ** If successful, SQLITE_OK is returned and a pointer to the new | |||
22267 | ** Fts5TokenizerModule object returned via output parameter (*ppNew). All | |||
22268 | ** that is required is for the caller to fill in the methods in | |||
22269 | ** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native | |||
22270 | ** as appropriate. | |||
22271 | ** | |||
22272 | ** If an error occurs, an SQLite error code is returned and the final value | |||
22273 | ** of (*ppNew) undefined. | |||
22274 | */ | |||
22275 | static int fts5NewTokenizerModule( | |||
22276 | Fts5Global *pGlobal, /* Global context (one per db handle) */ | |||
22277 | const char *zName, /* Name of new function */ | |||
22278 | void *pUserData, /* User data for aux. function */ | |||
22279 | void(*xDestroy)(void*), /* Destructor for pUserData */ | |||
22280 | Fts5TokenizerModule **ppNew | |||
22281 | ){ | |||
22282 | int rc = SQLITE_OK0; | |||
22283 | Fts5TokenizerModule *pNew; | |||
22284 | sqlite3_int64 nName; /* Size of zName and its \0 terminator */ | |||
22285 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
22286 | ||||
22287 | nName = strlen(zName) + 1; | |||
22288 | nByte = sizeof(Fts5TokenizerModule) + nName; | |||
22289 | *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte); | |||
22290 | if( pNew ){ | |||
22291 | pNew->zName = (char*)&pNew[1]; | |||
22292 | memcpy(pNew->zName, zName, nName); | |||
22293 | pNew->pUserData = pUserData; | |||
22294 | pNew->xDestroy = xDestroy; | |||
22295 | pNew->pNext = pGlobal->pTok; | |||
22296 | pGlobal->pTok = pNew; | |||
22297 | if( pNew->pNext==0 ){ | |||
22298 | pGlobal->pDfltTok = pNew; | |||
22299 | } | |||
22300 | } | |||
22301 | ||||
22302 | return rc; | |||
22303 | } | |||
22304 | ||||
22305 | /* | |||
22306 | ** An instance of this type is used as the Fts5Tokenizer object for | |||
22307 | ** wrapper tokenizers - those that provide access to a v1 tokenizer via | |||
22308 | ** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer | |||
22309 | ** via the fts5_tokenizer API. | |||
22310 | */ | |||
22311 | typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer; | |||
22312 | struct Fts5VtoVTokenizer { | |||
22313 | int bV2Native; /* True if v2 native tokenizer */ | |||
22314 | fts5_tokenizer x1; /* Tokenizer functions */ | |||
22315 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | |||
22316 | Fts5Tokenizer *pReal; | |||
22317 | }; | |||
22318 | ||||
22319 | /* | |||
22320 | ** Create a wrapper tokenizer. The context argument pCtx points to the | |||
22321 | ** Fts5TokenizerModule object. | |||
22322 | */ | |||
22323 | static int fts5VtoVCreate( | |||
22324 | void *pCtx, | |||
22325 | const char **azArg, | |||
22326 | int nArg, | |||
22327 | Fts5Tokenizer **ppOut | |||
22328 | ){ | |||
22329 | Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx; | |||
22330 | Fts5VtoVTokenizer *pNew = 0; | |||
22331 | int rc = SQLITE_OK0; | |||
22332 | ||||
22333 | pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
22334 | if( rc==SQLITE_OK0 ){ | |||
22335 | pNew->x1 = pMod->x1; | |||
22336 | pNew->x2 = pMod->x2; | |||
22337 | pNew->bV2Native = pMod->bV2Native; | |||
22338 | if( pMod->bV2Native ){ | |||
22339 | rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | |||
22340 | }else{ | |||
22341 | rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | |||
22342 | } | |||
22343 | if( rc!=SQLITE_OK0 ){ | |||
22344 | sqlite3_freesqlite3_api->free(pNew); | |||
22345 | pNew = 0; | |||
22346 | } | |||
22347 | } | |||
22348 | ||||
22349 | *ppOut = (Fts5Tokenizer*)pNew; | |||
22350 | return rc; | |||
22351 | } | |||
22352 | ||||
22353 | /* | |||
22354 | ** Delete an Fts5VtoVTokenizer wrapper tokenizer. | |||
22355 | */ | |||
22356 | static void fts5VtoVDelete(Fts5Tokenizer *pTok){ | |||
22357 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
22358 | if( p ){ | |||
22359 | if( p->bV2Native ){ | |||
22360 | p->x2.xDelete(p->pReal); | |||
22361 | }else{ | |||
22362 | p->x1.xDelete(p->pReal); | |||
22363 | } | |||
22364 | sqlite3_freesqlite3_api->free(p); | |||
22365 | } | |||
22366 | } | |||
22367 | ||||
22368 | ||||
22369 | /* | |||
22370 | ** xTokenizer method for a wrapper tokenizer that offers the v1 interface | |||
22371 | ** (no support for locales). | |||
22372 | */ | |||
22373 | static int fts5V1toV2Tokenize( | |||
22374 | Fts5Tokenizer *pTok, | |||
22375 | void *pCtx, int flags, | |||
22376 | const char *pText, int nText, | |||
22377 | int (*xToken)(void*, int, const char*, int, int, int) | |||
22378 | ){ | |||
22379 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
22380 | assert( p->bV2Native )((void) (0)); | |||
22381 | return p->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken); | |||
22382 | } | |||
22383 | ||||
22384 | /* | |||
22385 | ** xTokenizer method for a wrapper tokenizer that offers the v2 interface | |||
22386 | ** (with locale support). | |||
22387 | */ | |||
22388 | static int fts5V2toV1Tokenize( | |||
22389 | Fts5Tokenizer *pTok, | |||
22390 | void *pCtx, int flags, | |||
22391 | const char *pText, int nText, | |||
22392 | const char *pLocale, int nLocale, | |||
22393 | int (*xToken)(void*, int, const char*, int, int, int) | |||
22394 | ){ | |||
22395 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
22396 | assert( p->bV2Native==0 )((void) (0)); | |||
22397 | UNUSED_PARAM2(pLocale,nLocale)(void)(pLocale), (void)(nLocale); | |||
22398 | return p->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken); | |||
22399 | } | |||
22400 | ||||
22401 | /* | |||
22402 | ** Register a new tokenizer. This is the implementation of the | |||
22403 | ** fts5_api.xCreateTokenizer_v2() method. | |||
22404 | */ | |||
22405 | static int fts5CreateTokenizer_v2( | |||
22406 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
22407 | const char *zName, /* Name of new function */ | |||
22408 | void *pUserData, /* User data for aux. function */ | |||
22409 | fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */ | |||
22410 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
22411 | ){ | |||
22412 | Fts5Global *pGlobal = (Fts5Global*)pApi; | |||
22413 | int rc = SQLITE_OK0; | |||
22414 | ||||
22415 | if( pTokenizer->iVersion>2 ){ | |||
22416 | rc = SQLITE_ERROR1; | |||
22417 | }else{ | |||
22418 | Fts5TokenizerModule *pNew = 0; | |||
22419 | rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew); | |||
22420 | if( pNew ){ | |||
22421 | pNew->x2 = *pTokenizer; | |||
22422 | pNew->bV2Native = 1; | |||
22423 | pNew->x1.xCreate = fts5VtoVCreate; | |||
22424 | pNew->x1.xTokenize = fts5V1toV2Tokenize; | |||
22425 | pNew->x1.xDelete = fts5VtoVDelete; | |||
22426 | } | |||
22427 | } | |||
22428 | ||||
22429 | return rc; | |||
22430 | } | |||
22431 | ||||
22432 | /* | |||
22433 | ** The fts5_api.xCreateTokenizer() method. | |||
22434 | */ | |||
22435 | static int fts5CreateTokenizer( | |||
22436 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
22437 | const char *zName, /* Name of new function */ | |||
22438 | void *pUserData, /* User data for aux. function */ | |||
22439 | fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ | |||
22440 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
22441 | ){ | |||
22442 | Fts5TokenizerModule *pNew = 0; | |||
22443 | int rc = SQLITE_OK0; | |||
22444 | ||||
22445 | rc = fts5NewTokenizerModule( | |||
22446 | (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew | |||
22447 | ); | |||
22448 | if( pNew ){ | |||
22449 | pNew->x1 = *pTokenizer; | |||
22450 | pNew->x2.xCreate = fts5VtoVCreate; | |||
22451 | pNew->x2.xTokenize = fts5V2toV1Tokenize; | |||
22452 | pNew->x2.xDelete = fts5VtoVDelete; | |||
22453 | } | |||
22454 | return rc; | |||
22455 | } | |||
22456 | ||||
22457 | /* | |||
22458 | ** Search the global context passed as the first argument for a tokenizer | |||
22459 | ** module named zName. If found, return a pointer to the Fts5TokenizerModule | |||
22460 | ** object. Otherwise, return NULL. | |||
22461 | */ | |||
22462 | static Fts5TokenizerModule *fts5LocateTokenizer( | |||
22463 | Fts5Global *pGlobal, /* Global (one per db handle) object */ | |||
22464 | const char *zName /* Name of tokenizer module to find */ | |||
22465 | ){ | |||
22466 | Fts5TokenizerModule *pMod = 0; | |||
22467 | ||||
22468 | if( zName==0 ){ | |||
22469 | pMod = pGlobal->pDfltTok; | |||
22470 | }else{ | |||
22471 | for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ | |||
22472 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pMod->zName)==0 ) break; | |||
22473 | } | |||
22474 | } | |||
22475 | ||||
22476 | return pMod; | |||
22477 | } | |||
22478 | ||||
22479 | /* | |||
22480 | ** Find a tokenizer. This is the implementation of the | |||
22481 | ** fts5_api.xFindTokenizer_v2() method. | |||
22482 | */ | |||
22483 | static int fts5FindTokenizer_v2( | |||
22484 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
22485 | const char *zName, /* Name of tokenizer */ | |||
22486 | void **ppUserData, | |||
22487 | fts5_tokenizer_v2 **ppTokenizer /* Populate this object */ | |||
22488 | ){ | |||
22489 | int rc = SQLITE_OK0; | |||
22490 | Fts5TokenizerModule *pMod; | |||
22491 | ||||
22492 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | |||
22493 | if( pMod ){ | |||
22494 | if( pMod->bV2Native ){ | |||
22495 | *ppUserData = pMod->pUserData; | |||
22496 | }else{ | |||
22497 | *ppUserData = (void*)pMod; | |||
22498 | } | |||
22499 | *ppTokenizer = &pMod->x2; | |||
22500 | }else{ | |||
22501 | *ppTokenizer = 0; | |||
22502 | *ppUserData = 0; | |||
22503 | rc = SQLITE_ERROR1; | |||
22504 | } | |||
22505 | ||||
22506 | return rc; | |||
22507 | } | |||
22508 | ||||
22509 | /* | |||
22510 | ** Find a tokenizer. This is the implementation of the | |||
22511 | ** fts5_api.xFindTokenizer() method. | |||
22512 | */ | |||
22513 | static int fts5FindTokenizer( | |||
22514 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
22515 | const char *zName, /* Name of new function */ | |||
22516 | void **ppUserData, | |||
22517 | fts5_tokenizer *pTokenizer /* Populate this object */ | |||
22518 | ){ | |||
22519 | int rc = SQLITE_OK0; | |||
22520 | Fts5TokenizerModule *pMod; | |||
22521 | ||||
22522 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | |||
22523 | if( pMod ){ | |||
22524 | if( pMod->bV2Native==0 ){ | |||
22525 | *ppUserData = pMod->pUserData; | |||
22526 | }else{ | |||
22527 | *ppUserData = (void*)pMod; | |||
22528 | } | |||
22529 | *pTokenizer = pMod->x1; | |||
22530 | }else{ | |||
22531 | memset(pTokenizer, 0, sizeof(*pTokenizer)); | |||
22532 | *ppUserData = 0; | |||
22533 | rc = SQLITE_ERROR1; | |||
22534 | } | |||
22535 | ||||
22536 | return rc; | |||
22537 | } | |||
22538 | ||||
22539 | /* | |||
22540 | ** Attempt to instantiate the tokenizer. | |||
22541 | */ | |||
22542 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ | |||
22543 | const char **azArg = pConfig->t.azArg; | |||
22544 | const int nArg = pConfig->t.nArg; | |||
22545 | Fts5TokenizerModule *pMod = 0; | |||
22546 | int rc = SQLITE_OK0; | |||
22547 | ||||
22548 | pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]); | |||
22549 | if( pMod==0 ){ | |||
22550 | assert( nArg>0 )((void) (0)); | |||
22551 | rc = SQLITE_ERROR1; | |||
22552 | sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]); | |||
22553 | }else{ | |||
22554 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0; | |||
22555 | if( pMod->bV2Native ){ | |||
22556 | xCreate = pMod->x2.xCreate; | |||
22557 | pConfig->t.pApi2 = &pMod->x2; | |||
22558 | }else{ | |||
22559 | pConfig->t.pApi1 = &pMod->x1; | |||
22560 | xCreate = pMod->x1.xCreate; | |||
22561 | } | |||
22562 | ||||
22563 | rc = xCreate(pMod->pUserData, | |||
22564 | (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok | |||
22565 | ); | |||
22566 | ||||
22567 | if( rc!=SQLITE_OK0 ){ | |||
22568 | if( rc!=SQLITE_NOMEM7 ){ | |||
22569 | sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor"); | |||
22570 | } | |||
22571 | }else if( pMod->bV2Native==0 ){ | |||
22572 | pConfig->t.ePattern = sqlite3Fts5TokenizerPattern( | |||
22573 | pMod->x1.xCreate, pConfig->t.pTok | |||
22574 | ); | |||
22575 | } | |||
22576 | } | |||
22577 | ||||
22578 | if( rc!=SQLITE_OK0 ){ | |||
22579 | pConfig->t.pApi1 = 0; | |||
22580 | pConfig->t.pApi2 = 0; | |||
22581 | pConfig->t.pTok = 0; | |||
22582 | } | |||
22583 | ||||
22584 | return rc; | |||
22585 | } | |||
22586 | ||||
22587 | ||||
22588 | /* | |||
22589 | ** xDestroy callback passed to sqlite3_create_module(). This is invoked | |||
22590 | ** when the db handle is being closed. Free memory associated with | |||
22591 | ** tokenizers and aux functions registered with this db handle. | |||
22592 | */ | |||
22593 | static void fts5ModuleDestroy(void *pCtx){ | |||
22594 | Fts5TokenizerModule *pTok, *pNextTok; | |||
22595 | Fts5Auxiliary *pAux, *pNextAux; | |||
22596 | Fts5Global *pGlobal = (Fts5Global*)pCtx; | |||
22597 | ||||
22598 | for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ | |||
22599 | pNextAux = pAux->pNext; | |||
22600 | if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); | |||
22601 | sqlite3_freesqlite3_api->free(pAux); | |||
22602 | } | |||
22603 | ||||
22604 | for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ | |||
22605 | pNextTok = pTok->pNext; | |||
22606 | if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); | |||
22607 | sqlite3_freesqlite3_api->free(pTok); | |||
22608 | } | |||
22609 | ||||
22610 | sqlite3_freesqlite3_api->free(pGlobal); | |||
22611 | } | |||
22612 | ||||
22613 | /* | |||
22614 | ** Implementation of the fts5() function used by clients to obtain the | |||
22615 | ** API pointer. | |||
22616 | */ | |||
22617 | static void fts5Fts5Func( | |||
22618 | sqlite3_context *pCtx, /* Function call context */ | |||
22619 | int nArg, /* Number of args */ | |||
22620 | sqlite3_value **apArg /* Function arguments */ | |||
22621 | ){ | |||
22622 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
22623 | fts5_api **ppApi; | |||
22624 | UNUSED_PARAM(nArg)(void)(nArg); | |||
22625 | assert( nArg==1 )((void) (0)); | |||
22626 | ppApi = (fts5_api**)sqlite3_value_pointersqlite3_api->value_pointer(apArg[0], "fts5_api_ptr"); | |||
22627 | if( ppApi ) *ppApi = &pGlobal->api; | |||
22628 | } | |||
22629 | ||||
22630 | /* | |||
22631 | ** Implementation of fts5_source_id() function. | |||
22632 | */ | |||
22633 | static void fts5SourceIdFunc( | |||
22634 | sqlite3_context *pCtx, /* Function call context */ | |||
22635 | int nArg, /* Number of args */ | |||
22636 | sqlite3_value **apUnused /* Function arguments */ | |||
22637 | ){ | |||
22638 | assert( nArg==0 )((void) (0)); | |||
22639 | UNUSED_PARAM2(nArg, apUnused)(void)(nArg), (void)(apUnused); | |||
22640 | sqlite3_result_textsqlite3_api->result_text(pCtx, "fts5: 2025-06-06 14:52:32 b77dc5e0f596d2140d9ac682b2893ff65d3a4140aa86067a3efebe29dc914c95", -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
22641 | } | |||
22642 | ||||
22643 | /* | |||
22644 | ** Implementation of fts5_locale(LOCALE, TEXT) function. | |||
22645 | ** | |||
22646 | ** If parameter LOCALE is NULL, or a zero-length string, then a copy of | |||
22647 | ** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as | |||
22648 | ** text, and the value returned is a blob consisting of: | |||
22649 | ** | |||
22650 | ** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER). | |||
22651 | ** * The LOCALE, as utf-8 text, followed by | |||
22652 | ** * 0x00, followed by | |||
22653 | ** * The TEXT, as utf-8 text. | |||
22654 | ** | |||
22655 | ** There is no final nul-terminator following the TEXT value. | |||
22656 | */ | |||
22657 | static void fts5LocaleFunc( | |||
22658 | sqlite3_context *pCtx, /* Function call context */ | |||
22659 | int nArg, /* Number of args */ | |||
22660 | sqlite3_value **apArg /* Function arguments */ | |||
22661 | ){ | |||
22662 | const char *zLocale = 0; | |||
22663 | int nLocale = 0; | |||
22664 | const char *zText = 0; | |||
22665 | int nText = 0; | |||
22666 | ||||
22667 | assert( nArg==2 )((void) (0)); | |||
22668 | UNUSED_PARAM(nArg)(void)(nArg); | |||
22669 | ||||
22670 | zLocale = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[0]); | |||
22671 | nLocale = sqlite3_value_bytessqlite3_api->value_bytes(apArg[0]); | |||
22672 | ||||
22673 | zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[1]); | |||
22674 | nText = sqlite3_value_bytessqlite3_api->value_bytes(apArg[1]); | |||
22675 | ||||
22676 | if( zLocale==0 || zLocale[0]=='\0' ){ | |||
22677 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
22678 | }else{ | |||
22679 | Fts5Global *p = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
22680 | u8 *pBlob = 0; | |||
22681 | u8 *pCsr = 0; | |||
22682 | int nBlob = 0; | |||
22683 | ||||
22684 | nBlob = FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) + nLocale + 1 + nText; | |||
22685 | pBlob = (u8*)sqlite3_mallocsqlite3_api->malloc(nBlob); | |||
22686 | if( pBlob==0 ){ | |||
22687 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | |||
22688 | return; | |||
22689 | } | |||
22690 | ||||
22691 | pCsr = pBlob; | |||
22692 | memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))); | |||
22693 | pCsr += FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | |||
22694 | memcpy(pCsr, zLocale, nLocale); | |||
22695 | pCsr += nLocale; | |||
22696 | (*pCsr++) = 0x00; | |||
22697 | if( zText ) memcpy(pCsr, zText, nText); | |||
22698 | assert( &pCsr[nText]==&pBlob[nBlob] )((void) (0)); | |||
22699 | ||||
22700 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, pBlob, nBlob, sqlite3_freesqlite3_api->free); | |||
22701 | } | |||
22702 | } | |||
22703 | ||||
22704 | /* | |||
22705 | ** Implementation of fts5_insttoken() function. | |||
22706 | */ | |||
22707 | static void fts5InsttokenFunc( | |||
22708 | sqlite3_context *pCtx, /* Function call context */ | |||
22709 | int nArg, /* Number of args */ | |||
22710 | sqlite3_value **apArg /* Function arguments */ | |||
22711 | ){ | |||
22712 | assert( nArg==1 )((void) (0)); | |||
22713 | (void)nArg; | |||
22714 | sqlite3_result_valuesqlite3_api->result_value(pCtx, apArg[0]); | |||
22715 | sqlite3_result_subtypesqlite3_api->result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE73); | |||
22716 | } | |||
22717 | ||||
22718 | /* | |||
22719 | ** Return true if zName is the extension on one of the shadow tables used | |||
22720 | ** by this module. | |||
22721 | */ | |||
22722 | static int fts5ShadowName(const char *zName){ | |||
22723 | static const char *azName[] = { | |||
22724 | "config", "content", "data", "docsize", "idx" | |||
22725 | }; | |||
22726 | unsigned int i; | |||
22727 | for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){ | |||
22728 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i])==0 ) return 1; | |||
22729 | } | |||
22730 | return 0; | |||
22731 | } | |||
22732 | ||||
22733 | /* | |||
22734 | ** Run an integrity check on the FTS5 data structures. Return a string | |||
22735 | ** if anything is found amiss. Return a NULL pointer if everything is | |||
22736 | ** OK. | |||
22737 | */ | |||
22738 | static int fts5IntegrityMethod( | |||
22739 | sqlite3_vtab *pVtab, /* the FTS5 virtual table to check */ | |||
22740 | const char *zSchema, /* Name of schema in which this table lives */ | |||
22741 | const char *zTabname, /* Name of the table itself */ | |||
22742 | int isQuick, /* True if this is a quick-check */ | |||
22743 | char **pzErr /* Write error message here */ | |||
22744 | ){ | |||
22745 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
22746 | int rc; | |||
22747 | ||||
22748 | assert( pzErr!=0 && *pzErr==0 )((void) (0)); | |||
22749 | UNUSED_PARAM(isQuick)(void)(isQuick); | |||
22750 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | |||
22751 | pTab->p.pConfig->pzErrmsg = pzErr; | |||
22752 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, 0); | |||
22753 | if( *pzErr==0 && rc!=SQLITE_OK0 ){ | |||
22754 | if( (rc&0xff)==SQLITE_CORRUPT11 ){ | |||
22755 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed inverted index for FTS5 table %s.%s", | |||
22756 | zSchema, zTabname); | |||
22757 | rc = (*pzErr) ? SQLITE_OK0 : SQLITE_NOMEM7; | |||
22758 | }else{ | |||
22759 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unable to validate the inverted index for" | |||
22760 | " FTS5 table %s.%s: %s", | |||
22761 | zSchema, zTabname, sqlite3_errstrsqlite3_api->errstr(rc)); | |||
22762 | } | |||
22763 | } | |||
22764 | ||||
22765 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | |||
22766 | pTab->p.pConfig->pzErrmsg = 0; | |||
22767 | ||||
22768 | return rc; | |||
22769 | } | |||
22770 | ||||
22771 | static int fts5Init(sqlite3 *db){ | |||
22772 | static const sqlite3_module fts5Mod = { | |||
22773 | /* iVersion */ 4, | |||
22774 | /* xCreate */ fts5CreateMethod, | |||
22775 | /* xConnect */ fts5ConnectMethod, | |||
22776 | /* xBestIndex */ fts5BestIndexMethod, | |||
22777 | /* xDisconnect */ fts5DisconnectMethod, | |||
22778 | /* xDestroy */ fts5DestroyMethod, | |||
22779 | /* xOpen */ fts5OpenMethod, | |||
22780 | /* xClose */ fts5CloseMethod, | |||
22781 | /* xFilter */ fts5FilterMethod, | |||
22782 | /* xNext */ fts5NextMethod, | |||
22783 | /* xEof */ fts5EofMethod, | |||
22784 | /* xColumn */ fts5ColumnMethod, | |||
22785 | /* xRowid */ fts5RowidMethod, | |||
22786 | /* xUpdate */ fts5UpdateMethod, | |||
22787 | /* xBegin */ fts5BeginMethod, | |||
22788 | /* xSync */ fts5SyncMethod, | |||
22789 | /* xCommit */ fts5CommitMethod, | |||
22790 | /* xRollback */ fts5RollbackMethod, | |||
22791 | /* xFindFunction */ fts5FindFunctionMethod, | |||
22792 | /* xRename */ fts5RenameMethod, | |||
22793 | /* xSavepoint */ fts5SavepointMethod, | |||
22794 | /* xRelease */ fts5ReleaseMethod, | |||
22795 | /* xRollbackTo */ fts5RollbackToMethod, | |||
22796 | /* xShadowName */ fts5ShadowName, | |||
22797 | /* xIntegrity */ fts5IntegrityMethod | |||
22798 | }; | |||
22799 | ||||
22800 | int rc; | |||
22801 | Fts5Global *pGlobal = 0; | |||
22802 | ||||
22803 | pGlobal = (Fts5Global*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Global)); | |||
22804 | if( pGlobal==0 ){ | |||
22805 | rc = SQLITE_NOMEM7; | |||
22806 | }else{ | |||
22807 | void *p = (void*)pGlobal; | |||
22808 | memset(pGlobal, 0, sizeof(Fts5Global)); | |||
22809 | pGlobal->db = db; | |||
22810 | pGlobal->api.iVersion = 3; | |||
22811 | pGlobal->api.xCreateFunction = fts5CreateAux; | |||
22812 | pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; | |||
22813 | pGlobal->api.xFindTokenizer = fts5FindTokenizer; | |||
22814 | pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; | |||
22815 | pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; | |||
22816 | ||||
22817 | /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. | |||
22818 | ** The constants below were generated randomly. */ | |||
22819 | sqlite3_randomnesssqlite3_api->randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); | |||
22820 | pGlobal->aLocaleHdr[0] ^= 0xF924976D; | |||
22821 | pGlobal->aLocaleHdr[1] ^= 0x16596E13; | |||
22822 | pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; | |||
22823 | pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; | |||
22824 | assert( sizeof(pGlobal->aLocaleHdr)==16 )((void) (0)); | |||
22825 | ||||
22826 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); | |||
22827 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5IndexInit(db); | |||
22828 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5ExprInit(pGlobal, db); | |||
22829 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5AuxInit(&pGlobal->api); | |||
22830 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); | |||
22831 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5VocabInit(pGlobal, db); | |||
22832 | if( rc==SQLITE_OK0 ){ | |||
22833 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
22834 | db, "fts5", 1, SQLITE_UTF81, p, fts5Fts5Func, 0, 0 | |||
22835 | ); | |||
22836 | } | |||
22837 | if( rc==SQLITE_OK0 ){ | |||
22838 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
22839 | db, "fts5_source_id", 0, | |||
22840 | SQLITE_UTF81|SQLITE_DETERMINISTIC0x000000800|SQLITE_INNOCUOUS0x000200000, | |||
22841 | p, fts5SourceIdFunc, 0, 0 | |||
22842 | ); | |||
22843 | } | |||
22844 | if( rc==SQLITE_OK0 ){ | |||
22845 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
22846 | db, "fts5_locale", 2, | |||
22847 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000|SQLITE_SUBTYPE0x000100000, | |||
22848 | p, fts5LocaleFunc, 0, 0 | |||
22849 | ); | |||
22850 | } | |||
22851 | if( rc==SQLITE_OK0 ){ | |||
22852 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
22853 | db, "fts5_insttoken", 1, | |||
22854 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000, | |||
22855 | p, fts5InsttokenFunc, 0, 0 | |||
22856 | ); | |||
22857 | } | |||
22858 | } | |||
22859 | ||||
22860 | /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file | |||
22861 | ** fts5_test_mi.c is compiled and linked into the executable. And call | |||
22862 | ** its entry point to enable the matchinfo() demo. */ | |||
22863 | #ifdef SQLITE_FTS5_ENABLE_TEST_MI | |||
22864 | if( rc==SQLITE_OK0 ){ | |||
22865 | extern int sqlite3Fts5TestRegisterMatchinfoAPI(fts5_api*); | |||
22866 | rc = sqlite3Fts5TestRegisterMatchinfoAPI(&pGlobal->api); | |||
22867 | } | |||
22868 | #endif | |||
22869 | ||||
22870 | return rc; | |||
22871 | } | |||
22872 | ||||
22873 | /* | |||
22874 | ** The following functions are used to register the module with SQLite. If | |||
22875 | ** this module is being built as part of the SQLite core (SQLITE_CORE is | |||
22876 | ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. | |||
22877 | ** | |||
22878 | ** Or, if this module is being built as a loadable extension, | |||
22879 | ** sqlite3Fts5Init() is omitted and the two standard entry points | |||
22880 | ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. | |||
22881 | */ | |||
22882 | #ifndef SQLITE_CORE | |||
22883 | #ifdef _WIN32 | |||
22884 | __declspec(dllexport) | |||
22885 | #endif | |||
22886 | int sqlite3_fts_init( | |||
22887 | sqlite3 *db, | |||
22888 | char **pzErrMsg, | |||
22889 | const sqlite3_api_routines *pApi | |||
22890 | ){ | |||
22891 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
22892 | (void)pzErrMsg; /* Unused parameter */ | |||
22893 | return fts5Init(db); | |||
22894 | } | |||
22895 | ||||
22896 | #ifdef _WIN32 | |||
22897 | __declspec(dllexport) | |||
22898 | #endif | |||
22899 | int sqlite3_fts5_init( | |||
22900 | sqlite3 *db, | |||
22901 | char **pzErrMsg, | |||
22902 | const sqlite3_api_routines *pApi | |||
22903 | ){ | |||
22904 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
22905 | (void)pzErrMsg; /* Unused parameter */ | |||
22906 | return fts5Init(db); | |||
22907 | } | |||
22908 | #else | |||
22909 | int sqlite3Fts5Init(sqlite3 *db){ | |||
22910 | return fts5Init(db); | |||
22911 | } | |||
22912 | #endif | |||
22913 | ||||
22914 | #line 1 "fts5_storage.c" | |||
22915 | /* | |||
22916 | ** 2014 May 31 | |||
22917 | ** | |||
22918 | ** The author disclaims copyright to this source code. In place of | |||
22919 | ** a legal notice, here is a blessing: | |||
22920 | ** | |||
22921 | ** May you do good and not evil. | |||
22922 | ** May you find forgiveness for yourself and forgive others. | |||
22923 | ** May you share freely, never taking more than you give. | |||
22924 | ** | |||
22925 | ****************************************************************************** | |||
22926 | ** | |||
22927 | */ | |||
22928 | ||||
22929 | ||||
22930 | ||||
22931 | /* #include "fts5Int.h" */ | |||
22932 | ||||
22933 | /* | |||
22934 | ** pSavedRow: | |||
22935 | ** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it | |||
22936 | ** does a by-rowid lookup to retrieve a single row from the %_content | |||
22937 | ** table or equivalent external-content table/view. | |||
22938 | ** | |||
22939 | ** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original | |||
22940 | ** values for a row being UPDATEd. In that case, the SQL statement is | |||
22941 | ** not reset and pSavedRow is set to point at it. This is so that the | |||
22942 | ** insert operation that follows the delete may access the original | |||
22943 | ** row values for any new values for which sqlite3_value_nochange() returns | |||
22944 | ** true. i.e. if the user executes: | |||
22945 | ** | |||
22946 | ** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1); | |||
22947 | ** ... | |||
22948 | ** UPDATE fts SET a=?, b=? WHERE rowid=?; | |||
22949 | ** | |||
22950 | ** then the value passed to the xUpdate() method of this table as the | |||
22951 | ** new.c value is an sqlite3_value_nochange() value. So in this case it | |||
22952 | ** must be read from the saved row stored in Fts5Storage.pSavedRow. | |||
22953 | ** | |||
22954 | ** This is necessary - using sqlite3_value_nochange() instead of just having | |||
22955 | ** SQLite pass the original value back via xUpdate() - so as not to discard | |||
22956 | ** any locale information associated with such values. | |||
22957 | ** | |||
22958 | */ | |||
22959 | struct Fts5Storage { | |||
22960 | Fts5Config *pConfig; | |||
22961 | Fts5Index *pIndex; | |||
22962 | int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ | |||
22963 | i64 nTotalRow; /* Total number of rows in FTS table */ | |||
22964 | i64 *aTotalSize; /* Total sizes of each column */ | |||
22965 | sqlite3_stmt *pSavedRow; | |||
22966 | sqlite3_stmt *aStmt[12]; | |||
22967 | }; | |||
22968 | ||||
22969 | ||||
22970 | #if FTS5_STMT_SCAN_ASC0!=0 | |||
22971 | # error "FTS5_STMT_SCAN_ASC mismatch" | |||
22972 | #endif | |||
22973 | #if FTS5_STMT_SCAN_DESC1!=1 | |||
22974 | # error "FTS5_STMT_SCAN_DESC mismatch" | |||
22975 | #endif | |||
22976 | #if FTS5_STMT_LOOKUP2!=2 | |||
22977 | # error "FTS5_STMT_LOOKUP mismatch" | |||
22978 | #endif | |||
22979 | ||||
22980 | #define FTS5_STMT_LOOKUP23 3 | |||
22981 | #define FTS5_STMT_INSERT_CONTENT4 4 | |||
22982 | #define FTS5_STMT_REPLACE_CONTENT5 5 | |||
22983 | #define FTS5_STMT_DELETE_CONTENT6 6 | |||
22984 | #define FTS5_STMT_REPLACE_DOCSIZE7 7 | |||
22985 | #define FTS5_STMT_DELETE_DOCSIZE8 8 | |||
22986 | #define FTS5_STMT_LOOKUP_DOCSIZE9 9 | |||
22987 | #define FTS5_STMT_REPLACE_CONFIG10 10 | |||
22988 | #define FTS5_STMT_SCAN11 11 | |||
22989 | ||||
22990 | /* | |||
22991 | ** Prepare the two insert statements - Fts5Storage.pInsertContent and | |||
22992 | ** Fts5Storage.pInsertDocsize - if they have not already been prepared. | |||
22993 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
22994 | ** occurs. | |||
22995 | */ | |||
22996 | static int fts5StorageGetStmt( | |||
22997 | Fts5Storage *p, /* Storage handle */ | |||
22998 | int eStmt, /* FTS5_STMT_XXX constant */ | |||
22999 | sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ | |||
23000 | char **pzErrMsg /* OUT: Error message (if any) */ | |||
23001 | ){ | |||
23002 | int rc = SQLITE_OK0; | |||
23003 | ||||
23004 | /* If there is no %_docsize table, there should be no requests for | |||
23005 | ** statements to operate on it. */ | |||
23006 | assert( p->pConfig->bColumnsize || (((void) (0)) | |||
23007 | eStmt!=FTS5_STMT_REPLACE_DOCSIZE((void) (0)) | |||
23008 | && eStmt!=FTS5_STMT_DELETE_DOCSIZE((void) (0)) | |||
23009 | && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE((void) (0)) | |||
23010 | ))((void) (0)); | |||
23011 | ||||
23012 | assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) )((void) (0)); | |||
23013 | if( p->aStmt[eStmt]==0 ){ | |||
23014 | const char *azStmt[] = { | |||
23015 | "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", | |||
23016 | "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", | |||
23017 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ | |||
23018 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */ | |||
23019 | ||||
23020 | "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ | |||
23021 | "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ | |||
23022 | "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ | |||
23023 | "REPLACE INTO %Q.'%q_docsize' VALUES(?,?%s)", /* REPLACE_DOCSIZE */ | |||
23024 | "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ | |||
23025 | ||||
23026 | "SELECT sz%s FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ | |||
23027 | ||||
23028 | "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ | |||
23029 | "SELECT %s FROM %s AS T", /* SCAN */ | |||
23030 | }; | |||
23031 | Fts5Config *pC = p->pConfig; | |||
23032 | char *zSql = 0; | |||
23033 | ||||
23034 | assert( ArraySize(azStmt)==ArraySize(p->aStmt) )((void) (0)); | |||
23035 | ||||
23036 | switch( eStmt ){ | |||
23037 | case FTS5_STMT_SCAN11: | |||
23038 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
23039 | pC->zContentExprlist, pC->zContent | |||
23040 | ); | |||
23041 | break; | |||
23042 | ||||
23043 | case FTS5_STMT_SCAN_ASC0: | |||
23044 | case FTS5_STMT_SCAN_DESC1: | |||
23045 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zContentExprlist, | |||
23046 | pC->zContent, pC->zContentRowid, pC->zContentRowid, | |||
23047 | pC->zContentRowid | |||
23048 | ); | |||
23049 | break; | |||
23050 | ||||
23051 | case FTS5_STMT_LOOKUP2: | |||
23052 | case FTS5_STMT_LOOKUP23: | |||
23053 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
23054 | pC->zContentExprlist, pC->zContent, pC->zContentRowid | |||
23055 | ); | |||
23056 | break; | |||
23057 | ||||
23058 | case FTS5_STMT_INSERT_CONTENT4: | |||
23059 | case FTS5_STMT_REPLACE_CONTENT5: { | |||
23060 | char *zBind = 0; | |||
23061 | int i; | |||
23062 | ||||
23063 | assert( pC->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
23064 | || pC->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
23065 | )((void) (0)); | |||
23066 | ||||
23067 | /* Add bindings for the "c*" columns - those that store the actual | |||
23068 | ** table content. If eContent==NORMAL, then there is one binding | |||
23069 | ** for each column. Or, if eContent==UNINDEXED, then there are only | |||
23070 | ** bindings for the UNINDEXED columns. */ | |||
23071 | for(i=0; rc==SQLITE_OK0 && i<(pC->nCol+1); i++){ | |||
23072 | if( !i || pC->eContent==FTS5_CONTENT_NORMAL0 || pC->abUnindexed[i-1] ){ | |||
23073 | zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1); | |||
23074 | } | |||
23075 | } | |||
23076 | ||||
23077 | /* Add bindings for any "l*" columns. Only non-UNINDEXED columns | |||
23078 | ** require these. */ | |||
23079 | if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
23080 | for(i=0; rc==SQLITE_OK0 && i<pC->nCol; i++){ | |||
23081 | if( pC->abUnindexed[i]==0 ){ | |||
23082 | zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2); | |||
23083 | } | |||
23084 | } | |||
23085 | } | |||
23086 | ||||
23087 | zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind); | |||
23088 | sqlite3_freesqlite3_api->free(zBind); | |||
23089 | break; | |||
23090 | } | |||
23091 | ||||
23092 | case FTS5_STMT_REPLACE_DOCSIZE7: | |||
23093 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName, | |||
23094 | (pC->bContentlessDelete ? ",?" : "") | |||
23095 | ); | |||
23096 | break; | |||
23097 | ||||
23098 | case FTS5_STMT_LOOKUP_DOCSIZE9: | |||
23099 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
23100 | (pC->bContentlessDelete ? ",origin" : ""), | |||
23101 | pC->zDb, pC->zName | |||
23102 | ); | |||
23103 | break; | |||
23104 | ||||
23105 | default: | |||
23106 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName); | |||
23107 | break; | |||
23108 | } | |||
23109 | ||||
23110 | if( zSql==0 ){ | |||
23111 | rc = SQLITE_NOMEM7; | |||
23112 | }else{ | |||
23113 | int f = SQLITE_PREPARE_PERSISTENT0x01; | |||
23114 | if( eStmt>FTS5_STMT_LOOKUP23 ) f |= SQLITE_PREPARE_NO_VTAB0x04; | |||
23115 | p->pConfig->bLock++; | |||
23116 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); | |||
23117 | p->pConfig->bLock--; | |||
23118 | sqlite3_freesqlite3_api->free(zSql); | |||
23119 | if( rc!=SQLITE_OK0 && pzErrMsg ){ | |||
23120 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("%s", sqlite3_errmsgsqlite3_api->errmsg(pC->db)); | |||
23121 | } | |||
23122 | if( rc==SQLITE_ERROR1 && eStmt>FTS5_STMT_LOOKUP23 && eStmt<FTS5_STMT_SCAN11 ){ | |||
23123 | /* One of the internal tables - not the %_content table - is missing. | |||
23124 | ** This counts as a corrupted table. */ | |||
23125 | rc = SQLITE_CORRUPT11; | |||
23126 | } | |||
23127 | } | |||
23128 | } | |||
23129 | ||||
23130 | *ppStmt = p->aStmt[eStmt]; | |||
23131 | sqlite3_resetsqlite3_api->reset(*ppStmt); | |||
23132 | return rc; | |||
23133 | } | |||
23134 | ||||
23135 | ||||
23136 | static int fts5ExecPrintf( | |||
23137 | sqlite3 *db, | |||
23138 | char **pzErr, | |||
23139 | const char *zFormat, | |||
23140 | ... | |||
23141 | ){ | |||
23142 | int rc; | |||
23143 | va_list ap; /* ... printf arguments */ | |||
23144 | char *zSql; | |||
23145 | ||||
23146 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | |||
23147 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | |||
23148 | ||||
23149 | if( zSql==0 ){ | |||
23150 | rc = SQLITE_NOMEM7; | |||
23151 | }else{ | |||
23152 | rc = sqlite3_execsqlite3_api->exec(db, zSql, 0, 0, pzErr); | |||
23153 | sqlite3_freesqlite3_api->free(zSql); | |||
23154 | } | |||
23155 | ||||
23156 | va_end(ap)__builtin_va_end(ap); | |||
23157 | return rc; | |||
23158 | } | |||
23159 | ||||
23160 | /* | |||
23161 | ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error | |||
23162 | ** code otherwise. | |||
23163 | */ | |||
23164 | static int sqlite3Fts5DropAll(Fts5Config *pConfig){ | |||
23165 | int rc = fts5ExecPrintf(pConfig->db, 0, | |||
23166 | "DROP TABLE IF EXISTS %Q.'%q_data';" | |||
23167 | "DROP TABLE IF EXISTS %Q.'%q_idx';" | |||
23168 | "DROP TABLE IF EXISTS %Q.'%q_config';", | |||
23169 | pConfig->zDb, pConfig->zName, | |||
23170 | pConfig->zDb, pConfig->zName, | |||
23171 | pConfig->zDb, pConfig->zName | |||
23172 | ); | |||
23173 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
23174 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
23175 | "DROP TABLE IF EXISTS %Q.'%q_docsize';", | |||
23176 | pConfig->zDb, pConfig->zName | |||
23177 | ); | |||
23178 | } | |||
23179 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
23180 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
23181 | "DROP TABLE IF EXISTS %Q.'%q_content';", | |||
23182 | pConfig->zDb, pConfig->zName | |||
23183 | ); | |||
23184 | } | |||
23185 | return rc; | |||
23186 | } | |||
23187 | ||||
23188 | static void fts5StorageRenameOne( | |||
23189 | Fts5Config *pConfig, /* Current FTS5 configuration */ | |||
23190 | int *pRc, /* IN/OUT: Error code */ | |||
23191 | const char *zTail, /* Tail of table name e.g. "data", "config" */ | |||
23192 | const char *zName /* New name of FTS5 table */ | |||
23193 | ){ | |||
23194 | if( *pRc==SQLITE_OK0 ){ | |||
23195 | *pRc = fts5ExecPrintf(pConfig->db, 0, | |||
23196 | "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", | |||
23197 | pConfig->zDb, pConfig->zName, zTail, zName, zTail | |||
23198 | ); | |||
23199 | } | |||
23200 | } | |||
23201 | ||||
23202 | static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ | |||
23203 | Fts5Config *pConfig = pStorage->pConfig; | |||
23204 | int rc = sqlite3Fts5StorageSync(pStorage); | |||
23205 | ||||
23206 | fts5StorageRenameOne(pConfig, &rc, "data", zName); | |||
23207 | fts5StorageRenameOne(pConfig, &rc, "idx", zName); | |||
23208 | fts5StorageRenameOne(pConfig, &rc, "config", zName); | |||
23209 | if( pConfig->bColumnsize ){ | |||
23210 | fts5StorageRenameOne(pConfig, &rc, "docsize", zName); | |||
23211 | } | |||
23212 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
23213 | fts5StorageRenameOne(pConfig, &rc, "content", zName); | |||
23214 | } | |||
23215 | return rc; | |||
23216 | } | |||
23217 | ||||
23218 | /* | |||
23219 | ** Create the shadow table named zPost, with definition zDefn. Return | |||
23220 | ** SQLITE_OK if successful, or an SQLite error code otherwise. | |||
23221 | */ | |||
23222 | static int sqlite3Fts5CreateTable( | |||
23223 | Fts5Config *pConfig, /* FTS5 configuration */ | |||
23224 | const char *zPost, /* Shadow table to create (e.g. "content") */ | |||
23225 | const char *zDefn, /* Columns etc. for shadow table */ | |||
23226 | int bWithout, /* True for without rowid */ | |||
23227 | char **pzErr /* OUT: Error message */ | |||
23228 | ){ | |||
23229 | int rc; | |||
23230 | char *zErr = 0; | |||
23231 | ||||
23232 | rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", | |||
23233 | pConfig->zDb, pConfig->zName, zPost, zDefn, | |||
23234 | #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID | |||
23235 | bWithout?" WITHOUT ROWID": | |||
23236 | #endif | |||
23237 | "" | |||
23238 | ); | |||
23239 | if( zErr ){ | |||
23240 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
23241 | "fts5: error creating shadow table %q_%s: %s", | |||
23242 | pConfig->zName, zPost, zErr | |||
23243 | ); | |||
23244 | sqlite3_freesqlite3_api->free(zErr); | |||
23245 | } | |||
23246 | ||||
23247 | return rc; | |||
23248 | } | |||
23249 | ||||
23250 | /* | |||
23251 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | |||
23252 | ** and initialize the underlying tables | |||
23253 | ** | |||
23254 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | |||
23255 | ** Otherwise, set *pp to NULL and return an SQLite error code. | |||
23256 | */ | |||
23257 | static int sqlite3Fts5StorageOpen( | |||
23258 | Fts5Config *pConfig, | |||
23259 | Fts5Index *pIndex, | |||
23260 | int bCreate, | |||
23261 | Fts5Storage **pp, | |||
23262 | char **pzErr /* OUT: Error message */ | |||
23263 | ){ | |||
23264 | int rc = SQLITE_OK0; | |||
23265 | Fts5Storage *p; /* New object */ | |||
23266 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
23267 | ||||
23268 | nByte = sizeof(Fts5Storage) /* Fts5Storage object */ | |||
23269 | + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ | |||
23270 | *pp = p = (Fts5Storage*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
23271 | if( !p ) return SQLITE_NOMEM7; | |||
23272 | ||||
23273 | memset(p, 0, (size_t)nByte); | |||
23274 | p->aTotalSize = (i64*)&p[1]; | |||
23275 | p->pConfig = pConfig; | |||
23276 | p->pIndex = pIndex; | |||
23277 | ||||
23278 | if( bCreate ){ | |||
23279 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
23280 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
23281 | ){ | |||
23282 | int nDefn = 32 + pConfig->nCol*10; | |||
23283 | char *zDefn = sqlite3_malloc64sqlite3_api->malloc64(32 + (sqlite3_int64)pConfig->nCol * 20); | |||
23284 | if( zDefn==0 ){ | |||
23285 | rc = SQLITE_NOMEM7; | |||
23286 | }else{ | |||
23287 | int i; | |||
23288 | int iOff; | |||
23289 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); | |||
23290 | iOff = (int)strlen(zDefn); | |||
23291 | for(i=0; i<pConfig->nCol; i++){ | |||
23292 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
23293 | || pConfig->abUnindexed[i] | |||
23294 | ){ | |||
23295 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); | |||
23296 | iOff += (int)strlen(&zDefn[iOff]); | |||
23297 | } | |||
23298 | } | |||
23299 | if( pConfig->bLocale ){ | |||
23300 | for(i=0; i<pConfig->nCol; i++){ | |||
23301 | if( pConfig->abUnindexed[i]==0 ){ | |||
23302 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i); | |||
23303 | iOff += (int)strlen(&zDefn[iOff]); | |||
23304 | } | |||
23305 | } | |||
23306 | } | |||
23307 | rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); | |||
23308 | } | |||
23309 | sqlite3_freesqlite3_api->free(zDefn); | |||
23310 | } | |||
23311 | ||||
23312 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
23313 | const char *zCols = "id INTEGER PRIMARY KEY, sz BLOB"; | |||
23314 | if( pConfig->bContentlessDelete ){ | |||
23315 | zCols = "id INTEGER PRIMARY KEY, sz BLOB, origin INTEGER"; | |||
23316 | } | |||
23317 | rc = sqlite3Fts5CreateTable(pConfig, "docsize", zCols, 0, pzErr); | |||
23318 | } | |||
23319 | if( rc==SQLITE_OK0 ){ | |||
23320 | rc = sqlite3Fts5CreateTable( | |||
23321 | pConfig, "config", "k PRIMARY KEY, v", 1, pzErr | |||
23322 | ); | |||
23323 | } | |||
23324 | if( rc==SQLITE_OK0 ){ | |||
23325 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | |||
23326 | } | |||
23327 | } | |||
23328 | ||||
23329 | if( rc ){ | |||
23330 | sqlite3Fts5StorageClose(p); | |||
23331 | *pp = 0; | |||
23332 | } | |||
23333 | return rc; | |||
23334 | } | |||
23335 | ||||
23336 | /* | |||
23337 | ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). | |||
23338 | */ | |||
23339 | static int sqlite3Fts5StorageClose(Fts5Storage *p){ | |||
23340 | int rc = SQLITE_OK0; | |||
23341 | if( p ){ | |||
23342 | int i; | |||
23343 | ||||
23344 | /* Finalize all SQL statements */ | |||
23345 | for(i=0; i<ArraySize(p->aStmt)((int)(sizeof(p->aStmt) / sizeof(p->aStmt[0]))); i++){ | |||
23346 | sqlite3_finalizesqlite3_api->finalize(p->aStmt[i]); | |||
23347 | } | |||
23348 | ||||
23349 | sqlite3_freesqlite3_api->free(p); | |||
23350 | } | |||
23351 | return rc; | |||
23352 | } | |||
23353 | ||||
23354 | typedef struct Fts5InsertCtx Fts5InsertCtx; | |||
23355 | struct Fts5InsertCtx { | |||
23356 | Fts5Storage *pStorage; | |||
23357 | int iCol; | |||
23358 | int szCol; /* Size of column value in tokens */ | |||
23359 | }; | |||
23360 | ||||
23361 | /* | |||
23362 | ** Tokenization callback used when inserting tokens into the FTS index. | |||
23363 | */ | |||
23364 | static int fts5StorageInsertCallback( | |||
23365 | void *pContext, /* Pointer to Fts5InsertCtx object */ | |||
23366 | int tflags, | |||
23367 | const char *pToken, /* Buffer containing token */ | |||
23368 | int nToken, /* Size of token in bytes */ | |||
23369 | int iUnused1, /* Start offset of token */ | |||
23370 | int iUnused2 /* End offset of token */ | |||
23371 | ){ | |||
23372 | Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; | |||
23373 | Fts5Index *pIdx = pCtx->pStorage->pIndex; | |||
23374 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
23375 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
23376 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | |||
23377 | pCtx->szCol++; | |||
23378 | } | |||
23379 | return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); | |||
23380 | } | |||
23381 | ||||
23382 | /* | |||
23383 | ** This function is used as part of an UPDATE statement that modifies the | |||
23384 | ** rowid of a row. In that case, this function is called first to set | |||
23385 | ** Fts5Storage.pSavedRow to point to a statement that may be used to | |||
23386 | ** access the original values of the row being deleted - iDel. | |||
23387 | ** | |||
23388 | ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. | |||
23389 | ** It is not considered an error if row iDel does not exist. In this case | |||
23390 | ** pSavedRow is not set and SQLITE_OK returned. | |||
23391 | */ | |||
23392 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){ | |||
23393 | int rc = SQLITE_OK0; | |||
23394 | sqlite3_stmt *pSeek = 0; | |||
23395 | ||||
23396 | assert( p->pSavedRow==0 )((void) (0)); | |||
23397 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+1, &pSeek, 0); | |||
23398 | if( rc==SQLITE_OK0 ){ | |||
23399 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | |||
23400 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | |||
23401 | rc = sqlite3_resetsqlite3_api->reset(pSeek); | |||
23402 | }else{ | |||
23403 | p->pSavedRow = pSeek; | |||
23404 | } | |||
23405 | } | |||
23406 | ||||
23407 | return rc; | |||
23408 | } | |||
23409 | ||||
23410 | /* | |||
23411 | ** If a row with rowid iDel is present in the %_content table, add the | |||
23412 | ** delete-markers to the FTS index necessary to delete it. Do not actually | |||
23413 | ** remove the %_content row at this time though. | |||
23414 | ** | |||
23415 | ** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left | |||
23416 | ** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access | |||
23417 | ** the original values of the row being deleted. This is used by UPDATE | |||
23418 | ** statements. | |||
23419 | */ | |||
23420 | static int fts5StorageDeleteFromIndex( | |||
23421 | Fts5Storage *p, | |||
23422 | i64 iDel, | |||
23423 | sqlite3_value **apVal, | |||
23424 | int bSaveRow /* True to set pSavedRow */ | |||
23425 | ){ | |||
23426 | Fts5Config *pConfig = p->pConfig; | |||
23427 | sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ | |||
23428 | int rc = SQLITE_OK0; /* Return code */ | |||
23429 | int rc2; /* sqlite3_reset() return code */ | |||
23430 | int iCol; | |||
23431 | Fts5InsertCtx ctx; | |||
23432 | ||||
23433 | assert( bSaveRow==0 || apVal==0 )((void) (0)); | |||
23434 | assert( bSaveRow==0 || bSaveRow==1 )((void) (0)); | |||
23435 | assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 )((void) (0)); | |||
23436 | ||||
23437 | if( apVal==0 ){ | |||
23438 | if( p->pSavedRow && bSaveRow ){ | |||
23439 | pSeek = p->pSavedRow; | |||
23440 | p->pSavedRow = 0; | |||
23441 | }else{ | |||
23442 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+bSaveRow, &pSeek, 0); | |||
23443 | if( rc!=SQLITE_OK0 ) return rc; | |||
23444 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | |||
23445 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | |||
23446 | return sqlite3_resetsqlite3_api->reset(pSeek); | |||
23447 | } | |||
23448 | } | |||
23449 | } | |||
23450 | ||||
23451 | ctx.pStorage = p; | |||
23452 | ctx.iCol = -1; | |||
23453 | for(iCol=1; rc==SQLITE_OK0 && iCol<=pConfig->nCol; iCol++){ | |||
23454 | if( pConfig->abUnindexed[iCol-1]==0 ){ | |||
23455 | sqlite3_value *pVal = 0; | |||
23456 | const char *pText = 0; | |||
23457 | int nText = 0; | |||
23458 | const char *pLoc = 0; | |||
23459 | int nLoc = 0; | |||
23460 | ||||
23461 | assert( pSeek==0 || apVal==0 )((void) (0)); | |||
23462 | assert( pSeek!=0 || apVal!=0 )((void) (0)); | |||
23463 | if( pSeek ){ | |||
23464 | pVal = sqlite3_column_valuesqlite3_api->column_value(pSeek, iCol); | |||
23465 | }else{ | |||
23466 | pVal = apVal[iCol-1]; | |||
23467 | } | |||
23468 | ||||
23469 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
23470 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
23471 | }else{ | |||
23472 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
23473 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
23474 | if( pConfig->bLocale && pSeek ){ | |||
23475 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pSeek, iCol + pConfig->nCol); | |||
23476 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pSeek, iCol + pConfig->nCol); | |||
23477 | } | |||
23478 | } | |||
23479 | ||||
23480 | if( rc==SQLITE_OK0 ){ | |||
23481 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
23482 | ctx.szCol = 0; | |||
23483 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT0x0004, | |||
23484 | pText, nText, (void*)&ctx, fts5StorageInsertCallback | |||
23485 | ); | |||
23486 | p->aTotalSize[iCol-1] -= (i64)ctx.szCol; | |||
23487 | if( rc==SQLITE_OK0 && p->aTotalSize[iCol-1]<0 ){ | |||
23488 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
23489 | } | |||
23490 | sqlite3Fts5ClearLocale(pConfig); | |||
23491 | } | |||
23492 | } | |||
23493 | } | |||
23494 | if( rc==SQLITE_OK0 && p->nTotalRow<1 ){ | |||
23495 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
23496 | }else{ | |||
23497 | p->nTotalRow--; | |||
23498 | } | |||
23499 | ||||
23500 | if( rc==SQLITE_OK0 && bSaveRow ){ | |||
23501 | assert( p->pSavedRow==0 )((void) (0)); | |||
23502 | p->pSavedRow = pSeek; | |||
23503 | }else{ | |||
23504 | rc2 = sqlite3_resetsqlite3_api->reset(pSeek); | |||
23505 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
23506 | } | |||
23507 | return rc; | |||
23508 | } | |||
23509 | ||||
23510 | /* | |||
23511 | ** Reset any saved statement pSavedRow. Zero pSavedRow as well. This | |||
23512 | ** should be called by the xUpdate() method of the fts5 table before | |||
23513 | ** returning from any operation that may have set Fts5Storage.pSavedRow. | |||
23514 | */ | |||
23515 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){ | |||
23516 | assert( pStorage->pSavedRow==0((void) (0)) | |||
23517 | || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2]((void) (0)) | |||
23518 | )((void) (0)); | |||
23519 | sqlite3_resetsqlite3_api->reset(pStorage->pSavedRow); | |||
23520 | pStorage->pSavedRow = 0; | |||
23521 | } | |||
23522 | ||||
23523 | /* | |||
23524 | ** This function is called to process a DELETE on a contentless_delete=1 | |||
23525 | ** table. It adds the tombstone required to delete the entry with rowid | |||
23526 | ** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs, | |||
23527 | ** an SQLite error code. | |||
23528 | */ | |||
23529 | static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){ | |||
23530 | i64 iOrigin = 0; | |||
23531 | sqlite3_stmt *pLookup = 0; | |||
23532 | int rc = SQLITE_OK0; | |||
23533 | ||||
23534 | assert( p->pConfig->bContentlessDelete )((void) (0)); | |||
23535 | assert( p->pConfig->eContent==FTS5_CONTENT_NONE((void) (0)) | |||
23536 | || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
23537 | )((void) (0)); | |||
23538 | ||||
23539 | /* Look up the origin of the document in the %_docsize table. Store | |||
23540 | ** this in stack variable iOrigin. */ | |||
23541 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | |||
23542 | if( rc==SQLITE_OK0 ){ | |||
23543 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iDel); | |||
23544 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | |||
23545 | iOrigin = sqlite3_column_int64sqlite3_api->column_int64(pLookup, 1); | |||
23546 | } | |||
23547 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | |||
23548 | } | |||
23549 | ||||
23550 | if( rc==SQLITE_OK0 && iOrigin!=0 ){ | |||
23551 | rc = sqlite3Fts5IndexContentlessDelete(p->pIndex, iOrigin, iDel); | |||
23552 | } | |||
23553 | ||||
23554 | return rc; | |||
23555 | } | |||
23556 | ||||
23557 | /* | |||
23558 | ** Insert a record into the %_docsize table. Specifically, do: | |||
23559 | ** | |||
23560 | ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); | |||
23561 | ** | |||
23562 | ** If there is no %_docsize table (as happens if the columnsize=0 option | |||
23563 | ** is specified when the FTS5 table is created), this function is a no-op. | |||
23564 | */ | |||
23565 | static int fts5StorageInsertDocsize( | |||
23566 | Fts5Storage *p, /* Storage module to write to */ | |||
23567 | i64 iRowid, /* id value */ | |||
23568 | Fts5Buffer *pBuf /* sz value */ | |||
23569 | ){ | |||
23570 | int rc = SQLITE_OK0; | |||
23571 | if( p->pConfig->bColumnsize ){ | |||
23572 | sqlite3_stmt *pReplace = 0; | |||
23573 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | |||
23574 | if( rc==SQLITE_OK0 ){ | |||
23575 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 1, iRowid); | |||
23576 | if( p->pConfig->bContentlessDelete ){ | |||
23577 | i64 iOrigin = 0; | |||
23578 | rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin); | |||
23579 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 3, iOrigin); | |||
23580 | } | |||
23581 | } | |||
23582 | if( rc==SQLITE_OK0 ){ | |||
23583 | sqlite3_bind_blobsqlite3_api->bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
23584 | sqlite3_stepsqlite3_api->step(pReplace); | |||
23585 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
23586 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | |||
23587 | } | |||
23588 | } | |||
23589 | return rc; | |||
23590 | } | |||
23591 | ||||
23592 | /* | |||
23593 | ** Load the contents of the "averages" record from disk into the | |||
23594 | ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if | |||
23595 | ** argument bCache is true, set the p->bTotalsValid flag to indicate | |||
23596 | ** that the contents of aTotalSize[] and nTotalRow are valid until | |||
23597 | ** further notice. | |||
23598 | ** | |||
23599 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
23600 | ** occurs. | |||
23601 | */ | |||
23602 | static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ | |||
23603 | int rc = SQLITE_OK0; | |||
23604 | if( p->bTotalsValid==0 ){ | |||
23605 | rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); | |||
23606 | p->bTotalsValid = bCache; | |||
23607 | } | |||
23608 | return rc; | |||
23609 | } | |||
23610 | ||||
23611 | /* | |||
23612 | ** Store the current contents of the p->nTotalRow and p->aTotalSize[] | |||
23613 | ** variables in the "averages" record on disk. | |||
23614 | ** | |||
23615 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
23616 | ** occurs. | |||
23617 | */ | |||
23618 | static int fts5StorageSaveTotals(Fts5Storage *p){ | |||
23619 | int nCol = p->pConfig->nCol; | |||
23620 | int i; | |||
23621 | Fts5Buffer buf; | |||
23622 | int rc = SQLITE_OK0; | |||
23623 | memset(&buf, 0, sizeof(buf)); | |||
23624 | ||||
23625 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); | |||
23626 | for(i=0; i<nCol; i++){ | |||
23627 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); | |||
23628 | } | |||
23629 | if( rc==SQLITE_OK0 ){ | |||
23630 | rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); | |||
23631 | } | |||
23632 | sqlite3_freesqlite3_api->free(buf.p); | |||
23633 | ||||
23634 | return rc; | |||
23635 | } | |||
23636 | ||||
23637 | /* | |||
23638 | ** Remove a row from the FTS table. | |||
23639 | */ | |||
23640 | static int sqlite3Fts5StorageDelete( | |||
23641 | Fts5Storage *p, /* Storage object */ | |||
23642 | i64 iDel, /* Rowid to delete from table */ | |||
23643 | sqlite3_value **apVal, /* Optional - values to remove from index */ | |||
23644 | int bSaveRow /* If true, set pSavedRow for deleted row */ | |||
23645 | ){ | |||
23646 | Fts5Config *pConfig = p->pConfig; | |||
23647 | int rc; | |||
23648 | sqlite3_stmt *pDel = 0; | |||
23649 | ||||
23650 | assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 )((void) (0)); | |||
23651 | rc = fts5StorageLoadTotals(p, 1); | |||
23652 | ||||
23653 | /* Delete the index records */ | |||
23654 | if( rc==SQLITE_OK0 ){ | |||
23655 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); | |||
23656 | } | |||
23657 | ||||
23658 | if( rc==SQLITE_OK0 ){ | |||
23659 | if( p->pConfig->bContentlessDelete ){ | |||
23660 | rc = fts5StorageContentlessDelete(p, iDel); | |||
23661 | if( rc==SQLITE_OK0 | |||
23662 | && bSaveRow | |||
23663 | && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
23664 | ){ | |||
23665 | rc = sqlite3Fts5StorageFindDeleteRow(p, iDel); | |||
23666 | } | |||
23667 | }else{ | |||
23668 | rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); | |||
23669 | } | |||
23670 | } | |||
23671 | ||||
23672 | /* Delete the %_docsize record */ | |||
23673 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
23674 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE8, &pDel, 0); | |||
23675 | if( rc==SQLITE_OK0 ){ | |||
23676 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | |||
23677 | sqlite3_stepsqlite3_api->step(pDel); | |||
23678 | rc = sqlite3_resetsqlite3_api->reset(pDel); | |||
23679 | } | |||
23680 | } | |||
23681 | ||||
23682 | /* Delete the %_content record */ | |||
23683 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
23684 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
23685 | ){ | |||
23686 | if( rc==SQLITE_OK0 ){ | |||
23687 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT6, &pDel, 0); | |||
23688 | } | |||
23689 | if( rc==SQLITE_OK0 ){ | |||
23690 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | |||
23691 | sqlite3_stepsqlite3_api->step(pDel); | |||
23692 | rc = sqlite3_resetsqlite3_api->reset(pDel); | |||
23693 | } | |||
23694 | } | |||
23695 | ||||
23696 | return rc; | |||
23697 | } | |||
23698 | ||||
23699 | /* | |||
23700 | ** Delete all entries in the FTS5 index. | |||
23701 | */ | |||
23702 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ | |||
23703 | Fts5Config *pConfig = p->pConfig; | |||
23704 | int rc; | |||
23705 | ||||
23706 | p->bTotalsValid = 0; | |||
23707 | ||||
23708 | /* Delete the contents of the %_data and %_docsize tables. */ | |||
23709 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
23710 | "DELETE FROM %Q.'%q_data';" | |||
23711 | "DELETE FROM %Q.'%q_idx';", | |||
23712 | pConfig->zDb, pConfig->zName, | |||
23713 | pConfig->zDb, pConfig->zName | |||
23714 | ); | |||
23715 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
23716 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
23717 | "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName | |||
23718 | ); | |||
23719 | } | |||
23720 | ||||
23721 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | |||
23722 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
23723 | "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName | |||
23724 | ); | |||
23725 | } | |||
23726 | ||||
23727 | /* Reinitialize the %_data table. This call creates the initial structure | |||
23728 | ** and averages records. */ | |||
23729 | if( rc==SQLITE_OK0 ){ | |||
23730 | rc = sqlite3Fts5IndexReinit(p->pIndex); | |||
23731 | } | |||
23732 | if( rc==SQLITE_OK0 ){ | |||
23733 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | |||
23734 | } | |||
23735 | return rc; | |||
23736 | } | |||
23737 | ||||
23738 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ | |||
23739 | Fts5Buffer buf = {0,0,0}; | |||
23740 | Fts5Config *pConfig = p->pConfig; | |||
23741 | sqlite3_stmt *pScan = 0; | |||
23742 | Fts5InsertCtx ctx; | |||
23743 | int rc, rc2; | |||
23744 | ||||
23745 | memset(&ctx, 0, sizeof(Fts5InsertCtx)); | |||
23746 | ctx.pStorage = p; | |||
23747 | rc = sqlite3Fts5StorageDeleteAll(p); | |||
23748 | if( rc==SQLITE_OK0 ){ | |||
23749 | rc = fts5StorageLoadTotals(p, 1); | |||
23750 | } | |||
23751 | ||||
23752 | if( rc==SQLITE_OK0 ){ | |||
23753 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, pConfig->pzErrmsg); | |||
23754 | } | |||
23755 | ||||
23756 | while( rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | |||
23757 | i64 iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | |||
23758 | ||||
23759 | sqlite3Fts5BufferZero(&buf); | |||
23760 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | |||
23761 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | |||
23762 | ctx.szCol = 0; | |||
23763 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | |||
23764 | int nText = 0; /* Size of pText in bytes */ | |||
23765 | const char *pText = 0; /* Pointer to buffer containing text value */ | |||
23766 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
23767 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | |||
23768 | ||||
23769 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, ctx.iCol+1); | |||
23770 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
23771 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
23772 | ){ | |||
23773 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
23774 | }else{ | |||
23775 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
23776 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
23777 | if( pConfig->bLocale ){ | |||
23778 | int iCol = ctx.iCol + 1 + pConfig->nCol; | |||
23779 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | |||
23780 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | |||
23781 | } | |||
23782 | } | |||
23783 | ||||
23784 | if( rc==SQLITE_OK0 ){ | |||
23785 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
23786 | rc = sqlite3Fts5Tokenize(pConfig, | |||
23787 | FTS5_TOKENIZE_DOCUMENT0x0004, | |||
23788 | pText, nText, | |||
23789 | (void*)&ctx, | |||
23790 | fts5StorageInsertCallback | |||
23791 | ); | |||
23792 | sqlite3Fts5ClearLocale(pConfig); | |||
23793 | } | |||
23794 | } | |||
23795 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | |||
23796 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | |||
23797 | } | |||
23798 | p->nTotalRow++; | |||
23799 | ||||
23800 | if( rc==SQLITE_OK0 ){ | |||
23801 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | |||
23802 | } | |||
23803 | } | |||
23804 | sqlite3_freesqlite3_api->free(buf.p); | |||
23805 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | |||
23806 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
23807 | ||||
23808 | /* Write the averages record */ | |||
23809 | if( rc==SQLITE_OK0 ){ | |||
23810 | rc = fts5StorageSaveTotals(p); | |||
23811 | } | |||
23812 | return rc; | |||
23813 | } | |||
23814 | ||||
23815 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ | |||
23816 | return sqlite3Fts5IndexOptimize(p->pIndex); | |||
23817 | } | |||
23818 | ||||
23819 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ | |||
23820 | return sqlite3Fts5IndexMerge(p->pIndex, nMerge); | |||
23821 | } | |||
23822 | ||||
23823 | static int sqlite3Fts5StorageReset(Fts5Storage *p){ | |||
23824 | return sqlite3Fts5IndexReset(p->pIndex); | |||
23825 | } | |||
23826 | ||||
23827 | /* | |||
23828 | ** Allocate a new rowid. This is used for "external content" tables when | |||
23829 | ** a NULL value is inserted into the rowid column. The new rowid is allocated | |||
23830 | ** by inserting a dummy row into the %_docsize table. The dummy will be | |||
23831 | ** overwritten later. | |||
23832 | ** | |||
23833 | ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In | |||
23834 | ** this case the user is required to provide a rowid explicitly. | |||
23835 | */ | |||
23836 | static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ | |||
23837 | int rc = SQLITE_MISMATCH20; | |||
23838 | if( p->pConfig->bColumnsize ){ | |||
23839 | sqlite3_stmt *pReplace = 0; | |||
23840 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | |||
23841 | if( rc==SQLITE_OK0 ){ | |||
23842 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | |||
23843 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | |||
23844 | sqlite3_stepsqlite3_api->step(pReplace); | |||
23845 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
23846 | } | |||
23847 | if( rc==SQLITE_OK0 ){ | |||
23848 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | |||
23849 | } | |||
23850 | } | |||
23851 | return rc; | |||
23852 | } | |||
23853 | ||||
23854 | /* | |||
23855 | ** Insert a new row into the FTS content table. | |||
23856 | */ | |||
23857 | static int sqlite3Fts5StorageContentInsert( | |||
23858 | Fts5Storage *p, | |||
23859 | int bReplace, /* True to use REPLACE instead of INSERT */ | |||
23860 | sqlite3_value **apVal, | |||
23861 | i64 *piRowid | |||
23862 | ){ | |||
23863 | Fts5Config *pConfig = p->pConfig; | |||
23864 | int rc = SQLITE_OK0; | |||
23865 | ||||
23866 | /* Insert the new row into the %_content table. */ | |||
23867 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | |||
23868 | && pConfig->eContent!=FTS5_CONTENT_UNINDEXED3 | |||
23869 | ){ | |||
23870 | if( sqlite3_value_typesqlite3_api->value_type(apVal[1])==SQLITE_INTEGER1 ){ | |||
23871 | *piRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | |||
23872 | }else{ | |||
23873 | rc = fts5StorageNewRowid(p, piRowid); | |||
23874 | } | |||
23875 | }else{ | |||
23876 | sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ | |||
23877 | int i; /* Counter variable */ | |||
23878 | ||||
23879 | assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT )((void) (0)); | |||
23880 | assert( bReplace==0 || bReplace==1 )((void) (0)); | |||
23881 | rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT4+bReplace, &pInsert, 0); | |||
23882 | if( pInsert ) sqlite3_clear_bindingssqlite3_api->clear_bindings(pInsert); | |||
23883 | ||||
23884 | /* Bind the rowid value */ | |||
23885 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, 1, apVal[1]); | |||
23886 | ||||
23887 | /* Loop through values for user-defined columns. i=2 is the leftmost | |||
23888 | ** user-defined column. As is column 1 of pSavedRow. */ | |||
23889 | for(i=2; rc==SQLITE_OK0 && i<=pConfig->nCol+1; i++){ | |||
23890 | int bUnindexed = pConfig->abUnindexed[i-2]; | |||
23891 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || bUnindexed ){ | |||
23892 | sqlite3_value *pVal = apVal[i]; | |||
23893 | ||||
23894 | if( sqlite3_value_nochangesqlite3_api->value_nochange(pVal) && p->pSavedRow ){ | |||
23895 | /* This is an UPDATE statement, and user-defined column (i-2) was not | |||
23896 | ** modified. Retrieve the value from Fts5Storage.pSavedRow. */ | |||
23897 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, i-1); | |||
23898 | if( pConfig->bLocale && bUnindexed==0 ){ | |||
23899 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, pConfig->nCol + i, | |||
23900 | sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, pConfig->nCol + i - 1) | |||
23901 | ); | |||
23902 | } | |||
23903 | }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
23904 | const char *pText = 0; | |||
23905 | const char *pLoc = 0; | |||
23906 | int nText = 0; | |||
23907 | int nLoc = 0; | |||
23908 | assert( pConfig->bLocale )((void) (0)); | |||
23909 | ||||
23910 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
23911 | if( rc==SQLITE_OK0 ){ | |||
23912 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
23913 | if( bUnindexed==0 ){ | |||
23914 | int iLoc = pConfig->nCol + i; | |||
23915 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
23916 | } | |||
23917 | } | |||
23918 | ||||
23919 | continue; | |||
23920 | } | |||
23921 | ||||
23922 | rc = sqlite3_bind_valuesqlite3_api->bind_value(pInsert, i, pVal); | |||
23923 | } | |||
23924 | } | |||
23925 | if( rc==SQLITE_OK0 ){ | |||
23926 | sqlite3_stepsqlite3_api->step(pInsert); | |||
23927 | rc = sqlite3_resetsqlite3_api->reset(pInsert); | |||
23928 | } | |||
23929 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(pConfig->db); | |||
23930 | } | |||
23931 | ||||
23932 | return rc; | |||
23933 | } | |||
23934 | ||||
23935 | /* | |||
23936 | ** Insert new entries into the FTS index and %_docsize table. | |||
23937 | */ | |||
23938 | static int sqlite3Fts5StorageIndexInsert( | |||
23939 | Fts5Storage *p, | |||
23940 | sqlite3_value **apVal, | |||
23941 | i64 iRowid | |||
23942 | ){ | |||
23943 | Fts5Config *pConfig = p->pConfig; | |||
23944 | int rc = SQLITE_OK0; /* Return code */ | |||
23945 | Fts5InsertCtx ctx; /* Tokenization callback context object */ | |||
23946 | Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ | |||
23947 | ||||
23948 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
23949 | ctx.pStorage = p; | |||
23950 | rc = fts5StorageLoadTotals(p, 1); | |||
23951 | ||||
23952 | if( rc==SQLITE_OK0 ){ | |||
23953 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | |||
23954 | } | |||
23955 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | |||
23956 | ctx.szCol = 0; | |||
23957 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | |||
23958 | int nText = 0; /* Size of pText in bytes */ | |||
23959 | const char *pText = 0; /* Pointer to buffer containing text value */ | |||
23960 | int nLoc = 0; /* Size of pText in bytes */ | |||
23961 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | |||
23962 | ||||
23963 | sqlite3_value *pVal = apVal[ctx.iCol+2]; | |||
23964 | if( p->pSavedRow && sqlite3_value_nochangesqlite3_api->value_nochange(pVal) ){ | |||
23965 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, ctx.iCol+1); | |||
23966 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | |||
23967 | int iCol = ctx.iCol + 1 + pConfig->nCol; | |||
23968 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(p->pSavedRow, iCol); | |||
23969 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(p->pSavedRow, iCol); | |||
23970 | } | |||
23971 | }else{ | |||
23972 | pVal = apVal[ctx.iCol+2]; | |||
23973 | } | |||
23974 | ||||
23975 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
23976 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
23977 | }else{ | |||
23978 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
23979 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
23980 | } | |||
23981 | ||||
23982 | if( rc==SQLITE_OK0 ){ | |||
23983 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
23984 | rc = sqlite3Fts5Tokenize(pConfig, | |||
23985 | FTS5_TOKENIZE_DOCUMENT0x0004, pText, nText, (void*)&ctx, | |||
23986 | fts5StorageInsertCallback | |||
23987 | ); | |||
23988 | sqlite3Fts5ClearLocale(pConfig); | |||
23989 | } | |||
23990 | } | |||
23991 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | |||
23992 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | |||
23993 | } | |||
23994 | p->nTotalRow++; | |||
23995 | ||||
23996 | /* Write the %_docsize record */ | |||
23997 | if( rc==SQLITE_OK0 ){ | |||
23998 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | |||
23999 | } | |||
24000 | sqlite3_freesqlite3_api->free(buf.p); | |||
24001 | ||||
24002 | return rc; | |||
24003 | } | |||
24004 | ||||
24005 | static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ | |||
24006 | Fts5Config *pConfig = p->pConfig; | |||
24007 | char *zSql; | |||
24008 | int rc; | |||
24009 | ||||
24010 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT count(*) FROM %Q.'%q_%s'", | |||
24011 | pConfig->zDb, pConfig->zName, zSuffix | |||
24012 | ); | |||
24013 | if( zSql==0 ){ | |||
24014 | rc = SQLITE_NOMEM7; | |||
24015 | }else{ | |||
24016 | sqlite3_stmt *pCnt = 0; | |||
24017 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); | |||
24018 | if( rc==SQLITE_OK0 ){ | |||
24019 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pCnt) ){ | |||
24020 | *pnRow = sqlite3_column_int64sqlite3_api->column_int64(pCnt, 0); | |||
24021 | } | |||
24022 | rc = sqlite3_finalizesqlite3_api->finalize(pCnt); | |||
24023 | } | |||
24024 | } | |||
24025 | ||||
24026 | sqlite3_freesqlite3_api->free(zSql); | |||
24027 | return rc; | |||
24028 | } | |||
24029 | ||||
24030 | /* | |||
24031 | ** Context object used by sqlite3Fts5StorageIntegrity(). | |||
24032 | */ | |||
24033 | typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; | |||
24034 | struct Fts5IntegrityCtx { | |||
24035 | i64 iRowid; | |||
24036 | int iCol; | |||
24037 | int szCol; | |||
24038 | u64 cksum; | |||
24039 | Fts5Termset *pTermset; | |||
24040 | Fts5Config *pConfig; | |||
24041 | }; | |||
24042 | ||||
24043 | ||||
24044 | /* | |||
24045 | ** Tokenization callback used by integrity check. | |||
24046 | */ | |||
24047 | static int fts5StorageIntegrityCallback( | |||
24048 | void *pContext, /* Pointer to Fts5IntegrityCtx object */ | |||
24049 | int tflags, | |||
24050 | const char *pToken, /* Buffer containing token */ | |||
24051 | int nToken, /* Size of token in bytes */ | |||
24052 | int iUnused1, /* Start offset of token */ | |||
24053 | int iUnused2 /* End offset of token */ | |||
24054 | ){ | |||
24055 | Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; | |||
24056 | Fts5Termset *pTermset = pCtx->pTermset; | |||
24057 | int bPresent; | |||
24058 | int ii; | |||
24059 | int rc = SQLITE_OK0; | |||
24060 | int iPos; | |||
24061 | int iCol; | |||
24062 | ||||
24063 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
24064 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
24065 | ||||
24066 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | |||
24067 | pCtx->szCol++; | |||
24068 | } | |||
24069 | ||||
24070 | switch( pCtx->pConfig->eDetail ){ | |||
24071 | case FTS5_DETAIL_FULL0: | |||
24072 | iPos = pCtx->szCol-1; | |||
24073 | iCol = pCtx->iCol; | |||
24074 | break; | |||
24075 | ||||
24076 | case FTS5_DETAIL_COLUMNS2: | |||
24077 | iPos = pCtx->iCol; | |||
24078 | iCol = 0; | |||
24079 | break; | |||
24080 | ||||
24081 | default: | |||
24082 | assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
24083 | iPos = 0; | |||
24084 | iCol = 0; | |||
24085 | break; | |||
24086 | } | |||
24087 | ||||
24088 | rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); | |||
24089 | if( rc==SQLITE_OK0 && bPresent==0 ){ | |||
24090 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | |||
24091 | pCtx->iRowid, iCol, iPos, 0, pToken, nToken | |||
24092 | ); | |||
24093 | } | |||
24094 | ||||
24095 | for(ii=0; rc==SQLITE_OK0 && ii<pCtx->pConfig->nPrefix; ii++){ | |||
24096 | const int nChar = pCtx->pConfig->aPrefix[ii]; | |||
24097 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | |||
24098 | if( nByte ){ | |||
24099 | rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); | |||
24100 | if( bPresent==0 ){ | |||
24101 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | |||
24102 | pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte | |||
24103 | ); | |||
24104 | } | |||
24105 | } | |||
24106 | } | |||
24107 | ||||
24108 | return rc; | |||
24109 | } | |||
24110 | ||||
24111 | /* | |||
24112 | ** Check that the contents of the FTS index match that of the %_content | |||
24113 | ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return | |||
24114 | ** some other SQLite error code if an error occurs while attempting to | |||
24115 | ** determine this. | |||
24116 | */ | |||
24117 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ | |||
24118 | Fts5Config *pConfig = p->pConfig; | |||
24119 | int rc = SQLITE_OK0; /* Return code */ | |||
24120 | int *aColSize; /* Array of size pConfig->nCol */ | |||
24121 | i64 *aTotalSize; /* Array of size pConfig->nCol */ | |||
24122 | Fts5IntegrityCtx ctx; | |||
24123 | sqlite3_stmt *pScan; | |||
24124 | int bUseCksum; | |||
24125 | ||||
24126 | memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); | |||
24127 | ctx.pConfig = p->pConfig; | |||
24128 | aTotalSize = (i64*)sqlite3_malloc64sqlite3_api->malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64))); | |||
24129 | if( !aTotalSize ) return SQLITE_NOMEM7; | |||
24130 | aColSize = (int*)&aTotalSize[pConfig->nCol]; | |||
24131 | memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); | |||
24132 | ||||
24133 | bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
24134 | || (pConfig->eContent==FTS5_CONTENT_EXTERNAL2 && iArg) | |||
24135 | ); | |||
24136 | if( bUseCksum ){ | |||
24137 | /* Generate the expected index checksum based on the contents of the | |||
24138 | ** %_content table. This block stores the checksum in ctx.cksum. */ | |||
24139 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, 0); | |||
24140 | if( rc==SQLITE_OK0 ){ | |||
24141 | int rc2; | |||
24142 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | |||
24143 | int i; | |||
24144 | ctx.iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | |||
24145 | ctx.szCol = 0; | |||
24146 | if( pConfig->bColumnsize ){ | |||
24147 | rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); | |||
24148 | } | |||
24149 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
24150 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | |||
24151 | } | |||
24152 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
24153 | if( pConfig->abUnindexed[i]==0 ){ | |||
24154 | const char *pText = 0; | |||
24155 | int nText = 0; | |||
24156 | const char *pLoc = 0; | |||
24157 | int nLoc = 0; | |||
24158 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, i+1); | |||
24159 | ||||
24160 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
24161 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
24162 | ){ | |||
24163 | rc = sqlite3Fts5DecodeLocaleValue( | |||
24164 | pVal, &pText, &nText, &pLoc, &nLoc | |||
24165 | ); | |||
24166 | }else{ | |||
24167 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | |||
24168 | int iCol = i + 1 + pConfig->nCol; | |||
24169 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | |||
24170 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | |||
24171 | } | |||
24172 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
24173 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
24174 | } | |||
24175 | ||||
24176 | ctx.iCol = i; | |||
24177 | ctx.szCol = 0; | |||
24178 | ||||
24179 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
24180 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | |||
24181 | } | |||
24182 | ||||
24183 | if( rc==SQLITE_OK0 ){ | |||
24184 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
24185 | rc = sqlite3Fts5Tokenize(pConfig, | |||
24186 | FTS5_TOKENIZE_DOCUMENT0x0004, | |||
24187 | pText, nText, | |||
24188 | (void*)&ctx, | |||
24189 | fts5StorageIntegrityCallback | |||
24190 | ); | |||
24191 | sqlite3Fts5ClearLocale(pConfig); | |||
24192 | } | |||
24193 | ||||
24194 | /* If this is not a columnsize=0 database, check that the number | |||
24195 | ** of tokens in the value matches the aColSize[] value read from | |||
24196 | ** the %_docsize table. */ | |||
24197 | if( rc==SQLITE_OK0 | |||
24198 | && pConfig->bColumnsize | |||
24199 | && ctx.szCol!=aColSize[i] | |||
24200 | ){ | |||
24201 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24202 | } | |||
24203 | aTotalSize[i] += ctx.szCol; | |||
24204 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
24205 | sqlite3Fts5TermsetFree(ctx.pTermset); | |||
24206 | ctx.pTermset = 0; | |||
24207 | } | |||
24208 | } | |||
24209 | } | |||
24210 | sqlite3Fts5TermsetFree(ctx.pTermset); | |||
24211 | ctx.pTermset = 0; | |||
24212 | ||||
24213 | if( rc!=SQLITE_OK0 ) break; | |||
24214 | } | |||
24215 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | |||
24216 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
24217 | } | |||
24218 | ||||
24219 | /* Test that the "totals" (sometimes called "averages") record looks Ok */ | |||
24220 | if( rc==SQLITE_OK0 ){ | |||
24221 | int i; | |||
24222 | rc = fts5StorageLoadTotals(p, 0); | |||
24223 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
24224 | if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24225 | } | |||
24226 | } | |||
24227 | ||||
24228 | /* Check that the %_docsize and %_content tables contain the expected | |||
24229 | ** number of rows. */ | |||
24230 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
24231 | i64 nRow = 0; | |||
24232 | rc = fts5StorageCount(p, "content", &nRow); | |||
24233 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24234 | } | |||
24235 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
24236 | i64 nRow = 0; | |||
24237 | rc = fts5StorageCount(p, "docsize", &nRow); | |||
24238 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24239 | } | |||
24240 | } | |||
24241 | ||||
24242 | /* Pass the expected checksum down to the FTS index module. It will | |||
24243 | ** verify, amongst other things, that it matches the checksum generated by | |||
24244 | ** inspecting the index itself. */ | |||
24245 | if( rc==SQLITE_OK0 ){ | |||
24246 | rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum); | |||
24247 | } | |||
24248 | ||||
24249 | sqlite3_freesqlite3_api->free(aTotalSize); | |||
24250 | return rc; | |||
24251 | } | |||
24252 | ||||
24253 | /* | |||
24254 | ** Obtain an SQLite statement handle that may be used to read data from the | |||
24255 | ** %_content table. | |||
24256 | */ | |||
24257 | static int sqlite3Fts5StorageStmt( | |||
24258 | Fts5Storage *p, | |||
24259 | int eStmt, | |||
24260 | sqlite3_stmt **pp, | |||
24261 | char **pzErrMsg | |||
24262 | ){ | |||
24263 | int rc; | |||
24264 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | |||
24265 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | |||
24266 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | |||
24267 | )((void) (0)); | |||
24268 | rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); | |||
24269 | if( rc==SQLITE_OK0 ){ | |||
24270 | assert( p->aStmt[eStmt]==*pp )((void) (0)); | |||
24271 | p->aStmt[eStmt] = 0; | |||
24272 | } | |||
24273 | return rc; | |||
24274 | } | |||
24275 | ||||
24276 | /* | |||
24277 | ** Release an SQLite statement handle obtained via an earlier call to | |||
24278 | ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function | |||
24279 | ** must match that passed to the sqlite3Fts5StorageStmt() call. | |||
24280 | */ | |||
24281 | static void sqlite3Fts5StorageStmtRelease( | |||
24282 | Fts5Storage *p, | |||
24283 | int eStmt, | |||
24284 | sqlite3_stmt *pStmt | |||
24285 | ){ | |||
24286 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | |||
24287 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | |||
24288 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | |||
24289 | )((void) (0)); | |||
24290 | if( p->aStmt[eStmt]==0 ){ | |||
24291 | sqlite3_resetsqlite3_api->reset(pStmt); | |||
24292 | p->aStmt[eStmt] = pStmt; | |||
24293 | }else{ | |||
24294 | sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
24295 | } | |||
24296 | } | |||
24297 | ||||
24298 | static int fts5StorageDecodeSizeArray( | |||
24299 | int *aCol, int nCol, /* Array to populate */ | |||
24300 | const u8 *aBlob, int nBlob /* Record to read varints from */ | |||
24301 | ){ | |||
24302 | int i; | |||
24303 | int iOff = 0; | |||
24304 | for(i=0; i<nCol; i++){ | |||
24305 | if( iOff>=nBlob ) return 1; | |||
24306 | iOff += fts5GetVarint32(&aBlob[iOff], aCol[i])sqlite3Fts5GetVarint32(&aBlob[iOff],(u32*)&(aCol[i])); | |||
24307 | } | |||
24308 | return (iOff!=nBlob); | |||
24309 | } | |||
24310 | ||||
24311 | /* | |||
24312 | ** Argument aCol points to an array of integers containing one entry for | |||
24313 | ** each table column. This function reads the %_docsize record for the | |||
24314 | ** specified rowid and populates aCol[] with the results. | |||
24315 | ** | |||
24316 | ** An SQLite error code is returned if an error occurs, or SQLITE_OK | |||
24317 | ** otherwise. | |||
24318 | */ | |||
24319 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ | |||
24320 | int nCol = p->pConfig->nCol; /* Number of user columns in table */ | |||
24321 | sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ | |||
24322 | int rc; /* Return Code */ | |||
24323 | ||||
24324 | assert( p->pConfig->bColumnsize )((void) (0)); | |||
24325 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | |||
24326 | if( pLookup ){ | |||
24327 | int bCorrupt = 1; | |||
24328 | assert( rc==SQLITE_OK )((void) (0)); | |||
24329 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iRowid); | |||
24330 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | |||
24331 | const u8 *aBlob = sqlite3_column_blobsqlite3_api->column_blob(pLookup, 0); | |||
24332 | int nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pLookup, 0); | |||
24333 | if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ | |||
24334 | bCorrupt = 0; | |||
24335 | } | |||
24336 | } | |||
24337 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | |||
24338 | if( bCorrupt && rc==SQLITE_OK0 ){ | |||
24339 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24340 | } | |||
24341 | }else{ | |||
24342 | assert( rc!=SQLITE_OK )((void) (0)); | |||
24343 | } | |||
24344 | ||||
24345 | return rc; | |||
24346 | } | |||
24347 | ||||
24348 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ | |||
24349 | int rc = fts5StorageLoadTotals(p, 0); | |||
24350 | if( rc==SQLITE_OK0 ){ | |||
24351 | *pnToken = 0; | |||
24352 | if( iCol<0 ){ | |||
24353 | int i; | |||
24354 | for(i=0; i<p->pConfig->nCol; i++){ | |||
24355 | *pnToken += p->aTotalSize[i]; | |||
24356 | } | |||
24357 | }else if( iCol<p->pConfig->nCol ){ | |||
24358 | *pnToken = p->aTotalSize[iCol]; | |||
24359 | }else{ | |||
24360 | rc = SQLITE_RANGE25; | |||
24361 | } | |||
24362 | } | |||
24363 | return rc; | |||
24364 | } | |||
24365 | ||||
24366 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ | |||
24367 | int rc = fts5StorageLoadTotals(p, 0); | |||
24368 | if( rc==SQLITE_OK0 ){ | |||
24369 | /* nTotalRow being zero does not necessarily indicate a corrupt | |||
24370 | ** database - it might be that the FTS5 table really does contain zero | |||
24371 | ** rows. However this function is only called from the xRowCount() API, | |||
24372 | ** and there is no way for that API to be invoked if the table contains | |||
24373 | ** no rows. Hence the FTS5_CORRUPT return. */ | |||
24374 | *pnRow = p->nTotalRow; | |||
24375 | if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
24376 | } | |||
24377 | return rc; | |||
24378 | } | |||
24379 | ||||
24380 | /* | |||
24381 | ** Flush any data currently held in-memory to disk. | |||
24382 | */ | |||
24383 | static int sqlite3Fts5StorageSync(Fts5Storage *p){ | |||
24384 | int rc = SQLITE_OK0; | |||
24385 | i64 iLastRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | |||
24386 | if( p->bTotalsValid ){ | |||
24387 | rc = fts5StorageSaveTotals(p); | |||
24388 | if( rc==SQLITE_OK0 ){ | |||
24389 | p->bTotalsValid = 0; | |||
24390 | } | |||
24391 | } | |||
24392 | if( rc==SQLITE_OK0 ){ | |||
24393 | rc = sqlite3Fts5IndexSync(p->pIndex); | |||
24394 | } | |||
24395 | sqlite3_set_last_insert_rowidsqlite3_api->set_last_insert_rowid(p->pConfig->db, iLastRowid); | |||
24396 | return rc; | |||
24397 | } | |||
24398 | ||||
24399 | static int sqlite3Fts5StorageRollback(Fts5Storage *p){ | |||
24400 | p->bTotalsValid = 0; | |||
24401 | return sqlite3Fts5IndexRollback(p->pIndex); | |||
24402 | } | |||
24403 | ||||
24404 | static int sqlite3Fts5StorageConfigValue( | |||
24405 | Fts5Storage *p, | |||
24406 | const char *z, | |||
24407 | sqlite3_value *pVal, | |||
24408 | int iVal | |||
24409 | ){ | |||
24410 | sqlite3_stmt *pReplace = 0; | |||
24411 | int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG10, &pReplace, 0); | |||
24412 | if( rc==SQLITE_OK0 ){ | |||
24413 | sqlite3_bind_textsqlite3_api->bind_text(pReplace, 1, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
24414 | if( pVal ){ | |||
24415 | sqlite3_bind_valuesqlite3_api->bind_value(pReplace, 2, pVal); | |||
24416 | }else{ | |||
24417 | sqlite3_bind_intsqlite3_api->bind_int(pReplace, 2, iVal); | |||
24418 | } | |||
24419 | sqlite3_stepsqlite3_api->step(pReplace); | |||
24420 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
24421 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | |||
24422 | } | |||
24423 | if( rc==SQLITE_OK0 && pVal ){ | |||
24424 | int iNew = p->pConfig->iCookie + 1; | |||
24425 | rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); | |||
24426 | if( rc==SQLITE_OK0 ){ | |||
24427 | p->pConfig->iCookie = iNew; | |||
24428 | } | |||
24429 | } | |||
24430 | return rc; | |||
24431 | } | |||
24432 | ||||
24433 | #line 1 "fts5_tokenize.c" | |||
24434 | /* | |||
24435 | ** 2014 May 31 | |||
24436 | ** | |||
24437 | ** The author disclaims copyright to this source code. In place of | |||
24438 | ** a legal notice, here is a blessing: | |||
24439 | ** | |||
24440 | ** May you do good and not evil. | |||
24441 | ** May you find forgiveness for yourself and forgive others. | |||
24442 | ** May you share freely, never taking more than you give. | |||
24443 | ** | |||
24444 | ****************************************************************************** | |||
24445 | */ | |||
24446 | ||||
24447 | ||||
24448 | /* #include "fts5Int.h" */ | |||
24449 | ||||
24450 | /************************************************************************** | |||
24451 | ** Start of ascii tokenizer implementation. | |||
24452 | */ | |||
24453 | ||||
24454 | /* | |||
24455 | ** For tokenizers with no "unicode" modifier, the set of token characters | |||
24456 | ** is the same as the set of ASCII range alphanumeric characters. | |||
24457 | */ | |||
24458 | static unsigned char aAsciiTokenChar[128] = { | |||
24459 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ | |||
24460 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ | |||
24461 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ | |||
24462 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ | |||
24463 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ | |||
24464 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ | |||
24465 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ | |||
24466 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ | |||
24467 | }; | |||
24468 | ||||
24469 | typedef struct AsciiTokenizer AsciiTokenizer; | |||
24470 | struct AsciiTokenizer { | |||
24471 | unsigned char aTokenChar[128]; | |||
24472 | }; | |||
24473 | ||||
24474 | static void fts5AsciiAddExceptions( | |||
24475 | AsciiTokenizer *p, | |||
24476 | const char *zArg, | |||
24477 | int bTokenChars | |||
24478 | ){ | |||
24479 | int i; | |||
24480 | for(i=0; zArg[i]; i++){ | |||
24481 | if( (zArg[i] & 0x80)==0 ){ | |||
24482 | p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; | |||
24483 | } | |||
24484 | } | |||
24485 | } | |||
24486 | ||||
24487 | /* | |||
24488 | ** Delete a "ascii" tokenizer. | |||
24489 | */ | |||
24490 | static void fts5AsciiDelete(Fts5Tokenizer *p){ | |||
24491 | sqlite3_freesqlite3_api->free(p); | |||
24492 | } | |||
24493 | ||||
24494 | /* | |||
24495 | ** Create an "ascii" tokenizer. | |||
24496 | */ | |||
24497 | static int fts5AsciiCreate( | |||
24498 | void *pUnused, | |||
24499 | const char **azArg, int nArg, | |||
24500 | Fts5Tokenizer **ppOut | |||
24501 | ){ | |||
24502 | int rc = SQLITE_OK0; | |||
24503 | AsciiTokenizer *p = 0; | |||
24504 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
24505 | if( nArg%2 ){ | |||
24506 | rc = SQLITE_ERROR1; | |||
24507 | }else{ | |||
24508 | p = sqlite3_mallocsqlite3_api->malloc(sizeof(AsciiTokenizer)); | |||
24509 | if( p==0 ){ | |||
24510 | rc = SQLITE_NOMEM7; | |||
24511 | }else{ | |||
24512 | int i; | |||
24513 | memset(p, 0, sizeof(AsciiTokenizer)); | |||
24514 | memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); | |||
24515 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
24516 | const char *zArg = azArg[i+1]; | |||
24517 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | |||
24518 | fts5AsciiAddExceptions(p, zArg, 1); | |||
24519 | }else | |||
24520 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | |||
24521 | fts5AsciiAddExceptions(p, zArg, 0); | |||
24522 | }else{ | |||
24523 | rc = SQLITE_ERROR1; | |||
24524 | } | |||
24525 | } | |||
24526 | if( rc!=SQLITE_OK0 ){ | |||
24527 | fts5AsciiDelete((Fts5Tokenizer*)p); | |||
24528 | p = 0; | |||
24529 | } | |||
24530 | } | |||
24531 | } | |||
24532 | ||||
24533 | *ppOut = (Fts5Tokenizer*)p; | |||
24534 | return rc; | |||
24535 | } | |||
24536 | ||||
24537 | ||||
24538 | static void asciiFold(char *aOut, const char *aIn, int nByte){ | |||
24539 | int i; | |||
24540 | for(i=0; i<nByte; i++){ | |||
24541 | char c = aIn[i]; | |||
24542 | if( c>='A' && c<='Z' ) c += 32; | |||
24543 | aOut[i] = c; | |||
24544 | } | |||
24545 | } | |||
24546 | ||||
24547 | /* | |||
24548 | ** Tokenize some text using the ascii tokenizer. | |||
24549 | */ | |||
24550 | static int fts5AsciiTokenize( | |||
24551 | Fts5Tokenizer *pTokenizer, | |||
24552 | void *pCtx, | |||
24553 | int iUnused, | |||
24554 | const char *pText, int nText, | |||
24555 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
24556 | ){ | |||
24557 | AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; | |||
24558 | int rc = SQLITE_OK0; | |||
24559 | int ie; | |||
24560 | int is = 0; | |||
24561 | ||||
24562 | char aFold[64]; | |||
24563 | int nFold = sizeof(aFold); | |||
24564 | char *pFold = aFold; | |||
24565 | unsigned char *a = p->aTokenChar; | |||
24566 | ||||
24567 | UNUSED_PARAM(iUnused)(void)(iUnused); | |||
24568 | ||||
24569 | while( is<nText && rc==SQLITE_OK0 ){ | |||
24570 | int nByte; | |||
24571 | ||||
24572 | /* Skip any leading divider characters. */ | |||
24573 | while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ | |||
24574 | is++; | |||
24575 | } | |||
24576 | if( is==nText ) break; | |||
24577 | ||||
24578 | /* Count the token characters */ | |||
24579 | ie = is+1; | |||
24580 | while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ | |||
24581 | ie++; | |||
24582 | } | |||
24583 | ||||
24584 | /* Fold to lower case */ | |||
24585 | nByte = ie-is; | |||
24586 | if( nByte>nFold ){ | |||
24587 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | |||
24588 | pFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nByte*2); | |||
24589 | if( pFold==0 ){ | |||
24590 | rc = SQLITE_NOMEM7; | |||
24591 | break; | |||
24592 | } | |||
24593 | nFold = nByte*2; | |||
24594 | } | |||
24595 | asciiFold(pFold, &pText[is], nByte); | |||
24596 | ||||
24597 | /* Invoke the token callback */ | |||
24598 | rc = xToken(pCtx, 0, pFold, nByte, is, ie); | |||
24599 | is = ie+1; | |||
24600 | } | |||
24601 | ||||
24602 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | |||
24603 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
24604 | return rc; | |||
24605 | } | |||
24606 | ||||
24607 | /************************************************************************** | |||
24608 | ** Start of unicode61 tokenizer implementation. | |||
24609 | */ | |||
24610 | ||||
24611 | ||||
24612 | /* | |||
24613 | ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied | |||
24614 | ** from the sqlite3 source file utf.c. If this file is compiled as part | |||
24615 | ** of the amalgamation, they are not required. | |||
24616 | */ | |||
24617 | #ifndef SQLITE_AMALGAMATION | |||
24618 | ||||
24619 | static const unsigned char sqlite3Utf8Trans1[] = { | |||
24620 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
24621 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |||
24622 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |||
24623 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |||
24624 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
24625 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |||
24626 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
24627 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | |||
24628 | }; | |||
24629 | ||||
24630 | #define READ_UTF8(zIn, zTerm, c)c = *(zIn++); if( c>=0xc0 ){ c = sqlite3Utf8Trans1[c-0xc0] ; while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ c = (c<<6) + (0x3f & *(zIn++)); } if( c<0x80 || ( c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } } \ | |||
24631 | c = *(zIn++); \ | |||
24632 | if( c>=0xc0 ){ \ | |||
24633 | c = sqlite3Utf8Trans1[c-0xc0]; \ | |||
24634 | while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ \ | |||
24635 | c = (c<<6) + (0x3f & *(zIn++)); \ | |||
24636 | } \ | |||
24637 | if( c<0x80 \ | |||
24638 | || (c&0xFFFFF800)==0xD800 \ | |||
24639 | || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ | |||
24640 | } | |||
24641 | ||||
24642 | ||||
24643 | #define WRITE_UTF8(zOut, c){ if( c<0x00080 ){ *zOut++ = (unsigned char)(c&0xFF); } else if( c<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((c >>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } else if( c<0x10000 ){ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); *zOut++ = 0x80 + (unsigned char )((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)( c & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((c>> 18) & 0x07); *zOut++ = 0x80 + (unsigned char)((c>>12 ) & 0x3F); *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } } { \ | |||
24644 | if( c<0x00080 ){ \ | |||
24645 | *zOut++ = (unsigned char)(c&0xFF); \ | |||
24646 | } \ | |||
24647 | else if( c<0x00800 ){ \ | |||
24648 | *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ | |||
24649 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
24650 | } \ | |||
24651 | else if( c<0x10000 ){ \ | |||
24652 | *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ | |||
24653 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | |||
24654 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
24655 | }else{ \ | |||
24656 | *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ | |||
24657 | *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ | |||
24658 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | |||
24659 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
24660 | } \ | |||
24661 | } | |||
24662 | ||||
24663 | #endif /* ifndef SQLITE_AMALGAMATION */ | |||
24664 | ||||
24665 | #define FTS5_SKIP_UTF8(zIn){ if( ((unsigned char)(*(zIn++)))>=0xc0 ){ while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } } } { \ | |||
24666 | if( ((unsigned char)(*(zIn++)))>=0xc0 ){ \ | |||
24667 | while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } \ | |||
24668 | } \ | |||
24669 | } | |||
24670 | ||||
24671 | typedef struct Unicode61Tokenizer Unicode61Tokenizer; | |||
24672 | struct Unicode61Tokenizer { | |||
24673 | unsigned char aTokenChar[128]; /* ASCII range token characters */ | |||
24674 | char *aFold; /* Buffer to fold text into */ | |||
24675 | int nFold; /* Size of aFold[] in bytes */ | |||
24676 | int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ | |||
24677 | int nException; | |||
24678 | int *aiException; | |||
24679 | ||||
24680 | unsigned char aCategory[32]; /* True for token char categories */ | |||
24681 | }; | |||
24682 | ||||
24683 | /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ | |||
24684 | #define FTS5_REMOVE_DIACRITICS_NONE0 0 | |||
24685 | #define FTS5_REMOVE_DIACRITICS_SIMPLE1 1 | |||
24686 | #define FTS5_REMOVE_DIACRITICS_COMPLEX2 2 | |||
24687 | ||||
24688 | static int fts5UnicodeAddExceptions( | |||
24689 | Unicode61Tokenizer *p, /* Tokenizer object */ | |||
24690 | const char *z, /* Characters to treat as exceptions */ | |||
24691 | int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ | |||
24692 | ){ | |||
24693 | int rc = SQLITE_OK0; | |||
24694 | int n = (int)strlen(z); | |||
24695 | int *aNew; | |||
24696 | ||||
24697 | if( n>0 ){ | |||
24698 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aiException, | |||
24699 | (n+p->nException)*sizeof(int)); | |||
24700 | if( aNew ){ | |||
24701 | int nNew = p->nException; | |||
24702 | const unsigned char *zCsr = (const unsigned char*)z; | |||
24703 | const unsigned char *zTerm = (const unsigned char*)&z[n]; | |||
24704 | while( zCsr<zTerm ){ | |||
24705 | u32 iCode; | |||
24706 | int bToken; | |||
24707 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
24708 | if( iCode<128 ){ | |||
24709 | p->aTokenChar[iCode] = (unsigned char)bTokenChars; | |||
24710 | }else{ | |||
24711 | bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]; | |||
24712 | assert( (bToken==0 || bToken==1) )((void) (0)); | |||
24713 | assert( (bTokenChars==0 || bTokenChars==1) )((void) (0)); | |||
24714 | if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ | |||
24715 | int i; | |||
24716 | for(i=0; i<nNew; i++){ | |||
24717 | if( (u32)aNew[i]>iCode ) break; | |||
24718 | } | |||
24719 | memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); | |||
24720 | aNew[i] = iCode; | |||
24721 | nNew++; | |||
24722 | } | |||
24723 | } | |||
24724 | } | |||
24725 | p->aiException = aNew; | |||
24726 | p->nException = nNew; | |||
24727 | }else{ | |||
24728 | rc = SQLITE_NOMEM7; | |||
24729 | } | |||
24730 | } | |||
24731 | ||||
24732 | return rc; | |||
24733 | } | |||
24734 | ||||
24735 | /* | |||
24736 | ** Return true if the p->aiException[] array contains the value iCode. | |||
24737 | */ | |||
24738 | static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ | |||
24739 | if( p->nException>0 ){ | |||
24740 | int *a = p->aiException; | |||
24741 | int iLo = 0; | |||
24742 | int iHi = p->nException-1; | |||
24743 | ||||
24744 | while( iHi>=iLo ){ | |||
24745 | int iTest = (iHi + iLo) / 2; | |||
24746 | if( iCode==a[iTest] ){ | |||
24747 | return 1; | |||
24748 | }else if( iCode>a[iTest] ){ | |||
24749 | iLo = iTest+1; | |||
24750 | }else{ | |||
24751 | iHi = iTest-1; | |||
24752 | } | |||
24753 | } | |||
24754 | } | |||
24755 | ||||
24756 | return 0; | |||
24757 | } | |||
24758 | ||||
24759 | /* | |||
24760 | ** Delete a "unicode61" tokenizer. | |||
24761 | */ | |||
24762 | static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ | |||
24763 | if( pTok ){ | |||
24764 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; | |||
24765 | sqlite3_freesqlite3_api->free(p->aiException); | |||
24766 | sqlite3_freesqlite3_api->free(p->aFold); | |||
24767 | sqlite3_freesqlite3_api->free(p); | |||
24768 | } | |||
24769 | return; | |||
24770 | } | |||
24771 | ||||
24772 | static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){ | |||
24773 | const char *z = zCat; | |||
24774 | ||||
24775 | while( *z ){ | |||
24776 | while( *z==' ' || *z=='\t' ) z++; | |||
24777 | if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){ | |||
24778 | return SQLITE_ERROR1; | |||
24779 | } | |||
24780 | while( *z!=' ' && *z!='\t' && *z!='\0' ) z++; | |||
24781 | } | |||
24782 | ||||
24783 | sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar); | |||
24784 | return SQLITE_OK0; | |||
24785 | } | |||
24786 | ||||
24787 | /* | |||
24788 | ** Create a "unicode61" tokenizer. | |||
24789 | */ | |||
24790 | static int fts5UnicodeCreate( | |||
24791 | void *pUnused, | |||
24792 | const char **azArg, int nArg, | |||
24793 | Fts5Tokenizer **ppOut | |||
24794 | ){ | |||
24795 | int rc = SQLITE_OK0; /* Return code */ | |||
24796 | Unicode61Tokenizer *p = 0; /* New tokenizer object */ | |||
24797 | ||||
24798 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
24799 | ||||
24800 | if( nArg%2 ){ | |||
24801 | rc = SQLITE_ERROR1; | |||
24802 | }else{ | |||
24803 | p = (Unicode61Tokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(Unicode61Tokenizer)); | |||
24804 | if( p ){ | |||
24805 | const char *zCat = "L* N* Co"; | |||
24806 | int i; | |||
24807 | memset(p, 0, sizeof(Unicode61Tokenizer)); | |||
24808 | ||||
24809 | p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE1; | |||
24810 | p->nFold = 64; | |||
24811 | p->aFold = sqlite3_malloc64sqlite3_api->malloc64(p->nFold * sizeof(char)); | |||
24812 | if( p->aFold==0 ){ | |||
24813 | rc = SQLITE_NOMEM7; | |||
24814 | } | |||
24815 | ||||
24816 | /* Search for a "categories" argument */ | |||
24817 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
24818 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | |||
24819 | zCat = azArg[i+1]; | |||
24820 | } | |||
24821 | } | |||
24822 | if( rc==SQLITE_OK0 ){ | |||
24823 | rc = unicodeSetCategories(p, zCat); | |||
24824 | } | |||
24825 | ||||
24826 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
24827 | const char *zArg = azArg[i+1]; | |||
24828 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | |||
24829 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | |||
24830 | rc = SQLITE_ERROR1; | |||
24831 | }else{ | |||
24832 | p->eRemoveDiacritic = (zArg[0] - '0'); | |||
24833 | assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE((void) (0)) | |||
24834 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE((void) (0)) | |||
24835 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX((void) (0)) | |||
24836 | )((void) (0)); | |||
24837 | } | |||
24838 | }else | |||
24839 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | |||
24840 | rc = fts5UnicodeAddExceptions(p, zArg, 1); | |||
24841 | }else | |||
24842 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | |||
24843 | rc = fts5UnicodeAddExceptions(p, zArg, 0); | |||
24844 | }else | |||
24845 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | |||
24846 | /* no-op */ | |||
24847 | }else{ | |||
24848 | rc = SQLITE_ERROR1; | |||
24849 | } | |||
24850 | } | |||
24851 | }else{ | |||
24852 | rc = SQLITE_NOMEM7; | |||
24853 | } | |||
24854 | if( rc!=SQLITE_OK0 ){ | |||
24855 | fts5UnicodeDelete((Fts5Tokenizer*)p); | |||
24856 | p = 0; | |||
24857 | } | |||
24858 | *ppOut = (Fts5Tokenizer*)p; | |||
24859 | } | |||
24860 | return rc; | |||
24861 | } | |||
24862 | ||||
24863 | /* | |||
24864 | ** Return true if, for the purposes of tokenizing with the tokenizer | |||
24865 | ** passed as the first argument, codepoint iCode is considered a token | |||
24866 | ** character (not a separator). | |||
24867 | */ | |||
24868 | static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ | |||
24869 | return ( | |||
24870 | p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] | |||
24871 | ^ fts5UnicodeIsException(p, iCode) | |||
24872 | ); | |||
24873 | } | |||
24874 | ||||
24875 | static int fts5UnicodeTokenize( | |||
24876 | Fts5Tokenizer *pTokenizer, | |||
24877 | void *pCtx, | |||
24878 | int iUnused, | |||
24879 | const char *pText, int nText, | |||
24880 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
24881 | ){ | |||
24882 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; | |||
24883 | int rc = SQLITE_OK0; | |||
24884 | unsigned char *a = p->aTokenChar; | |||
24885 | ||||
24886 | unsigned char *zTerm = (unsigned char*)&pText[nText]; | |||
24887 | unsigned char *zCsr = (unsigned char *)pText; | |||
24888 | ||||
24889 | /* Output buffer */ | |||
24890 | char *aFold = p->aFold; | |||
24891 | int nFold = p->nFold; | |||
24892 | const char *pEnd = &aFold[nFold-6]; | |||
24893 | ||||
24894 | UNUSED_PARAM(iUnused)(void)(iUnused); | |||
24895 | ||||
24896 | /* Each iteration of this loop gobbles up a contiguous run of separators, | |||
24897 | ** then the next token. */ | |||
24898 | while( rc==SQLITE_OK0 ){ | |||
24899 | u32 iCode; /* non-ASCII codepoint read from input */ | |||
24900 | char *zOut = aFold; | |||
24901 | int is; | |||
24902 | int ie; | |||
24903 | ||||
24904 | /* Skip any separator characters. */ | |||
24905 | while( 1 ){ | |||
24906 | if( zCsr>=zTerm ) goto tokenize_done; | |||
24907 | if( *zCsr & 0x80 ) { | |||
24908 | /* A character outside of the ascii range. Skip past it if it is | |||
24909 | ** a separator character. Or break out of the loop if it is not. */ | |||
24910 | is = zCsr - (unsigned char*)pText; | |||
24911 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
24912 | if( fts5UnicodeIsAlnum(p, iCode) ){ | |||
24913 | goto non_ascii_tokenchar; | |||
24914 | } | |||
24915 | }else{ | |||
24916 | if( a[*zCsr] ){ | |||
24917 | is = zCsr - (unsigned char*)pText; | |||
24918 | goto ascii_tokenchar; | |||
24919 | } | |||
24920 | zCsr++; | |||
24921 | } | |||
24922 | } | |||
24923 | ||||
24924 | /* Run through the tokenchars. Fold them into the output buffer along | |||
24925 | ** the way. */ | |||
24926 | while( zCsr<zTerm ){ | |||
24927 | ||||
24928 | /* Grow the output buffer so that there is sufficient space to fit the | |||
24929 | ** largest possible utf-8 character. */ | |||
24930 | if( zOut>pEnd ){ | |||
24931 | aFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nFold*2); | |||
24932 | if( aFold==0 ){ | |||
24933 | rc = SQLITE_NOMEM7; | |||
24934 | goto tokenize_done; | |||
24935 | } | |||
24936 | zOut = &aFold[zOut - p->aFold]; | |||
24937 | memcpy(aFold, p->aFold, nFold); | |||
24938 | sqlite3_freesqlite3_api->free(p->aFold); | |||
24939 | p->aFold = aFold; | |||
24940 | p->nFold = nFold = nFold*2; | |||
24941 | pEnd = &aFold[nFold-6]; | |||
24942 | } | |||
24943 | ||||
24944 | if( *zCsr & 0x80 ){ | |||
24945 | /* An non-ascii-range character. Fold it into the output buffer if | |||
24946 | ** it is a token character, or break out of the loop if it is not. */ | |||
24947 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
24948 | if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ | |||
24949 | non_ascii_tokenchar: | |||
24950 | iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); | |||
24951 | if( iCode ) WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
24952 | }else{ | |||
24953 | break; | |||
24954 | } | |||
24955 | }else if( a[*zCsr]==0 ){ | |||
24956 | /* An ascii-range separator character. End of token. */ | |||
24957 | break; | |||
24958 | }else{ | |||
24959 | ascii_tokenchar: | |||
24960 | if( *zCsr>='A' && *zCsr<='Z' ){ | |||
24961 | *zOut++ = *zCsr + 32; | |||
24962 | }else{ | |||
24963 | *zOut++ = *zCsr; | |||
24964 | } | |||
24965 | zCsr++; | |||
24966 | } | |||
24967 | ie = zCsr - (unsigned char*)pText; | |||
24968 | } | |||
24969 | ||||
24970 | /* Invoke the token callback */ | |||
24971 | rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); | |||
24972 | } | |||
24973 | ||||
24974 | tokenize_done: | |||
24975 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
24976 | return rc; | |||
24977 | } | |||
24978 | ||||
24979 | /************************************************************************** | |||
24980 | ** Start of porter stemmer implementation. | |||
24981 | */ | |||
24982 | ||||
24983 | /* Any tokens larger than this (in bytes) are passed through without | |||
24984 | ** stemming. */ | |||
24985 | #define FTS5_PORTER_MAX_TOKEN64 64 | |||
24986 | ||||
24987 | typedef struct PorterTokenizer PorterTokenizer; | |||
24988 | struct PorterTokenizer { | |||
24989 | fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */ | |||
24990 | Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ | |||
24991 | char aBuf[FTS5_PORTER_MAX_TOKEN64 + 64]; | |||
24992 | }; | |||
24993 | ||||
24994 | /* | |||
24995 | ** Delete a "porter" tokenizer. | |||
24996 | */ | |||
24997 | static void fts5PorterDelete(Fts5Tokenizer *pTok){ | |||
24998 | if( pTok ){ | |||
24999 | PorterTokenizer *p = (PorterTokenizer*)pTok; | |||
25000 | if( p->pTokenizer ){ | |||
25001 | p->tokenizer_v2.xDelete(p->pTokenizer); | |||
25002 | } | |||
25003 | sqlite3_freesqlite3_api->free(p); | |||
25004 | } | |||
25005 | } | |||
25006 | ||||
25007 | /* | |||
25008 | ** Create a "porter" tokenizer. | |||
25009 | */ | |||
25010 | static int fts5PorterCreate( | |||
25011 | void *pCtx, | |||
25012 | const char **azArg, int nArg, | |||
25013 | Fts5Tokenizer **ppOut | |||
25014 | ){ | |||
25015 | fts5_api *pApi = (fts5_api*)pCtx; | |||
25016 | int rc = SQLITE_OK0; | |||
25017 | PorterTokenizer *pRet; | |||
25018 | void *pUserdata = 0; | |||
25019 | const char *zBase = "unicode61"; | |||
25020 | fts5_tokenizer_v2 *pV2 = 0; | |||
25021 | ||||
25022 | if( nArg>0 ){ | |||
25023 | zBase = azArg[0]; | |||
25024 | } | |||
25025 | ||||
25026 | pRet = (PorterTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(PorterTokenizer)); | |||
25027 | if( pRet ){ | |||
25028 | memset(pRet, 0, sizeof(PorterTokenizer)); | |||
25029 | rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2); | |||
25030 | }else{ | |||
25031 | rc = SQLITE_NOMEM7; | |||
25032 | } | |||
25033 | if( rc==SQLITE_OK0 ){ | |||
25034 | int nArg2 = (nArg>0 ? nArg-1 : 0); | |||
25035 | const char **az2 = (nArg2 ? &azArg[1] : 0); | |||
25036 | memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2)); | |||
25037 | rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer); | |||
25038 | } | |||
25039 | ||||
25040 | if( rc!=SQLITE_OK0 ){ | |||
25041 | fts5PorterDelete((Fts5Tokenizer*)pRet); | |||
25042 | pRet = 0; | |||
25043 | } | |||
25044 | *ppOut = (Fts5Tokenizer*)pRet; | |||
25045 | return rc; | |||
25046 | } | |||
25047 | ||||
25048 | typedef struct PorterContext PorterContext; | |||
25049 | struct PorterContext { | |||
25050 | void *pCtx; | |||
25051 | int (*xToken)(void*, int, const char*, int, int, int); | |||
25052 | char *aBuf; | |||
25053 | }; | |||
25054 | ||||
25055 | typedef struct PorterRule PorterRule; | |||
25056 | struct PorterRule { | |||
25057 | const char *zSuffix; | |||
25058 | int nSuffix; | |||
25059 | int (*xCond)(char *zStem, int nStem); | |||
25060 | const char *zOutput; | |||
25061 | int nOutput; | |||
25062 | }; | |||
25063 | ||||
25064 | #if 0 | |||
25065 | static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ | |||
25066 | int ret = -1; | |||
25067 | int nBuf = *pnBuf; | |||
25068 | PorterRule *p; | |||
25069 | ||||
25070 | for(p=aRule; p->zSuffix; p++){ | |||
25071 | assert( strlen(p->zSuffix)==p->nSuffix )((void) (0)); | |||
25072 | assert( strlen(p->zOutput)==p->nOutput )((void) (0)); | |||
25073 | if( nBuf<p->nSuffix ) continue; | |||
25074 | if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; | |||
25075 | } | |||
25076 | ||||
25077 | if( p->zSuffix ){ | |||
25078 | int nStem = nBuf - p->nSuffix; | |||
25079 | if( p->xCond==0 || p->xCond(aBuf, nStem) ){ | |||
25080 | memcpy(&aBuf[nStem], p->zOutput, p->nOutput); | |||
25081 | *pnBuf = nStem + p->nOutput; | |||
25082 | ret = p - aRule; | |||
25083 | } | |||
25084 | } | |||
25085 | ||||
25086 | return ret; | |||
25087 | } | |||
25088 | #endif | |||
25089 | ||||
25090 | static int fts5PorterIsVowel(char c, int bYIsVowel){ | |||
25091 | return ( | |||
25092 | c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') | |||
25093 | ); | |||
25094 | } | |||
25095 | ||||
25096 | static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ | |||
25097 | int i; | |||
25098 | int bCons = bPrevCons; | |||
25099 | ||||
25100 | /* Scan for a vowel */ | |||
25101 | for(i=0; i<nStem; i++){ | |||
25102 | if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; | |||
25103 | } | |||
25104 | ||||
25105 | /* Scan for a consonent */ | |||
25106 | for(i++; i<nStem; i++){ | |||
25107 | if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; | |||
25108 | } | |||
25109 | return 0; | |||
25110 | } | |||
25111 | ||||
25112 | /* porter rule condition: (m > 0) */ | |||
25113 | static int fts5Porter_MGt0(char *zStem, int nStem){ | |||
25114 | return !!fts5PorterGobbleVC(zStem, nStem, 0); | |||
25115 | } | |||
25116 | ||||
25117 | /* porter rule condition: (m > 1) */ | |||
25118 | static int fts5Porter_MGt1(char *zStem, int nStem){ | |||
25119 | int n; | |||
25120 | n = fts5PorterGobbleVC(zStem, nStem, 0); | |||
25121 | if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | |||
25122 | return 1; | |||
25123 | } | |||
25124 | return 0; | |||
25125 | } | |||
25126 | ||||
25127 | /* porter rule condition: (m = 1) */ | |||
25128 | static int fts5Porter_MEq1(char *zStem, int nStem){ | |||
25129 | int n; | |||
25130 | n = fts5PorterGobbleVC(zStem, nStem, 0); | |||
25131 | if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | |||
25132 | return 1; | |||
25133 | } | |||
25134 | return 0; | |||
25135 | } | |||
25136 | ||||
25137 | /* porter rule condition: (*o) */ | |||
25138 | static int fts5Porter_Ostar(char *zStem, int nStem){ | |||
25139 | if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ | |||
25140 | return 0; | |||
25141 | }else{ | |||
25142 | int i; | |||
25143 | int mask = 0; | |||
25144 | int bCons = 0; | |||
25145 | for(i=0; i<nStem; i++){ | |||
25146 | bCons = !fts5PorterIsVowel(zStem[i], bCons); | |||
25147 | assert( bCons==0 || bCons==1 )((void) (0)); | |||
25148 | mask = (mask << 1) + bCons; | |||
25149 | } | |||
25150 | return ((mask & 0x0007)==0x0005); | |||
25151 | } | |||
25152 | } | |||
25153 | ||||
25154 | /* porter rule condition: (m > 1 and (*S or *T)) */ | |||
25155 | static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ | |||
25156 | assert( nStem>0 )((void) (0)); | |||
25157 | return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') | |||
25158 | && fts5Porter_MGt1(zStem, nStem); | |||
25159 | } | |||
25160 | ||||
25161 | /* porter rule condition: (*v*) */ | |||
25162 | static int fts5Porter_Vowel(char *zStem, int nStem){ | |||
25163 | int i; | |||
25164 | for(i=0; i<nStem; i++){ | |||
25165 | if( fts5PorterIsVowel(zStem[i], i>0) ){ | |||
25166 | return 1; | |||
25167 | } | |||
25168 | } | |||
25169 | return 0; | |||
25170 | } | |||
25171 | ||||
25172 | ||||
25173 | /************************************************************************** | |||
25174 | *************************************************************************** | |||
25175 | ** GENERATED CODE STARTS HERE (mkportersteps.tcl) | |||
25176 | */ | |||
25177 | ||||
25178 | static int fts5PorterStep4(char *aBuf, int *pnBuf){ | |||
25179 | int ret = 0; | |||
25180 | int nBuf = *pnBuf; | |||
25181 | switch( aBuf[nBuf-2] ){ | |||
25182 | ||||
25183 | case 'a': | |||
25184 | if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ | |||
25185 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
25186 | *pnBuf = nBuf - 2; | |||
25187 | } | |||
25188 | } | |||
25189 | break; | |||
25190 | ||||
25191 | case 'c': | |||
25192 | if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ | |||
25193 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
25194 | *pnBuf = nBuf - 4; | |||
25195 | } | |||
25196 | }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ | |||
25197 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
25198 | *pnBuf = nBuf - 4; | |||
25199 | } | |||
25200 | } | |||
25201 | break; | |||
25202 | ||||
25203 | case 'e': | |||
25204 | if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ | |||
25205 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
25206 | *pnBuf = nBuf - 2; | |||
25207 | } | |||
25208 | } | |||
25209 | break; | |||
25210 | ||||
25211 | case 'i': | |||
25212 | if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ | |||
25213 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
25214 | *pnBuf = nBuf - 2; | |||
25215 | } | |||
25216 | } | |||
25217 | break; | |||
25218 | ||||
25219 | case 'l': | |||
25220 | if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ | |||
25221 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
25222 | *pnBuf = nBuf - 4; | |||
25223 | } | |||
25224 | }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ | |||
25225 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
25226 | *pnBuf = nBuf - 4; | |||
25227 | } | |||
25228 | } | |||
25229 | break; | |||
25230 | ||||
25231 | case 'n': | |||
25232 | if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ | |||
25233 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25234 | *pnBuf = nBuf - 3; | |||
25235 | } | |||
25236 | }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ | |||
25237 | if( fts5Porter_MGt1(aBuf, nBuf-5) ){ | |||
25238 | *pnBuf = nBuf - 5; | |||
25239 | } | |||
25240 | }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ | |||
25241 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
25242 | *pnBuf = nBuf - 4; | |||
25243 | } | |||
25244 | }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ | |||
25245 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25246 | *pnBuf = nBuf - 3; | |||
25247 | } | |||
25248 | } | |||
25249 | break; | |||
25250 | ||||
25251 | case 'o': | |||
25252 | if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ | |||
25253 | if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ | |||
25254 | *pnBuf = nBuf - 3; | |||
25255 | } | |||
25256 | }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ | |||
25257 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
25258 | *pnBuf = nBuf - 2; | |||
25259 | } | |||
25260 | } | |||
25261 | break; | |||
25262 | ||||
25263 | case 's': | |||
25264 | if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ | |||
25265 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25266 | *pnBuf = nBuf - 3; | |||
25267 | } | |||
25268 | } | |||
25269 | break; | |||
25270 | ||||
25271 | case 't': | |||
25272 | if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ | |||
25273 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25274 | *pnBuf = nBuf - 3; | |||
25275 | } | |||
25276 | }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ | |||
25277 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25278 | *pnBuf = nBuf - 3; | |||
25279 | } | |||
25280 | } | |||
25281 | break; | |||
25282 | ||||
25283 | case 'u': | |||
25284 | if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ | |||
25285 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25286 | *pnBuf = nBuf - 3; | |||
25287 | } | |||
25288 | } | |||
25289 | break; | |||
25290 | ||||
25291 | case 'v': | |||
25292 | if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ | |||
25293 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25294 | *pnBuf = nBuf - 3; | |||
25295 | } | |||
25296 | } | |||
25297 | break; | |||
25298 | ||||
25299 | case 'z': | |||
25300 | if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ | |||
25301 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
25302 | *pnBuf = nBuf - 3; | |||
25303 | } | |||
25304 | } | |||
25305 | break; | |||
25306 | ||||
25307 | } | |||
25308 | return ret; | |||
25309 | } | |||
25310 | ||||
25311 | ||||
25312 | static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ | |||
25313 | int ret = 0; | |||
25314 | int nBuf = *pnBuf; | |||
25315 | switch( aBuf[nBuf-2] ){ | |||
25316 | ||||
25317 | case 'a': | |||
25318 | if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ | |||
25319 | memcpy(&aBuf[nBuf-2], "ate", 3); | |||
25320 | *pnBuf = nBuf - 2 + 3; | |||
25321 | ret = 1; | |||
25322 | } | |||
25323 | break; | |||
25324 | ||||
25325 | case 'b': | |||
25326 | if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ | |||
25327 | memcpy(&aBuf[nBuf-2], "ble", 3); | |||
25328 | *pnBuf = nBuf - 2 + 3; | |||
25329 | ret = 1; | |||
25330 | } | |||
25331 | break; | |||
25332 | ||||
25333 | case 'i': | |||
25334 | if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ | |||
25335 | memcpy(&aBuf[nBuf-2], "ize", 3); | |||
25336 | *pnBuf = nBuf - 2 + 3; | |||
25337 | ret = 1; | |||
25338 | } | |||
25339 | break; | |||
25340 | ||||
25341 | } | |||
25342 | return ret; | |||
25343 | } | |||
25344 | ||||
25345 | ||||
25346 | static int fts5PorterStep2(char *aBuf, int *pnBuf){ | |||
25347 | int ret = 0; | |||
25348 | int nBuf = *pnBuf; | |||
25349 | switch( aBuf[nBuf-2] ){ | |||
25350 | ||||
25351 | case 'a': | |||
25352 | if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ | |||
25353 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
25354 | memcpy(&aBuf[nBuf-7], "ate", 3); | |||
25355 | *pnBuf = nBuf - 7 + 3; | |||
25356 | } | |||
25357 | }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ | |||
25358 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | |||
25359 | memcpy(&aBuf[nBuf-6], "tion", 4); | |||
25360 | *pnBuf = nBuf - 6 + 4; | |||
25361 | } | |||
25362 | } | |||
25363 | break; | |||
25364 | ||||
25365 | case 'c': | |||
25366 | if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ | |||
25367 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25368 | memcpy(&aBuf[nBuf-4], "ence", 4); | |||
25369 | *pnBuf = nBuf - 4 + 4; | |||
25370 | } | |||
25371 | }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ | |||
25372 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25373 | memcpy(&aBuf[nBuf-4], "ance", 4); | |||
25374 | *pnBuf = nBuf - 4 + 4; | |||
25375 | } | |||
25376 | } | |||
25377 | break; | |||
25378 | ||||
25379 | case 'e': | |||
25380 | if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ | |||
25381 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25382 | memcpy(&aBuf[nBuf-4], "ize", 3); | |||
25383 | *pnBuf = nBuf - 4 + 3; | |||
25384 | } | |||
25385 | } | |||
25386 | break; | |||
25387 | ||||
25388 | case 'g': | |||
25389 | if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ | |||
25390 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25391 | memcpy(&aBuf[nBuf-4], "log", 3); | |||
25392 | *pnBuf = nBuf - 4 + 3; | |||
25393 | } | |||
25394 | } | |||
25395 | break; | |||
25396 | ||||
25397 | case 'l': | |||
25398 | if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ | |||
25399 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
25400 | memcpy(&aBuf[nBuf-3], "ble", 3); | |||
25401 | *pnBuf = nBuf - 3 + 3; | |||
25402 | } | |||
25403 | }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ | |||
25404 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25405 | memcpy(&aBuf[nBuf-4], "al", 2); | |||
25406 | *pnBuf = nBuf - 4 + 2; | |||
25407 | } | |||
25408 | }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ | |||
25409 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25410 | memcpy(&aBuf[nBuf-5], "ent", 3); | |||
25411 | *pnBuf = nBuf - 5 + 3; | |||
25412 | } | |||
25413 | }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ | |||
25414 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
25415 | memcpy(&aBuf[nBuf-3], "e", 1); | |||
25416 | *pnBuf = nBuf - 3 + 1; | |||
25417 | } | |||
25418 | }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ | |||
25419 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25420 | memcpy(&aBuf[nBuf-5], "ous", 3); | |||
25421 | *pnBuf = nBuf - 5 + 3; | |||
25422 | } | |||
25423 | } | |||
25424 | break; | |||
25425 | ||||
25426 | case 'o': | |||
25427 | if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ | |||
25428 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
25429 | memcpy(&aBuf[nBuf-7], "ize", 3); | |||
25430 | *pnBuf = nBuf - 7 + 3; | |||
25431 | } | |||
25432 | }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ | |||
25433 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25434 | memcpy(&aBuf[nBuf-5], "ate", 3); | |||
25435 | *pnBuf = nBuf - 5 + 3; | |||
25436 | } | |||
25437 | }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ | |||
25438 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25439 | memcpy(&aBuf[nBuf-4], "ate", 3); | |||
25440 | *pnBuf = nBuf - 4 + 3; | |||
25441 | } | |||
25442 | } | |||
25443 | break; | |||
25444 | ||||
25445 | case 's': | |||
25446 | if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ | |||
25447 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25448 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
25449 | *pnBuf = nBuf - 5 + 2; | |||
25450 | } | |||
25451 | }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ | |||
25452 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
25453 | memcpy(&aBuf[nBuf-7], "ive", 3); | |||
25454 | *pnBuf = nBuf - 7 + 3; | |||
25455 | } | |||
25456 | }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ | |||
25457 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
25458 | memcpy(&aBuf[nBuf-7], "ful", 3); | |||
25459 | *pnBuf = nBuf - 7 + 3; | |||
25460 | } | |||
25461 | }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ | |||
25462 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
25463 | memcpy(&aBuf[nBuf-7], "ous", 3); | |||
25464 | *pnBuf = nBuf - 7 + 3; | |||
25465 | } | |||
25466 | } | |||
25467 | break; | |||
25468 | ||||
25469 | case 't': | |||
25470 | if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ | |||
25471 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25472 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
25473 | *pnBuf = nBuf - 5 + 2; | |||
25474 | } | |||
25475 | }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ | |||
25476 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25477 | memcpy(&aBuf[nBuf-5], "ive", 3); | |||
25478 | *pnBuf = nBuf - 5 + 3; | |||
25479 | } | |||
25480 | }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ | |||
25481 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | |||
25482 | memcpy(&aBuf[nBuf-6], "ble", 3); | |||
25483 | *pnBuf = nBuf - 6 + 3; | |||
25484 | } | |||
25485 | } | |||
25486 | break; | |||
25487 | ||||
25488 | } | |||
25489 | return ret; | |||
25490 | } | |||
25491 | ||||
25492 | ||||
25493 | static int fts5PorterStep3(char *aBuf, int *pnBuf){ | |||
25494 | int ret = 0; | |||
25495 | int nBuf = *pnBuf; | |||
25496 | switch( aBuf[nBuf-2] ){ | |||
25497 | ||||
25498 | case 'a': | |||
25499 | if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ | |||
25500 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25501 | memcpy(&aBuf[nBuf-4], "ic", 2); | |||
25502 | *pnBuf = nBuf - 4 + 2; | |||
25503 | } | |||
25504 | } | |||
25505 | break; | |||
25506 | ||||
25507 | case 's': | |||
25508 | if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ | |||
25509 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
25510 | *pnBuf = nBuf - 4; | |||
25511 | } | |||
25512 | } | |||
25513 | break; | |||
25514 | ||||
25515 | case 't': | |||
25516 | if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ | |||
25517 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25518 | memcpy(&aBuf[nBuf-5], "ic", 2); | |||
25519 | *pnBuf = nBuf - 5 + 2; | |||
25520 | } | |||
25521 | }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ | |||
25522 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25523 | memcpy(&aBuf[nBuf-5], "ic", 2); | |||
25524 | *pnBuf = nBuf - 5 + 2; | |||
25525 | } | |||
25526 | } | |||
25527 | break; | |||
25528 | ||||
25529 | case 'u': | |||
25530 | if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ | |||
25531 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
25532 | *pnBuf = nBuf - 3; | |||
25533 | } | |||
25534 | } | |||
25535 | break; | |||
25536 | ||||
25537 | case 'v': | |||
25538 | if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ | |||
25539 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25540 | *pnBuf = nBuf - 5; | |||
25541 | } | |||
25542 | } | |||
25543 | break; | |||
25544 | ||||
25545 | case 'z': | |||
25546 | if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ | |||
25547 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
25548 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
25549 | *pnBuf = nBuf - 5 + 2; | |||
25550 | } | |||
25551 | } | |||
25552 | break; | |||
25553 | ||||
25554 | } | |||
25555 | return ret; | |||
25556 | } | |||
25557 | ||||
25558 | ||||
25559 | static int fts5PorterStep1B(char *aBuf, int *pnBuf){ | |||
25560 | int ret = 0; | |||
25561 | int nBuf = *pnBuf; | |||
25562 | switch( aBuf[nBuf-2] ){ | |||
25563 | ||||
25564 | case 'e': | |||
25565 | if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ | |||
25566 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
25567 | memcpy(&aBuf[nBuf-3], "ee", 2); | |||
25568 | *pnBuf = nBuf - 3 + 2; | |||
25569 | } | |||
25570 | }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ | |||
25571 | if( fts5Porter_Vowel(aBuf, nBuf-2) ){ | |||
25572 | *pnBuf = nBuf - 2; | |||
25573 | ret = 1; | |||
25574 | } | |||
25575 | } | |||
25576 | break; | |||
25577 | ||||
25578 | case 'n': | |||
25579 | if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ | |||
25580 | if( fts5Porter_Vowel(aBuf, nBuf-3) ){ | |||
25581 | *pnBuf = nBuf - 3; | |||
25582 | ret = 1; | |||
25583 | } | |||
25584 | } | |||
25585 | break; | |||
25586 | ||||
25587 | } | |||
25588 | return ret; | |||
25589 | } | |||
25590 | ||||
25591 | /* | |||
25592 | ** GENERATED CODE ENDS HERE (mkportersteps.tcl) | |||
25593 | *************************************************************************** | |||
25594 | **************************************************************************/ | |||
25595 | ||||
25596 | static void fts5PorterStep1A(char *aBuf, int *pnBuf){ | |||
25597 | int nBuf = *pnBuf; | |||
25598 | if( aBuf[nBuf-1]=='s' ){ | |||
25599 | if( aBuf[nBuf-2]=='e' ){ | |||
25600 | if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') | |||
25601 | || (nBuf>3 && aBuf[nBuf-3]=='i' ) | |||
25602 | ){ | |||
25603 | *pnBuf = nBuf-2; | |||
25604 | }else{ | |||
25605 | *pnBuf = nBuf-1; | |||
25606 | } | |||
25607 | } | |||
25608 | else if( aBuf[nBuf-2]!='s' ){ | |||
25609 | *pnBuf = nBuf-1; | |||
25610 | } | |||
25611 | } | |||
25612 | } | |||
25613 | ||||
25614 | static int fts5PorterCb( | |||
25615 | void *pCtx, | |||
25616 | int tflags, | |||
25617 | const char *pToken, | |||
25618 | int nToken, | |||
25619 | int iStart, | |||
25620 | int iEnd | |||
25621 | ){ | |||
25622 | PorterContext *p = (PorterContext*)pCtx; | |||
25623 | ||||
25624 | char *aBuf; | |||
25625 | int nBuf; | |||
25626 | ||||
25627 | if( nToken>FTS5_PORTER_MAX_TOKEN64 || nToken<3 ) goto pass_through; | |||
25628 | aBuf = p->aBuf; | |||
25629 | nBuf = nToken; | |||
25630 | memcpy(aBuf, pToken, nBuf); | |||
25631 | ||||
25632 | /* Step 1. */ | |||
25633 | fts5PorterStep1A(aBuf, &nBuf); | |||
25634 | if( fts5PorterStep1B(aBuf, &nBuf) ){ | |||
25635 | if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ | |||
25636 | char c = aBuf[nBuf-1]; | |||
25637 | if( fts5PorterIsVowel(c, 0)==0 | |||
25638 | && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] | |||
25639 | ){ | |||
25640 | nBuf--; | |||
25641 | }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ | |||
25642 | aBuf[nBuf++] = 'e'; | |||
25643 | } | |||
25644 | } | |||
25645 | } | |||
25646 | ||||
25647 | /* Step 1C. */ | |||
25648 | if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ | |||
25649 | aBuf[nBuf-1] = 'i'; | |||
25650 | } | |||
25651 | ||||
25652 | /* Steps 2 through 4. */ | |||
25653 | fts5PorterStep2(aBuf, &nBuf); | |||
25654 | fts5PorterStep3(aBuf, &nBuf); | |||
25655 | fts5PorterStep4(aBuf, &nBuf); | |||
25656 | ||||
25657 | /* Step 5a. */ | |||
25658 | assert( nBuf>0 )((void) (0)); | |||
25659 | if( aBuf[nBuf-1]=='e' ){ | |||
25660 | if( fts5Porter_MGt1(aBuf, nBuf-1) | |||
25661 | || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) | |||
25662 | ){ | |||
25663 | nBuf--; | |||
25664 | } | |||
25665 | } | |||
25666 | ||||
25667 | /* Step 5b. */ | |||
25668 | if( nBuf>1 && aBuf[nBuf-1]=='l' | |||
25669 | && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) | |||
25670 | ){ | |||
25671 | nBuf--; | |||
25672 | } | |||
25673 | ||||
25674 | return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); | |||
25675 | ||||
25676 | pass_through: | |||
25677 | return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); | |||
25678 | } | |||
25679 | ||||
25680 | /* | |||
25681 | ** Tokenize using the porter tokenizer. | |||
25682 | */ | |||
25683 | static int fts5PorterTokenize( | |||
25684 | Fts5Tokenizer *pTokenizer, | |||
25685 | void *pCtx, | |||
25686 | int flags, | |||
25687 | const char *pText, int nText, | |||
25688 | const char *pLoc, int nLoc, | |||
25689 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
25690 | ){ | |||
25691 | PorterTokenizer *p = (PorterTokenizer*)pTokenizer; | |||
25692 | PorterContext sCtx; | |||
25693 | sCtx.xToken = xToken; | |||
25694 | sCtx.pCtx = pCtx; | |||
25695 | sCtx.aBuf = p->aBuf; | |||
25696 | return p->tokenizer_v2.xTokenize( | |||
25697 | p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb | |||
25698 | ); | |||
25699 | } | |||
25700 | ||||
25701 | /************************************************************************** | |||
25702 | ** Start of trigram implementation. | |||
25703 | */ | |||
25704 | typedef struct TrigramTokenizer TrigramTokenizer; | |||
25705 | struct TrigramTokenizer { | |||
25706 | int bFold; /* True to fold to lower-case */ | |||
25707 | int iFoldParam; /* Parameter to pass to Fts5UnicodeFold() */ | |||
25708 | }; | |||
25709 | ||||
25710 | /* | |||
25711 | ** Free a trigram tokenizer. | |||
25712 | */ | |||
25713 | static void fts5TriDelete(Fts5Tokenizer *p){ | |||
25714 | sqlite3_freesqlite3_api->free(p); | |||
25715 | } | |||
25716 | ||||
25717 | /* | |||
25718 | ** Allocate a trigram tokenizer. | |||
25719 | */ | |||
25720 | static int fts5TriCreate( | |||
25721 | void *pUnused, | |||
25722 | const char **azArg, | |||
25723 | int nArg, | |||
25724 | Fts5Tokenizer **ppOut | |||
25725 | ){ | |||
25726 | int rc = SQLITE_OK0; | |||
25727 | TrigramTokenizer *pNew = 0; | |||
25728 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
25729 | if( nArg%2 ){ | |||
25730 | rc = SQLITE_ERROR1; | |||
25731 | }else{ | |||
25732 | int i; | |||
25733 | pNew = (TrigramTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
25734 | if( pNew==0 ){ | |||
25735 | rc = SQLITE_NOMEM7; | |||
25736 | }else{ | |||
25737 | pNew->bFold = 1; | |||
25738 | pNew->iFoldParam = 0; | |||
25739 | ||||
25740 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
25741 | const char *zArg = azArg[i+1]; | |||
25742 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "case_sensitive") ){ | |||
25743 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ | |||
25744 | rc = SQLITE_ERROR1; | |||
25745 | }else{ | |||
25746 | pNew->bFold = (zArg[0]=='0'); | |||
25747 | } | |||
25748 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | |||
25749 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | |||
25750 | rc = SQLITE_ERROR1; | |||
25751 | }else{ | |||
25752 | pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; | |||
25753 | } | |||
25754 | }else{ | |||
25755 | rc = SQLITE_ERROR1; | |||
25756 | } | |||
25757 | } | |||
25758 | ||||
25759 | if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ | |||
25760 | rc = SQLITE_ERROR1; | |||
25761 | } | |||
25762 | ||||
25763 | if( rc!=SQLITE_OK0 ){ | |||
25764 | fts5TriDelete((Fts5Tokenizer*)pNew); | |||
25765 | pNew = 0; | |||
25766 | } | |||
25767 | } | |||
25768 | } | |||
25769 | *ppOut = (Fts5Tokenizer*)pNew; | |||
25770 | return rc; | |||
25771 | } | |||
25772 | ||||
25773 | /* | |||
25774 | ** Trigram tokenizer tokenize routine. | |||
25775 | */ | |||
25776 | static int fts5TriTokenize( | |||
25777 | Fts5Tokenizer *pTok, | |||
25778 | void *pCtx, | |||
25779 | int unusedFlags, | |||
25780 | const char *pText, int nText, | |||
25781 | int (*xToken)(void*, int, const char*, int, int, int) | |||
25782 | ){ | |||
25783 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | |||
25784 | int rc = SQLITE_OK0; | |||
25785 | char aBuf[32]; | |||
25786 | char *zOut = aBuf; | |||
25787 | int ii; | |||
25788 | const unsigned char *zIn = (const unsigned char*)pText; | |||
25789 | const unsigned char *zEof = (zIn ? &zIn[nText] : 0); | |||
25790 | u32 iCode = 0; | |||
25791 | int aStart[3]; /* Input offset of each character in aBuf[] */ | |||
25792 | ||||
25793 | UNUSED_PARAM(unusedFlags)(void)(unusedFlags); | |||
25794 | ||||
25795 | /* Populate aBuf[] with the characters for the first trigram. */ | |||
25796 | for(ii=0; ii<3; ii++){ | |||
25797 | do { | |||
25798 | aStart[ii] = zIn - (const unsigned char*)pText; | |||
25799 | if( zIn>=zEof ) return SQLITE_OK0; | |||
25800 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
25801 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | |||
25802 | }while( iCode==0 ); | |||
25803 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
25804 | } | |||
25805 | ||||
25806 | /* At the start of each iteration of this loop: | |||
25807 | ** | |||
25808 | ** aBuf: Contains 3 characters. The 3 characters of the next trigram. | |||
25809 | ** zOut: Points to the byte following the last character in aBuf. | |||
25810 | ** aStart[3]: Contains the byte offset in the input text corresponding | |||
25811 | ** to the start of each of the three characters in the buffer. | |||
25812 | */ | |||
25813 | assert( zIn<=zEof )((void) (0)); | |||
25814 | while( 1 ){ | |||
25815 | int iNext; /* Start of character following current tri */ | |||
25816 | const char *z1; | |||
25817 | ||||
25818 | /* Read characters from the input up until the first non-diacritic */ | |||
25819 | do { | |||
25820 | iNext = zIn - (const unsigned char*)pText; | |||
25821 | if( zIn>=zEof ){ | |||
25822 | iCode = 0; | |||
25823 | break; | |||
25824 | } | |||
25825 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
25826 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | |||
25827 | }while( iCode==0 ); | |||
25828 | ||||
25829 | /* Pass the current trigram back to fts5 */ | |||
25830 | rc = xToken(pCtx, 0, aBuf, zOut-aBuf, aStart[0], iNext); | |||
25831 | if( iCode==0 || rc!=SQLITE_OK0 ) break; | |||
25832 | ||||
25833 | /* Remove the first character from buffer aBuf[]. Append the character | |||
25834 | ** with codepoint iCode. */ | |||
25835 | z1 = aBuf; | |||
25836 | FTS5_SKIP_UTF8(z1){ if( ((unsigned char)(*(z1++)))>=0xc0 ){ while( (((unsigned char)*z1) & 0xc0)==0x80 ){ z1++; } } }; | |||
25837 | memmove(aBuf, z1, zOut - z1); | |||
25838 | zOut -= (z1 - aBuf); | |||
25839 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
25840 | ||||
25841 | /* Update the aStart[] array */ | |||
25842 | aStart[0] = aStart[1]; | |||
25843 | aStart[1] = aStart[2]; | |||
25844 | aStart[2] = iNext; | |||
25845 | } | |||
25846 | ||||
25847 | return rc; | |||
25848 | } | |||
25849 | ||||
25850 | /* | |||
25851 | ** Argument xCreate is a pointer to a constructor function for a tokenizer. | |||
25852 | ** pTok is a tokenizer previously created using the same method. This function | |||
25853 | ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB | |||
25854 | ** indicating the style of pattern matching that the tokenizer can support. | |||
25855 | ** In practice, this is: | |||
25856 | ** | |||
25857 | ** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB | |||
25858 | ** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE | |||
25859 | ** all other tokenizers - FTS5_PATTERN_NONE | |||
25860 | */ | |||
25861 | static int sqlite3Fts5TokenizerPattern( | |||
25862 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | |||
25863 | Fts5Tokenizer *pTok | |||
25864 | ){ | |||
25865 | if( xCreate==fts5TriCreate ){ | |||
25866 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | |||
25867 | if( p->iFoldParam==0 ){ | |||
25868 | return p->bFold ? FTS5_PATTERN_LIKE65 : FTS5_PATTERN_GLOB66; | |||
25869 | } | |||
25870 | } | |||
25871 | return FTS5_PATTERN_NONE0; | |||
25872 | } | |||
25873 | ||||
25874 | /* | |||
25875 | ** Return true if the tokenizer described by p->azArg[] is the trigram | |||
25876 | ** tokenizer. This tokenizer needs to be loaded before xBestIndex is | |||
25877 | ** called for the first time in order to correctly handle LIKE/GLOB. | |||
25878 | */ | |||
25879 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){ | |||
25880 | return (p->nArg>=1 && 0==sqlite3_stricmpsqlite3_api->stricmp(p->azArg[0], "trigram")); | |||
25881 | } | |||
25882 | ||||
25883 | ||||
25884 | /* | |||
25885 | ** Register all built-in tokenizers with FTS5. | |||
25886 | */ | |||
25887 | static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ | |||
25888 | struct BuiltinTokenizer { | |||
25889 | const char *zName; | |||
25890 | fts5_tokenizer x; | |||
25891 | } aBuiltin[] = { | |||
25892 | { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, | |||
25893 | { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, | |||
25894 | { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, | |||
25895 | }; | |||
25896 | ||||
25897 | int rc = SQLITE_OK0; /* Return code */ | |||
25898 | int i; /* To iterate through builtin functions */ | |||
25899 | ||||
25900 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | |||
25901 | rc = pApi->xCreateTokenizer(pApi, | |||
25902 | aBuiltin[i].zName, | |||
25903 | (void*)pApi, | |||
25904 | &aBuiltin[i].x, | |||
25905 | 0 | |||
25906 | ); | |||
25907 | } | |||
25908 | if( rc==SQLITE_OK0 ){ | |||
25909 | fts5_tokenizer_v2 sPorter = { | |||
25910 | 2, | |||
25911 | fts5PorterCreate, | |||
25912 | fts5PorterDelete, | |||
25913 | fts5PorterTokenize | |||
25914 | }; | |||
25915 | rc = pApi->xCreateTokenizer_v2(pApi, | |||
25916 | "porter", | |||
25917 | (void*)pApi, | |||
25918 | &sPorter, | |||
25919 | 0 | |||
25920 | ); | |||
25921 | } | |||
25922 | return rc; | |||
25923 | } | |||
25924 | ||||
25925 | #line 1 "fts5_unicode2.c" | |||
25926 | /* | |||
25927 | ** 2012-05-25 | |||
25928 | ** | |||
25929 | ** The author disclaims copyright to this source code. In place of | |||
25930 | ** a legal notice, here is a blessing: | |||
25931 | ** | |||
25932 | ** May you do good and not evil. | |||
25933 | ** May you find forgiveness for yourself and forgive others. | |||
25934 | ** May you share freely, never taking more than you give. | |||
25935 | ** | |||
25936 | ****************************************************************************** | |||
25937 | */ | |||
25938 | ||||
25939 | /* | |||
25940 | ** DO NOT EDIT THIS MACHINE GENERATED FILE. | |||
25941 | */ | |||
25942 | ||||
25943 | ||||
25944 | #include <assert.h> | |||
25945 | ||||
25946 | ||||
25947 | ||||
25948 | /* | |||
25949 | ** If the argument is a codepoint corresponding to a lowercase letter | |||
25950 | ** in the ASCII range with a diacritic added, return the codepoint | |||
25951 | ** of the ASCII letter only. For example, if passed 235 - "LATIN | |||
25952 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER | |||
25953 | ** E"). The resuls of passing a codepoint that corresponds to an | |||
25954 | ** uppercase letter are undefined. | |||
25955 | */ | |||
25956 | static int fts5_remove_diacritic(int c, int bComplex){ | |||
25957 | unsigned short aDia[] = { | |||
25958 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | |||
25959 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | |||
25960 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | |||
25961 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | |||
25962 | 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, | |||
25963 | 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, | |||
25964 | 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, | |||
25965 | 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, | |||
25966 | 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, | |||
25967 | 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, | |||
25968 | 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, | |||
25969 | 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, | |||
25970 | 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, | |||
25971 | 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, | |||
25972 | 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, | |||
25973 | 63182, 63242, 63274, 63310, 63368, 63390, | |||
25974 | }; | |||
25975 | #define HIBIT((unsigned char)0x80) ((unsigned char)0x80) | |||
25976 | unsigned char aChar[] = { | |||
25977 | '\0', 'a', 'c', 'e', 'i', 'n', | |||
25978 | 'o', 'u', 'y', 'y', 'a', 'c', | |||
25979 | 'd', 'e', 'e', 'g', 'h', 'i', | |||
25980 | 'j', 'k', 'l', 'n', 'o', 'r', | |||
25981 | 's', 't', 'u', 'u', 'w', 'y', | |||
25982 | 'z', 'o', 'u', 'a', 'i', 'o', | |||
25983 | 'u', 'u'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'g', 'k', 'o', | |||
25984 | 'o'|HIBIT((unsigned char)0x80), 'j', 'g', 'n', 'a'|HIBIT((unsigned char)0x80), 'a', | |||
25985 | 'e', 'i', 'o', 'r', 'u', 's', | |||
25986 | 't', 'h', 'a', 'e', 'o'|HIBIT((unsigned char)0x80), 'o', | |||
25987 | 'o'|HIBIT((unsigned char)0x80), 'y', '\0', '\0', '\0', '\0', | |||
25988 | '\0', '\0', '\0', '\0', 'a', 'b', | |||
25989 | 'c'|HIBIT((unsigned char)0x80), 'd', 'd', 'e'|HIBIT((unsigned char)0x80), 'e', 'e'|HIBIT((unsigned char)0x80), | |||
25990 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT((unsigned char)0x80), | |||
25991 | 'k', 'l', 'l'|HIBIT((unsigned char)0x80), 'l', 'm', 'n', | |||
25992 | 'o'|HIBIT((unsigned char)0x80), 'p', 'r', 'r'|HIBIT((unsigned char)0x80), 'r', 's', | |||
25993 | 's'|HIBIT((unsigned char)0x80), 't', 'u', 'u'|HIBIT((unsigned char)0x80), 'v', 'w', | |||
25994 | 'w', 'x', 'y', 'z', 'h', 't', | |||
25995 | 'w', 'y', 'a', 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), | |||
25996 | 'e', 'e'|HIBIT((unsigned char)0x80), 'e'|HIBIT((unsigned char)0x80), 'i', 'o', 'o'|HIBIT((unsigned char)0x80), | |||
25997 | 'o'|HIBIT((unsigned char)0x80), 'o'|HIBIT((unsigned char)0x80), 'u', 'u'|HIBIT((unsigned char)0x80), 'u'|HIBIT((unsigned char)0x80), 'y', | |||
25998 | }; | |||
25999 | ||||
26000 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; | |||
26001 | int iRes = 0; | |||
26002 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; | |||
26003 | int iLo = 0; | |||
26004 | while( iHi>=iLo ){ | |||
26005 | int iTest = (iHi + iLo) / 2; | |||
26006 | if( key >= aDia[iTest] ){ | |||
26007 | iRes = iTest; | |||
26008 | iLo = iTest+1; | |||
26009 | }else{ | |||
26010 | iHi = iTest-1; | |||
26011 | } | |||
26012 | } | |||
26013 | assert( key>=aDia[iRes] )((void) (0)); | |||
26014 | if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; | |||
26015 | return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); | |||
26016 | } | |||
26017 | ||||
26018 | ||||
26019 | /* | |||
26020 | ** Return true if the argument interpreted as a unicode codepoint | |||
26021 | ** is a diacritical modifier character. | |||
26022 | */ | |||
26023 | static int sqlite3Fts5UnicodeIsdiacritic(int c){ | |||
26024 | unsigned int mask0 = 0x08029FDF; | |||
26025 | unsigned int mask1 = 0x000361F8; | |||
26026 | if( c<768 || c>817 ) return 0; | |||
26027 | return (c < 768+32) ? | |||
26028 | (mask0 & ((unsigned int)1 << (c-768))) : | |||
26029 | (mask1 & ((unsigned int)1 << (c-768-32))); | |||
26030 | } | |||
26031 | ||||
26032 | ||||
26033 | /* | |||
26034 | ** Interpret the argument as a unicode codepoint. If the codepoint | |||
26035 | ** is an upper case character that has a lower case equivalent, | |||
26036 | ** return the codepoint corresponding to the lower case version. | |||
26037 | ** Otherwise, return a copy of the argument. | |||
26038 | ** | |||
26039 | ** The results are undefined if the value passed to this function | |||
26040 | ** is less than zero. | |||
26041 | */ | |||
26042 | static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ | |||
26043 | /* Each entry in the following array defines a rule for folding a range | |||
26044 | ** of codepoints to lower case. The rule applies to a range of nRange | |||
26045 | ** codepoints starting at codepoint iCode. | |||
26046 | ** | |||
26047 | ** If the least significant bit in flags is clear, then the rule applies | |||
26048 | ** to all nRange codepoints (i.e. all nRange codepoints are upper case and | |||
26049 | ** need to be folded). Or, if it is set, then the rule only applies to | |||
26050 | ** every second codepoint in the range, starting with codepoint C. | |||
26051 | ** | |||
26052 | ** The 7 most significant bits in flags are an index into the aiOff[] | |||
26053 | ** array. If a specific codepoint C does require folding, then its lower | |||
26054 | ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). | |||
26055 | ** | |||
26056 | ** The contents of this array are generated by parsing the CaseFolding.txt | |||
26057 | ** file distributed as part of the "Unicode Character Database". See | |||
26058 | ** http://www.unicode.org for details. | |||
26059 | */ | |||
26060 | static const struct TableEntry { | |||
26061 | unsigned short iCode; | |||
26062 | unsigned char flags; | |||
26063 | unsigned char nRange; | |||
26064 | } aEntry[] = { | |||
26065 | {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, | |||
26066 | {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, | |||
26067 | {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, | |||
26068 | {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, | |||
26069 | {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, | |||
26070 | {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, | |||
26071 | {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, | |||
26072 | {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, | |||
26073 | {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, | |||
26074 | {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, | |||
26075 | {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, | |||
26076 | {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, | |||
26077 | {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, | |||
26078 | {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, | |||
26079 | {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, | |||
26080 | {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, | |||
26081 | {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, | |||
26082 | {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, | |||
26083 | {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, | |||
26084 | {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, | |||
26085 | {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, | |||
26086 | {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, | |||
26087 | {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, | |||
26088 | {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, | |||
26089 | {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, | |||
26090 | {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, | |||
26091 | {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, | |||
26092 | {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, | |||
26093 | {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, | |||
26094 | {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, | |||
26095 | {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, | |||
26096 | {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, | |||
26097 | {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, | |||
26098 | {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, | |||
26099 | {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, | |||
26100 | {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, | |||
26101 | {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, | |||
26102 | {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, | |||
26103 | {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, | |||
26104 | {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, | |||
26105 | {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, | |||
26106 | {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, | |||
26107 | {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, | |||
26108 | {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, | |||
26109 | {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, | |||
26110 | {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, | |||
26111 | {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, | |||
26112 | {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, | |||
26113 | {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, | |||
26114 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, | |||
26115 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, | |||
26116 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, | |||
26117 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, | |||
26118 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, | |||
26119 | {65313, 14, 26}, | |||
26120 | }; | |||
26121 | static const unsigned short aiOff[] = { | |||
26122 | 1, 2, 8, 15, 16, 26, 28, 32, | |||
26123 | 37, 38, 40, 48, 63, 64, 69, 71, | |||
26124 | 79, 80, 116, 202, 203, 205, 206, 207, | |||
26125 | 209, 210, 211, 213, 214, 217, 218, 219, | |||
26126 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | |||
26127 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | |||
26128 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | |||
26129 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | |||
26130 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | |||
26131 | 65514, 65521, 65527, 65528, 65529, | |||
26132 | }; | |||
26133 | ||||
26134 | int ret = c; | |||
26135 | ||||
26136 | assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 )((void) (0)); | |||
26137 | ||||
26138 | if( c<128 ){ | |||
26139 | if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); | |||
26140 | }else if( c<65536 ){ | |||
26141 | const struct TableEntry *p; | |||
26142 | int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | |||
26143 | int iLo = 0; | |||
26144 | int iRes = -1; | |||
26145 | ||||
26146 | assert( c>aEntry[0].iCode )((void) (0)); | |||
26147 | while( iHi>=iLo ){ | |||
26148 | int iTest = (iHi + iLo) / 2; | |||
26149 | int cmp = (c - aEntry[iTest].iCode); | |||
26150 | if( cmp>=0 ){ | |||
26151 | iRes = iTest; | |||
26152 | iLo = iTest+1; | |||
26153 | }else{ | |||
26154 | iHi = iTest-1; | |||
26155 | } | |||
26156 | } | |||
26157 | ||||
26158 | assert( iRes>=0 && c>=aEntry[iRes].iCode )((void) (0)); | |||
26159 | p = &aEntry[iRes]; | |||
26160 | if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ | |||
26161 | ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; | |||
26162 | assert( ret>0 )((void) (0)); | |||
26163 | } | |||
26164 | ||||
26165 | if( eRemoveDiacritic ){ | |||
26166 | ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); | |||
26167 | } | |||
26168 | } | |||
26169 | ||||
26170 | else if( c>=66560 && c<66600 ){ | |||
26171 | ret = c + 40; | |||
26172 | } | |||
26173 | ||||
26174 | return ret; | |||
26175 | } | |||
26176 | ||||
26177 | ||||
26178 | static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ | |||
26179 | aArray[0] = 1; | |||
26180 | switch( zCat[0] ){ | |||
26181 | case 'C': | |||
26182 | switch( zCat[1] ){ | |||
26183 | case 'c': aArray[1] = 1; break; | |||
26184 | case 'f': aArray[2] = 1; break; | |||
26185 | case 'n': aArray[3] = 1; break; | |||
26186 | case 's': aArray[4] = 1; break; | |||
26187 | case 'o': aArray[31] = 1; break; | |||
26188 | case '*': | |||
26189 | aArray[1] = 1; | |||
26190 | aArray[2] = 1; | |||
26191 | aArray[3] = 1; | |||
26192 | aArray[4] = 1; | |||
26193 | aArray[31] = 1; | |||
26194 | break; | |||
26195 | default: return 1; } | |||
26196 | break; | |||
26197 | ||||
26198 | case 'L': | |||
26199 | switch( zCat[1] ){ | |||
26200 | case 'l': aArray[5] = 1; break; | |||
26201 | case 'm': aArray[6] = 1; break; | |||
26202 | case 'o': aArray[7] = 1; break; | |||
26203 | case 't': aArray[8] = 1; break; | |||
26204 | case 'u': aArray[9] = 1; break; | |||
26205 | case 'C': aArray[30] = 1; break; | |||
26206 | case '*': | |||
26207 | aArray[5] = 1; | |||
26208 | aArray[6] = 1; | |||
26209 | aArray[7] = 1; | |||
26210 | aArray[8] = 1; | |||
26211 | aArray[9] = 1; | |||
26212 | aArray[30] = 1; | |||
26213 | break; | |||
26214 | default: return 1; } | |||
26215 | break; | |||
26216 | ||||
26217 | case 'M': | |||
26218 | switch( zCat[1] ){ | |||
26219 | case 'c': aArray[10] = 1; break; | |||
26220 | case 'e': aArray[11] = 1; break; | |||
26221 | case 'n': aArray[12] = 1; break; | |||
26222 | case '*': | |||
26223 | aArray[10] = 1; | |||
26224 | aArray[11] = 1; | |||
26225 | aArray[12] = 1; | |||
26226 | break; | |||
26227 | default: return 1; } | |||
26228 | break; | |||
26229 | ||||
26230 | case 'N': | |||
26231 | switch( zCat[1] ){ | |||
26232 | case 'd': aArray[13] = 1; break; | |||
26233 | case 'l': aArray[14] = 1; break; | |||
26234 | case 'o': aArray[15] = 1; break; | |||
26235 | case '*': | |||
26236 | aArray[13] = 1; | |||
26237 | aArray[14] = 1; | |||
26238 | aArray[15] = 1; | |||
26239 | break; | |||
26240 | default: return 1; } | |||
26241 | break; | |||
26242 | ||||
26243 | case 'P': | |||
26244 | switch( zCat[1] ){ | |||
26245 | case 'c': aArray[16] = 1; break; | |||
26246 | case 'd': aArray[17] = 1; break; | |||
26247 | case 'e': aArray[18] = 1; break; | |||
26248 | case 'f': aArray[19] = 1; break; | |||
26249 | case 'i': aArray[20] = 1; break; | |||
26250 | case 'o': aArray[21] = 1; break; | |||
26251 | case 's': aArray[22] = 1; break; | |||
26252 | case '*': | |||
26253 | aArray[16] = 1; | |||
26254 | aArray[17] = 1; | |||
26255 | aArray[18] = 1; | |||
26256 | aArray[19] = 1; | |||
26257 | aArray[20] = 1; | |||
26258 | aArray[21] = 1; | |||
26259 | aArray[22] = 1; | |||
26260 | break; | |||
26261 | default: return 1; } | |||
26262 | break; | |||
26263 | ||||
26264 | case 'S': | |||
26265 | switch( zCat[1] ){ | |||
26266 | case 'c': aArray[23] = 1; break; | |||
26267 | case 'k': aArray[24] = 1; break; | |||
26268 | case 'm': aArray[25] = 1; break; | |||
26269 | case 'o': aArray[26] = 1; break; | |||
26270 | case '*': | |||
26271 | aArray[23] = 1; | |||
26272 | aArray[24] = 1; | |||
26273 | aArray[25] = 1; | |||
26274 | aArray[26] = 1; | |||
26275 | break; | |||
26276 | default: return 1; } | |||
26277 | break; | |||
26278 | ||||
26279 | case 'Z': | |||
26280 | switch( zCat[1] ){ | |||
26281 | case 'l': aArray[27] = 1; break; | |||
26282 | case 'p': aArray[28] = 1; break; | |||
26283 | case 's': aArray[29] = 1; break; | |||
26284 | case '*': | |||
26285 | aArray[27] = 1; | |||
26286 | aArray[28] = 1; | |||
26287 | aArray[29] = 1; | |||
26288 | break; | |||
26289 | default: return 1; } | |||
26290 | break; | |||
26291 | ||||
26292 | ||||
26293 | default: | |||
26294 | return 1; | |||
26295 | } | |||
26296 | return 0; | |||
26297 | } | |||
26298 | ||||
26299 | static u16 aFts5UnicodeBlock[] = { | |||
26300 | 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760, | |||
26301 | 1760, 1760, 1760, 1760, 1760, 1763, 1765, | |||
26302 | }; | |||
26303 | static u16 aFts5UnicodeMap[] = { | |||
26304 | 0, 32, 33, 36, 37, 40, 41, 42, 43, 44, | |||
26305 | 45, 46, 48, 58, 60, 63, 65, 91, 92, 93, | |||
26306 | 94, 95, 96, 97, 123, 124, 125, 126, 127, 160, | |||
26307 | 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, | |||
26308 | 174, 175, 176, 177, 178, 180, 181, 182, 184, 185, | |||
26309 | 186, 187, 188, 191, 192, 215, 216, 223, 247, 248, | |||
26310 | 256, 312, 313, 329, 330, 377, 383, 385, 387, 388, | |||
26311 | 391, 394, 396, 398, 402, 403, 405, 406, 409, 412, | |||
26312 | 414, 415, 417, 418, 423, 427, 428, 431, 434, 436, | |||
26313 | 437, 440, 442, 443, 444, 446, 448, 452, 453, 454, | |||
26314 | 455, 456, 457, 458, 459, 460, 461, 477, 478, 496, | |||
26315 | 497, 498, 499, 500, 503, 505, 506, 564, 570, 572, | |||
26316 | 573, 575, 577, 580, 583, 584, 592, 660, 661, 688, | |||
26317 | 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, | |||
26318 | 880, 884, 885, 886, 890, 891, 894, 900, 902, 903, | |||
26319 | 904, 908, 910, 912, 913, 931, 940, 975, 977, 978, | |||
26320 | 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072, | |||
26321 | 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369, | |||
26322 | 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473, | |||
26323 | 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545, | |||
26324 | 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611, | |||
26325 | 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758, | |||
26326 | 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791, | |||
26327 | 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984, | |||
26328 | 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075, | |||
26329 | 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210, | |||
26330 | 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369, | |||
26331 | 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416, | |||
26332 | 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482, | |||
26333 | 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519, | |||
26334 | 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561, | |||
26335 | 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, | |||
26336 | 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677, | |||
26337 | 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749, | |||
26338 | 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790, | |||
26339 | 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869, | |||
26340 | 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902, | |||
26341 | 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947, | |||
26342 | 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006, | |||
26343 | 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059, | |||
26344 | 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134, | |||
26345 | 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199, | |||
26346 | 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263, | |||
26347 | 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302, | |||
26348 | 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402, | |||
26349 | 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458, | |||
26350 | 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544, | |||
26351 | 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655, | |||
26352 | 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737, | |||
26353 | 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773, | |||
26354 | 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860, | |||
26355 | 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896, | |||
26356 | 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967, | |||
26357 | 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046, | |||
26358 | 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153, | |||
26359 | 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190, | |||
26360 | 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229, | |||
26361 | 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295, | |||
26362 | 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704, | |||
26363 | 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, | |||
26364 | 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743, | |||
26365 | 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906, | |||
26366 | 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068, | |||
26367 | 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107, | |||
26368 | 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160, | |||
26369 | 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435, | |||
26370 | 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480, | |||
26371 | 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679, | |||
26372 | 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754, | |||
26373 | 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824, | |||
26374 | 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978, | |||
26375 | 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043, | |||
26376 | 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098, | |||
26377 | 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168, | |||
26378 | 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288, | |||
26379 | 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, | |||
26380 | 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616, | |||
26381 | 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976, | |||
26382 | 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033, | |||
26383 | 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118, | |||
26384 | 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141, | |||
26385 | 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184, | |||
26386 | 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219, | |||
26387 | 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249, | |||
26388 | 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275, | |||
26389 | 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317, | |||
26390 | 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413, | |||
26391 | 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459, | |||
26392 | 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484, | |||
26393 | 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500, | |||
26394 | 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523, | |||
26395 | 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597, | |||
26396 | 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, | |||
26397 | 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, | |||
26398 | 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180, | |||
26399 | 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665, | |||
26400 | 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091, | |||
26401 | 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, | |||
26402 | 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217, | |||
26403 | 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627, | |||
26404 | 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, | |||
26405 | 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, | |||
26406 | 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750, | |||
26407 | 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365, | |||
26408 | 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393, | |||
26409 | 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520, | |||
26410 | 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696, | |||
26411 | 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780, | |||
26412 | 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800, | |||
26413 | 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812, | |||
26414 | 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904, | |||
26415 | 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296, | |||
26416 | 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, | |||
26417 | 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317, | |||
26418 | 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347, | |||
26419 | 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449, | |||
26420 | 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736, | |||
26421 | 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, | |||
26422 | 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981, | |||
26423 | 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528, | |||
26424 | 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624, | |||
26425 | 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800, | |||
26426 | 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912, | |||
26427 | 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, | |||
26428 | 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136, | |||
26429 | 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264, | |||
26430 | 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395, | |||
26431 | 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472, | |||
26432 | 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588, | |||
26433 | 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643, | |||
26434 | 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, | |||
26435 | 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762, | |||
26436 | 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003, | |||
26437 | 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203, | |||
26438 | 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112, | |||
26439 | 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320, | |||
26440 | 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020, | |||
26441 | 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075, | |||
26442 | 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, | |||
26443 | 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097, | |||
26444 | 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118, | |||
26445 | 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279, | |||
26446 | 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294, | |||
26447 | 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343, | |||
26448 | 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378, | |||
26449 | 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490, | |||
26450 | 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529, | |||
26451 | 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263, | |||
26452 | 311, 320, 373, 377, 394, 400, 464, 509, 640, 672, | |||
26453 | 768, 800, 816, 833, 834, 842, 896, 927, 928, 968, | |||
26454 | 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103, | |||
26455 | 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432, | |||
26456 | 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623, | |||
26457 | 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912, | |||
26458 | 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178, | |||
26459 | 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285, | |||
26460 | 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416, | |||
26461 | 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760, | |||
26462 | 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216, | |||
26463 | 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248, | |||
26464 | 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637, | |||
26465 | 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298, | |||
26466 | 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441, | |||
26467 | 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541, | |||
26468 | 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662, | |||
26469 | 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922, | |||
26470 | 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062, | |||
26471 | 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178, | |||
26472 | 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961, | |||
26473 | 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003, | |||
26474 | 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028, | |||
26475 | 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099, | |||
26476 | 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744, | |||
26477 | 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368, | |||
26478 | 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971, | |||
26479 | 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488, | |||
26480 | 1, 32, 256, 0, 65533, | |||
26481 | }; | |||
26482 | static u16 aFts5UnicodeData[] = { | |||
26483 | 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53, | |||
26484 | 49, 85, 333, 85, 121, 85, 841, 54, 53, 50, | |||
26485 | 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61, | |||
26486 | 53, 151, 58, 53, 56, 58, 39, 52, 57, 34, | |||
26487 | 58, 56, 58, 57, 79, 56, 37, 85, 56, 47, | |||
26488 | 39, 51, 111, 53, 745, 57, 233, 773, 57, 261, | |||
26489 | 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126, | |||
26490 | 126, 73, 69, 137, 37, 73, 37, 105, 101, 73, | |||
26491 | 37, 73, 37, 190, 158, 37, 126, 126, 73, 37, | |||
26492 | 126, 94, 37, 39, 94, 69, 135, 41, 40, 37, | |||
26493 | 41, 40, 37, 41, 40, 37, 542, 37, 606, 37, | |||
26494 | 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37, | |||
26495 | 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582, | |||
26496 | 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596, | |||
26497 | 158, 38, 56, 94, 38, 101, 53, 88, 41, 53, | |||
26498 | 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105, | |||
26499 | 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541, | |||
26500 | 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38, | |||
26501 | 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76, | |||
26502 | 53, 76, 53, 44, 871, 103, 85, 162, 121, 85, | |||
26503 | 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684, | |||
26504 | 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58, | |||
26505 | 204, 70, 76, 58, 140, 71, 333, 103, 90, 39, | |||
26506 | 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333, | |||
26507 | 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300, | |||
26508 | 38, 108, 38, 172, 501, 807, 108, 53, 39, 359, | |||
26509 | 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268, | |||
26510 | 138, 44, 74, 39, 236, 327, 76, 85, 333, 53, | |||
26511 | 38, 199, 231, 44, 74, 263, 71, 711, 231, 39, | |||
26512 | 135, 44, 39, 106, 140, 74, 74, 44, 39, 42, | |||
26513 | 71, 103, 76, 333, 71, 87, 207, 58, 55, 76, | |||
26514 | 42, 199, 71, 711, 231, 71, 71, 71, 44, 106, | |||
26515 | 76, 76, 108, 44, 135, 39, 333, 76, 103, 44, | |||
26516 | 76, 42, 295, 103, 711, 231, 71, 167, 44, 39, | |||
26517 | 106, 172, 76, 42, 74, 44, 39, 71, 76, 333, | |||
26518 | 53, 55, 44, 74, 263, 71, 711, 231, 71, 167, | |||
26519 | 44, 39, 42, 44, 42, 140, 74, 74, 44, 44, | |||
26520 | 42, 71, 103, 76, 333, 58, 39, 207, 44, 39, | |||
26521 | 199, 103, 135, 71, 39, 71, 71, 103, 391, 74, | |||
26522 | 44, 74, 106, 106, 44, 39, 42, 333, 111, 218, | |||
26523 | 55, 58, 106, 263, 103, 743, 327, 167, 39, 108, | |||
26524 | 138, 108, 140, 76, 71, 71, 76, 333, 239, 58, | |||
26525 | 74, 263, 103, 743, 327, 167, 44, 39, 42, 44, | |||
26526 | 170, 44, 74, 74, 76, 74, 39, 71, 76, 333, | |||
26527 | 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106, | |||
26528 | 44, 39, 42, 71, 76, 333, 207, 58, 199, 74, | |||
26529 | 583, 775, 295, 39, 231, 44, 106, 108, 44, 266, | |||
26530 | 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268, | |||
26531 | 53, 333, 85, 71, 39, 71, 39, 39, 135, 231, | |||
26532 | 103, 39, 39, 71, 135, 44, 71, 204, 76, 39, | |||
26533 | 167, 38, 204, 333, 135, 39, 122, 501, 58, 53, | |||
26534 | 122, 76, 218, 333, 335, 58, 44, 58, 44, 58, | |||
26535 | 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42, | |||
26536 | 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90, | |||
26537 | 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76, | |||
26538 | 74, 76, 39, 333, 213, 199, 74, 76, 135, 108, | |||
26539 | 39, 106, 71, 234, 103, 140, 423, 44, 74, 76, | |||
26540 | 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41, | |||
26541 | 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319, | |||
26542 | 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151, | |||
26543 | 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551, | |||
26544 | 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108, | |||
26545 | 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76, | |||
26546 | 42, 236, 266, 44, 74, 364, 117, 38, 117, 55, | |||
26547 | 39, 44, 333, 335, 213, 49, 149, 108, 61, 333, | |||
26548 | 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138, | |||
26549 | 76, 106, 74, 44, 202, 108, 58, 85, 333, 967, | |||
26550 | 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76, | |||
26551 | 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44, | |||
26552 | 74, 268, 202, 332, 44, 333, 333, 245, 38, 213, | |||
26553 | 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44, | |||
26554 | 74, 231, 333, 245, 346, 300, 314, 76, 42, 967, | |||
26555 | 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415, | |||
26556 | 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159, | |||
26557 | 266, 268, 74, 76, 181, 333, 103, 333, 967, 198, | |||
26558 | 85, 277, 108, 53, 428, 42, 236, 135, 44, 135, | |||
26559 | 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260, | |||
26560 | 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265, | |||
26561 | 261, 265, 197, 201, 261, 41, 41, 41, 94, 229, | |||
26562 | 265, 453, 261, 264, 261, 264, 261, 264, 165, 69, | |||
26563 | 137, 40, 56, 37, 120, 101, 69, 137, 40, 120, | |||
26564 | 133, 69, 137, 120, 261, 169, 120, 101, 69, 137, | |||
26565 | 40, 88, 381, 162, 209, 85, 52, 51, 54, 84, | |||
26566 | 51, 54, 52, 277, 59, 60, 162, 61, 309, 52, | |||
26567 | 51, 149, 80, 117, 57, 54, 50, 373, 57, 53, | |||
26568 | 48, 341, 61, 162, 194, 47, 38, 207, 121, 54, | |||
26569 | 50, 38, 335, 121, 54, 50, 422, 855, 428, 139, | |||
26570 | 44, 107, 396, 90, 41, 154, 41, 90, 37, 105, | |||
26571 | 69, 105, 37, 58, 41, 90, 57, 169, 218, 41, | |||
26572 | 58, 41, 58, 41, 58, 137, 58, 37, 137, 37, | |||
26573 | 135, 37, 90, 69, 73, 185, 94, 101, 58, 57, | |||
26574 | 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186, | |||
26575 | 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018, | |||
26576 | 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666, | |||
26577 | 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217, | |||
26578 | 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57, | |||
26579 | 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50, | |||
26580 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | |||
26581 | 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50, | |||
26582 | 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54, | |||
26583 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | |||
26584 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | |||
26585 | 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281, | |||
26586 | 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69, | |||
26587 | 254, 105, 37, 94, 37, 94, 165, 70, 105, 37, | |||
26588 | 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221, | |||
26589 | 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231, | |||
26590 | 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52, | |||
26591 | 51, 117, 52, 51, 53, 52, 51, 309, 49, 85, | |||
26592 | 49, 53, 52, 51, 85, 52, 51, 54, 50, 54, | |||
26593 | 50, 54, 50, 54, 50, 181, 38, 341, 81, 858, | |||
26594 | 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54, | |||
26595 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 90, | |||
26596 | 54, 50, 54, 50, 54, 50, 54, 50, 49, 54, | |||
26597 | 82, 58, 302, 140, 74, 49, 166, 90, 110, 38, | |||
26598 | 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887, | |||
26599 | 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178, | |||
26600 | 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274, | |||
26601 | 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38, | |||
26602 | 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333, | |||
26603 | 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798, | |||
26604 | 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69, | |||
26605 | 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382, | |||
26606 | 70, 37, 231, 44, 103, 44, 135, 44, 743, 74, | |||
26607 | 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74, | |||
26608 | 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333, | |||
26609 | 903, 268, 85, 743, 364, 74, 53, 935, 108, 42, | |||
26610 | 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333, | |||
26611 | 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263, | |||
26612 | 44, 42, 333, 149, 519, 38, 199, 122, 39, 42, | |||
26613 | 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44, | |||
26614 | 39, 71, 38, 85, 359, 42, 76, 74, 85, 39, | |||
26615 | 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74, | |||
26616 | 44, 74, 44, 74, 53, 42, 44, 333, 39, 39, | |||
26617 | 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399, | |||
26618 | 229, 165, 39, 44, 327, 57, 423, 167, 39, 71, | |||
26619 | 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55, | |||
26620 | 58, 524, 245, 54, 50, 53, 236, 53, 81, 80, | |||
26621 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | |||
26622 | 54, 50, 54, 50, 54, 50, 85, 54, 50, 149, | |||
26623 | 112, 117, 149, 49, 54, 50, 54, 50, 54, 50, | |||
26624 | 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34, | |||
26625 | 117, 55, 117, 54, 50, 53, 57, 53, 49, 85, | |||
26626 | 333, 85, 121, 85, 841, 54, 53, 50, 56, 48, | |||
26627 | 56, 837, 54, 57, 50, 57, 54, 50, 53, 54, | |||
26628 | 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199, | |||
26629 | 103, 87, 57, 56, 58, 87, 58, 153, 90, 98, | |||
26630 | 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455, | |||
26631 | 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575, | |||
26632 | 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263, | |||
26633 | 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71, | |||
26634 | 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799, | |||
26635 | 71, 39, 108, 76, 140, 135, 103, 871, 108, 44, | |||
26636 | 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615, | |||
26637 | 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655, | |||
26638 | 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34, | |||
26639 | 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149, | |||
26640 | 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383, | |||
26641 | 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182, | |||
26642 | 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898, | |||
26643 | 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236, | |||
26644 | 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837, | |||
26645 | 841, 229, 581, 841, 837, 41, 73, 41, 73, 137, | |||
26646 | 265, 133, 37, 229, 357, 841, 837, 73, 137, 265, | |||
26647 | 233, 837, 73, 137, 169, 41, 233, 837, 841, 837, | |||
26648 | 841, 837, 841, 837, 841, 837, 841, 837, 841, 901, | |||
26649 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | |||
26650 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | |||
26651 | 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71, | |||
26652 | 39, 39, 327, 135, 39, 39, 39, 39, 39, 39, | |||
26653 | 103, 71, 39, 39, 39, 39, 39, 39, 71, 39, | |||
26654 | 135, 231, 135, 135, 39, 327, 551, 103, 167, 551, | |||
26655 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, | |||
26656 | 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, | |||
26657 | 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, | |||
26658 | 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, | |||
26659 | 34, 3074, 7692, 63, 63, | |||
26660 | }; | |||
26661 | ||||
26662 | static int sqlite3Fts5UnicodeCategory(u32 iCode) { | |||
26663 | int iRes = -1; | |||
26664 | int iHi; | |||
26665 | int iLo; | |||
26666 | int ret; | |||
26667 | u16 iKey; | |||
26668 | ||||
26669 | if( iCode>=(1<<20) ){ | |||
26670 | return 0; | |||
26671 | } | |||
26672 | iLo = aFts5UnicodeBlock[(iCode>>16)]; | |||
26673 | iHi = aFts5UnicodeBlock[1+(iCode>>16)]; | |||
26674 | iKey = (iCode & 0xFFFF); | |||
26675 | while( iHi>iLo ){ | |||
26676 | int iTest = (iHi + iLo) / 2; | |||
26677 | assert( iTest>=iLo && iTest<iHi )((void) (0)); | |||
26678 | if( iKey>=aFts5UnicodeMap[iTest] ){ | |||
26679 | iRes = iTest; | |||
26680 | iLo = iTest+1; | |||
26681 | }else{ | |||
26682 | iHi = iTest; | |||
26683 | } | |||
26684 | } | |||
26685 | ||||
26686 | if( iRes<0 ) return 0; | |||
26687 | if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; | |||
26688 | ret = aFts5UnicodeData[iRes] & 0x1F; | |||
26689 | if( ret!=30 ) return ret; | |||
26690 | return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9; | |||
26691 | } | |||
26692 | ||||
26693 | static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ | |||
26694 | int i = 0; | |||
26695 | int iTbl = 0; | |||
26696 | while( i<128 ){ | |||
26697 | int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; | |||
26698 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; | |||
26699 | for(; i<128 && i<n; i++){ | |||
26700 | aAscii[i] = (u8)bToken; | |||
26701 | } | |||
26702 | iTbl++; | |||
26703 | } | |||
26704 | aAscii[0] = 0; /* 0x00 is never a token character */ | |||
26705 | } | |||
26706 | ||||
26707 | #line 1 "fts5_varint.c" | |||
26708 | /* | |||
26709 | ** 2015 May 30 | |||
26710 | ** | |||
26711 | ** The author disclaims copyright to this source code. In place of | |||
26712 | ** a legal notice, here is a blessing: | |||
26713 | ** | |||
26714 | ** May you do good and not evil. | |||
26715 | ** May you find forgiveness for yourself and forgive others. | |||
26716 | ** May you share freely, never taking more than you give. | |||
26717 | ** | |||
26718 | ****************************************************************************** | |||
26719 | ** | |||
26720 | ** Routines for varint serialization and deserialization. | |||
26721 | */ | |||
26722 | ||||
26723 | ||||
26724 | /* #include "fts5Int.h" */ | |||
26725 | ||||
26726 | /* | |||
26727 | ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. | |||
26728 | ** Except, this version does handle the single byte case that the core | |||
26729 | ** version depends on being handled before its function is called. | |||
26730 | */ | |||
26731 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ | |||
26732 | u32 a,b; | |||
26733 | ||||
26734 | /* The 1-byte case. Overwhelmingly the most common. */ | |||
26735 | a = *p; | |||
26736 | /* a: p0 (unmasked) */ | |||
26737 | if (!(a&0x80)) | |||
26738 | { | |||
26739 | /* Values between 0 and 127 */ | |||
26740 | *v = a; | |||
26741 | return 1; | |||
26742 | } | |||
26743 | ||||
26744 | /* The 2-byte case */ | |||
26745 | p++; | |||
26746 | b = *p; | |||
26747 | /* b: p1 (unmasked) */ | |||
26748 | if (!(b&0x80)) | |||
26749 | { | |||
26750 | /* Values between 128 and 16383 */ | |||
26751 | a &= 0x7f; | |||
26752 | a = a<<7; | |||
26753 | *v = a | b; | |||
26754 | return 2; | |||
26755 | } | |||
26756 | ||||
26757 | /* The 3-byte case */ | |||
26758 | p++; | |||
26759 | a = a<<14; | |||
26760 | a |= *p; | |||
26761 | /* a: p0<<14 | p2 (unmasked) */ | |||
26762 | if (!(a&0x80)) | |||
26763 | { | |||
26764 | /* Values between 16384 and 2097151 */ | |||
26765 | a &= (0x7f<<14)|(0x7f); | |||
26766 | b &= 0x7f; | |||
26767 | b = b<<7; | |||
26768 | *v = a | b; | |||
26769 | return 3; | |||
26770 | } | |||
26771 | ||||
26772 | /* A 32-bit varint is used to store size information in btrees. | |||
26773 | ** Objects are rarely larger than 2MiB limit of a 3-byte varint. | |||
26774 | ** A 3-byte varint is sufficient, for example, to record the size | |||
26775 | ** of a 1048569-byte BLOB or string. | |||
26776 | ** | |||
26777 | ** We only unroll the first 1-, 2-, and 3- byte cases. The very | |||
26778 | ** rare larger cases can be handled by the slower 64-bit varint | |||
26779 | ** routine. | |||
26780 | */ | |||
26781 | { | |||
26782 | u64 v64; | |||
26783 | u8 n; | |||
26784 | p -= 2; | |||
26785 | n = sqlite3Fts5GetVarint(p, &v64); | |||
26786 | *v = ((u32)v64) & 0x7FFFFFFF; | |||
26787 | assert( n>3 && n<=9 )((void) (0)); | |||
26788 | return n; | |||
26789 | } | |||
26790 | } | |||
26791 | ||||
26792 | ||||
26793 | /* | |||
26794 | ** Bitmasks used by sqlite3GetVarint(). These precomputed constants | |||
26795 | ** are defined here rather than simply putting the constant expressions | |||
26796 | ** inline in order to work around bugs in the RVT compiler. | |||
26797 | ** | |||
26798 | ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f | |||
26799 | ** | |||
26800 | ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 | |||
26801 | */ | |||
26802 | #define SLOT_2_00x001fc07f 0x001fc07f | |||
26803 | #define SLOT_4_2_00xf01fc07f 0xf01fc07f | |||
26804 | ||||
26805 | /* | |||
26806 | ** Read a 64-bit variable-length integer from memory starting at p[0]. | |||
26807 | ** Return the number of bytes read. The value is stored in *v. | |||
26808 | */ | |||
26809 | static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ | |||
26810 | u32 a,b,s; | |||
26811 | ||||
26812 | a = *p; | |||
26813 | /* a: p0 (unmasked) */ | |||
26814 | if (!(a&0x80)) | |||
26815 | { | |||
26816 | *v = a; | |||
26817 | return 1; | |||
26818 | } | |||
26819 | ||||
26820 | p++; | |||
26821 | b = *p; | |||
26822 | /* b: p1 (unmasked) */ | |||
26823 | if (!(b&0x80)) | |||
26824 | { | |||
26825 | a &= 0x7f; | |||
26826 | a = a<<7; | |||
26827 | a |= b; | |||
26828 | *v = a; | |||
26829 | return 2; | |||
26830 | } | |||
26831 | ||||
26832 | /* Verify that constants are precomputed correctly */ | |||
26833 | assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) )((void) (0)); | |||
26834 | assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) )((void) (0)); | |||
26835 | ||||
26836 | p++; | |||
26837 | a = a<<14; | |||
26838 | a |= *p; | |||
26839 | /* a: p0<<14 | p2 (unmasked) */ | |||
26840 | if (!(a&0x80)) | |||
26841 | { | |||
26842 | a &= SLOT_2_00x001fc07f; | |||
26843 | b &= 0x7f; | |||
26844 | b = b<<7; | |||
26845 | a |= b; | |||
26846 | *v = a; | |||
26847 | return 3; | |||
26848 | } | |||
26849 | ||||
26850 | /* CSE1 from below */ | |||
26851 | a &= SLOT_2_00x001fc07f; | |||
26852 | p++; | |||
26853 | b = b<<14; | |||
26854 | b |= *p; | |||
26855 | /* b: p1<<14 | p3 (unmasked) */ | |||
26856 | if (!(b&0x80)) | |||
26857 | { | |||
26858 | b &= SLOT_2_00x001fc07f; | |||
26859 | /* moved CSE1 up */ | |||
26860 | /* a &= (0x7f<<14)|(0x7f); */ | |||
26861 | a = a<<7; | |||
26862 | a |= b; | |||
26863 | *v = a; | |||
26864 | return 4; | |||
26865 | } | |||
26866 | ||||
26867 | /* a: p0<<14 | p2 (masked) */ | |||
26868 | /* b: p1<<14 | p3 (unmasked) */ | |||
26869 | /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
26870 | /* moved CSE1 up */ | |||
26871 | /* a &= (0x7f<<14)|(0x7f); */ | |||
26872 | b &= SLOT_2_00x001fc07f; | |||
26873 | s = a; | |||
26874 | /* s: p0<<14 | p2 (masked) */ | |||
26875 | ||||
26876 | p++; | |||
26877 | a = a<<14; | |||
26878 | a |= *p; | |||
26879 | /* a: p0<<28 | p2<<14 | p4 (unmasked) */ | |||
26880 | if (!(a&0x80)) | |||
26881 | { | |||
26882 | /* we can skip these cause they were (effectively) done above in calc'ing s */ | |||
26883 | /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | |||
26884 | /* b &= (0x7f<<14)|(0x7f); */ | |||
26885 | b = b<<7; | |||
26886 | a |= b; | |||
26887 | s = s>>18; | |||
26888 | *v = ((u64)s)<<32 | a; | |||
26889 | return 5; | |||
26890 | } | |||
26891 | ||||
26892 | /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
26893 | s = s<<7; | |||
26894 | s |= b; | |||
26895 | /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
26896 | ||||
26897 | p++; | |||
26898 | b = b<<14; | |||
26899 | b |= *p; | |||
26900 | /* b: p1<<28 | p3<<14 | p5 (unmasked) */ | |||
26901 | if (!(b&0x80)) | |||
26902 | { | |||
26903 | /* we can skip this cause it was (effectively) done above in calc'ing s */ | |||
26904 | /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | |||
26905 | a &= SLOT_2_00x001fc07f; | |||
26906 | a = a<<7; | |||
26907 | a |= b; | |||
26908 | s = s>>18; | |||
26909 | *v = ((u64)s)<<32 | a; | |||
26910 | return 6; | |||
26911 | } | |||
26912 | ||||
26913 | p++; | |||
26914 | a = a<<14; | |||
26915 | a |= *p; | |||
26916 | /* a: p2<<28 | p4<<14 | p6 (unmasked) */ | |||
26917 | if (!(a&0x80)) | |||
26918 | { | |||
26919 | a &= SLOT_4_2_00xf01fc07f; | |||
26920 | b &= SLOT_2_00x001fc07f; | |||
26921 | b = b<<7; | |||
26922 | a |= b; | |||
26923 | s = s>>11; | |||
26924 | *v = ((u64)s)<<32 | a; | |||
26925 | return 7; | |||
26926 | } | |||
26927 | ||||
26928 | /* CSE2 from below */ | |||
26929 | a &= SLOT_2_00x001fc07f; | |||
26930 | p++; | |||
26931 | b = b<<14; | |||
26932 | b |= *p; | |||
26933 | /* b: p3<<28 | p5<<14 | p7 (unmasked) */ | |||
26934 | if (!(b&0x80)) | |||
26935 | { | |||
26936 | b &= SLOT_4_2_00xf01fc07f; | |||
26937 | /* moved CSE2 up */ | |||
26938 | /* a &= (0x7f<<14)|(0x7f); */ | |||
26939 | a = a<<7; | |||
26940 | a |= b; | |||
26941 | s = s>>4; | |||
26942 | *v = ((u64)s)<<32 | a; | |||
26943 | return 8; | |||
26944 | } | |||
26945 | ||||
26946 | p++; | |||
26947 | a = a<<15; | |||
26948 | a |= *p; | |||
26949 | /* a: p4<<29 | p6<<15 | p8 (unmasked) */ | |||
26950 | ||||
26951 | /* moved CSE2 up */ | |||
26952 | /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ | |||
26953 | b &= SLOT_2_00x001fc07f; | |||
26954 | b = b<<8; | |||
26955 | a |= b; | |||
26956 | ||||
26957 | s = s<<4; | |||
26958 | b = p[-4]; | |||
26959 | b &= 0x7f; | |||
26960 | b = b>>3; | |||
26961 | s |= b; | |||
26962 | ||||
26963 | *v = ((u64)s)<<32 | a; | |||
26964 | ||||
26965 | return 9; | |||
26966 | } | |||
26967 | ||||
26968 | /* | |||
26969 | ** The variable-length integer encoding is as follows: | |||
26970 | ** | |||
26971 | ** KEY: | |||
26972 | ** A = 0xxxxxxx 7 bits of data and one flag bit | |||
26973 | ** B = 1xxxxxxx 7 bits of data and one flag bit | |||
26974 | ** C = xxxxxxxx 8 bits of data | |||
26975 | ** | |||
26976 | ** 7 bits - A | |||
26977 | ** 14 bits - BA | |||
26978 | ** 21 bits - BBA | |||
26979 | ** 28 bits - BBBA | |||
26980 | ** 35 bits - BBBBA | |||
26981 | ** 42 bits - BBBBBA | |||
26982 | ** 49 bits - BBBBBBA | |||
26983 | ** 56 bits - BBBBBBBA | |||
26984 | ** 64 bits - BBBBBBBBC | |||
26985 | */ | |||
26986 | ||||
26987 | #ifdef SQLITE_NOINLINE | |||
26988 | # define FTS5_NOINLINE SQLITE_NOINLINE | |||
26989 | #else | |||
26990 | # define FTS5_NOINLINE | |||
26991 | #endif | |||
26992 | ||||
26993 | /* | |||
26994 | ** Write a 64-bit variable-length integer to memory starting at p[0]. | |||
26995 | ** The length of data write will be between 1 and 9 bytes. The number | |||
26996 | ** of bytes written is returned. | |||
26997 | ** | |||
26998 | ** A variable-length integer consists of the lower 7 bits of each byte | |||
26999 | ** for all bytes that have the 8th bit set and one byte with the 8th | |||
27000 | ** bit clear. Except, if we get to the 9th byte, it stores the full | |||
27001 | ** 8 bits and is the last byte. | |||
27002 | */ | |||
27003 | static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ | |||
27004 | int i, j, n; | |||
27005 | u8 buf[10]; | |||
27006 | if( v & (((u64)0xff000000)<<32) ){ | |||
27007 | p[8] = (u8)v; | |||
27008 | v >>= 8; | |||
27009 | for(i=7; i>=0; i--){ | |||
27010 | p[i] = (u8)((v & 0x7f) | 0x80); | |||
27011 | v >>= 7; | |||
27012 | } | |||
27013 | return 9; | |||
27014 | } | |||
27015 | n = 0; | |||
27016 | do{ | |||
27017 | buf[n++] = (u8)((v & 0x7f) | 0x80); | |||
27018 | v >>= 7; | |||
27019 | }while( v!=0 ); | |||
27020 | buf[0] &= 0x7f; | |||
27021 | assert( n<=9 )((void) (0)); | |||
27022 | for(i=0, j=n-1; j>=0; j--, i++){ | |||
27023 | p[i] = buf[j]; | |||
27024 | } | |||
27025 | return n; | |||
27026 | } | |||
27027 | ||||
27028 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ | |||
27029 | if( v<=0x7f ){ | |||
27030 | p[0] = v&0x7f; | |||
27031 | return 1; | |||
27032 | } | |||
27033 | if( v<=0x3fff ){ | |||
27034 | p[0] = ((v>>7)&0x7f)|0x80; | |||
27035 | p[1] = v&0x7f; | |||
27036 | return 2; | |||
27037 | } | |||
27038 | return fts5PutVarint64(p,v); | |||
27039 | } | |||
27040 | ||||
27041 | ||||
27042 | static int sqlite3Fts5GetVarintLen(u32 iVal){ | |||
27043 | #if 0 | |||
27044 | if( iVal<(1 << 7 ) ) return 1; | |||
27045 | #endif | |||
27046 | assert( iVal>=(1 << 7) )((void) (0)); | |||
27047 | if( iVal<(1 << 14) ) return 2; | |||
27048 | if( iVal<(1 << 21) ) return 3; | |||
27049 | if( iVal<(1 << 28) ) return 4; | |||
27050 | return 5; | |||
27051 | } | |||
27052 | ||||
27053 | #line 1 "fts5_vocab.c" | |||
27054 | /* | |||
27055 | ** 2015 May 08 | |||
27056 | ** | |||
27057 | ** The author disclaims copyright to this source code. In place of | |||
27058 | ** a legal notice, here is a blessing: | |||
27059 | ** | |||
27060 | ** May you do good and not evil. | |||
27061 | ** May you find forgiveness for yourself and forgive others. | |||
27062 | ** May you share freely, never taking more than you give. | |||
27063 | ** | |||
27064 | ****************************************************************************** | |||
27065 | ** | |||
27066 | ** This is an SQLite virtual table module implementing direct access to an | |||
27067 | ** existing FTS5 index. The module may create several different types of | |||
27068 | ** tables: | |||
27069 | ** | |||
27070 | ** col: | |||
27071 | ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); | |||
27072 | ** | |||
27073 | ** One row for each term/column combination. The value of $doc is set to | |||
27074 | ** the number of fts5 rows that contain at least one instance of term | |||
27075 | ** $term within column $col. Field $cnt is set to the total number of | |||
27076 | ** instances of term $term in column $col (in any row of the fts5 table). | |||
27077 | ** | |||
27078 | ** row: | |||
27079 | ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); | |||
27080 | ** | |||
27081 | ** One row for each term in the database. The value of $doc is set to | |||
27082 | ** the number of fts5 rows that contain at least one instance of term | |||
27083 | ** $term. Field $cnt is set to the total number of instances of term | |||
27084 | ** $term in the database. | |||
27085 | ** | |||
27086 | ** instance: | |||
27087 | ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>)); | |||
27088 | ** | |||
27089 | ** One row for each term instance in the database. | |||
27090 | */ | |||
27091 | ||||
27092 | ||||
27093 | /* #include "fts5Int.h" */ | |||
27094 | ||||
27095 | ||||
27096 | typedef struct Fts5VocabTable Fts5VocabTable; | |||
27097 | typedef struct Fts5VocabCursor Fts5VocabCursor; | |||
27098 | ||||
27099 | struct Fts5VocabTable { | |||
27100 | sqlite3_vtab base; | |||
27101 | char *zFts5Tbl; /* Name of fts5 table */ | |||
27102 | char *zFts5Db; /* Db containing fts5 table */ | |||
27103 | sqlite3 *db; /* Database handle */ | |||
27104 | Fts5Global *pGlobal; /* FTS5 global object for this database */ | |||
27105 | int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ | |||
27106 | unsigned bBusy; /* True if busy */ | |||
27107 | }; | |||
27108 | ||||
27109 | struct Fts5VocabCursor { | |||
27110 | sqlite3_vtab_cursor base; | |||
27111 | sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ | |||
27112 | Fts5Table *pFts5; /* Associated FTS5 table */ | |||
27113 | ||||
27114 | int bEof; /* True if this cursor is at EOF */ | |||
27115 | Fts5IndexIter *pIter; /* Term/rowid iterator object */ | |||
27116 | void *pStruct; /* From sqlite3Fts5StructureRef() */ | |||
27117 | ||||
27118 | int nLeTerm; /* Size of zLeTerm in bytes */ | |||
27119 | char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ | |||
27120 | int colUsed; /* Copy of sqlite3_index_info.colUsed */ | |||
27121 | ||||
27122 | /* These are used by 'col' tables only */ | |||
27123 | int iCol; | |||
27124 | i64 *aCnt; | |||
27125 | i64 *aDoc; | |||
27126 | ||||
27127 | /* Output values used by all tables. */ | |||
27128 | i64 rowid; /* This table's current rowid value */ | |||
27129 | Fts5Buffer term; /* Current value of 'term' column */ | |||
27130 | ||||
27131 | /* Output values Used by 'instance' tables only */ | |||
27132 | i64 iInstPos; | |||
27133 | int iInstOff; | |||
27134 | }; | |||
27135 | ||||
27136 | #define FTS5_VOCAB_COL0 0 | |||
27137 | #define FTS5_VOCAB_ROW1 1 | |||
27138 | #define FTS5_VOCAB_INSTANCE2 2 | |||
27139 | ||||
27140 | #define FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" "term, col, doc, cnt" | |||
27141 | #define FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" "term, doc, cnt" | |||
27142 | #define FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" "term, doc, col, offset" | |||
27143 | ||||
27144 | /* | |||
27145 | ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. | |||
27146 | */ | |||
27147 | #define FTS5_VOCAB_TERM_EQ0x0100 0x0100 | |||
27148 | #define FTS5_VOCAB_TERM_GE0x0200 0x0200 | |||
27149 | #define FTS5_VOCAB_TERM_LE0x0400 0x0400 | |||
27150 | ||||
27151 | #define FTS5_VOCAB_COLUSED_MASK0xFF 0xFF | |||
27152 | ||||
27153 | ||||
27154 | /* | |||
27155 | ** Translate a string containing an fts5vocab table type to an | |||
27156 | ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output | |||
27157 | ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message | |||
27158 | ** and return SQLITE_ERROR. | |||
27159 | */ | |||
27160 | static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ | |||
27161 | int rc = SQLITE_OK0; | |||
27162 | char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); | |||
27163 | if( rc==SQLITE_OK0 ){ | |||
27164 | sqlite3Fts5Dequote(zCopy); | |||
27165 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "col")==0 ){ | |||
27166 | *peType = FTS5_VOCAB_COL0; | |||
27167 | }else | |||
27168 | ||||
27169 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "row")==0 ){ | |||
27170 | *peType = FTS5_VOCAB_ROW1; | |||
27171 | }else | |||
27172 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "instance")==0 ){ | |||
27173 | *peType = FTS5_VOCAB_INSTANCE2; | |||
27174 | }else | |||
27175 | { | |||
27176 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("fts5vocab: unknown table type: %Q", zCopy); | |||
27177 | rc = SQLITE_ERROR1; | |||
27178 | } | |||
27179 | sqlite3_freesqlite3_api->free(zCopy); | |||
27180 | } | |||
27181 | ||||
27182 | return rc; | |||
27183 | } | |||
27184 | ||||
27185 | ||||
27186 | /* | |||
27187 | ** The xDisconnect() virtual table method. | |||
27188 | */ | |||
27189 | static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ | |||
27190 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | |||
27191 | sqlite3_freesqlite3_api->free(pTab); | |||
27192 | return SQLITE_OK0; | |||
27193 | } | |||
27194 | ||||
27195 | /* | |||
27196 | ** The xDestroy() virtual table method. | |||
27197 | */ | |||
27198 | static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ | |||
27199 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | |||
27200 | sqlite3_freesqlite3_api->free(pTab); | |||
27201 | return SQLITE_OK0; | |||
27202 | } | |||
27203 | ||||
27204 | /* | |||
27205 | ** This function is the implementation of both the xConnect and xCreate | |||
27206 | ** methods of the FTS3 virtual table. | |||
27207 | ** | |||
27208 | ** The argv[] array contains the following: | |||
27209 | ** | |||
27210 | ** argv[0] -> module name ("fts5vocab") | |||
27211 | ** argv[1] -> database name | |||
27212 | ** argv[2] -> table name | |||
27213 | ** | |||
27214 | ** then: | |||
27215 | ** | |||
27216 | ** argv[3] -> name of fts5 table | |||
27217 | ** argv[4] -> type of fts5vocab table | |||
27218 | ** | |||
27219 | ** or, for tables in the TEMP schema only. | |||
27220 | ** | |||
27221 | ** argv[3] -> name of fts5 tables database | |||
27222 | ** argv[4] -> name of fts5 table | |||
27223 | ** argv[5] -> type of fts5vocab table | |||
27224 | */ | |||
27225 | static int fts5VocabInitVtab( | |||
27226 | sqlite3 *db, /* The SQLite database connection */ | |||
27227 | void *pAux, /* Pointer to Fts5Global object */ | |||
27228 | int argc, /* Number of elements in argv array */ | |||
27229 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
27230 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | |||
27231 | char **pzErr /* Write any error message here */ | |||
27232 | ){ | |||
27233 | const char *azSchema[] = { | |||
27234 | "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" ")", | |||
27235 | "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" ")", | |||
27236 | "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" ")" | |||
27237 | }; | |||
27238 | ||||
27239 | Fts5VocabTable *pRet = 0; | |||
27240 | int rc = SQLITE_OK0; /* Return code */ | |||
27241 | int bDb; | |||
27242 | ||||
27243 | bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); | |||
27244 | ||||
27245 | if( argc!=5 && bDb==0 ){ | |||
27246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of vtable arguments"); | |||
27247 | rc = SQLITE_ERROR1; | |||
27248 | }else{ | |||
27249 | i64 nByte; /* Bytes of space to allocate */ | |||
27250 | const char *zDb = bDb ? argv[3] : argv[1]; | |||
27251 | const char *zTab = bDb ? argv[4] : argv[3]; | |||
27252 | const char *zType = bDb ? argv[5] : argv[4]; | |||
27253 | i64 nDb = strlen(zDb)+1; | |||
27254 | i64 nTab = strlen(zTab)+1; | |||
27255 | int eType = 0; | |||
27256 | ||||
27257 | rc = fts5VocabTableType(zType, pzErr, &eType); | |||
27258 | if( rc==SQLITE_OK0 ){ | |||
27259 | assert( eType>=0 && eType<ArraySize(azSchema) )((void) (0)); | |||
27260 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, azSchema[eType]); | |||
27261 | } | |||
27262 | ||||
27263 | nByte = sizeof(Fts5VocabTable) + nDb + nTab; | |||
27264 | pRet = sqlite3Fts5MallocZero(&rc, nByte); | |||
27265 | if( pRet ){ | |||
27266 | pRet->pGlobal = (Fts5Global*)pAux; | |||
27267 | pRet->eType = eType; | |||
27268 | pRet->db = db; | |||
27269 | pRet->zFts5Tbl = (char*)&pRet[1]; | |||
27270 | pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; | |||
27271 | memcpy(pRet->zFts5Tbl, zTab, nTab); | |||
27272 | memcpy(pRet->zFts5Db, zDb, nDb); | |||
27273 | sqlite3Fts5Dequote(pRet->zFts5Tbl); | |||
27274 | sqlite3Fts5Dequote(pRet->zFts5Db); | |||
27275 | } | |||
27276 | } | |||
27277 | ||||
27278 | *ppVTab = (sqlite3_vtab*)pRet; | |||
27279 | return rc; | |||
27280 | } | |||
27281 | ||||
27282 | ||||
27283 | /* | |||
27284 | ** The xConnect() and xCreate() methods for the virtual table. All the | |||
27285 | ** work is done in function fts5VocabInitVtab(). | |||
27286 | */ | |||
27287 | static int fts5VocabConnectMethod( | |||
27288 | sqlite3 *db, /* Database connection */ | |||
27289 | void *pAux, /* Pointer to tokenizer hash table */ | |||
27290 | int argc, /* Number of elements in argv array */ | |||
27291 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
27292 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
27293 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
27294 | ){ | |||
27295 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | |||
27296 | } | |||
27297 | static int fts5VocabCreateMethod( | |||
27298 | sqlite3 *db, /* Database connection */ | |||
27299 | void *pAux, /* Pointer to tokenizer hash table */ | |||
27300 | int argc, /* Number of elements in argv array */ | |||
27301 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
27302 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
27303 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
27304 | ){ | |||
27305 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | |||
27306 | } | |||
27307 | ||||
27308 | /* | |||
27309 | ** Implementation of the xBestIndex method. | |||
27310 | ** | |||
27311 | ** Only constraints of the form: | |||
27312 | ** | |||
27313 | ** term <= ? | |||
27314 | ** term == ? | |||
27315 | ** term >= ? | |||
27316 | ** | |||
27317 | ** are interpreted. Less-than and less-than-or-equal are treated | |||
27318 | ** identically, as are greater-than and greater-than-or-equal. | |||
27319 | */ | |||
27320 | static int fts5VocabBestIndexMethod( | |||
27321 | sqlite3_vtab *pUnused, | |||
27322 | sqlite3_index_info *pInfo | |||
27323 | ){ | |||
27324 | int i; | |||
27325 | int iTermEq = -1; | |||
27326 | int iTermGe = -1; | |||
27327 | int iTermLe = -1; | |||
27328 | int idxNum = (int)pInfo->colUsed; | |||
27329 | int nArg = 0; | |||
27330 | ||||
27331 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
27332 | ||||
27333 | assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed )((void) (0)); | |||
27334 | ||||
27335 | for(i=0; i<pInfo->nConstraint; i++){ | |||
27336 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
27337 | if( p->usable==0 ) continue; | |||
27338 | if( p->iColumn==0 ){ /* term column */ | |||
27339 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 ) iTermEq = i; | |||
27340 | if( p->op==SQLITE_INDEX_CONSTRAINT_LE8 ) iTermLe = i; | |||
27341 | if( p->op==SQLITE_INDEX_CONSTRAINT_LT16 ) iTermLe = i; | |||
27342 | if( p->op==SQLITE_INDEX_CONSTRAINT_GE32 ) iTermGe = i; | |||
27343 | if( p->op==SQLITE_INDEX_CONSTRAINT_GT4 ) iTermGe = i; | |||
27344 | } | |||
27345 | } | |||
27346 | ||||
27347 | if( iTermEq>=0 ){ | |||
27348 | idxNum |= FTS5_VOCAB_TERM_EQ0x0100; | |||
27349 | pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; | |||
27350 | pInfo->estimatedCost = 100; | |||
27351 | }else{ | |||
27352 | pInfo->estimatedCost = 1000000; | |||
27353 | if( iTermGe>=0 ){ | |||
27354 | idxNum |= FTS5_VOCAB_TERM_GE0x0200; | |||
27355 | pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; | |||
27356 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | |||
27357 | } | |||
27358 | if( iTermLe>=0 ){ | |||
27359 | idxNum |= FTS5_VOCAB_TERM_LE0x0400; | |||
27360 | pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; | |||
27361 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | |||
27362 | } | |||
27363 | } | |||
27364 | ||||
27365 | /* This virtual table always delivers results in ascending order of | |||
27366 | ** the "term" column (column 0). So if the user has requested this | |||
27367 | ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the | |||
27368 | ** sqlite3_index_info.orderByConsumed flag to tell the core the results | |||
27369 | ** are already in sorted order. */ | |||
27370 | if( pInfo->nOrderBy==1 | |||
27371 | && pInfo->aOrderBy[0].iColumn==0 | |||
27372 | && pInfo->aOrderBy[0].desc==0 | |||
27373 | ){ | |||
27374 | pInfo->orderByConsumed = 1; | |||
27375 | } | |||
27376 | ||||
27377 | pInfo->idxNum = idxNum; | |||
27378 | return SQLITE_OK0; | |||
27379 | } | |||
27380 | ||||
27381 | /* | |||
27382 | ** Implementation of xOpen method. | |||
27383 | */ | |||
27384 | static int fts5VocabOpenMethod( | |||
27385 | sqlite3_vtab *pVTab, | |||
27386 | sqlite3_vtab_cursor **ppCsr | |||
27387 | ){ | |||
27388 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; | |||
27389 | Fts5Table *pFts5 = 0; | |||
27390 | Fts5VocabCursor *pCsr = 0; | |||
27391 | int rc = SQLITE_OK0; | |||
27392 | sqlite3_stmt *pStmt = 0; | |||
27393 | char *zSql = 0; | |||
27394 | ||||
27395 | if( pTab->bBusy ){ | |||
27396 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
27397 | "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | |||
27398 | ); | |||
27399 | return SQLITE_ERROR1; | |||
27400 | } | |||
27401 | zSql = sqlite3Fts5Mprintf(&rc, | |||
27402 | "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", | |||
27403 | pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl | |||
27404 | ); | |||
27405 | if( zSql ){ | |||
27406 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pTab->db, zSql, -1, &pStmt, 0); | |||
27407 | } | |||
27408 | sqlite3_freesqlite3_api->free(zSql); | |||
27409 | assert( rc==SQLITE_OK || pStmt==0 )((void) (0)); | |||
27410 | if( rc==SQLITE_ERROR1 ) rc = SQLITE_OK0; | |||
27411 | ||||
27412 | pTab->bBusy = 1; | |||
27413 | if( pStmt && sqlite3_stepsqlite3_api->step(pStmt)==SQLITE_ROW100 ){ | |||
27414 | i64 iId = sqlite3_column_int64sqlite3_api->column_int64(pStmt, 0); | |||
27415 | pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId); | |||
27416 | } | |||
27417 | pTab->bBusy = 0; | |||
27418 | ||||
27419 | if( rc==SQLITE_OK0 ){ | |||
27420 | if( pFts5==0 ){ | |||
27421 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
27422 | pStmt = 0; | |||
27423 | if( rc==SQLITE_OK0 ){ | |||
27424 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
27425 | "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | |||
27426 | ); | |||
27427 | rc = SQLITE_ERROR1; | |||
27428 | } | |||
27429 | }else{ | |||
27430 | rc = sqlite3Fts5FlushToDisk(pFts5); | |||
27431 | } | |||
27432 | } | |||
27433 | ||||
27434 | if( rc==SQLITE_OK0 ){ | |||
27435 | i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor); | |||
27436 | pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); | |||
27437 | } | |||
27438 | ||||
27439 | if( pCsr ){ | |||
27440 | pCsr->pFts5 = pFts5; | |||
27441 | pCsr->pStmt = pStmt; | |||
27442 | pCsr->aCnt = (i64*)&pCsr[1]; | |||
27443 | pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol]; | |||
27444 | }else{ | |||
27445 | sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
27446 | } | |||
27447 | ||||
27448 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | |||
27449 | return rc; | |||
27450 | } | |||
27451 | ||||
27452 | static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ | |||
27453 | pCsr->rowid = 0; | |||
27454 | sqlite3Fts5IterClose(pCsr->pIter); | |||
27455 | sqlite3Fts5StructureRelease(pCsr->pStruct); | |||
27456 | pCsr->pStruct = 0; | |||
27457 | pCsr->pIter = 0; | |||
27458 | sqlite3_freesqlite3_api->free(pCsr->zLeTerm); | |||
27459 | pCsr->nLeTerm = -1; | |||
27460 | pCsr->zLeTerm = 0; | |||
27461 | pCsr->bEof = 0; | |||
27462 | } | |||
27463 | ||||
27464 | /* | |||
27465 | ** Close the cursor. For additional information see the documentation | |||
27466 | ** on the xClose method of the virtual table interface. | |||
27467 | */ | |||
27468 | static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ | |||
27469 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27470 | fts5VocabResetCursor(pCsr); | |||
27471 | sqlite3Fts5BufferFree(&pCsr->term); | |||
27472 | sqlite3_finalizesqlite3_api->finalize(pCsr->pStmt); | |||
27473 | sqlite3_freesqlite3_api->free(pCsr); | |||
27474 | return SQLITE_OK0; | |||
27475 | } | |||
27476 | ||||
27477 | static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ | |||
27478 | int rc = SQLITE_OK0; | |||
27479 | ||||
27480 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | |||
27481 | pCsr->bEof = 1; | |||
27482 | }else{ | |||
27483 | const char *zTerm; | |||
27484 | int nTerm; | |||
27485 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
27486 | if( pCsr->nLeTerm>=0 ){ | |||
27487 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | |||
27488 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | |||
27489 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | |||
27490 | pCsr->bEof = 1; | |||
27491 | } | |||
27492 | } | |||
27493 | ||||
27494 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | |||
27495 | } | |||
27496 | return rc; | |||
27497 | } | |||
27498 | ||||
27499 | static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ | |||
27500 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
27501 | int rc = SQLITE_OK0; | |||
27502 | Fts5IndexIter *pIter = pCsr->pIter; | |||
27503 | i64 *pp = &pCsr->iInstPos; | |||
27504 | int *po = &pCsr->iInstOff; | |||
27505 | ||||
27506 | assert( sqlite3Fts5IterEof(pIter)==0 )((void) (0)); | |||
27507 | assert( pCsr->bEof==0 )((void) (0)); | |||
27508 | while( eDetail==FTS5_DETAIL_NONE1 | |||
27509 | || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) | |||
27510 | ){ | |||
27511 | pCsr->iInstPos = 0; | |||
27512 | pCsr->iInstOff = 0; | |||
27513 | ||||
27514 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | |||
27515 | if( rc==SQLITE_OK0 ){ | |||
27516 | rc = fts5VocabInstanceNewTerm(pCsr); | |||
27517 | if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE1 ) break; | |||
27518 | } | |||
27519 | if( rc ){ | |||
27520 | pCsr->bEof = 1; | |||
27521 | break; | |||
27522 | } | |||
27523 | } | |||
27524 | ||||
27525 | return rc; | |||
27526 | } | |||
27527 | ||||
27528 | /* | |||
27529 | ** Advance the cursor to the next row in the table. | |||
27530 | */ | |||
27531 | static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ | |||
27532 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27533 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | |||
27534 | int nCol = pCsr->pFts5->pConfig->nCol; | |||
27535 | int rc; | |||
27536 | ||||
27537 | rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct); | |||
27538 | if( rc!=SQLITE_OK0 ) return rc; | |||
27539 | pCsr->rowid++; | |||
27540 | ||||
27541 | if( pTab->eType==FTS5_VOCAB_INSTANCE2 ){ | |||
27542 | return fts5VocabInstanceNext(pCsr); | |||
27543 | } | |||
27544 | ||||
27545 | if( pTab->eType==FTS5_VOCAB_COL0 ){ | |||
27546 | for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ | |||
27547 | if( pCsr->aDoc[pCsr->iCol] ) break; | |||
27548 | } | |||
27549 | } | |||
27550 | ||||
27551 | if( pTab->eType!=FTS5_VOCAB_COL0 || pCsr->iCol>=nCol ){ | |||
27552 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | |||
27553 | pCsr->bEof = 1; | |||
27554 | }else{ | |||
27555 | const char *zTerm; | |||
27556 | int nTerm; | |||
27557 | ||||
27558 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
27559 | assert( nTerm>=0 )((void) (0)); | |||
27560 | if( pCsr->nLeTerm>=0 ){ | |||
27561 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | |||
27562 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | |||
27563 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | |||
27564 | pCsr->bEof = 1; | |||
27565 | return SQLITE_OK0; | |||
27566 | } | |||
27567 | } | |||
27568 | ||||
27569 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | |||
27570 | memset(pCsr->aCnt, 0, nCol * sizeof(i64)); | |||
27571 | memset(pCsr->aDoc, 0, nCol * sizeof(i64)); | |||
27572 | pCsr->iCol = 0; | |||
27573 | ||||
27574 | assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW )((void) (0)); | |||
27575 | while( rc==SQLITE_OK0 ){ | |||
27576 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
27577 | const u8 *pPos; int nPos; /* Position list */ | |||
27578 | i64 iPos = 0; /* 64-bit position read from poslist */ | |||
27579 | int iOff = 0; /* Current offset within position list */ | |||
27580 | ||||
27581 | pPos = pCsr->pIter->pData; | |||
27582 | nPos = pCsr->pIter->nData; | |||
27583 | ||||
27584 | switch( pTab->eType ){ | |||
27585 | case FTS5_VOCAB_ROW1: | |||
27586 | /* Do not bother counting the number of instances if the "cnt" | |||
27587 | ** column is not being read (according to colUsed). */ | |||
27588 | if( eDetail==FTS5_DETAIL_FULL0 && (pCsr->colUsed & 0x04) ){ | |||
27589 | while( iPos<nPos ){ | |||
27590 | u32 ii; | |||
27591 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | |||
27592 | if( ii==1 ){ | |||
27593 | /* New column in the position list */ | |||
27594 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | |||
27595 | }else{ | |||
27596 | /* An instance - increment pCsr->aCnt[] */ | |||
27597 | pCsr->aCnt[0]++; | |||
27598 | } | |||
27599 | } | |||
27600 | } | |||
27601 | pCsr->aDoc[0]++; | |||
27602 | break; | |||
27603 | ||||
27604 | case FTS5_VOCAB_COL0: | |||
27605 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
27606 | int iCol = -1; | |||
27607 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ | |||
27608 | int ii = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | |||
27609 | if( iCol!=ii ){ | |||
27610 | if( ii>=nCol ){ | |||
27611 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
27612 | break; | |||
27613 | } | |||
27614 | pCsr->aDoc[ii]++; | |||
27615 | iCol = ii; | |||
27616 | } | |||
27617 | pCsr->aCnt[ii]++; | |||
27618 | } | |||
27619 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
27620 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ | |||
27621 | assert_nc( iPos>=0 && iPos<nCol )((void) (0)); | |||
27622 | if( iPos>=nCol ){ | |||
27623 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
27624 | break; | |||
27625 | } | |||
27626 | pCsr->aDoc[iPos]++; | |||
27627 | } | |||
27628 | }else{ | |||
27629 | assert( eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
27630 | pCsr->aDoc[0]++; | |||
27631 | } | |||
27632 | break; | |||
27633 | ||||
27634 | default: | |||
27635 | assert( pTab->eType==FTS5_VOCAB_INSTANCE )((void) (0)); | |||
27636 | break; | |||
27637 | } | |||
27638 | ||||
27639 | if( rc==SQLITE_OK0 ){ | |||
27640 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | |||
27641 | } | |||
27642 | if( pTab->eType==FTS5_VOCAB_INSTANCE2 ) break; | |||
27643 | ||||
27644 | if( rc==SQLITE_OK0 ){ | |||
27645 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
27646 | if( nTerm!=pCsr->term.n | |||
27647 | || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm)) | |||
27648 | ){ | |||
27649 | break; | |||
27650 | } | |||
27651 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ) break; | |||
27652 | } | |||
27653 | } | |||
27654 | } | |||
27655 | } | |||
27656 | ||||
27657 | if( rc==SQLITE_OK0 && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL0 ){ | |||
27658 | for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++); | |||
27659 | if( pCsr->iCol==nCol ){ | |||
27660 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
27661 | } | |||
27662 | } | |||
27663 | return rc; | |||
27664 | } | |||
27665 | ||||
27666 | /* | |||
27667 | ** This is the xFilter implementation for the virtual table. | |||
27668 | */ | |||
27669 | static int fts5VocabFilterMethod( | |||
27670 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | |||
27671 | int idxNum, /* Strategy index */ | |||
27672 | const char *zUnused, /* Unused */ | |||
27673 | int nUnused, /* Number of elements in apVal */ | |||
27674 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | |||
27675 | ){ | |||
27676 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | |||
27677 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27678 | int eType = pTab->eType; | |||
27679 | int rc = SQLITE_OK0; | |||
27680 | ||||
27681 | int iVal = 0; | |||
27682 | int f = FTS5INDEX_QUERY_SCAN0x0008; | |||
27683 | const char *zTerm = 0; | |||
27684 | int nTerm = 0; | |||
27685 | ||||
27686 | sqlite3_value *pEq = 0; | |||
27687 | sqlite3_value *pGe = 0; | |||
27688 | sqlite3_value *pLe = 0; | |||
27689 | ||||
27690 | UNUSED_PARAM2(zUnused, nUnused)(void)(zUnused), (void)(nUnused); | |||
27691 | ||||
27692 | fts5VocabResetCursor(pCsr); | |||
27693 | if( idxNum & FTS5_VOCAB_TERM_EQ0x0100 ) pEq = apVal[iVal++]; | |||
27694 | if( idxNum & FTS5_VOCAB_TERM_GE0x0200 ) pGe = apVal[iVal++]; | |||
27695 | if( idxNum & FTS5_VOCAB_TERM_LE0x0400 ) pLe = apVal[iVal++]; | |||
27696 | pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK0xFF); | |||
27697 | ||||
27698 | if( pEq ){ | |||
27699 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pEq); | |||
27700 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pEq); | |||
27701 | f = FTS5INDEX_QUERY_NOTOKENDATA0x0080; | |||
27702 | }else{ | |||
27703 | if( pGe ){ | |||
27704 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pGe); | |||
27705 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pGe); | |||
27706 | } | |||
27707 | if( pLe ){ | |||
27708 | const char *zCopy = (const char *)sqlite3_value_textsqlite3_api->value_text(pLe); | |||
27709 | if( zCopy==0 ) zCopy = ""; | |||
27710 | pCsr->nLeTerm = sqlite3_value_bytessqlite3_api->value_bytes(pLe); | |||
27711 | pCsr->zLeTerm = sqlite3_mallocsqlite3_api->malloc(pCsr->nLeTerm+1); | |||
27712 | if( pCsr->zLeTerm==0 ){ | |||
27713 | rc = SQLITE_NOMEM7; | |||
27714 | }else{ | |||
27715 | memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); | |||
27716 | } | |||
27717 | } | |||
27718 | } | |||
27719 | ||||
27720 | if( rc==SQLITE_OK0 ){ | |||
27721 | Fts5Index *pIndex = pCsr->pFts5->pIndex; | |||
27722 | rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); | |||
27723 | if( rc==SQLITE_OK0 ){ | |||
27724 | pCsr->pStruct = sqlite3Fts5StructureRef(pIndex); | |||
27725 | } | |||
27726 | } | |||
27727 | if( rc==SQLITE_OK0 && eType==FTS5_VOCAB_INSTANCE2 ){ | |||
27728 | rc = fts5VocabInstanceNewTerm(pCsr); | |||
27729 | } | |||
27730 | if( rc==SQLITE_OK0 && !pCsr->bEof | |||
27731 | && (eType!=FTS5_VOCAB_INSTANCE2 | |||
27732 | || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE1) | |||
27733 | ){ | |||
27734 | rc = fts5VocabNextMethod(pCursor); | |||
27735 | } | |||
27736 | ||||
27737 | return rc; | |||
27738 | } | |||
27739 | ||||
27740 | /* | |||
27741 | ** This is the xEof method of the virtual table. SQLite calls this | |||
27742 | ** routine to find out if it has reached the end of a result set. | |||
27743 | */ | |||
27744 | static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ | |||
27745 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27746 | return pCsr->bEof; | |||
27747 | } | |||
27748 | ||||
27749 | static int fts5VocabColumnMethod( | |||
27750 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | |||
27751 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | |||
27752 | int iCol /* Index of column to read value from */ | |||
27753 | ){ | |||
27754 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27755 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
27756 | int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; | |||
27757 | i64 iVal = 0; | |||
27758 | ||||
27759 | if( iCol==0 ){ | |||
27760 | sqlite3_result_textsqlite3_api->result_text( | |||
27761 | pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1) | |||
27762 | ); | |||
27763 | }else if( eType==FTS5_VOCAB_COL0 ){ | |||
27764 | assert( iCol==1 || iCol==2 || iCol==3 )((void) (0)); | |||
27765 | if( iCol==1 ){ | |||
27766 | if( eDetail!=FTS5_DETAIL_NONE1 ){ | |||
27767 | const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol]; | |||
27768 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
27769 | } | |||
27770 | }else if( iCol==2 ){ | |||
27771 | iVal = pCsr->aDoc[pCsr->iCol]; | |||
27772 | }else{ | |||
27773 | iVal = pCsr->aCnt[pCsr->iCol]; | |||
27774 | } | |||
27775 | }else if( eType==FTS5_VOCAB_ROW1 ){ | |||
27776 | assert( iCol==1 || iCol==2 )((void) (0)); | |||
27777 | if( iCol==1 ){ | |||
27778 | iVal = pCsr->aDoc[0]; | |||
27779 | }else{ | |||
27780 | iVal = pCsr->aCnt[0]; | |||
27781 | } | |||
27782 | }else{ | |||
27783 | assert( eType==FTS5_VOCAB_INSTANCE )((void) (0)); | |||
27784 | switch( iCol ){ | |||
27785 | case 1: | |||
27786 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->pIter->iRowid); | |||
27787 | break; | |||
27788 | case 2: { | |||
27789 | int ii = -1; | |||
27790 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
27791 | ii = FTS5_POS2COLUMN(pCsr->iInstPos)(int)((pCsr->iInstPos >> 32) & 0x7FFFFFFF); | |||
27792 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
27793 | ii = (int)pCsr->iInstPos; | |||
27794 | } | |||
27795 | if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){ | |||
27796 | const char *z = pCsr->pFts5->pConfig->azCol[ii]; | |||
27797 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
27798 | } | |||
27799 | break; | |||
27800 | } | |||
27801 | default: { | |||
27802 | assert( iCol==3 )((void) (0)); | |||
27803 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
27804 | int ii = FTS5_POS2OFFSET(pCsr->iInstPos)(int)(pCsr->iInstPos & 0x7FFFFFFF); | |||
27805 | sqlite3_result_intsqlite3_api->result_int(pCtx, ii); | |||
27806 | } | |||
27807 | break; | |||
27808 | } | |||
27809 | } | |||
27810 | } | |||
27811 | ||||
27812 | if( iVal>0 ) sqlite3_result_int64sqlite3_api->result_int64(pCtx, iVal); | |||
27813 | return SQLITE_OK0; | |||
27814 | } | |||
27815 | ||||
27816 | /* | |||
27817 | ** This is the xRowid method. The SQLite core calls this routine to | |||
27818 | ** retrieve the rowid for the current row of the result set. The | |||
27819 | ** rowid should be written to *pRowid. | |||
27820 | */ | |||
27821 | static int fts5VocabRowidMethod( | |||
27822 | sqlite3_vtab_cursor *pCursor, | |||
27823 | sqlite_int64 *pRowid | |||
27824 | ){ | |||
27825 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
27826 | *pRowid = pCsr->rowid; | |||
27827 | return SQLITE_OK0; | |||
27828 | } | |||
27829 | ||||
27830 | static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ | |||
27831 | static const sqlite3_module fts5Vocab = { | |||
27832 | /* iVersion */ 2, | |||
27833 | /* xCreate */ fts5VocabCreateMethod, | |||
27834 | /* xConnect */ fts5VocabConnectMethod, | |||
27835 | /* xBestIndex */ fts5VocabBestIndexMethod, | |||
27836 | /* xDisconnect */ fts5VocabDisconnectMethod, | |||
27837 | /* xDestroy */ fts5VocabDestroyMethod, | |||
27838 | /* xOpen */ fts5VocabOpenMethod, | |||
27839 | /* xClose */ fts5VocabCloseMethod, | |||
27840 | /* xFilter */ fts5VocabFilterMethod, | |||
27841 | /* xNext */ fts5VocabNextMethod, | |||
27842 | /* xEof */ fts5VocabEofMethod, | |||
27843 | /* xColumn */ fts5VocabColumnMethod, | |||
27844 | /* xRowid */ fts5VocabRowidMethod, | |||
27845 | /* xUpdate */ 0, | |||
27846 | /* xBegin */ 0, | |||
27847 | /* xSync */ 0, | |||
27848 | /* xCommit */ 0, | |||
27849 | /* xRollback */ 0, | |||
27850 | /* xFindFunction */ 0, | |||
27851 | /* xRename */ 0, | |||
27852 | /* xSavepoint */ 0, | |||
27853 | /* xRelease */ 0, | |||
27854 | /* xRollbackTo */ 0, | |||
27855 | /* xShadowName */ 0, | |||
27856 | /* xIntegrity */ 0 | |||
27857 | }; | |||
27858 | void *p = (void*)pGlobal; | |||
27859 | ||||
27860 | return sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); | |||
27861 | } | |||
27862 | ||||
27863 | ||||
27864 | /* Here ends the fts5.c composite file. */ | |||
27865 | #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ |