Bug Summary

File:root/firefox-clang/third_party/sqlite3/ext/fts5.c
Warning:line 13185, column 5
Null pointer passed to 1st parameter expecting 'nonnull'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name fts5.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -ffp-contract=off -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -fcoverage-compilation-dir=/root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -resource-dir /usr/lib/llvm-21/lib/clang/21 -include /root/firefox-clang/obj-x86_64-pc-linux-gnu/mozilla-config.h -U _FORTIFY_SOURCE -D _FORTIFY_SOURCE=2 -D _GLIBCXX_ASSERTIONS -D DEBUG=1 -I /root/firefox-clang/third_party/sqlite3/ext -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/third_party/sqlite3/ext -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nspr -I /root/firefox-clang/obj-x86_64-pc-linux-gnu/dist/include/nss -D MOZILLA_CLIENT -internal-isystem /usr/lib/llvm-21/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-error=tautological-type-limit-compare -Wno-range-loop-analysis -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=free-nonheap-object -Wno-error=atomic-alignment -Wno-error=deprecated-builtins -Wno-psabi -Wno-error=builtin-macro-redefined -Wno-unknown-warning-option -ferror-limit 19 -fstrict-flex-arrays=1 -stack-protector 2 -fstack-clash-protection -ftrivial-auto-var-init=pattern -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2025-06-27-100320-3286336-1 -x c /root/firefox-clang/third_party/sqlite3/ext/fts5.c
1
2/*
3** This, the "fts5.c" source file, is a composite file that is itself
4** assembled from the following files:
5**
6** fts5.h
7** fts5Int.h
8** fts5parse.h <--- Generated from fts5parse.y by Lemon
9** fts5parse.c <--- Generated from fts5parse.y by Lemon
10** fts5_aux.c
11** fts5_buffer.c
12** fts5_config.c
13** fts5_expr.c
14** fts5_hash.c
15** fts5_index.c
16** fts5_main.c
17** fts5_storage.c
18** fts5_tokenize.c
19** fts5_unicode2.c
20** fts5_varint.c
21** fts5_vocab.c
22*/
23#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5)
24
25#if !defined(NDEBUG1) && !defined(SQLITE_DEBUG)
26# define NDEBUG1 1
27#endif
28#if defined(NDEBUG1) && defined(SQLITE_DEBUG)
29# undef NDEBUG1
30#endif
31
32#ifdef HAVE_STDINT_H1
33#include <stdint.h>
34#endif
35#ifdef HAVE_INTTYPES_H1
36#include <inttypes.h>
37#endif
38#line 1 "fts5.h"
39/*
40** 2014 May 31
41**
42** The author disclaims copyright to this source code. In place of
43** a legal notice, here is a blessing:
44**
45** May you do good and not evil.
46** May you find forgiveness for yourself and forgive others.
47** May you share freely, never taking more than you give.
48**
49******************************************************************************
50**
51** Interfaces to extend FTS5. Using the interfaces defined in this file,
52** FTS5 may be extended with:
53**
54** * custom tokenizers, and
55** * custom auxiliary functions.
56*/
57
58
59#ifndef _FTS5_H
60#define _FTS5_H
61
62#include "sqlite3.h"
63
64#ifdef __cplusplus
65extern "C" {
66#endif
67
68/*************************************************************************
69** CUSTOM AUXILIARY FUNCTIONS
70**
71** Virtual table implementations may overload SQL functions by implementing
72** the sqlite3_module.xFindFunction() method.
73*/
74
75typedef struct Fts5ExtensionApi Fts5ExtensionApi;
76typedef struct Fts5Context Fts5Context;
77typedef struct Fts5PhraseIter Fts5PhraseIter;
78
79typedef void (*fts5_extension_function)(
80 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
81 Fts5Context *pFts, /* First arg to pass to pApi functions */
82 sqlite3_context *pCtx, /* Context for returning result/error */
83 int nVal, /* Number of values in apVal[] array */
84 sqlite3_value **apVal /* Array of trailing arguments */
85);
86
87struct Fts5PhraseIter {
88 const unsigned char *a;
89 const unsigned char *b;
90};
91
92/*
93** EXTENSION API FUNCTIONS
94**
95** xUserData(pFts):
96** Return a copy of the pUserData pointer passed to the xCreateFunction()
97** API when the extension function was registered.
98**
99** xColumnTotalSize(pFts, iCol, pnToken):
100** If parameter iCol is less than zero, set output variable *pnToken
101** to the total number of tokens in the FTS5 table. Or, if iCol is
102** non-negative but less than the number of columns in the table, return
103** the total number of tokens in column iCol, considering all rows in
104** the FTS5 table.
105**
106** If parameter iCol is greater than or equal to the number of columns
107** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
108** an OOM condition or IO error), an appropriate SQLite error code is
109** returned.
110**
111** xColumnCount(pFts):
112** Return the number of columns in the table.
113**
114** xColumnSize(pFts, iCol, pnToken):
115** If parameter iCol is less than zero, set output variable *pnToken
116** to the total number of tokens in the current row. Or, if iCol is
117** non-negative but less than the number of columns in the table, set
118** *pnToken to the number of tokens in column iCol of the current row.
119**
120** If parameter iCol is greater than or equal to the number of columns
121** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
122** an OOM condition or IO error), an appropriate SQLite error code is
123** returned.
124**
125** This function may be quite inefficient if used with an FTS5 table
126** created with the "columnsize=0" option.
127**
128** xColumnText:
129** If parameter iCol is less than zero, or greater than or equal to the
130** number of columns in the table, SQLITE_RANGE is returned.
131**
132** Otherwise, this function attempts to retrieve the text of column iCol of
133** the current document. If successful, (*pz) is set to point to a buffer
134** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
135** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
136** if an error occurs, an SQLite error code is returned and the final values
137** of (*pz) and (*pn) are undefined.
138**
139** xPhraseCount:
140** Returns the number of phrases in the current query expression.
141**
142** xPhraseSize:
143** If parameter iCol is less than zero, or greater than or equal to the
144** number of phrases in the current query, as returned by xPhraseCount,
145** 0 is returned. Otherwise, this function returns the number of tokens in
146** phrase iPhrase of the query. Phrases are numbered starting from zero.
147**
148** xInstCount:
149** Set *pnInst to the total number of occurrences of all phrases within
150** the query within the current row. Return SQLITE_OK if successful, or
151** an error code (i.e. SQLITE_NOMEM) if an error occurs.
152**
153** This API can be quite slow if used with an FTS5 table created with the
154** "detail=none" or "detail=column" option. If the FTS5 table is created
155** with either "detail=none" or "detail=column" and "content=" option
156** (i.e. if it is a contentless table), then this API always returns 0.
157**
158** xInst:
159** Query for the details of phrase match iIdx within the current row.
160** Phrase matches are numbered starting from zero, so the iIdx argument
161** should be greater than or equal to zero and smaller than the value
162** output by xInstCount(). If iIdx is less than zero or greater than
163** or equal to the value returned by xInstCount(), SQLITE_RANGE is returned.
164**
165** Otherwise, output parameter *piPhrase is set to the phrase number, *piCol
166** to the column in which it occurs and *piOff the token offset of the
167** first token of the phrase. SQLITE_OK is returned if successful, or an
168** error code (i.e. SQLITE_NOMEM) if an error occurs.
169**
170** This API can be quite slow if used with an FTS5 table created with the
171** "detail=none" or "detail=column" option.
172**
173** xRowid:
174** Returns the rowid of the current row.
175**
176** xTokenize:
177** Tokenize text using the tokenizer belonging to the FTS5 table.
178**
179** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
180** This API function is used to query the FTS table for phrase iPhrase
181** of the current query. Specifically, a query equivalent to:
182**
183** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
184**
185** with $p set to a phrase equivalent to the phrase iPhrase of the
186** current query is executed. Any column filter that applies to
187** phrase iPhrase of the current query is included in $p. For each
188** row visited, the callback function passed as the fourth argument
189** is invoked. The context and API objects passed to the callback
190** function may be used to access the properties of each matched row.
191** Invoking Api.xUserData() returns a copy of the pointer passed as
192** the third argument to pUserData.
193**
194** If parameter iPhrase is less than zero, or greater than or equal to
195** the number of phrases in the query, as returned by xPhraseCount(),
196** this function returns SQLITE_RANGE.
197**
198** If the callback function returns any value other than SQLITE_OK, the
199** query is abandoned and the xQueryPhrase function returns immediately.
200** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
201** Otherwise, the error code is propagated upwards.
202**
203** If the query runs to completion without incident, SQLITE_OK is returned.
204** Or, if some error occurs before the query completes or is aborted by
205** the callback, an SQLite error code is returned.
206**
207**
208** xSetAuxdata(pFts5, pAux, xDelete)
209**
210** Save the pointer passed as the second argument as the extension function's
211** "auxiliary data". The pointer may then be retrieved by the current or any
212** future invocation of the same fts5 extension function made as part of
213** the same MATCH query using the xGetAuxdata() API.
214**
215** Each extension function is allocated a single auxiliary data slot for
216** each FTS query (MATCH expression). If the extension function is invoked
217** more than once for a single FTS query, then all invocations share a
218** single auxiliary data context.
219**
220** If there is already an auxiliary data pointer when this function is
221** invoked, then it is replaced by the new pointer. If an xDelete callback
222** was specified along with the original pointer, it is invoked at this
223** point.
224**
225** The xDelete callback, if one is specified, is also invoked on the
226** auxiliary data pointer after the FTS5 query has finished.
227**
228** If an error (e.g. an OOM condition) occurs within this function,
229** the auxiliary data is set to NULL and an error code returned. If the
230** xDelete parameter was not NULL, it is invoked on the auxiliary data
231** pointer before returning.
232**
233**
234** xGetAuxdata(pFts5, bClear)
235**
236** Returns the current auxiliary data pointer for the fts5 extension
237** function. See the xSetAuxdata() method for details.
238**
239** If the bClear argument is non-zero, then the auxiliary data is cleared
240** (set to NULL) before this function returns. In this case the xDelete,
241** if any, is not invoked.
242**
243**
244** xRowCount(pFts5, pnRow)
245**
246** This function is used to retrieve the total number of rows in the table.
247** In other words, the same value that would be returned by:
248**
249** SELECT count(*) FROM ftstable;
250**
251** xPhraseFirst()
252** This function is used, along with type Fts5PhraseIter and the xPhraseNext
253** method, to iterate through all instances of a single query phrase within
254** the current row. This is the same information as is accessible via the
255** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
256** to use, this API may be faster under some circumstances. To iterate
257** through instances of phrase iPhrase, use the following code:
258**
259** Fts5PhraseIter iter;
260** int iCol, iOff;
261** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
262** iCol>=0;
263** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
264** ){
265** // An instance of phrase iPhrase at offset iOff of column iCol
266** }
267**
268** The Fts5PhraseIter structure is defined above. Applications should not
269** modify this structure directly - it should only be used as shown above
270** with the xPhraseFirst() and xPhraseNext() API methods (and by
271** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
272**
273** This API can be quite slow if used with an FTS5 table created with the
274** "detail=none" or "detail=column" option. If the FTS5 table is created
275** with either "detail=none" or "detail=column" and "content=" option
276** (i.e. if it is a contentless table), then this API always iterates
277** through an empty set (all calls to xPhraseFirst() set iCol to -1).
278**
279** In all cases, matches are visited in (column ASC, offset ASC) order.
280** i.e. all those in column 0, sorted by offset, followed by those in
281** column 1, etc.
282**
283** xPhraseNext()
284** See xPhraseFirst above.
285**
286** xPhraseFirstColumn()
287** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
288** and xPhraseNext() APIs described above. The difference is that instead
289** of iterating through all instances of a phrase in the current row, these
290** APIs are used to iterate through the set of columns in the current row
291** that contain one or more instances of a specified phrase. For example:
292**
293** Fts5PhraseIter iter;
294** int iCol;
295** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
296** iCol>=0;
297** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
298** ){
299** // Column iCol contains at least one instance of phrase iPhrase
300** }
301**
302** This API can be quite slow if used with an FTS5 table created with the
303** "detail=none" option. If the FTS5 table is created with either
304** "detail=none" "content=" option (i.e. if it is a contentless table),
305** then this API always iterates through an empty set (all calls to
306** xPhraseFirstColumn() set iCol to -1).
307**
308** The information accessed using this API and its companion
309** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
310** (or xInst/xInstCount). The chief advantage of this API is that it is
311** significantly more efficient than those alternatives when used with
312** "detail=column" tables.
313**
314** xPhraseNextColumn()
315** See xPhraseFirstColumn above.
316**
317** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken)
318** This is used to access token iToken of phrase iPhrase of the current
319** query. Before returning, output parameter *ppToken is set to point
320** to a buffer containing the requested token, and *pnToken to the
321** size of this buffer in bytes.
322**
323** If iPhrase or iToken are less than zero, or if iPhrase is greater than
324** or equal to the number of phrases in the query as reported by
325** xPhraseCount(), or if iToken is equal to or greater than the number of
326** tokens in the phrase, SQLITE_RANGE is returned and *ppToken and *pnToken
327 are both zeroed.
328**
329** The output text is not a copy of the query text that specified the
330** token. It is the output of the tokenizer module. For tokendata=1
331** tables, this includes any embedded 0x00 and trailing data.
332**
333** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken)
334** This is used to access token iToken of phrase hit iIdx within the
335** current row. If iIdx is less than zero or greater than or equal to the
336** value returned by xInstCount(), SQLITE_RANGE is returned. Otherwise,
337** output variable (*ppToken) is set to point to a buffer containing the
338** matching document token, and (*pnToken) to the size of that buffer in
339** bytes.
340**
341** The output text is not a copy of the document text that was tokenized.
342** It is the output of the tokenizer module. For tokendata=1 tables, this
343** includes any embedded 0x00 and trailing data.
344**
345** This API may be slow in some cases if the token identified by parameters
346** iIdx and iToken matched a prefix token in the query. In most cases, the
347** first call to this API for each prefix token in the query is forced
348** to scan the portion of the full-text index that matches the prefix
349** token to collect the extra data required by this API. If the prefix
350** token matches a large number of token instances in the document set,
351** this may be a performance problem.
352**
353** If the user knows in advance that a query may use this API for a
354** prefix token, FTS5 may be configured to collect all required data as part
355** of the initial querying of the full-text index, avoiding the second scan
356** entirely. This also causes prefix queries that do not use this API to
357** run more slowly and use more memory. FTS5 may be configured in this way
358** either on a per-table basis using the [FTS5 insttoken | 'insttoken']
359** option, or on a per-query basis using the
360** [fts5_insttoken | fts5_insttoken()] user function.
361**
362** This API can be quite slow if used with an FTS5 table created with the
363** "detail=none" or "detail=column" option.
364**
365** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale)
366** If parameter iCol is less than zero, or greater than or equal to the
367** number of columns in the table, SQLITE_RANGE is returned.
368**
369** Otherwise, this function attempts to retrieve the locale associated
370** with column iCol of the current row. Usually, there is no associated
371** locale, and output parameters (*pzLocale) and (*pnLocale) are set
372** to NULL and 0, respectively. However, if the fts5_locale() function
373** was used to associate a locale with the value when it was inserted
374** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated
375** buffer containing the name of the locale in utf-8 encoding. (*pnLocale)
376** is set to the size in bytes of the buffer, not including the
377** nul-terminator.
378**
379** If successful, SQLITE_OK is returned. Or, if an error occurs, an
380** SQLite error code is returned. The final value of the output parameters
381** is undefined in this case.
382**
383** xTokenize_v2:
384** Tokenize text using the tokenizer belonging to the FTS5 table. This
385** API is the same as the xTokenize() API, except that it allows a tokenizer
386** locale to be specified.
387*/
388struct Fts5ExtensionApi {
389 int iVersion; /* Currently always set to 4 */
390
391 void *(*xUserData)(Fts5Context*);
392
393 int (*xColumnCount)(Fts5Context*);
394 int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
395 int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
396
397 int (*xTokenize)(Fts5Context*,
398 const char *pText, int nText, /* Text to tokenize */
399 void *pCtx, /* Context passed to xToken() */
400 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
401 );
402
403 int (*xPhraseCount)(Fts5Context*);
404 int (*xPhraseSize)(Fts5Context*, int iPhrase);
405
406 int (*xInstCount)(Fts5Context*, int *pnInst);
407 int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
408
409 sqlite3_int64 (*xRowid)(Fts5Context*);
410 int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
411 int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
412
413 int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
414 int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
415 );
416 int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
417 void *(*xGetAuxdata)(Fts5Context*, int bClear);
418
419 int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
420 void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
421
422 int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
423 void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
424
425 /* Below this point are iVersion>=3 only */
426 int (*xQueryToken)(Fts5Context*,
427 int iPhrase, int iToken,
428 const char **ppToken, int *pnToken
429 );
430 int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*);
431
432 /* Below this point are iVersion>=4 only */
433 int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn);
434 int (*xTokenize_v2)(Fts5Context*,
435 const char *pText, int nText, /* Text to tokenize */
436 const char *pLocale, int nLocale, /* Locale to pass to tokenizer */
437 void *pCtx, /* Context passed to xToken() */
438 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
439 );
440};
441
442/*
443** CUSTOM AUXILIARY FUNCTIONS
444*************************************************************************/
445
446/*************************************************************************
447** CUSTOM TOKENIZERS
448**
449** Applications may also register custom tokenizer types. A tokenizer
450** is registered by providing fts5 with a populated instance of the
451** following structure. All structure methods must be defined, setting
452** any member of the fts5_tokenizer struct to NULL leads to undefined
453** behaviour. The structure methods are expected to function as follows:
454**
455** xCreate:
456** This function is used to allocate and initialize a tokenizer instance.
457** A tokenizer instance is required to actually tokenize text.
458**
459** The first argument passed to this function is a copy of the (void*)
460** pointer provided by the application when the fts5_tokenizer_v2 object
461** was registered with FTS5 (the third argument to xCreateTokenizer()).
462** The second and third arguments are an array of nul-terminated strings
463** containing the tokenizer arguments, if any, specified following the
464** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
465** to create the FTS5 table.
466**
467** The final argument is an output variable. If successful, (*ppOut)
468** should be set to point to the new tokenizer handle and SQLITE_OK
469** returned. If an error occurs, some value other than SQLITE_OK should
470** be returned. In this case, fts5 assumes that the final value of *ppOut
471** is undefined.
472**
473** xDelete:
474** This function is invoked to delete a tokenizer handle previously
475** allocated using xCreate(). Fts5 guarantees that this function will
476** be invoked exactly once for each successful call to xCreate().
477**
478** xTokenize:
479** This function is expected to tokenize the nText byte string indicated
480** by argument pText. pText may or may not be nul-terminated. The first
481** argument passed to this function is a pointer to an Fts5Tokenizer object
482** returned by an earlier call to xCreate().
483**
484** The third argument indicates the reason that FTS5 is requesting
485** tokenization of the supplied text. This is always one of the following
486** four values:
487**
488** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
489** or removed from the FTS table. The tokenizer is being invoked to
490** determine the set of tokens to add to (or delete from) the
491** FTS index.
492**
493** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
494** against the FTS index. The tokenizer is being called to tokenize
495** a bareword or quoted string specified as part of the query.
496**
497** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
498** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
499** followed by a "*" character, indicating that the last token
500** returned by the tokenizer will be treated as a token prefix.
501**
502** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
503** satisfy an fts5_api.xTokenize() request made by an auxiliary
504** function. Or an fts5_api.xColumnSize() request made by the same
505** on a columnsize=0 database.
506** </ul>
507**
508** The sixth and seventh arguments passed to xTokenize() - pLocale and
509** nLocale - are a pointer to a buffer containing the locale to use for
510** tokenization (e.g. "en_US") and its size in bytes, respectively. The
511** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in
512** which case nLocale is always 0) to indicate that the tokenizer should
513** use its default locale.
514**
515** For each token in the input string, the supplied callback xToken() must
516** be invoked. The first argument to it should be a copy of the pointer
517** passed as the second argument to xTokenize(). The third and fourth
518** arguments are a pointer to a buffer containing the token text, and the
519** size of the token in bytes. The 4th and 5th arguments are the byte offsets
520** of the first byte of and first byte immediately following the text from
521** which the token is derived within the input.
522**
523** The second argument passed to the xToken() callback ("tflags") should
524** normally be set to 0. The exception is if the tokenizer supports
525** synonyms. In this case see the discussion below for details.
526**
527** FTS5 assumes the xToken() callback is invoked for each token in the
528** order that they occur within the input text.
529**
530** If an xToken() callback returns any value other than SQLITE_OK, then
531** the tokenization should be abandoned and the xTokenize() method should
532** immediately return a copy of the xToken() return value. Or, if the
533** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
534** if an error occurs with the xTokenize() implementation itself, it
535** may abandon the tokenization and return any error code other than
536** SQLITE_OK or SQLITE_DONE.
537**
538** If the tokenizer is registered using an fts5_tokenizer_v2 object,
539** then the xTokenize() method has two additional arguments - pLocale
540** and nLocale. These specify the locale that the tokenizer should use
541** for the current request. If pLocale and nLocale are both 0, then the
542** tokenizer should use its default locale. Otherwise, pLocale points to
543** an nLocale byte buffer containing the name of the locale to use as utf-8
544** text. pLocale is not nul-terminated.
545**
546** FTS5_TOKENIZER
547**
548** There is also an fts5_tokenizer object. This is an older, deprecated,
549** version of fts5_tokenizer_v2. It is similar except that:
550**
551** <ul>
552** <li> There is no "iVersion" field, and
553** <li> The xTokenize() method does not take a locale argument.
554** </ul>
555**
556** Legacy fts5_tokenizer tokenizers must be registered using the
557** legacy xCreateTokenizer() function, instead of xCreateTokenizer_v2().
558**
559** Tokenizer implementations registered using either API may be retrieved
560** using both xFindTokenizer() and xFindTokenizer_v2().
561**
562** SYNONYM SUPPORT
563**
564** Custom tokenizers may also support synonyms. Consider a case in which a
565** user wishes to query for a phrase such as "first place". Using the
566** built-in tokenizers, the FTS5 query 'first + place' will match instances
567** of "first place" within the document set, but not alternative forms
568** such as "1st place". In some applications, it would be better to match
569** all instances of "first place" or "1st place" regardless of which form
570** the user specified in the MATCH query text.
571**
572** There are several ways to approach this in FTS5:
573**
574** <ol><li> By mapping all synonyms to a single token. In this case, using
575** the above example, this means that the tokenizer returns the
576** same token for inputs "first" and "1st". Say that token is in
577** fact "first", so that when the user inserts the document "I won
578** 1st place" entries are added to the index for tokens "i", "won",
579** "first" and "place". If the user then queries for '1st + place',
580** the tokenizer substitutes "first" for "1st" and the query works
581** as expected.
582**
583** <li> By querying the index for all synonyms of each query term
584** separately. In this case, when tokenizing query text, the
585** tokenizer may provide multiple synonyms for a single term
586** within the document. FTS5 then queries the index for each
587** synonym individually. For example, faced with the query:
588**
589** <codeblock>
590** ... MATCH 'first place'</codeblock>
591**
592** the tokenizer offers both "1st" and "first" as synonyms for the
593** first token in the MATCH query and FTS5 effectively runs a query
594** similar to:
595**
596** <codeblock>
597** ... MATCH '(first OR 1st) place'</codeblock>
598**
599** except that, for the purposes of auxiliary functions, the query
600** still appears to contain just two phrases - "(first OR 1st)"
601** being treated as a single phrase.
602**
603** <li> By adding multiple synonyms for a single term to the FTS index.
604** Using this method, when tokenizing document text, the tokenizer
605** provides multiple synonyms for each token. So that when a
606** document such as "I won first place" is tokenized, entries are
607** added to the FTS index for "i", "won", "first", "1st" and
608** "place".
609**
610** This way, even if the tokenizer does not provide synonyms
611** when tokenizing query text (it should not - to do so would be
612** inefficient), it doesn't matter if the user queries for
613** 'first + place' or '1st + place', as there are entries in the
614** FTS index corresponding to both forms of the first token.
615** </ol>
616**
617** Whether it is parsing document or query text, any call to xToken that
618** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
619** is considered to supply a synonym for the previous token. For example,
620** when parsing the document "I won first place", a tokenizer that supports
621** synonyms would call xToken() 5 times, as follows:
622**
623** <codeblock>
624** xToken(pCtx, 0, "i", 1, 0, 1);
625** xToken(pCtx, 0, "won", 3, 2, 5);
626** xToken(pCtx, 0, "first", 5, 6, 11);
627** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
628** xToken(pCtx, 0, "place", 5, 12, 17);
629**</codeblock>
630**
631** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
632** xToken() is called. Multiple synonyms may be specified for a single token
633** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
634** There is no limit to the number of synonyms that may be provided for a
635** single token.
636**
637** In many cases, method (1) above is the best approach. It does not add
638** extra data to the FTS index or require FTS5 to query for multiple terms,
639** so it is efficient in terms of disk space and query speed. However, it
640** does not support prefix queries very well. If, as suggested above, the
641** token "first" is substituted for "1st" by the tokenizer, then the query:
642**
643** <codeblock>
644** ... MATCH '1s*'</codeblock>
645**
646** will not match documents that contain the token "1st" (as the tokenizer
647** will probably not map "1s" to any prefix of "first").
648**
649** For full prefix support, method (3) may be preferred. In this case,
650** because the index contains entries for both "first" and "1st", prefix
651** queries such as 'fi*' or '1s*' will match correctly. However, because
652** extra entries are added to the FTS index, this method uses more space
653** within the database.
654**
655** Method (2) offers a midpoint between (1) and (3). Using this method,
656** a query such as '1s*' will match documents that contain the literal
657** token "1st", but not "first" (assuming the tokenizer is not able to
658** provide synonyms for prefixes). However, a non-prefix query like '1st'
659** will match against "1st" and "first". This method does not require
660** extra disk space, as no extra entries are added to the FTS index.
661** On the other hand, it may require more CPU cycles to run MATCH queries,
662** as separate queries of the FTS index are required for each synonym.
663**
664** When using methods (2) or (3), it is important that the tokenizer only
665** provide synonyms when tokenizing document text (method (3)) or query
666** text (method (2)), not both. Doing so will not cause any errors, but is
667** inefficient.
668*/
669typedef struct Fts5Tokenizer Fts5Tokenizer;
670typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2;
671struct fts5_tokenizer_v2 {
672 int iVersion; /* Currently always 2 */
673
674 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
675 void (*xDelete)(Fts5Tokenizer*);
676 int (*xTokenize)(Fts5Tokenizer*,
677 void *pCtx,
678 int flags, /* Mask of FTS5_TOKENIZE_* flags */
679 const char *pText, int nText,
680 const char *pLocale, int nLocale,
681 int (*xToken)(
682 void *pCtx, /* Copy of 2nd argument to xTokenize() */
683 int tflags, /* Mask of FTS5_TOKEN_* flags */
684 const char *pToken, /* Pointer to buffer containing token */
685 int nToken, /* Size of token in bytes */
686 int iStart, /* Byte offset of token within input text */
687 int iEnd /* Byte offset of end of token within input text */
688 )
689 );
690};
691
692/*
693** New code should use the fts5_tokenizer_v2 type to define tokenizer
694** implementations. The following type is included for legacy applications
695** that still use it.
696*/
697typedef struct fts5_tokenizer fts5_tokenizer;
698struct fts5_tokenizer {
699 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
700 void (*xDelete)(Fts5Tokenizer*);
701 int (*xTokenize)(Fts5Tokenizer*,
702 void *pCtx,
703 int flags, /* Mask of FTS5_TOKENIZE_* flags */
704 const char *pText, int nText,
705 int (*xToken)(
706 void *pCtx, /* Copy of 2nd argument to xTokenize() */
707 int tflags, /* Mask of FTS5_TOKEN_* flags */
708 const char *pToken, /* Pointer to buffer containing token */
709 int nToken, /* Size of token in bytes */
710 int iStart, /* Byte offset of token within input text */
711 int iEnd /* Byte offset of end of token within input text */
712 )
713 );
714};
715
716
717/* Flags that may be passed as the third argument to xTokenize() */
718#define FTS5_TOKENIZE_QUERY0x0001 0x0001
719#define FTS5_TOKENIZE_PREFIX0x0002 0x0002
720#define FTS5_TOKENIZE_DOCUMENT0x0004 0x0004
721#define FTS5_TOKENIZE_AUX0x0008 0x0008
722
723/* Flags that may be passed by the tokenizer implementation back to FTS5
724** as the third argument to the supplied xToken callback. */
725#define FTS5_TOKEN_COLOCATED0x0001 0x0001 /* Same position as prev. token */
726
727/*
728** END OF CUSTOM TOKENIZERS
729*************************************************************************/
730
731/*************************************************************************
732** FTS5 EXTENSION REGISTRATION API
733*/
734typedef struct fts5_api fts5_api;
735struct fts5_api {
736 int iVersion; /* Currently always set to 3 */
737
738 /* Create a new tokenizer */
739 int (*xCreateTokenizer)(
740 fts5_api *pApi,
741 const char *zName,
742 void *pUserData,
743 fts5_tokenizer *pTokenizer,
744 void (*xDestroy)(void*)
745 );
746
747 /* Find an existing tokenizer */
748 int (*xFindTokenizer)(
749 fts5_api *pApi,
750 const char *zName,
751 void **ppUserData,
752 fts5_tokenizer *pTokenizer
753 );
754
755 /* Create a new auxiliary function */
756 int (*xCreateFunction)(
757 fts5_api *pApi,
758 const char *zName,
759 void *pUserData,
760 fts5_extension_function xFunction,
761 void (*xDestroy)(void*)
762 );
763
764 /* APIs below this point are only available if iVersion>=3 */
765
766 /* Create a new tokenizer */
767 int (*xCreateTokenizer_v2)(
768 fts5_api *pApi,
769 const char *zName,
770 void *pUserData,
771 fts5_tokenizer_v2 *pTokenizer,
772 void (*xDestroy)(void*)
773 );
774
775 /* Find an existing tokenizer */
776 int (*xFindTokenizer_v2)(
777 fts5_api *pApi,
778 const char *zName,
779 void **ppUserData,
780 fts5_tokenizer_v2 **ppTokenizer
781 );
782};
783
784/*
785** END OF REGISTRATION API
786*************************************************************************/
787
788#ifdef __cplusplus
789} /* end of the 'extern "C"' block */
790#endif
791
792#endif /* _FTS5_H */
793
794#line 1 "fts5Int.h"
795/*
796** 2014 May 31
797**
798** The author disclaims copyright to this source code. In place of
799** a legal notice, here is a blessing:
800**
801** May you do good and not evil.
802** May you find forgiveness for yourself and forgive others.
803** May you share freely, never taking more than you give.
804**
805******************************************************************************
806**
807*/
808#ifndef _FTS5INT_H
809#define _FTS5INT_H
810
811/* #include "fts5.h" */
812#include "sqlite3ext.h"
813SQLITE_EXTENSION_INIT1const sqlite3_api_routines *sqlite3_api=0;
814
815#include <string.h>
816#include <assert.h>
817#include <stddef.h>
818
819#ifndef SQLITE_AMALGAMATION
820
821typedef unsigned char u8;
822typedef unsigned int u32;
823typedef unsigned short u16;
824typedef short i16;
825typedef sqlite3_int64 i64;
826typedef sqlite3_uint64 u64;
827
828#ifndef ArraySize
829# define ArraySize(x)((int)(sizeof(x) / sizeof(x[0]))) ((int)(sizeof(x) / sizeof(x[0])))
830#endif
831
832#define testcase(x)
833
834#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST)
835# define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1
836#endif
837#if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS)
838# define ALWAYS(X)(X) (1)
839# define NEVER(X)(X) (0)
840#elif !defined(NDEBUG1)
841# define ALWAYS(X)(X) ((X)?1:(assert(0)((void) (0)),0))
842# define NEVER(X)(X) ((X)?(assert(0)((void) (0)),1):0)
843#else
844# define ALWAYS(X)(X) (X)
845# define NEVER(X)(X) (X)
846#endif
847
848#define MIN(x,y)(((x) < (y)) ? (x) : (y)) (((x) < (y)) ? (x) : (y))
849#define MAX(x,y)(((x) > (y)) ? (x) : (y)) (((x) > (y)) ? (x) : (y))
850
851/*
852** Constants for the largest and smallest possible 64-bit signed integers.
853*/
854# define LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32))
855# define SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) (((i64)-1) - LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)))
856
857/* The uptr type is an unsigned integer large enough to hold a pointer
858*/
859#if defined(HAVE_STDINT_H1)
860 typedef uintptr_t uptr;
861#elif SQLITE_PTRSIZE==4
862 typedef u32 uptr;
863#else
864 typedef u64 uptr;
865#endif
866
867#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
868# define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&3)==0)
869#else
870# define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&7)==0)
871#endif
872
873/*
874** Macros needed to provide flexible arrays in a portable way
875*/
876#ifndef offsetof
877# define offsetof(STRUCTURE,FIELD)__builtin_offsetof(STRUCTURE, FIELD) ((size_t)((char*)&((STRUCTURE*)0)->FIELD))
878#endif
879#if defined(__STDC_VERSION__201710L) && (__STDC_VERSION__201710L >= 199901L)
880# define FLEXARRAY
881#else
882# define FLEXARRAY 1
883#endif
884
885#endif
886
887/* Truncate very long tokens to this many bytes. Hard limit is
888** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
889** field that occurs at the start of each leaf page (see fts5_index.c). */
890#define FTS5_MAX_TOKEN_SIZE32768 32768
891
892/*
893** Maximum number of prefix indexes on single FTS5 table. This must be
894** less than 32. If it is set to anything large than that, an #error
895** directive in fts5_index.c will cause the build to fail.
896*/
897#define FTS5_MAX_PREFIX_INDEXES31 31
898
899/*
900** Maximum segments permitted in a single index
901*/
902#define FTS5_MAX_SEGMENT2000 2000
903
904#define FTS5_DEFAULT_NEARDIST10 10
905#define FTS5_DEFAULT_RANK"bm25" "bm25"
906
907/* Name of rank and rowid columns */
908#define FTS5_RANK_NAME"rank" "rank"
909#define FTS5_ROWID_NAME"rowid" "rowid"
910
911#ifdef SQLITE_DEBUG
912# define FTS5_CORRUPT(11 | (1<<8)) sqlite3Fts5Corrupt()
913static int sqlite3Fts5Corrupt(void);
914#else
915# define FTS5_CORRUPT(11 | (1<<8)) SQLITE_CORRUPT_VTAB(11 | (1<<8))
916#endif
917
918/*
919** The assert_nc() macro is similar to the assert() macro, except that it
920** is used for assert() conditions that are true only if it can be
921** guranteed that the database is not corrupt.
922*/
923#ifdef SQLITE_DEBUG
924extern int sqlite3_fts5_may_be_corrupt;
925# define assert_nc(x)((void) (0)) assert(sqlite3_fts5_may_be_corrupt || (x))((void) (0))
926#else
927# define assert_nc(x)((void) (0)) assert(x)((void) (0))
928#endif
929
930/*
931** A version of memcmp() that does not cause asan errors if one of the pointer
932** parameters is NULL and the number of bytes to compare is zero.
933*/
934#define fts5Memcmp(s1, s2, n)((n)<=0 ? 0 : memcmp((s1), (s2), (n))) ((n)<=0 ? 0 : memcmp((s1), (s2), (n)))
935
936/* Mark a function parameter as unused, to suppress nuisance compiler
937** warnings. */
938#ifndef UNUSED_PARAM
939# define UNUSED_PARAM(X)(void)(X) (void)(X)
940#endif
941
942#ifndef UNUSED_PARAM2
943# define UNUSED_PARAM2(X, Y)(void)(X), (void)(Y) (void)(X), (void)(Y)
944#endif
945
946typedef struct Fts5Global Fts5Global;
947typedef struct Fts5Colset Fts5Colset;
948
949/* If a NEAR() clump or phrase may only match a specific set of columns,
950** then an object of the following type is used to record the set of columns.
951** Each entry in the aiCol[] array is a column that may be matched.
952**
953** This object is used by fts5_expr.c and fts5_index.c.
954*/
955struct Fts5Colset {
956 int nCol;
957 int aiCol[FLEXARRAY];
958};
959
960/* Size (int bytes) of a complete Fts5Colset object with N columns. */
961#define SZ_FTS5COLSET(N)(sizeof(i64)*((N+2)/2)) (sizeof(i64)*((N+2)/2))
962
963/**************************************************************************
964** Interface to code in fts5_config.c. fts5_config.c contains contains code
965** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
966*/
967
968typedef struct Fts5Config Fts5Config;
969typedef struct Fts5TokenizerConfig Fts5TokenizerConfig;
970
971struct Fts5TokenizerConfig {
972 Fts5Tokenizer *pTok;
973 fts5_tokenizer_v2 *pApi2;
974 fts5_tokenizer *pApi1;
975 const char **azArg;
976 int nArg;
977 int ePattern; /* FTS_PATTERN_XXX constant */
978 const char *pLocale; /* Current locale to use */
979 int nLocale; /* Size of pLocale in bytes */
980};
981
982/*
983** An instance of the following structure encodes all information that can
984** be gleaned from the CREATE VIRTUAL TABLE statement.
985**
986** And all information loaded from the %_config table.
987**
988** nAutomerge:
989** The minimum number of segments that an auto-merge operation should
990** attempt to merge together. A value of 1 sets the object to use the
991** compile time default. Zero disables auto-merge altogether.
992**
993** bContentlessDelete:
994** True if the contentless_delete option was present in the CREATE
995** VIRTUAL TABLE statement.
996**
997** zContent:
998**
999** zContentRowid:
1000** The value of the content_rowid= option, if one was specified. Or
1001** the string "rowid" otherwise. This text is not quoted - if it is
1002** used as part of an SQL statement it needs to be quoted appropriately.
1003**
1004** zContentExprlist:
1005**
1006** pzErrmsg:
1007** This exists in order to allow the fts5_index.c module to return a
1008** decent error message if it encounters a file-format version it does
1009** not understand.
1010**
1011** bColumnsize:
1012** True if the %_docsize table is created.
1013**
1014** bPrefixIndex:
1015** This is only used for debugging. If set to false, any prefix indexes
1016** are ignored. This value is configured using:
1017**
1018** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
1019**
1020** bLocale:
1021** Set to true if locale=1 was specified when the table was created.
1022*/
1023struct Fts5Config {
1024 sqlite3 *db; /* Database handle */
1025 Fts5Global *pGlobal; /* Global fts5 object for handle db */
1026 char *zDb; /* Database holding FTS index (e.g. "main") */
1027 char *zName; /* Name of FTS index */
1028 int nCol; /* Number of columns */
1029 char **azCol; /* Column names */
1030 u8 *abUnindexed; /* True for unindexed columns */
1031 int nPrefix; /* Number of prefix indexes */
1032 int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
1033 int eContent; /* An FTS5_CONTENT value */
1034 int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */
1035 int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */
1036 char *zContent; /* content table */
1037 char *zContentRowid; /* "content_rowid=" option value */
1038 int bColumnsize; /* "columnsize=" option value (dflt==1) */
1039 int bTokendata; /* "tokendata=" option value (dflt==0) */
1040 int bLocale; /* "locale=" option value (dflt==0) */
1041 int eDetail; /* FTS5_DETAIL_XXX value */
1042 char *zContentExprlist;
1043 Fts5TokenizerConfig t;
1044 int bLock; /* True when table is preparing statement */
1045
1046
1047 /* Values loaded from the %_config table */
1048 int iVersion; /* fts5 file format 'version' */
1049 int iCookie; /* Incremented when %_config is modified */
1050 int pgsz; /* Approximate page size used in %_data */
1051 int nAutomerge; /* 'automerge' setting */
1052 int nCrisisMerge; /* Maximum allowed segments per level */
1053 int nUsermerge; /* 'usermerge' setting */
1054 int nHashSize; /* Bytes of memory for in-memory hash */
1055 char *zRank; /* Name of rank function */
1056 char *zRankArgs; /* Arguments to rank function */
1057 int bSecureDelete; /* 'secure-delete' */
1058 int nDeleteMerge; /* 'deletemerge' */
1059 int bPrefixInsttoken; /* 'prefix-insttoken' */
1060
1061 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
1062 char **pzErrmsg;
1063
1064#ifdef SQLITE_DEBUG
1065 int bPrefixIndex; /* True to use prefix-indexes */
1066#endif
1067};
1068
1069/* Current expected value of %_config table 'version' field. And
1070** the expected version if the 'secure-delete' option has ever been
1071** set on the table. */
1072#define FTS5_CURRENT_VERSION4 4
1073#define FTS5_CURRENT_VERSION_SECUREDELETE5 5
1074
1075#define FTS5_CONTENT_NORMAL0 0
1076#define FTS5_CONTENT_NONE1 1
1077#define FTS5_CONTENT_EXTERNAL2 2
1078#define FTS5_CONTENT_UNINDEXED3 3
1079
1080#define FTS5_DETAIL_FULL0 0
1081#define FTS5_DETAIL_NONE1 1
1082#define FTS5_DETAIL_COLUMNS2 2
1083
1084#define FTS5_PATTERN_NONE0 0
1085#define FTS5_PATTERN_LIKE65 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */
1086#define FTS5_PATTERN_GLOB66 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */
1087
1088static int sqlite3Fts5ConfigParse(
1089 Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
1090);
1091static void sqlite3Fts5ConfigFree(Fts5Config*);
1092
1093static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
1094
1095static int sqlite3Fts5Tokenize(
1096 Fts5Config *pConfig, /* FTS5 Configuration object */
1097 int flags, /* FTS5_TOKENIZE_* flags */
1098 const char *pText, int nText, /* Text to tokenize */
1099 void *pCtx, /* Context passed to xToken() */
1100 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
1101);
1102
1103static void sqlite3Fts5Dequote(char *z);
1104
1105/* Load the contents of the %_config table */
1106static int sqlite3Fts5ConfigLoad(Fts5Config*, int);
1107
1108/* Set the value of a single config attribute */
1109static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
1110
1111static int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
1112
1113static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...);
1114
1115/*
1116** End of interface to code in fts5_config.c.
1117**************************************************************************/
1118
1119/**************************************************************************
1120** Interface to code in fts5_buffer.c.
1121*/
1122
1123/*
1124** Buffer object for the incremental building of string data.
1125*/
1126typedef struct Fts5Buffer Fts5Buffer;
1127struct Fts5Buffer {
1128 u8 *p;
1129 int n;
1130 int nSpace;
1131};
1132
1133static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
1134static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
1135static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
1136static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
1137static void sqlite3Fts5BufferFree(Fts5Buffer*);
1138static void sqlite3Fts5BufferZero(Fts5Buffer*);
1139static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
1140static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
1141
1142static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
1143
1144#define fts5BufferZero(x)sqlite3Fts5BufferZero(x) sqlite3Fts5BufferZero(x)
1145#define fts5BufferAppendVarint(a,b,c)sqlite3Fts5BufferAppendVarint(a,b,(i64)c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c)
1146#define fts5BufferFree(a)sqlite3Fts5BufferFree(a) sqlite3Fts5BufferFree(a)
1147#define fts5BufferAppendBlob(a,b,c,d)sqlite3Fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
1148#define fts5BufferSet(a,b,c,d)sqlite3Fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
1149
1150#define fts5BufferGrow(pRc,pBuf,nn)( (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace
) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n)
)
( \
1151 (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
1152 sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
1153)
1154
1155/* Write and decode big-endian 32-bit integer values */
1156static void sqlite3Fts5Put32(u8*, int);
1157static int sqlite3Fts5Get32(const u8*);
1158
1159#define FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF) (int)((iPos >> 32) & 0x7FFFFFFF)
1160#define FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF) (int)(iPos & 0x7FFFFFFF)
1161
1162typedef struct Fts5PoslistReader Fts5PoslistReader;
1163struct Fts5PoslistReader {
1164 /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
1165 const u8 *a; /* Position list to iterate through */
1166 int n; /* Size of buffer at a[] in bytes */
1167 int i; /* Current offset in a[] */
1168
1169 u8 bFlag; /* For client use (any custom purpose) */
1170
1171 /* Output variables */
1172 u8 bEof; /* Set to true at EOF */
1173 i64 iPos; /* (iCol<<32) + iPos */
1174};
1175static int sqlite3Fts5PoslistReaderInit(
1176 const u8 *a, int n, /* Poslist buffer to iterate through */
1177 Fts5PoslistReader *pIter /* Iterator object to initialize */
1178);
1179static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
1180
1181typedef struct Fts5PoslistWriter Fts5PoslistWriter;
1182struct Fts5PoslistWriter {
1183 i64 iPrev;
1184};
1185static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
1186static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
1187
1188static int sqlite3Fts5PoslistNext64(
1189 const u8 *a, int n, /* Buffer containing poslist */
1190 int *pi, /* IN/OUT: Offset within a[] */
1191 i64 *piOff /* IN/OUT: Current offset */
1192);
1193
1194/* Malloc utility */
1195static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte);
1196static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
1197
1198/* Character set tests (like isspace(), isalpha() etc.) */
1199static int sqlite3Fts5IsBareword(char t);
1200
1201
1202/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
1203typedef struct Fts5Termset Fts5Termset;
1204static int sqlite3Fts5TermsetNew(Fts5Termset**);
1205static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
1206static void sqlite3Fts5TermsetFree(Fts5Termset*);
1207
1208/*
1209** End of interface to code in fts5_buffer.c.
1210**************************************************************************/
1211
1212/**************************************************************************
1213** Interface to code in fts5_index.c. fts5_index.c contains contains code
1214** to access the data stored in the %_data table.
1215*/
1216
1217typedef struct Fts5Index Fts5Index;
1218typedef struct Fts5IndexIter Fts5IndexIter;
1219
1220struct Fts5IndexIter {
1221 i64 iRowid;
1222 const u8 *pData;
1223 int nData;
1224 u8 bEof;
1225};
1226
1227#define sqlite3Fts5IterEof(x)((x)->bEof) ((x)->bEof)
1228
1229/*
1230** Values used as part of the flags argument passed to IndexQuery().
1231*/
1232#define FTS5INDEX_QUERY_PREFIX0x0001 0x0001 /* Prefix query */
1233#define FTS5INDEX_QUERY_DESC0x0002 0x0002 /* Docs in descending rowid order */
1234#define FTS5INDEX_QUERY_TEST_NOIDX0x0004 0x0004 /* Do not use prefix index */
1235#define FTS5INDEX_QUERY_SCAN0x0008 0x0008 /* Scan query (fts5vocab) */
1236
1237/* The following are used internally by the fts5_index.c module. They are
1238** defined here only to make it easier to avoid clashes with the flags
1239** above. */
1240#define FTS5INDEX_QUERY_SKIPEMPTY0x0010 0x0010
1241#define FTS5INDEX_QUERY_NOOUTPUT0x0020 0x0020
1242#define FTS5INDEX_QUERY_SKIPHASH0x0040 0x0040
1243#define FTS5INDEX_QUERY_NOTOKENDATA0x0080 0x0080
1244#define FTS5INDEX_QUERY_SCANONETERM0x0100 0x0100
1245
1246/*
1247** Create/destroy an Fts5Index object.
1248*/
1249static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
1250static int sqlite3Fts5IndexClose(Fts5Index *p);
1251
1252/*
1253** Return a simple checksum value based on the arguments.
1254*/
1255static u64 sqlite3Fts5IndexEntryCksum(
1256 i64 iRowid,
1257 int iCol,
1258 int iPos,
1259 int iIdx,
1260 const char *pTerm,
1261 int nTerm
1262);
1263
1264/*
1265** Argument p points to a buffer containing utf-8 text that is n bytes in
1266** size. Return the number of bytes in the nChar character prefix of the
1267** buffer, or 0 if there are less than nChar characters in total.
1268*/
1269static int sqlite3Fts5IndexCharlenToBytelen(
1270 const char *p,
1271 int nByte,
1272 int nChar
1273);
1274
1275/*
1276** Open a new iterator to iterate though all rowids that match the
1277** specified token or token prefix.
1278*/
1279static int sqlite3Fts5IndexQuery(
1280 Fts5Index *p, /* FTS index to query */
1281 const char *pToken, int nToken, /* Token (or prefix) to query for */
1282 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
1283 Fts5Colset *pColset, /* Match these columns only */
1284 Fts5IndexIter **ppIter /* OUT: New iterator object */
1285);
1286
1287/*
1288** The various operations on open token or token prefix iterators opened
1289** using sqlite3Fts5IndexQuery().
1290*/
1291static int sqlite3Fts5IterNext(Fts5IndexIter*);
1292static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
1293
1294/*
1295** Close an iterator opened by sqlite3Fts5IndexQuery().
1296*/
1297static void sqlite3Fts5IterClose(Fts5IndexIter*);
1298
1299/*
1300** Close the reader blob handle, if it is open.
1301*/
1302static void sqlite3Fts5IndexCloseReader(Fts5Index*);
1303
1304/*
1305** This interface is used by the fts5vocab module.
1306*/
1307static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
1308static int sqlite3Fts5IterNextScan(Fts5IndexIter*);
1309static void *sqlite3Fts5StructureRef(Fts5Index*);
1310static void sqlite3Fts5StructureRelease(void*);
1311static int sqlite3Fts5StructureTest(Fts5Index*, void*);
1312
1313/*
1314** Used by xInstToken():
1315*/
1316static int sqlite3Fts5IterToken(
1317 Fts5IndexIter *pIndexIter,
1318 const char *pToken, int nToken,
1319 i64 iRowid,
1320 int iCol,
1321 int iOff,
1322 const char **ppOut, int *pnOut
1323);
1324
1325/*
1326** Insert or remove data to or from the index. Each time a document is
1327** added to or removed from the index, this function is called one or more
1328** times.
1329**
1330** For an insert, it must be called once for each token in the new document.
1331** If the operation is a delete, it must be called (at least) once for each
1332** unique token in the document with an iCol value less than zero. The iPos
1333** argument is ignored for a delete.
1334*/
1335static int sqlite3Fts5IndexWrite(
1336 Fts5Index *p, /* Index to write to */
1337 int iCol, /* Column token appears in (-ve -> delete) */
1338 int iPos, /* Position of token within column */
1339 const char *pToken, int nToken /* Token to add or remove to or from index */
1340);
1341
1342/*
1343** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
1344** document iDocid.
1345*/
1346static int sqlite3Fts5IndexBeginWrite(
1347 Fts5Index *p, /* Index to write to */
1348 int bDelete, /* True if current operation is a delete */
1349 i64 iDocid /* Docid to add or remove data from */
1350);
1351
1352/*
1353** Flush any data stored in the in-memory hash tables to the database.
1354** Also close any open blob handles.
1355*/
1356static int sqlite3Fts5IndexSync(Fts5Index *p);
1357
1358/*
1359** Discard any data stored in the in-memory hash tables. Do not write it
1360** to the database. Additionally, assume that the contents of the %_data
1361** table may have changed on disk. So any in-memory caches of %_data
1362** records must be invalidated.
1363*/
1364static int sqlite3Fts5IndexRollback(Fts5Index *p);
1365
1366/*
1367** Get or set the "averages" values.
1368*/
1369static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
1370static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
1371
1372/*
1373** Functions called by the storage module as part of integrity-check.
1374*/
1375static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum);
1376
1377/*
1378** Called during virtual module initialization to register UDF
1379** fts5_decode() with SQLite
1380*/
1381static int sqlite3Fts5IndexInit(sqlite3*);
1382
1383static int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
1384
1385/*
1386** Return the total number of entries read from the %_data table by
1387** this connection since it was created.
1388*/
1389static int sqlite3Fts5IndexReads(Fts5Index *p);
1390
1391static int sqlite3Fts5IndexReinit(Fts5Index *p);
1392static int sqlite3Fts5IndexOptimize(Fts5Index *p);
1393static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
1394static int sqlite3Fts5IndexReset(Fts5Index *p);
1395
1396static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
1397
1398static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin);
1399static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid);
1400
1401static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*);
1402
1403/* Used to populate hash tables for xInstToken in detail=none/column mode. */
1404static int sqlite3Fts5IndexIterWriteTokendata(
1405 Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff
1406);
1407
1408/*
1409** End of interface to code in fts5_index.c.
1410**************************************************************************/
1411
1412/**************************************************************************
1413** Interface to code in fts5_varint.c.
1414*/
1415static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
1416static int sqlite3Fts5GetVarintLen(u32 iVal);
1417static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
1418static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
1419
1420#define fts5GetVarint32(a,b)sqlite3Fts5GetVarint32(a,(u32*)&(b)) sqlite3Fts5GetVarint32(a,(u32*)&(b))
1421#define fts5GetVarintsqlite3Fts5GetVarint sqlite3Fts5GetVarint
1422
1423#define fts5FastGetVarint32(a, iOff, nVal){ nVal = (a)[iOff++]; if( nVal & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32
(&(a)[iOff],(u32*)&(nVal)); } }
{ \
1424 nVal = (a)[iOff++]; \
1425 if( nVal & 0x80 ){ \
1426 iOff--; \
1427 iOff += fts5GetVarint32(&(a)[iOff], nVal)sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nVal)); \
1428 } \
1429}
1430
1431
1432/*
1433** End of interface to code in fts5_varint.c.
1434**************************************************************************/
1435
1436
1437/**************************************************************************
1438** Interface to code in fts5_main.c.
1439*/
1440
1441/*
1442** Virtual-table object.
1443*/
1444typedef struct Fts5Table Fts5Table;
1445struct Fts5Table {
1446 sqlite3_vtab base; /* Base class used by SQLite core */
1447 Fts5Config *pConfig; /* Virtual table configuration */
1448 Fts5Index *pIndex; /* Full-text index */
1449};
1450
1451static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig);
1452
1453static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
1454
1455static int sqlite3Fts5FlushToDisk(Fts5Table*);
1456
1457static void sqlite3Fts5ClearLocale(Fts5Config *pConfig);
1458static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc);
1459
1460static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal);
1461static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal,
1462 const char **ppText, int *pnText, const char **ppLoc, int *pnLoc
1463);
1464
1465/*
1466** End of interface to code in fts5.c.
1467**************************************************************************/
1468
1469/**************************************************************************
1470** Interface to code in fts5_hash.c.
1471*/
1472typedef struct Fts5Hash Fts5Hash;
1473
1474/*
1475** Create a hash table, free a hash table.
1476*/
1477static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
1478static void sqlite3Fts5HashFree(Fts5Hash*);
1479
1480static int sqlite3Fts5HashWrite(
1481 Fts5Hash*,
1482 i64 iRowid, /* Rowid for this entry */
1483 int iCol, /* Column token appears in (-ve -> delete) */
1484 int iPos, /* Position of token within column */
1485 char bByte,
1486 const char *pToken, int nToken /* Token to add or remove to or from index */
1487);
1488
1489/*
1490** Empty (but do not delete) a hash table.
1491*/
1492static void sqlite3Fts5HashClear(Fts5Hash*);
1493
1494/*
1495** Return true if the hash is empty, false otherwise.
1496*/
1497static int sqlite3Fts5HashIsEmpty(Fts5Hash*);
1498
1499static int sqlite3Fts5HashQuery(
1500 Fts5Hash*, /* Hash table to query */
1501 int nPre,
1502 const char *pTerm, int nTerm, /* Query term */
1503 void **ppObj, /* OUT: Pointer to doclist for pTerm */
1504 int *pnDoclist /* OUT: Size of doclist in bytes */
1505);
1506
1507static int sqlite3Fts5HashScanInit(
1508 Fts5Hash*, /* Hash table to query */
1509 const char *pTerm, int nTerm /* Query prefix */
1510);
1511static void sqlite3Fts5HashScanNext(Fts5Hash*);
1512static int sqlite3Fts5HashScanEof(Fts5Hash*);
1513static void sqlite3Fts5HashScanEntry(Fts5Hash *,
1514 const char **pzTerm, /* OUT: term (nul-terminated) */
1515 int *pnTerm, /* OUT: Size of term in bytes */
1516 const u8 **ppDoclist, /* OUT: pointer to doclist */
1517 int *pnDoclist /* OUT: size of doclist in bytes */
1518);
1519
1520
1521
1522/*
1523** End of interface to code in fts5_hash.c.
1524**************************************************************************/
1525
1526/**************************************************************************
1527** Interface to code in fts5_storage.c. fts5_storage.c contains contains
1528** code to access the data stored in the %_content and %_docsize tables.
1529*/
1530
1531#define FTS5_STMT_SCAN_ASC0 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
1532#define FTS5_STMT_SCAN_DESC1 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
1533#define FTS5_STMT_LOOKUP2 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
1534
1535typedef struct Fts5Storage Fts5Storage;
1536
1537static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
1538static int sqlite3Fts5StorageClose(Fts5Storage *p);
1539static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
1540
1541static int sqlite3Fts5DropAll(Fts5Config*);
1542static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
1543
1544static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int);
1545static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*);
1546static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
1547
1548static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg);
1549
1550static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
1551static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
1552
1553static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
1554static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
1555static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
1556
1557static int sqlite3Fts5StorageSync(Fts5Storage *p);
1558static int sqlite3Fts5StorageRollback(Fts5Storage *p);
1559
1560static int sqlite3Fts5StorageConfigValue(
1561 Fts5Storage *p, const char*, sqlite3_value*, int
1562);
1563
1564static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
1565static int sqlite3Fts5StorageRebuild(Fts5Storage *p);
1566static int sqlite3Fts5StorageOptimize(Fts5Storage *p);
1567static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
1568static int sqlite3Fts5StorageReset(Fts5Storage *p);
1569
1570static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*);
1571static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel);
1572
1573/*
1574** End of interface to code in fts5_storage.c.
1575**************************************************************************/
1576
1577
1578/**************************************************************************
1579** Interface to code in fts5_expr.c.
1580*/
1581typedef struct Fts5Expr Fts5Expr;
1582typedef struct Fts5ExprNode Fts5ExprNode;
1583typedef struct Fts5Parse Fts5Parse;
1584typedef struct Fts5Token Fts5Token;
1585typedef struct Fts5ExprPhrase Fts5ExprPhrase;
1586typedef struct Fts5ExprNearset Fts5ExprNearset;
1587
1588struct Fts5Token {
1589 const char *p; /* Token text (not NULL terminated) */
1590 int n; /* Size of buffer p in bytes */
1591};
1592
1593/* Parse a MATCH expression. */
1594static int sqlite3Fts5ExprNew(
1595 Fts5Config *pConfig,
1596 int bPhraseToAnd,
1597 int iCol, /* Column on LHS of MATCH operator */
1598 const char *zExpr,
1599 Fts5Expr **ppNew,
1600 char **pzErr
1601);
1602static int sqlite3Fts5ExprPattern(
1603 Fts5Config *pConfig,
1604 int bGlob,
1605 int iCol,
1606 const char *zText,
1607 Fts5Expr **pp
1608);
1609
1610/*
1611** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
1612** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
1613** rc = sqlite3Fts5ExprNext(pExpr)
1614** ){
1615** // The document with rowid iRowid matches the expression!
1616** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
1617** }
1618*/
1619static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
1620static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
1621static int sqlite3Fts5ExprEof(Fts5Expr*);
1622static i64 sqlite3Fts5ExprRowid(Fts5Expr*);
1623
1624static void sqlite3Fts5ExprFree(Fts5Expr*);
1625static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2);
1626
1627/* Called during startup to register a UDF with SQLite */
1628static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
1629
1630static int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
1631static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
1632static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
1633
1634typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
1635static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
1636static int sqlite3Fts5ExprPopulatePoslists(
1637 Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
1638);
1639static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
1640
1641static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
1642
1643static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
1644
1645static int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*);
1646static int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*);
1647static void sqlite3Fts5ExprClearTokens(Fts5Expr*);
1648
1649/*******************************************
1650** The fts5_expr.c API above this point is used by the other hand-written
1651** C code in this module. The interfaces below this point are called by
1652** the parser code in fts5parse.y. */
1653
1654static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
1655
1656static Fts5ExprNode *sqlite3Fts5ParseNode(
1657 Fts5Parse *pParse,
1658 int eType,
1659 Fts5ExprNode *pLeft,
1660 Fts5ExprNode *pRight,
1661 Fts5ExprNearset *pNear
1662);
1663
1664static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
1665 Fts5Parse *pParse,
1666 Fts5ExprNode *pLeft,
1667 Fts5ExprNode *pRight
1668);
1669
1670static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
1671 Fts5Parse *pParse,
1672 Fts5ExprPhrase *pPhrase,
1673 Fts5Token *pToken,
1674 int bPrefix
1675);
1676
1677static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*);
1678
1679static Fts5ExprNearset *sqlite3Fts5ParseNearset(
1680 Fts5Parse*,
1681 Fts5ExprNearset*,
1682 Fts5ExprPhrase*
1683);
1684
1685static Fts5Colset *sqlite3Fts5ParseColset(
1686 Fts5Parse*,
1687 Fts5Colset*,
1688 Fts5Token *
1689);
1690
1691static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
1692static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
1693static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
1694
1695static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
1696static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*);
1697static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*);
1698static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
1699static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
1700
1701/*
1702** End of interface to code in fts5_expr.c.
1703**************************************************************************/
1704
1705
1706
1707/**************************************************************************
1708** Interface to code in fts5_aux.c.
1709*/
1710
1711static int sqlite3Fts5AuxInit(fts5_api*);
1712/*
1713** End of interface to code in fts5_aux.c.
1714**************************************************************************/
1715
1716/**************************************************************************
1717** Interface to code in fts5_tokenizer.c.
1718*/
1719
1720static int sqlite3Fts5TokenizerInit(fts5_api*);
1721static int sqlite3Fts5TokenizerPattern(
1722 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
1723 Fts5Tokenizer *pTok
1724);
1725static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*);
1726/*
1727** End of interface to code in fts5_tokenizer.c.
1728**************************************************************************/
1729
1730/**************************************************************************
1731** Interface to code in fts5_vocab.c.
1732*/
1733
1734static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
1735
1736/*
1737** End of interface to code in fts5_vocab.c.
1738**************************************************************************/
1739
1740
1741/**************************************************************************
1742** Interface to automatically generated code in fts5_unicode2.c.
1743*/
1744static int sqlite3Fts5UnicodeIsdiacritic(int c);
1745static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
1746
1747static int sqlite3Fts5UnicodeCatParse(const char*, u8*);
1748static int sqlite3Fts5UnicodeCategory(u32 iCode);
1749static void sqlite3Fts5UnicodeAscii(u8*, u8*);
1750/*
1751** End of interface to code in fts5_unicode2.c.
1752**************************************************************************/
1753
1754#endif
1755
1756#line 1 "fts5parse.h"
1757#define FTS5_OR1 1
1758#define FTS5_AND2 2
1759#define FTS5_NOT3 3
1760#define FTS5_TERM4 4
1761#define FTS5_COLON5 5
1762#define FTS5_MINUS6 6
1763#define FTS5_LCP7 7
1764#define FTS5_RCP8 8
1765#define FTS5_STRING9 9
1766#define FTS5_LP10 10
1767#define FTS5_RP11 11
1768#define FTS5_CARET12 12
1769#define FTS5_COMMA13 13
1770#define FTS5_PLUS14 14
1771#define FTS5_STAR15 15
1772
1773#line 1 "fts5parse.c"
1774/* This file is automatically generated by Lemon from input grammar
1775** source file "fts5parse.y".
1776*/
1777/*
1778** 2000-05-29
1779**
1780** The author disclaims copyright to this source code. In place of
1781** a legal notice, here is a blessing:
1782**
1783** May you do good and not evil.
1784** May you find forgiveness for yourself and forgive others.
1785** May you share freely, never taking more than you give.
1786**
1787*************************************************************************
1788** Driver template for the LEMON parser generator.
1789**
1790** The "lemon" program processes an LALR(1) input grammar file, then uses
1791** this template to construct a parser. The "lemon" program inserts text
1792** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
1793** interstitial "-" characters) contained in this template is changed into
1794** the value of the %name directive from the grammar. Otherwise, the content
1795** of this template is copied straight through into the generate parser
1796** source file.
1797**
1798** The following is the concatenation of all %include directives from the
1799** input grammar file:
1800*/
1801/************ Begin %include sections from the grammar ************************/
1802#line 47 "fts5parse.y"
1803
1804/* #include "fts5Int.h" */
1805/* #include "fts5parse.h" */
1806
1807/*
1808** Disable all error recovery processing in the parser push-down
1809** automaton.
1810*/
1811#define fts5YYNOERRORRECOVERY1 1
1812
1813/*
1814** Make fts5yytestcase() the same as testcase()
1815*/
1816#define fts5yytestcase(X) testcase(X)
1817
1818/*
1819** Indicate that sqlite3ParserFree() will never be called with a null
1820** pointer.
1821*/
1822#define fts5YYPARSEFREENOTNULL1 1
1823
1824/*
1825** Alternative datatype for the argument to the malloc() routine passed
1826** into sqlite3ParserAlloc(). The default is size_t.
1827*/
1828#define fts5YYMALLOCARGTYPEu64 u64
1829
1830#line 58 "fts5parse.sql"
1831/**************** End of %include directives **********************************/
1832/* These constants specify the various numeric values for terminal symbols.
1833***************** Begin token definitions *************************************/
1834#ifndef FTS5_OR1
1835#define FTS5_OR1 1
1836#define FTS5_AND2 2
1837#define FTS5_NOT3 3
1838#define FTS5_TERM4 4
1839#define FTS5_COLON5 5
1840#define FTS5_MINUS6 6
1841#define FTS5_LCP7 7
1842#define FTS5_RCP8 8
1843#define FTS5_STRING9 9
1844#define FTS5_LP10 10
1845#define FTS5_RP11 11
1846#define FTS5_CARET12 12
1847#define FTS5_COMMA13 13
1848#define FTS5_PLUS14 14
1849#define FTS5_STAR15 15
1850#endif
1851/**************** End token definitions ***************************************/
1852
1853/* The next sections is a series of control #defines.
1854** various aspects of the generated parser.
1855** fts5YYCODETYPE is the data type used to store the integer codes
1856** that represent terminal and non-terminal symbols.
1857** "unsigned char" is used if there are fewer than
1858** 256 symbols. Larger types otherwise.
1859** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for
1860** any terminal or nonterminal symbol.
1861** fts5YYFALLBACK If defined, this indicates that one or more tokens
1862** (also known as: "terminal symbols") have fall-back
1863** values which should be used if the original symbol
1864** would not parse. This permits keywords to sometimes
1865** be used as identifiers, for example.
1866** fts5YYACTIONTYPE is the data type used for "action codes" - numbers
1867** that indicate what to do in response to the next
1868** token.
1869** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal
1870** symbols. Background: A "minor type" is a semantic
1871** value associated with a terminal or non-terminal
1872** symbols. For example, for an "ID" terminal symbol,
1873** the minor type might be the name of the identifier.
1874** Each non-terminal can have a different minor type.
1875** Terminal symbols all have the same minor type, though.
1876** This macros defines the minor type for terminal
1877** symbols.
1878** fts5YYMINORTYPE is the data type used for all minor types.
1879** This is typically a union of many types, one of
1880** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union
1881** for terminal symbols is called "fts5yy0".
1882** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If
1883** zero the stack is dynamically sized using realloc()
1884** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument
1885** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument
1886** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter
1887** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser
1888** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser
1889** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context
1890** fts5YYREALLOC Name of the realloc() function to use
1891** fts5YYFREE Name of the free() function to use
1892** fts5YYDYNSTACK True if stack space should be extended on heap
1893** fts5YYERRORSYMBOL is the code number of the error symbol. If not
1894** defined, then do no error processing.
1895** fts5YYNSTATE the combined number of states.
1896** fts5YYNRULE the number of rules in the grammar
1897** fts5YYNFTS5TOKEN Number of terminal symbols
1898** fts5YY_MAX_SHIFT Maximum value for shift actions
1899** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1900** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1901** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error
1902** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept
1903** fts5YY_NO_ACTION The fts5yy_action[] code for no-op
1904** fts5YY_MIN_REDUCE Minimum value for reduce actions
1905** fts5YY_MAX_REDUCE Maximum value for reduce actions
1906** fts5YY_MIN_DSTRCTR Minimum symbol value that has a destructor
1907** fts5YY_MAX_DSTRCTR Maximum symbol value that has a destructor
1908*/
1909#ifndef INTERFACE1
1910# define INTERFACE1 1
1911#endif
1912/************* Begin control #defines *****************************************/
1913#define fts5YYCODETYPEunsigned char unsigned char
1914#define fts5YYNOCODE27 27
1915#define fts5YYACTIONTYPEunsigned char unsigned char
1916#define sqlite3Fts5ParserFTS5TOKENTYPEFts5Token Fts5Token
1917typedef union {
1918 int fts5yyinit;
1919 sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yy0;
1920 int fts5yy4;
1921 Fts5Colset* fts5yy11;
1922 Fts5ExprNode* fts5yy24;
1923 Fts5ExprNearset* fts5yy46;
1924 Fts5ExprPhrase* fts5yy53;
1925} fts5YYMINORTYPE;
1926#ifndef fts5YYSTACKDEPTH100
1927#define fts5YYSTACKDEPTH100 100
1928#endif
1929#define sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; Fts5Parse *pParse;
1930#define sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse ,Fts5Parse *pParse
1931#define sqlite3Fts5ParserARG_PARAM,pParse ,pParse
1932#define sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; Fts5Parse *pParse=fts5yypParser->pParse;
1933#define sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; fts5yypParser->pParse=pParse;
1934#define fts5YYREALLOCrealloc realloc
1935#define fts5YYFREEfree free
1936#define fts5YYDYNSTACK0 0
1937#define sqlite3Fts5ParserCTX_SDECL
1938#define sqlite3Fts5ParserCTX_PDECL
1939#define sqlite3Fts5ParserCTX_PARAM
1940#define sqlite3Fts5ParserCTX_FETCH
1941#define sqlite3Fts5ParserCTX_STORE
1942#define fts5YYNSTATE35 35
1943#define fts5YYNRULE28 28
1944#define fts5YYNRULE_WITH_ACTION28 28
1945#define fts5YYNFTS5TOKEN16 16
1946#define fts5YY_MAX_SHIFT34 34
1947#define fts5YY_MIN_SHIFTREDUCE52 52
1948#define fts5YY_MAX_SHIFTREDUCE79 79
1949#define fts5YY_ERROR_ACTION80 80
1950#define fts5YY_ACCEPT_ACTION81 81
1951#define fts5YY_NO_ACTION82 82
1952#define fts5YY_MIN_REDUCE83 83
1953#define fts5YY_MAX_REDUCE110 110
1954#define fts5YY_MIN_DSTRCTR16 16
1955#define fts5YY_MAX_DSTRCTR24 24
1956/************* End control #defines *******************************************/
1957#define fts5YY_NLOOKAHEAD((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])))
1958
1959/* Define the fts5yytestcase() macro to be a no-op if is not already defined
1960** otherwise.
1961**
1962** Applications can choose to define fts5yytestcase() in the %include section
1963** to a macro that can assist in verifying code coverage. For production
1964** code the fts5yytestcase() macro should be turned off. But it is useful
1965** for testing.
1966*/
1967#ifndef fts5yytestcase
1968# define fts5yytestcase(X)
1969#endif
1970
1971/* Macro to determine if stack space has the ability to grow using
1972** heap memory.
1973*/
1974#if fts5YYSTACKDEPTH100<=0 || fts5YYDYNSTACK0
1975# define fts5YYGROWABLESTACK0 1
1976#else
1977# define fts5YYGROWABLESTACK0 0
1978#endif
1979
1980/* Guarantee a minimum number of initial stack slots.
1981*/
1982#if fts5YYSTACKDEPTH100<=0
1983# undef fts5YYSTACKDEPTH100
1984# define fts5YYSTACKDEPTH100 2 /* Need a minimum stack size */
1985#endif
1986
1987
1988/* Next are the tables used to determine what action to take based on the
1989** current state and lookahead token. These tables are used to implement
1990** functions that take a state number and lookahead value and return an
1991** action integer.
1992**
1993** Suppose the action integer is N. Then the action is determined as
1994** follows
1995**
1996** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead
1997** token onto the stack and goto state N.
1998**
1999** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
2000** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE.
2001**
2002** N == fts5YY_ERROR_ACTION A syntax error has occurred.
2003**
2004** N == fts5YY_ACCEPT_ACTION The parser accepts its input.
2005**
2006** N == fts5YY_NO_ACTION No such action. Denotes unused
2007** slots in the fts5yy_action[] table.
2008**
2009** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE
2010** and fts5YY_MAX_REDUCE
2011**
2012** The action table is constructed as a single large table named fts5yy_action[].
2013** Given state S and lookahead X, the action is computed as either:
2014**
2015** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ]
2016** (B) N = fts5yy_default[S]
2017**
2018** The (A) formula is preferred. The B formula is used instead if
2019** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X.
2020**
2021** The formulas above are for computing the action when the lookahead is
2022** a terminal symbol. If the lookahead is a non-terminal (as occurs after
2023** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of
2024** the fts5yy_shift_ofst[] array.
2025**
2026** The following are the tables generated in this section:
2027**
2028** fts5yy_action[] A single table containing all actions.
2029** fts5yy_lookahead[] A table containing the lookahead for each entry in
2030** fts5yy_action. Used to detect hash collisions.
2031** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for
2032** shifting terminals.
2033** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for
2034** shifting non-terminals after a reduce.
2035** fts5yy_default[] Default action for each state.
2036**
2037*********** Begin parsing tables **********************************************/
2038#define fts5YY_ACTTAB_COUNT(105) (105)
2039static const fts5YYACTIONTYPEunsigned char fts5yy_action[] = {
2040 /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18,
2041 /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6,
2042 /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28,
2043 /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98,
2044 /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26,
2045 /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23,
2046 /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7,
2047 /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4,
2048 /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13,
2049 /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53,
2050 /* 100 */ 57, 15, 82, 82, 71,
2051};
2052static const fts5YYCODETYPEunsigned char fts5yy_lookahead[] = {
2053 /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17,
2054 /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19,
2055 /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20,
2056 /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22,
2057 /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24,
2058 /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21,
2059 /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5,
2060 /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1,
2061 /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12,
2062 /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8,
2063 /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27,
2064 /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
2065 /* 120 */ 27,
2066};
2067#define fts5YY_SHIFT_COUNT(34) (34)
2068#define fts5YY_SHIFT_MIN(0) (0)
2069#define fts5YY_SHIFT_MAX(93) (93)
2070static const unsigned char fts5yy_shift_ofst[] = {
2071 /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12,
2072 /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74,
2073 /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68,
2074 /* 30 */ 53, 87, 92, 53, 93,
2075};
2076#define fts5YY_REDUCE_COUNT(17) (17)
2077#define fts5YY_REDUCE_MIN(-17) (-17)
2078#define fts5YY_REDUCE_MAX(67) (67)
2079static const signed char fts5yy_reduce_ofst[] = {
2080 /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37,
2081 /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38,
2082};
2083static const fts5YYACTIONTYPEunsigned char fts5yy_default[] = {
2084 /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105,
2085 /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80,
2086 /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90,
2087 /* 30 */ 103, 80, 80, 104, 80,
2088};
2089/********** End of lemon-generated parsing tables *****************************/
2090
2091/* The next table maps tokens (terminal symbols) into fallback tokens.
2092** If a construct like the following:
2093**
2094** %fallback ID X Y Z.
2095**
2096** appears in the grammar, then ID becomes a fallback token for X, Y,
2097** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
2098** but it does not parse, the type of the token is changed to ID and
2099** the parse is retried before an error is thrown.
2100**
2101** This feature can be used, for example, to cause some keywords in a language
2102** to revert to identifiers if they keyword does not apply in the context where
2103** it appears.
2104*/
2105#ifdef fts5YYFALLBACK
2106static const fts5YYCODETYPEunsigned char fts5yyFallback[] = {
2107};
2108#endif /* fts5YYFALLBACK */
2109
2110/* The following structure represents a single element of the
2111** parser's stack. Information stored includes:
2112**
2113** + The state number for the parser at this level of the stack.
2114**
2115** + The value of the token stored at this level of the stack.
2116** (In other words, the "major" token.)
2117**
2118** + The semantic value stored at this level of the stack. This is
2119** the information used by the action routines in the grammar.
2120** It is sometimes called the "minor" token.
2121**
2122** After the "shift" half of a SHIFTREDUCE action, the stateno field
2123** actually contains the reduce action for the second half of the
2124** SHIFTREDUCE.
2125*/
2126struct fts5yyStackEntry {
2127 fts5YYACTIONTYPEunsigned char stateno; /* The state-number, or reduce action in SHIFTREDUCE */
2128 fts5YYCODETYPEunsigned char major; /* The major token value. This is the code
2129 ** number for the token at this stack level */
2130 fts5YYMINORTYPE minor; /* The user-supplied minor token value. This
2131 ** is the value of the token */
2132};
2133typedef struct fts5yyStackEntry fts5yyStackEntry;
2134
2135/* The state of the parser is completely contained in an instance of
2136** the following structure */
2137struct fts5yyParser {
2138 fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */
2139#ifdef fts5YYTRACKMAXSTACKDEPTH
2140 int fts5yyhwm; /* High-water mark of the stack */
2141#endif
2142#ifndef fts5YYNOERRORRECOVERY1
2143 int fts5yyerrcnt; /* Shifts left before out of the error */
2144#endif
2145 sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; /* A place to hold %extra_argument */
2146 sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */
2147 fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */
2148 fts5yyStackEntry *fts5yystack; /* The parser stack */
2149 fts5yyStackEntry fts5yystk0[fts5YYSTACKDEPTH100]; /* Initial stack space */
2150};
2151typedef struct fts5yyParser fts5yyParser;
2152
2153#include <assert.h>
2154#ifndef NDEBUG1
2155#include <stdio.h>
2156static FILE *fts5yyTraceFILE = 0;
2157static char *fts5yyTracePrompt = 0;
2158#endif /* NDEBUG */
2159
2160#ifndef NDEBUG1
2161/*
2162** Turn parser tracing on by giving a stream to which to write the trace
2163** and a prompt to preface each trace message. Tracing is turned off
2164** by making either argument NULL
2165**
2166** Inputs:
2167** <ul>
2168** <li> A FILE* to which trace output should be written.
2169** If NULL, then tracing is turned off.
2170** <li> A prefix string written at the beginning of every
2171** line of trace output. If NULL, then tracing is
2172** turned off.
2173** </ul>
2174**
2175** Outputs:
2176** None.
2177*/
2178static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
2179 fts5yyTraceFILE = TraceFILE;
2180 fts5yyTracePrompt = zTracePrompt;
2181 if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0;
2182 else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
2183}
2184#endif /* NDEBUG */
2185
2186#if defined(fts5YYCOVERAGE) || !defined(NDEBUG1)
2187/* For tracing shifts, the names of all terminals and nonterminals
2188** are required. The following table supplies these names */
2189static const char *const fts5yyTokenName[] = {
2190 /* 0 */ "$",
2191 /* 1 */ "OR",
2192 /* 2 */ "AND",
2193 /* 3 */ "NOT",
2194 /* 4 */ "TERM",
2195 /* 5 */ "COLON",
2196 /* 6 */ "MINUS",
2197 /* 7 */ "LCP",
2198 /* 8 */ "RCP",
2199 /* 9 */ "STRING",
2200 /* 10 */ "LP",
2201 /* 11 */ "RP",
2202 /* 12 */ "CARET",
2203 /* 13 */ "COMMA",
2204 /* 14 */ "PLUS",
2205 /* 15 */ "STAR",
2206 /* 16 */ "input",
2207 /* 17 */ "expr",
2208 /* 18 */ "cnearset",
2209 /* 19 */ "exprlist",
2210 /* 20 */ "colset",
2211 /* 21 */ "colsetlist",
2212 /* 22 */ "nearset",
2213 /* 23 */ "nearphrases",
2214 /* 24 */ "phrase",
2215 /* 25 */ "neardist_opt",
2216 /* 26 */ "star_opt",
2217};
2218#endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */
2219
2220#ifndef NDEBUG1
2221/* For tracing reduce actions, the names of all rules are required.
2222*/
2223static const char *const fts5yyRuleName[] = {
2224 /* 0 */ "input ::= expr",
2225 /* 1 */ "colset ::= MINUS LCP colsetlist RCP",
2226 /* 2 */ "colset ::= LCP colsetlist RCP",
2227 /* 3 */ "colset ::= STRING",
2228 /* 4 */ "colset ::= MINUS STRING",
2229 /* 5 */ "colsetlist ::= colsetlist STRING",
2230 /* 6 */ "colsetlist ::= STRING",
2231 /* 7 */ "expr ::= expr AND expr",
2232 /* 8 */ "expr ::= expr OR expr",
2233 /* 9 */ "expr ::= expr NOT expr",
2234 /* 10 */ "expr ::= colset COLON LP expr RP",
2235 /* 11 */ "expr ::= LP expr RP",
2236 /* 12 */ "expr ::= exprlist",
2237 /* 13 */ "exprlist ::= cnearset",
2238 /* 14 */ "exprlist ::= exprlist cnearset",
2239 /* 15 */ "cnearset ::= nearset",
2240 /* 16 */ "cnearset ::= colset COLON nearset",
2241 /* 17 */ "nearset ::= phrase",
2242 /* 18 */ "nearset ::= CARET phrase",
2243 /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP",
2244 /* 20 */ "nearphrases ::= phrase",
2245 /* 21 */ "nearphrases ::= nearphrases phrase",
2246 /* 22 */ "neardist_opt ::=",
2247 /* 23 */ "neardist_opt ::= COMMA STRING",
2248 /* 24 */ "phrase ::= phrase PLUS STRING star_opt",
2249 /* 25 */ "phrase ::= STRING star_opt",
2250 /* 26 */ "star_opt ::= STAR",
2251 /* 27 */ "star_opt ::=",
2252};
2253#endif /* NDEBUG */
2254
2255
2256#if fts5YYGROWABLESTACK0
2257/*
2258** Try to increase the size of the parser stack. Return the number
2259** of errors. Return 0 on success.
2260*/
2261static int fts5yyGrowStack(fts5yyParser *p)1{
2262 int oldSize = 1 + (int)(p->fts5yystackEnd - p->fts5yystack);
2263 int newSize;
2264 int idx;
2265 fts5yyStackEntry *pNew;
2266
2267 newSize = oldSize*2 + 100;
2268 idx = (int)(p->fts5yytos - p->fts5yystack);
2269 if( p->fts5yystack==p->fts5yystk0 ){
2270 pNew = fts5YYREALLOCrealloc(0, newSize*sizeof(pNew[0]));
2271 if( pNew==0 ) return 1;
2272 memcpy(pNew, p->fts5yystack, oldSize*sizeof(pNew[0]));
2273 }else{
2274 pNew = fts5YYREALLOCrealloc(p->fts5yystack, newSize*sizeof(pNew[0]));
2275 if( pNew==0 ) return 1;
2276 }
2277 p->fts5yystack = pNew;
2278 p->fts5yytos = &p->fts5yystack[idx];
2279#ifndef NDEBUG1
2280 if( fts5yyTraceFILE ){
2281 fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n",
2282 fts5yyTracePrompt, oldSize, newSize);
2283 }
2284#endif
2285 p->fts5yystackEnd = &p->fts5yystack[newSize-1];
2286 return 0;
2287}
2288#endif /* fts5YYGROWABLESTACK */
2289
2290#if !fts5YYGROWABLESTACK0
2291/* For builds that do no have a growable stack, fts5yyGrowStack always
2292** returns an error.
2293*/
2294# define fts5yyGrowStack(X)1 1
2295#endif
2296
2297/* Datatype of the argument to the memory allocated passed as the
2298** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by
2299** putting an appropriate #define in the %include section of the input
2300** grammar.
2301*/
2302#ifndef fts5YYMALLOCARGTYPEu64
2303# define fts5YYMALLOCARGTYPEu64 size_t
2304#endif
2305
2306/* Initialize a new parser that has already been allocated.
2307*/
2308static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){
2309 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser;
2310 sqlite3Fts5ParserCTX_STORE
2311#ifdef fts5YYTRACKMAXSTACKDEPTH
2312 fts5yypParser->fts5yyhwm = 0;
2313#endif
2314 fts5yypParser->fts5yystack = fts5yypParser->fts5yystk0;
2315 fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH100-1];
2316#ifndef fts5YYNOERRORRECOVERY1
2317 fts5yypParser->fts5yyerrcnt = -1;
2318#endif
2319 fts5yypParser->fts5yytos = fts5yypParser->fts5yystack;
2320 fts5yypParser->fts5yystack[0].stateno = 0;
2321 fts5yypParser->fts5yystack[0].major = 0;
2322}
2323
2324#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
2325/*
2326** This function allocates a new parser.
2327** The only argument is a pointer to a function which works like
2328** malloc.
2329**
2330** Inputs:
2331** A pointer to the function used to allocate memory.
2332**
2333** Outputs:
2334** A pointer to a parser. This pointer is used in subsequent calls
2335** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
2336*/
2337static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPEu64) sqlite3Fts5ParserCTX_PDECL){
2338 fts5yyParser *fts5yypParser;
2339 fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPEu64)sizeof(fts5yyParser) );
2340 if( fts5yypParser ){
2341 sqlite3Fts5ParserCTX_STORE
2342 sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM);
2343 }
2344 return (void*)fts5yypParser;
2345}
2346#endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2347
2348
2349/* The following function deletes the "minor type" or semantic value
2350** associated with a symbol. The symbol can be either a terminal
2351** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is
2352** a pointer to the value to be deleted. The code used to do the
2353** deletions is derived from the %destructor and/or %token_destructor
2354** directives of the input grammar.
2355*/
2356static void fts5yy_destructor(
2357 fts5yyParser *fts5yypParser, /* The parser */
2358 fts5YYCODETYPEunsigned char fts5yymajor, /* Type code for object to destroy */
2359 fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */
2360){
2361 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
2362 sqlite3Fts5ParserCTX_FETCH
2363 switch( fts5yymajor ){
2364 /* Here is inserted the actions which take place when a
2365 ** terminal or non-terminal is destroyed. This can happen
2366 ** when the symbol is popped from the stack during a
2367 ** reduce or during error processing or when a parser is
2368 ** being destroyed before it is finished parsing.
2369 **
2370 ** Note: during a reduce, the only symbols destroyed are those
2371 ** which appear on the RHS of the rule, but which are *not* used
2372 ** inside the C code.
2373 */
2374/********* Begin destructor definitions ***************************************/
2375 case 16: /* input */
2376{
2377#line 83 "fts5parse.y"
2378 (void)pParse;
2379#line 606 "fts5parse.sql"
2380}
2381 break;
2382 case 17: /* expr */
2383 case 18: /* cnearset */
2384 case 19: /* exprlist */
2385{
2386#line 89 "fts5parse.y"
2387 sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24));
2388#line 615 "fts5parse.sql"
2389}
2390 break;
2391 case 20: /* colset */
2392 case 21: /* colsetlist */
2393{
2394#line 93 "fts5parse.y"
2395 sqlite3_freesqlite3_api->free((fts5yypminor->fts5yy11));
2396#line 623 "fts5parse.sql"
2397}
2398 break;
2399 case 22: /* nearset */
2400 case 23: /* nearphrases */
2401{
2402#line 148 "fts5parse.y"
2403 sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46));
2404#line 631 "fts5parse.sql"
2405}
2406 break;
2407 case 24: /* phrase */
2408{
2409#line 183 "fts5parse.y"
2410 sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53));
2411#line 638 "fts5parse.sql"
2412}
2413 break;
2414/********* End destructor definitions *****************************************/
2415 default: break; /* If no destructor action specified: do nothing */
2416 }
2417}
2418
2419/*
2420** Pop the parser's stack once.
2421**
2422** If there is a destructor routine associated with the token which
2423** is popped from the stack, then call it.
2424*/
2425static void fts5yy_pop_parser_stack(fts5yyParser *pParser){
2426 fts5yyStackEntry *fts5yytos;
2427 assert( pParser->fts5yytos!=0 )((void) (0));
2428 assert( pParser->fts5yytos > pParser->fts5yystack )((void) (0));
2429 fts5yytos = pParser->fts5yytos--;
2430#ifndef NDEBUG1
2431 if( fts5yyTraceFILE ){
2432 fprintf(fts5yyTraceFILE,"%sPopping %s\n",
2433 fts5yyTracePrompt,
2434 fts5yyTokenName[fts5yytos->major]);
2435 }
2436#endif
2437 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
2438}
2439
2440/*
2441** Clear all secondary memory allocations from the parser
2442*/
2443static void sqlite3Fts5ParserFinalize(void *p){
2444 fts5yyParser *pParser = (fts5yyParser*)p;
2445
2446 /* In-lined version of calling fts5yy_pop_parser_stack() for each
2447 ** element left in the stack */
2448 fts5yyStackEntry *fts5yytos = pParser->fts5yytos;
2449 while( fts5yytos>pParser->fts5yystack ){
2450#ifndef NDEBUG1
2451 if( fts5yyTraceFILE ){
2452 fprintf(fts5yyTraceFILE,"%sPopping %s\n",
2453 fts5yyTracePrompt,
2454 fts5yyTokenName[fts5yytos->major]);
2455 }
2456#endif
2457 if( fts5yytos->major>=fts5YY_MIN_DSTRCTR16 ){
2458 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
2459 }
2460 fts5yytos--;
2461 }
2462
2463#if fts5YYGROWABLESTACK0
2464 if( pParser->fts5yystack!=pParser->fts5yystk0 ) fts5YYFREEfree(pParser->fts5yystack);
2465#endif
2466}
2467
2468#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
2469/*
2470** Deallocate and destroy a parser. Destructors are called for
2471** all stack elements before shutting the parser down.
2472**
2473** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it
2474** is defined in a %include section of the input grammar) then it is
2475** assumed that the input pointer is never NULL.
2476*/
2477static void sqlite3Fts5ParserFree(
2478 void *p, /* The parser to be deleted */
2479 void (*freeProc)(void*) /* Function used to reclaim memory */
2480){
2481#ifndef fts5YYPARSEFREENEVERNULL
2482 if( p==0 ) return;
2483#endif
2484 sqlite3Fts5ParserFinalize(p);
2485 (*freeProc)(p);
2486}
2487#endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2488
2489/*
2490** Return the peak depth of the stack for a parser.
2491*/
2492#ifdef fts5YYTRACKMAXSTACKDEPTH
2493static int sqlite3Fts5ParserStackPeak(void *p){
2494 fts5yyParser *pParser = (fts5yyParser*)p;
2495 return pParser->fts5yyhwm;
2496}
2497#endif
2498
2499/* This array of booleans keeps track of the parser statement
2500** coverage. The element fts5yycoverage[X][Y] is set when the parser
2501** is in state X and has a lookahead token Y. In a well-tested
2502** systems, every element of this matrix should end up being set.
2503*/
2504#if defined(fts5YYCOVERAGE)
2505static unsigned char fts5yycoverage[fts5YYNSTATE35][fts5YYNFTS5TOKEN16];
2506#endif
2507
2508/*
2509** Write into out a description of every state/lookahead combination that
2510**
2511** (1) has not been used by the parser, and
2512** (2) is not a syntax error.
2513**
2514** Return the number of missed state/lookahead combinations.
2515*/
2516#if defined(fts5YYCOVERAGE)
2517static int sqlite3Fts5ParserCoverage(FILE *out){
2518 int stateno, iLookAhead, i;
2519 int nMissed = 0;
2520 for(stateno=0; stateno<fts5YYNSTATE35; stateno++){
2521 i = fts5yy_shift_ofst[stateno];
2522 for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN16; iLookAhead++){
2523 if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
2524 if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++;
2525 if( out ){
2526 fprintf(out,"State %d lookahead %s %s\n", stateno,
2527 fts5yyTokenName[iLookAhead],
2528 fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed");
2529 }
2530 }
2531 }
2532 return nMissed;
2533}
2534#endif
2535
2536/*
2537** Find the appropriate action for a parser given the terminal
2538** look-ahead token iLookAhead.
2539*/
2540static fts5YYACTIONTYPEunsigned char fts5yy_find_shift_action(
2541 fts5YYCODETYPEunsigned char iLookAhead, /* The look-ahead token */
2542 fts5YYACTIONTYPEunsigned char stateno /* Current state number */
2543){
2544 int i;
2545
2546 if( stateno>fts5YY_MAX_SHIFT34 ) return stateno;
2547 assert( stateno <= fts5YY_SHIFT_COUNT )((void) (0));
2548#if defined(fts5YYCOVERAGE)
2549 fts5yycoverage[stateno][iLookAhead] = 1;
2550#endif
2551 do{
2552 i = fts5yy_shift_ofst[stateno];
2553 assert( i>=0 )((void) (0));
2554 assert( i<=fts5YY_ACTTAB_COUNT )((void) (0));
2555 assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD )((void) (0));
2556 assert( iLookAhead!=fts5YYNOCODE )((void) (0));
2557 assert( iLookAhead < fts5YYNFTS5TOKEN )((void) (0));
2558 i += iLookAhead;
2559 assert( i<(int)fts5YY_NLOOKAHEAD )((void) (0));
2560 if( fts5yy_lookahead[i]!=iLookAhead ){
2561#ifdef fts5YYFALLBACK
2562 fts5YYCODETYPEunsigned char iFallback; /* Fallback token */
2563 assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) )((void) (0));
2564 iFallback = fts5yyFallback[iLookAhead];
2565 if( iFallback!=0 ){
2566#ifndef NDEBUG1
2567 if( fts5yyTraceFILE ){
2568 fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n",
2569 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]);
2570 }
2571#endif
2572 assert( fts5yyFallback[iFallback]==0 )((void) (0)); /* Fallback loop must terminate */
2573 iLookAhead = iFallback;
2574 continue;
2575 }
2576#endif
2577#ifdef fts5YYWILDCARD
2578 {
2579 int j = i - iLookAhead + fts5YYWILDCARD;
2580 assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) )((void) (0));
2581 if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){
2582#ifndef NDEBUG1
2583 if( fts5yyTraceFILE ){
2584 fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n",
2585 fts5yyTracePrompt, fts5yyTokenName[iLookAhead],
2586 fts5yyTokenName[fts5YYWILDCARD]);
2587 }
2588#endif /* NDEBUG */
2589 return fts5yy_action[j];
2590 }
2591 }
2592#endif /* fts5YYWILDCARD */
2593 return fts5yy_default[stateno];
2594 }else{
2595 assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) )((void) (0));
2596 return fts5yy_action[i];
2597 }
2598 }while(1);
2599}
2600
2601/*
2602** Find the appropriate action for a parser given the non-terminal
2603** look-ahead token iLookAhead.
2604*/
2605static fts5YYACTIONTYPEunsigned char fts5yy_find_reduce_action(
2606 fts5YYACTIONTYPEunsigned char stateno, /* Current state number */
2607 fts5YYCODETYPEunsigned char iLookAhead /* The look-ahead token */
2608){
2609 int i;
2610#ifdef fts5YYERRORSYMBOL
2611 if( stateno>fts5YY_REDUCE_COUNT(17) ){
2612 return fts5yy_default[stateno];
2613 }
2614#else
2615 assert( stateno<=fts5YY_REDUCE_COUNT )((void) (0));
2616#endif
2617 i = fts5yy_reduce_ofst[stateno];
2618 assert( iLookAhead!=fts5YYNOCODE )((void) (0));
2619 i += iLookAhead;
2620#ifdef fts5YYERRORSYMBOL
2621 if( i<0 || i>=fts5YY_ACTTAB_COUNT(105) || fts5yy_lookahead[i]!=iLookAhead ){
2622 return fts5yy_default[stateno];
2623 }
2624#else
2625 assert( i>=0 && i<fts5YY_ACTTAB_COUNT )((void) (0));
2626 assert( fts5yy_lookahead[i]==iLookAhead )((void) (0));
2627#endif
2628 return fts5yy_action[i];
2629}
2630
2631/*
2632** The following routine is called if the stack overflows.
2633*/
2634static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){
2635 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
2636 sqlite3Fts5ParserCTX_FETCH
2637#ifndef NDEBUG1
2638 if( fts5yyTraceFILE ){
2639 fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
2640 }
2641#endif
2642 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
2643 /* Here code is inserted which will execute if the parser
2644 ** stack every overflows */
2645/******** Begin %stack_overflow code ******************************************/
2646#line 36 "fts5parse.y"
2647
2648 sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow");
2649#line 876 "fts5parse.sql"
2650/******** End %stack_overflow code ********************************************/
2651 sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument var */
2652 sqlite3Fts5ParserCTX_STORE
2653}
2654
2655/*
2656** Print tracing information for a SHIFT action
2657*/
2658#ifndef NDEBUG1
2659static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){
2660 if( fts5yyTraceFILE ){
2661 if( fts5yyNewState<fts5YYNSTATE35 ){
2662 fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n",
2663 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2664 fts5yyNewState);
2665 }else{
2666 fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n",
2667 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2668 fts5yyNewState - fts5YY_MIN_REDUCE83);
2669 }
2670 }
2671}
2672#else
2673# define fts5yyTraceShift(X,Y,Z)
2674#endif
2675
2676/*
2677** Perform a shift action.
2678*/
2679static void fts5yy_shift(
2680 fts5yyParser *fts5yypParser, /* The parser to be shifted */
2681 fts5YYACTIONTYPEunsigned char fts5yyNewState, /* The new state to shift in */
2682 fts5YYCODETYPEunsigned char fts5yyMajor, /* The major token to shift in */
2683 sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyMinor /* The minor token to shift in */
2684){
2685 fts5yyStackEntry *fts5yytos;
2686 fts5yypParser->fts5yytos++;
2687#ifdef fts5YYTRACKMAXSTACKDEPTH
2688 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
2689 fts5yypParser->fts5yyhwm++;
2690 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) )((void) (0));
2691 }
2692#endif
2693 fts5yytos = fts5yypParser->fts5yytos;
2694 if( fts5yytos>fts5yypParser->fts5yystackEnd ){
2695 if( fts5yyGrowStack(fts5yypParser)1 ){
2696 fts5yypParser->fts5yytos--;
2697 fts5yyStackOverflow(fts5yypParser);
2698 return;
2699 }
2700 fts5yytos = fts5yypParser->fts5yytos;
2701 assert( fts5yytos <= fts5yypParser->fts5yystackEnd )((void) (0));
2702 }
2703 if( fts5yyNewState > fts5YY_MAX_SHIFT34 ){
2704 fts5yyNewState += fts5YY_MIN_REDUCE83 - fts5YY_MIN_SHIFTREDUCE52;
2705 }
2706 fts5yytos->stateno = fts5yyNewState;
2707 fts5yytos->major = fts5yyMajor;
2708 fts5yytos->minor.fts5yy0 = fts5yyMinor;
2709 fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift");
2710}
2711
2712/* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side
2713** of that rule */
2714static const fts5YYCODETYPEunsigned char fts5yyRuleInfoLhs[] = {
2715 16, /* (0) input ::= expr */
2716 20, /* (1) colset ::= MINUS LCP colsetlist RCP */
2717 20, /* (2) colset ::= LCP colsetlist RCP */
2718 20, /* (3) colset ::= STRING */
2719 20, /* (4) colset ::= MINUS STRING */
2720 21, /* (5) colsetlist ::= colsetlist STRING */
2721 21, /* (6) colsetlist ::= STRING */
2722 17, /* (7) expr ::= expr AND expr */
2723 17, /* (8) expr ::= expr OR expr */
2724 17, /* (9) expr ::= expr NOT expr */
2725 17, /* (10) expr ::= colset COLON LP expr RP */
2726 17, /* (11) expr ::= LP expr RP */
2727 17, /* (12) expr ::= exprlist */
2728 19, /* (13) exprlist ::= cnearset */
2729 19, /* (14) exprlist ::= exprlist cnearset */
2730 18, /* (15) cnearset ::= nearset */
2731 18, /* (16) cnearset ::= colset COLON nearset */
2732 22, /* (17) nearset ::= phrase */
2733 22, /* (18) nearset ::= CARET phrase */
2734 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2735 23, /* (20) nearphrases ::= phrase */
2736 23, /* (21) nearphrases ::= nearphrases phrase */
2737 25, /* (22) neardist_opt ::= */
2738 25, /* (23) neardist_opt ::= COMMA STRING */
2739 24, /* (24) phrase ::= phrase PLUS STRING star_opt */
2740 24, /* (25) phrase ::= STRING star_opt */
2741 26, /* (26) star_opt ::= STAR */
2742 26, /* (27) star_opt ::= */
2743};
2744
2745/* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number
2746** of symbols on the right-hand side of that rule. */
2747static const signed char fts5yyRuleInfoNRhs[] = {
2748 -1, /* (0) input ::= expr */
2749 -4, /* (1) colset ::= MINUS LCP colsetlist RCP */
2750 -3, /* (2) colset ::= LCP colsetlist RCP */
2751 -1, /* (3) colset ::= STRING */
2752 -2, /* (4) colset ::= MINUS STRING */
2753 -2, /* (5) colsetlist ::= colsetlist STRING */
2754 -1, /* (6) colsetlist ::= STRING */
2755 -3, /* (7) expr ::= expr AND expr */
2756 -3, /* (8) expr ::= expr OR expr */
2757 -3, /* (9) expr ::= expr NOT expr */
2758 -5, /* (10) expr ::= colset COLON LP expr RP */
2759 -3, /* (11) expr ::= LP expr RP */
2760 -1, /* (12) expr ::= exprlist */
2761 -1, /* (13) exprlist ::= cnearset */
2762 -2, /* (14) exprlist ::= exprlist cnearset */
2763 -1, /* (15) cnearset ::= nearset */
2764 -3, /* (16) cnearset ::= colset COLON nearset */
2765 -1, /* (17) nearset ::= phrase */
2766 -2, /* (18) nearset ::= CARET phrase */
2767 -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2768 -1, /* (20) nearphrases ::= phrase */
2769 -2, /* (21) nearphrases ::= nearphrases phrase */
2770 0, /* (22) neardist_opt ::= */
2771 -2, /* (23) neardist_opt ::= COMMA STRING */
2772 -4, /* (24) phrase ::= phrase PLUS STRING star_opt */
2773 -2, /* (25) phrase ::= STRING star_opt */
2774 -1, /* (26) star_opt ::= STAR */
2775 0, /* (27) star_opt ::= */
2776};
2777
2778static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */
2779
2780/*
2781** Perform a reduce action and the shift that must immediately
2782** follow the reduce.
2783**
2784** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions
2785** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE
2786** if the lookahead token has already been consumed. As this procedure is
2787** only called from one place, optimizing compilers will in-line it, which
2788** means that the extra parameters have no performance impact.
2789*/
2790static fts5YYACTIONTYPEunsigned char fts5yy_reduce(
2791 fts5yyParser *fts5yypParser, /* The parser */
2792 unsigned int fts5yyruleno, /* Number of the rule by which to reduce */
2793 int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */
2794 sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyLookaheadToken /* Value of the lookahead token */
2795 sqlite3Fts5ParserCTX_PDECL /* %extra_context */
2796){
2797 int fts5yygoto; /* The next state */
2798 fts5YYACTIONTYPEunsigned char fts5yyact; /* The next action */
2799 fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */
2800 int fts5yysize; /* Amount to pop the stack */
2801 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
2802 (void)fts5yyLookahead;
2803 (void)fts5yyLookaheadToken;
2804 fts5yymsp = fts5yypParser->fts5yytos;
2805
2806 switch( fts5yyruleno ){
2807 /* Beginning here are the reduction cases. A typical example
2808 ** follows:
2809 ** case 0:
2810 ** #line <lineno> <grammarfile>
2811 ** { ... } // User supplied code
2812 ** #line <lineno> <thisfile>
2813 ** break;
2814 */
2815/********** Begin reduce actions **********************************************/
2816 fts5YYMINORTYPE fts5yylhsminor;
2817 case 0: /* input ::= expr */
2818#line 82 "fts5parse.y"
2819{ sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); }
2820#line 1047 "fts5parse.sql"
2821 break;
2822 case 1: /* colset ::= MINUS LCP colsetlist RCP */
2823#line 97 "fts5parse.y"
2824{
2825 fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2826}
2827#line 1054 "fts5parse.sql"
2828 break;
2829 case 2: /* colset ::= LCP colsetlist RCP */
2830#line 100 "fts5parse.y"
2831{ fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; }
2832#line 1059 "fts5parse.sql"
2833 break;
2834 case 3: /* colset ::= STRING */
2835#line 101 "fts5parse.y"
2836{
2837 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2838}
2839#line 1066 "fts5parse.sql"
2840 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2841 break;
2842 case 4: /* colset ::= MINUS STRING */
2843#line 104 "fts5parse.y"
2844{
2845 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2846 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2847}
2848#line 1075 "fts5parse.sql"
2849 break;
2850 case 5: /* colsetlist ::= colsetlist STRING */
2851#line 109 "fts5parse.y"
2852{
2853 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); }
2854#line 1081 "fts5parse.sql"
2855 fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2856 break;
2857 case 6: /* colsetlist ::= STRING */
2858#line 111 "fts5parse.y"
2859{
2860 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2861}
2862#line 1089 "fts5parse.sql"
2863 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2864 break;
2865 case 7: /* expr ::= expr AND expr */
2866#line 115 "fts5parse.y"
2867{
2868 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND2, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2869}
2870#line 1097 "fts5parse.sql"
2871 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2872 break;
2873 case 8: /* expr ::= expr OR expr */
2874#line 118 "fts5parse.y"
2875{
2876 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR1, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2877}
2878#line 1105 "fts5parse.sql"
2879 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2880 break;
2881 case 9: /* expr ::= expr NOT expr */
2882#line 121 "fts5parse.y"
2883{
2884 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT3, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2885}
2886#line 1113 "fts5parse.sql"
2887 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2888 break;
2889 case 10: /* expr ::= colset COLON LP expr RP */
2890#line 125 "fts5parse.y"
2891{
2892 sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11);
2893 fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;
2894}
2895#line 1122 "fts5parse.sql"
2896 fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2897 break;
2898 case 11: /* expr ::= LP expr RP */
2899#line 129 "fts5parse.y"
2900{fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;}
2901#line 1128 "fts5parse.sql"
2902 break;
2903 case 12: /* expr ::= exprlist */
2904 case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13);
2905#line 130 "fts5parse.y"
2906{fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;}
2907#line 1134 "fts5parse.sql"
2908 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2909 break;
2910 case 14: /* exprlist ::= exprlist cnearset */
2911#line 133 "fts5parse.y"
2912{
2913 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24);
2914}
2915#line 1142 "fts5parse.sql"
2916 fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2917 break;
2918 case 15: /* cnearset ::= nearset */
2919#line 137 "fts5parse.y"
2920{
2921 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46);
2922}
2923#line 1150 "fts5parse.sql"
2924 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2925 break;
2926 case 16: /* cnearset ::= colset COLON nearset */
2927#line 140 "fts5parse.y"
2928{
2929 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46);
2930 sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11);
2931}
2932#line 1159 "fts5parse.sql"
2933 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2934 break;
2935 case 17: /* nearset ::= phrase */
2936#line 151 "fts5parse.y"
2937{ fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); }
2938#line 1165 "fts5parse.sql"
2939 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2940 break;
2941 case 18: /* nearset ::= CARET phrase */
2942#line 152 "fts5parse.y"
2943{
2944 sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53);
2945 fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2946}
2947#line 1174 "fts5parse.sql"
2948 break;
2949 case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */
2950#line 156 "fts5parse.y"
2951{
2952 sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0);
2953 sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0);
2954 fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46;
2955}
2956#line 1183 "fts5parse.sql"
2957 fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2958 break;
2959 case 20: /* nearphrases ::= phrase */
2960#line 162 "fts5parse.y"
2961{
2962 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2963}
2964#line 1191 "fts5parse.sql"
2965 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2966 break;
2967 case 21: /* nearphrases ::= nearphrases phrase */
2968#line 165 "fts5parse.y"
2969{
2970 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53);
2971}
2972#line 1199 "fts5parse.sql"
2973 fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2974 break;
2975 case 22: /* neardist_opt ::= */
2976#line 172 "fts5parse.y"
2977{ fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; }
2978#line 1205 "fts5parse.sql"
2979 break;
2980 case 23: /* neardist_opt ::= COMMA STRING */
2981#line 173 "fts5parse.y"
2982{ fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; }
2983#line 1210 "fts5parse.sql"
2984 break;
2985 case 24: /* phrase ::= phrase PLUS STRING star_opt */
2986#line 185 "fts5parse.y"
2987{
2988 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2989}
2990#line 1217 "fts5parse.sql"
2991 fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2992 break;
2993 case 25: /* phrase ::= STRING star_opt */
2994#line 188 "fts5parse.y"
2995{
2996 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2997}
2998#line 1225 "fts5parse.sql"
2999 fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
3000 break;
3001 case 26: /* star_opt ::= STAR */
3002#line 196 "fts5parse.y"
3003{ fts5yymsp[0].minor.fts5yy4 = 1; }
3004#line 1231 "fts5parse.sql"
3005 break;
3006 case 27: /* star_opt ::= */
3007#line 197 "fts5parse.y"
3008{ fts5yymsp[1].minor.fts5yy4 = 0; }
3009#line 1236 "fts5parse.sql"
3010 break;
3011 default:
3012 break;
3013/********** End reduce actions ************************************************/
3014 };
3015 assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) )((void) (0));
3016 fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno];
3017 fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
3018 fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPEunsigned char)fts5yygoto);
3019
3020 /* There are no SHIFTREDUCE actions on nonterminals because the table
3021 ** generator has simplified them to pure REDUCE actions. */
3022 assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) )((void) (0));
3023
3024 /* It is not possible for a REDUCE to be followed by an error */
3025 assert( fts5yyact!=fts5YY_ERROR_ACTION )((void) (0));
3026
3027 fts5yymsp += fts5yysize+1;
3028 fts5yypParser->fts5yytos = fts5yymsp;
3029 fts5yymsp->stateno = (fts5YYACTIONTYPEunsigned char)fts5yyact;
3030 fts5yymsp->major = (fts5YYCODETYPEunsigned char)fts5yygoto;
3031 fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift");
3032 return fts5yyact;
3033}
3034
3035/*
3036** The following code executes when the parse fails
3037*/
3038#ifndef fts5YYNOERRORRECOVERY1
3039static void fts5yy_parse_failed(
3040 fts5yyParser *fts5yypParser /* The parser */
3041){
3042 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
3043 sqlite3Fts5ParserCTX_FETCH
3044#ifndef NDEBUG1
3045 if( fts5yyTraceFILE ){
3046 fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
3047 }
3048#endif
3049 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
3050 /* Here code is inserted which will be executed whenever the
3051 ** parser fails */
3052/************ Begin %parse_failure code ***************************************/
3053/************ End %parse_failure code *****************************************/
3054 sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */
3055 sqlite3Fts5ParserCTX_STORE
3056}
3057#endif /* fts5YYNOERRORRECOVERY */
3058
3059/*
3060** The following code executes when a syntax error first occurs.
3061*/
3062static void fts5yy_syntax_error(
3063 fts5yyParser *fts5yypParser, /* The parser */
3064 int fts5yymajor, /* The major type of the error token */
3065 sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The minor type of the error token */
3066){
3067 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
3068 sqlite3Fts5ParserCTX_FETCH
3069#define FTS5TOKENfts5yyminor fts5yyminor
3070/************ Begin %syntax_error code ****************************************/
3071#line 30 "fts5parse.y"
3072
3073 UNUSED_PARAM(fts5yymajor)(void)(fts5yymajor); /* Silence a compiler warning */
3074 sqlite3Fts5ParseError(
3075 pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKENfts5yyminor.n,FTS5TOKENfts5yyminor.p
3076 );
3077#line 1304 "fts5parse.sql"
3078/************ End %syntax_error code ******************************************/
3079 sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */
3080 sqlite3Fts5ParserCTX_STORE
3081}
3082
3083/*
3084** The following is executed when the parser accepts
3085*/
3086static void fts5yy_accept(
3087 fts5yyParser *fts5yypParser /* The parser */
3088){
3089 sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse;
3090 sqlite3Fts5ParserCTX_FETCH
3091#ifndef NDEBUG1
3092 if( fts5yyTraceFILE ){
3093 fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
3094 }
3095#endif
3096#ifndef fts5YYNOERRORRECOVERY1
3097 fts5yypParser->fts5yyerrcnt = -1;
3098#endif
3099 assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack )((void) (0));
3100 /* Here code is inserted which will be executed whenever the
3101 ** parser accepts */
3102/*********** Begin %parse_accept code *****************************************/
3103/*********** End %parse_accept code *******************************************/
3104 sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */
3105 sqlite3Fts5ParserCTX_STORE
3106}
3107
3108/* The main parser program.
3109** The first argument is a pointer to a structure obtained from
3110** "sqlite3Fts5ParserAlloc" which describes the current state of the parser.
3111** The second argument is the major token number. The third is
3112** the minor token. The fourth optional argument is whatever the
3113** user wants (and specified in the grammar) and is available for
3114** use by the action routines.
3115**
3116** Inputs:
3117** <ul>
3118** <li> A pointer to the parser (an opaque structure.)
3119** <li> The major token number.
3120** <li> The minor token number.
3121** <li> An option argument of a grammar-specified type.
3122** </ul>
3123**
3124** Outputs:
3125** None.
3126*/
3127static void sqlite3Fts5Parser(
3128 void *fts5yyp, /* The parser */
3129 int fts5yymajor, /* The major token code number */
3130 sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The value for the token */
3131 sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse /* Optional %extra_argument parameter */
3132){
3133 fts5YYMINORTYPE fts5yyminorunion;
3134 fts5YYACTIONTYPEunsigned char fts5yyact; /* The parser action. */
3135#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1)
3136 int fts5yyendofinput; /* True if we are at the end of input */
3137#endif
3138#ifdef fts5YYERRORSYMBOL
3139 int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */
3140#endif
3141 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */
3142 sqlite3Fts5ParserCTX_FETCH
3143 sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse;
3144
3145 assert( fts5yypParser->fts5yytos!=0 )((void) (0));
3146#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1)
3147 fts5yyendofinput = (fts5yymajor==0);
3148#endif
3149
3150 fts5yyact = fts5yypParser->fts5yytos->stateno;
3151#ifndef NDEBUG1
3152 if( fts5yyTraceFILE ){
3153 if( fts5yyact < fts5YY_MIN_REDUCE83 ){
3154 fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n",
3155 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact);
3156 }else{
3157 fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
3158 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE83);
3159 }
3160 }
3161#endif
3162
3163 while(1){ /* Exit by "break" */
3164 assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack )((void) (0));
3165 assert( fts5yyact==fts5yypParser->fts5yytos->stateno )((void) (0));
3166 fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyact);
3167 if( fts5yyact >= fts5YY_MIN_REDUCE83 ){
3168 unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE83; /* Reduce by this rule */
3169#ifndef NDEBUG1
3170 assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) )((void) (0));
3171 if( fts5yyTraceFILE ){
3172 int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
3173 if( fts5yysize ){
3174 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
3175 fts5yyTracePrompt,
3176 fts5yyruleno, fts5yyRuleName[fts5yyruleno],
3177 fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action",
3178 fts5yypParser->fts5yytos[fts5yysize].stateno);
3179 }else{
3180 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n",
3181 fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno],
3182 fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action");
3183 }
3184 }
3185#endif /* NDEBUG */
3186
3187 /* Check that the stack is large enough to grow by a single entry
3188 ** if the RHS of the rule is empty. This ensures that there is room
3189 ** enough on the stack to push the LHS value */
3190 if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){
3191#ifdef fts5YYTRACKMAXSTACKDEPTH
3192 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
3193 fts5yypParser->fts5yyhwm++;
3194 assert( fts5yypParser->fts5yyhwm ==((void) (0))
3195 (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack))((void) (0));
3196 }
3197#endif
3198 if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){
3199 if( fts5yyGrowStack(fts5yypParser)1 ){
3200 fts5yyStackOverflow(fts5yypParser);
3201 break;
3202 }
3203 }
3204 }
3205 fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM);
3206 }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE79 ){
3207 fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyminor);
3208#ifndef fts5YYNOERRORRECOVERY1
3209 fts5yypParser->fts5yyerrcnt--;
3210#endif
3211 break;
3212 }else if( fts5yyact==fts5YY_ACCEPT_ACTION81 ){
3213 fts5yypParser->fts5yytos--;
3214 fts5yy_accept(fts5yypParser);
3215 return;
3216 }else{
3217 assert( fts5yyact == fts5YY_ERROR_ACTION )((void) (0));
3218 fts5yyminorunion.fts5yy0 = fts5yyminor;
3219#ifdef fts5YYERRORSYMBOL
3220 int fts5yymx;
3221#endif
3222#ifndef NDEBUG1
3223 if( fts5yyTraceFILE ){
3224 fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
3225 }
3226#endif
3227#ifdef fts5YYERRORSYMBOL
3228 /* A syntax error has occurred.
3229 ** The response to an error depends upon whether or not the
3230 ** grammar defines an error token "ERROR".
3231 **
3232 ** This is what we do if the grammar does define ERROR:
3233 **
3234 ** * Call the %syntax_error function.
3235 **
3236 ** * Begin popping the stack until we enter a state where
3237 ** it is legal to shift the error symbol, then shift
3238 ** the error symbol.
3239 **
3240 ** * Set the error count to three.
3241 **
3242 ** * Begin accepting and shifting new tokens. No new error
3243 ** processing will occur until three tokens have been
3244 ** shifted successfully.
3245 **
3246 */
3247 if( fts5yypParser->fts5yyerrcnt<0 ){
3248 fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
3249 }
3250 fts5yymx = fts5yypParser->fts5yytos->major;
3251 if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){
3252#ifndef NDEBUG1
3253 if( fts5yyTraceFILE ){
3254 fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
3255 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
3256 }
3257#endif
3258 fts5yy_destructor(fts5yypParser, (fts5YYCODETYPEunsigned char)fts5yymajor, &fts5yyminorunion);
3259 fts5yymajor = fts5YYNOCODE27;
3260 }else{
3261 while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){
3262 fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno,
3263 fts5YYERRORSYMBOL);
3264 if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE79 ) break;
3265 fts5yy_pop_parser_stack(fts5yypParser);
3266 }
3267 if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){
3268 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion);
3269 fts5yy_parse_failed(fts5yypParser);
3270#ifndef fts5YYNOERRORRECOVERY1
3271 fts5yypParser->fts5yyerrcnt = -1;
3272#endif
3273 fts5yymajor = fts5YYNOCODE27;
3274 }else if( fts5yymx!=fts5YYERRORSYMBOL ){
3275 fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
3276 }
3277 }
3278 fts5yypParser->fts5yyerrcnt = 3;
3279 fts5yyerrorhit = 1;
3280 if( fts5yymajor==fts5YYNOCODE27 ) break;
3281 fts5yyact = fts5yypParser->fts5yytos->stateno;
3282#elif defined(fts5YYNOERRORRECOVERY1)
3283 /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to
3284 ** do any kind of error recovery. Instead, simply invoke the syntax
3285 ** error routine and continue going as if nothing had happened.
3286 **
3287 ** Applications can set this macro (for example inside %include) if
3288 ** they intend to abandon the parse upon the first syntax error seen.
3289 */
3290 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
3291 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion);
3292 break;
3293#else /* fts5YYERRORSYMBOL is not defined */
3294 /* This is what we do if the grammar does not define ERROR:
3295 **
3296 ** * Report an error message, and throw away the input token.
3297 **
3298 ** * If the input token is $, then fail the parse.
3299 **
3300 ** As before, subsequent error messages are suppressed until
3301 ** three input tokens have been successfully shifted.
3302 */
3303 if( fts5yypParser->fts5yyerrcnt<=0 ){
3304 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
3305 }
3306 fts5yypParser->fts5yyerrcnt = 3;
3307 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion);
3308 if( fts5yyendofinput ){
3309 fts5yy_parse_failed(fts5yypParser);
3310#ifndef fts5YYNOERRORRECOVERY1
3311 fts5yypParser->fts5yyerrcnt = -1;
3312#endif
3313 }
3314 break;
3315#endif
3316 }
3317 }
3318#ifndef NDEBUG1
3319 if( fts5yyTraceFILE ){
3320 fts5yyStackEntry *i;
3321 char cDiv = '[';
3322 fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt);
3323 for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){
3324 fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]);
3325 cDiv = ' ';
3326 }
3327 fprintf(fts5yyTraceFILE,"]\n");
3328 }
3329#endif
3330 return;
3331}
3332
3333/*
3334** Return the fallback token corresponding to canonical token iToken, or
3335** 0 if iToken has no fallback.
3336*/
3337static int sqlite3Fts5ParserFallback(int iToken){
3338#ifdef fts5YYFALLBACK
3339 assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) )((void) (0));
3340 return fts5yyFallback[iToken];
3341#else
3342 (void)iToken;
3343 return 0;
3344#endif
3345}
3346
3347#line 1 "fts5_aux.c"
3348/*
3349** 2014 May 31
3350**
3351** The author disclaims copyright to this source code. In place of
3352** a legal notice, here is a blessing:
3353**
3354** May you do good and not evil.
3355** May you find forgiveness for yourself and forgive others.
3356** May you share freely, never taking more than you give.
3357**
3358******************************************************************************
3359*/
3360
3361
3362/* #include "fts5Int.h" */
3363#include <math.h> /* amalgamator: keep */
3364
3365/*
3366** Object used to iterate through all "coalesced phrase instances" in
3367** a single column of the current row. If the phrase instances in the
3368** column being considered do not overlap, this object simply iterates
3369** through them. Or, if they do overlap (share one or more tokens in
3370** common), each set of overlapping instances is treated as a single
3371** match. See documentation for the highlight() auxiliary function for
3372** details.
3373**
3374** Usage is:
3375**
3376** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
3377** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
3378** rc = fts5CInstIterNext(&iter)
3379** ){
3380** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
3381** }
3382**
3383*/
3384typedef struct CInstIter CInstIter;
3385struct CInstIter {
3386 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
3387 Fts5Context *pFts; /* First arg to pass to pApi functions */
3388 int iCol; /* Column to search */
3389 int iInst; /* Next phrase instance index */
3390 int nInst; /* Total number of phrase instances */
3391
3392 /* Output variables */
3393 int iStart; /* First token in coalesced phrase instance */
3394 int iEnd; /* Last token in coalesced phrase instance */
3395};
3396
3397/*
3398** Advance the iterator to the next coalesced phrase instance. Return
3399** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
3400*/
3401static int fts5CInstIterNext(CInstIter *pIter){
3402 int rc = SQLITE_OK0;
3403 pIter->iStart = -1;
3404 pIter->iEnd = -1;
3405
3406 while( rc==SQLITE_OK0 && pIter->iInst<pIter->nInst ){
3407 int ip; int ic; int io;
3408 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
3409 if( rc==SQLITE_OK0 ){
3410 if( ic==pIter->iCol ){
3411 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
3412 if( pIter->iStart<0 ){
3413 pIter->iStart = io;
3414 pIter->iEnd = iEnd;
3415 }else if( io<=pIter->iEnd ){
3416 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
3417 }else{
3418 break;
3419 }
3420 }
3421 pIter->iInst++;
3422 }
3423 }
3424
3425 return rc;
3426}
3427
3428/*
3429** Initialize the iterator object indicated by the final parameter to
3430** iterate through coalesced phrase instances in column iCol.
3431*/
3432static int fts5CInstIterInit(
3433 const Fts5ExtensionApi *pApi,
3434 Fts5Context *pFts,
3435 int iCol,
3436 CInstIter *pIter
3437){
3438 int rc;
3439
3440 memset(pIter, 0, sizeof(CInstIter));
3441 pIter->pApi = pApi;
3442 pIter->pFts = pFts;
3443 pIter->iCol = iCol;
3444 rc = pApi->xInstCount(pFts, &pIter->nInst);
3445
3446 if( rc==SQLITE_OK0 ){
3447 rc = fts5CInstIterNext(pIter);
3448 }
3449
3450 return rc;
3451}
3452
3453
3454
3455/*************************************************************************
3456** Start of highlight() implementation.
3457*/
3458typedef struct HighlightContext HighlightContext;
3459struct HighlightContext {
3460 /* Constant parameters to fts5HighlightCb() */
3461 int iRangeStart; /* First token to include */
3462 int iRangeEnd; /* If non-zero, last token to include */
3463 const char *zOpen; /* Opening highlight */
3464 const char *zClose; /* Closing highlight */
3465 const char *zIn; /* Input text */
3466 int nIn; /* Size of input text in bytes */
3467
3468 /* Variables modified by fts5HighlightCb() */
3469 CInstIter iter; /* Coalesced Instance Iterator */
3470 int iPos; /* Current token offset in zIn[] */
3471 int iOff; /* Have copied up to this offset in zIn[] */
3472 int bOpen; /* True if highlight is open */
3473 char *zOut; /* Output value */
3474};
3475
3476/*
3477** Append text to the HighlightContext output string - p->zOut. Argument
3478** z points to a buffer containing n bytes of text to append. If n is
3479** negative, everything up until the first '\0' is appended to the output.
3480**
3481** If *pRc is set to any value other than SQLITE_OK when this function is
3482** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
3483** *pRc is set to an error code before returning.
3484*/
3485static void fts5HighlightAppend(
3486 int *pRc,
3487 HighlightContext *p,
3488 const char *z, int n
3489){
3490 if( *pRc==SQLITE_OK0 && z ){
3491 if( n<0 ) n = (int)strlen(z);
3492 p->zOut = sqlite3_mprintfsqlite3_api->mprintf("%z%.*s", p->zOut, n, z);
3493 if( p->zOut==0 ) *pRc = SQLITE_NOMEM7;
3494 }
3495}
3496
3497/*
3498** Tokenizer callback used by implementation of highlight() function.
3499*/
3500static int fts5HighlightCb(
3501 void *pContext, /* Pointer to HighlightContext object */
3502 int tflags, /* Mask of FTS5_TOKEN_* flags */
3503 const char *pToken, /* Buffer containing token */
3504 int nToken, /* Size of token in bytes */
3505 int iStartOff, /* Start byte offset of token */
3506 int iEndOff /* End byte offset of token */
3507){
3508 HighlightContext *p = (HighlightContext*)pContext;
3509 int rc = SQLITE_OK0;
3510 int iPos;
3511
3512 UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken);
3513
3514 if( tflags & FTS5_TOKEN_COLOCATED0x0001 ) return SQLITE_OK0;
3515 iPos = p->iPos++;
3516
3517 if( p->iRangeEnd>=0 ){
3518 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK0;
3519 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
3520 }
3521
3522 /* If the parenthesis is open, and this token is not part of the current
3523 ** phrase, and the starting byte offset of this token is past the point
3524 ** that has currently been copied into the output buffer, close the
3525 ** parenthesis. */
3526 if( p->bOpen
3527 && (iPos<=p->iter.iStart || p->iter.iStart<0)
3528 && iStartOff>p->iOff
3529 ){
3530 fts5HighlightAppend(&rc, p, p->zClose, -1);
3531 p->bOpen = 0;
3532 }
3533
3534 /* If this is the start of a new phrase, and the highlight is not open:
3535 **
3536 ** * copy text from the input up to the start of the phrase, and
3537 ** * open the highlight.
3538 */
3539 if( iPos==p->iter.iStart && p->bOpen==0 ){
3540 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
3541 fts5HighlightAppend(&rc, p, p->zOpen, -1);
3542 p->iOff = iStartOff;
3543 p->bOpen = 1;
3544 }
3545
3546 if( iPos==p->iter.iEnd ){
3547 if( p->bOpen==0 ){
3548 assert( p->iRangeEnd>=0 )((void) (0));
3549 fts5HighlightAppend(&rc, p, p->zOpen, -1);
3550 p->bOpen = 1;
3551 }
3552 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3553 p->iOff = iEndOff;
3554
3555 if( rc==SQLITE_OK0 ){
3556 rc = fts5CInstIterNext(&p->iter);
3557 }
3558 }
3559
3560 if( iPos==p->iRangeEnd ){
3561 if( p->bOpen ){
3562 if( p->iter.iStart>=0 && iPos>=p->iter.iStart ){
3563 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3564 p->iOff = iEndOff;
3565 }
3566 fts5HighlightAppend(&rc, p, p->zClose, -1);
3567 p->bOpen = 0;
3568 }
3569 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3570 p->iOff = iEndOff;
3571 }
3572
3573 return rc;
3574}
3575
3576
3577/*
3578** Implementation of highlight() function.
3579*/
3580static void fts5HighlightFunction(
3581 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3582 Fts5Context *pFts, /* First arg to pass to pApi functions */
3583 sqlite3_context *pCtx, /* Context for returning result/error */
3584 int nVal, /* Number of values in apVal[] array */
3585 sqlite3_value **apVal /* Array of trailing arguments */
3586){
3587 HighlightContext ctx;
3588 int rc;
3589 int iCol;
3590
3591 if( nVal!=3 ){
3592 const char *zErr = "wrong number of arguments to function highlight()";
3593 sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1);
3594 return;
3595 }
3596
3597 iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]);
3598 memset(&ctx, 0, sizeof(HighlightContext));
3599 ctx.zOpen = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]);
3600 ctx.zClose = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2]);
3601 ctx.iRangeEnd = -1;
3602 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
3603 if( rc==SQLITE_RANGE25 ){
3604 sqlite3_result_textsqlite3_api->result_text(pCtx, "", -1, SQLITE_STATIC((sqlite3_destructor_type)0));
3605 rc = SQLITE_OK0;
3606 }else if( ctx.zIn ){
3607 const char *pLoc = 0; /* Locale of column iCol */
3608 int nLoc = 0; /* Size of pLoc in bytes */
3609 if( rc==SQLITE_OK0 ){
3610 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
3611 }
3612
3613 if( rc==SQLITE_OK0 ){
3614 rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc);
3615 }
3616 if( rc==SQLITE_OK0 ){
3617 rc = pApi->xTokenize_v2(
3618 pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb
3619 );
3620 }
3621 if( ctx.bOpen ){
3622 fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
3623 }
3624 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3625
3626 if( rc==SQLITE_OK0 ){
3627 sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
3628 }
3629 sqlite3_freesqlite3_api->free(ctx.zOut);
3630 }
3631 if( rc!=SQLITE_OK0 ){
3632 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
3633 }
3634}
3635/*
3636** End of highlight() implementation.
3637**************************************************************************/
3638
3639/*
3640** Context object passed to the fts5SentenceFinderCb() function.
3641*/
3642typedef struct Fts5SFinder Fts5SFinder;
3643struct Fts5SFinder {
3644 int iPos; /* Current token position */
3645 int nFirstAlloc; /* Allocated size of aFirst[] */
3646 int nFirst; /* Number of entries in aFirst[] */
3647 int *aFirst; /* Array of first token in each sentence */
3648 const char *zDoc; /* Document being tokenized */
3649};
3650
3651/*
3652** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
3653** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
3654** error occurs.
3655*/
3656static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
3657 if( p->nFirstAlloc==p->nFirst ){
3658 int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
3659 int *aNew;
3660
3661 aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aFirst, nNew*sizeof(int));
3662 if( aNew==0 ) return SQLITE_NOMEM7;
3663 p->aFirst = aNew;
3664 p->nFirstAlloc = nNew;
3665 }
3666 p->aFirst[p->nFirst++] = iAdd;
3667 return SQLITE_OK0;
3668}
3669
3670/*
3671** This function is an xTokenize() callback used by the auxiliary snippet()
3672** function. Its job is to identify tokens that are the first in a sentence.
3673** For each such token, an entry is added to the SFinder.aFirst[] array.
3674*/
3675static int fts5SentenceFinderCb(
3676 void *pContext, /* Pointer to HighlightContext object */
3677 int tflags, /* Mask of FTS5_TOKEN_* flags */
3678 const char *pToken, /* Buffer containing token */
3679 int nToken, /* Size of token in bytes */
3680 int iStartOff, /* Start offset of token */
3681 int iEndOff /* End offset of token */
3682){
3683 int rc = SQLITE_OK0;
3684
3685 UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken);
3686 UNUSED_PARAM(iEndOff)(void)(iEndOff);
3687
3688 if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){
3689 Fts5SFinder *p = (Fts5SFinder*)pContext;
3690 if( p->iPos>0 ){
3691 int i;
3692 char c = 0;
3693 for(i=iStartOff-1; i>=0; i--){
3694 c = p->zDoc[i];
3695 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
3696 }
3697 if( i!=iStartOff-1 && (c=='.' || c==':') ){
3698 rc = fts5SentenceFinderAdd(p, p->iPos);
3699 }
3700 }else{
3701 rc = fts5SentenceFinderAdd(p, 0);
3702 }
3703 p->iPos++;
3704 }
3705 return rc;
3706}
3707
3708static int fts5SnippetScore(
3709 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3710 Fts5Context *pFts, /* First arg to pass to pApi functions */
3711 int nDocsize, /* Size of column in tokens */
3712 unsigned char *aSeen, /* Array with one element per query phrase */
3713 int iCol, /* Column to score */
3714 int iPos, /* Starting offset to score */
3715 int nToken, /* Max tokens per snippet */
3716 int *pnScore, /* OUT: Score */
3717 int *piPos /* OUT: Adjusted offset */
3718){
3719 int rc;
3720 int i;
3721 int ip = 0;
3722 int ic = 0;
3723 int iOff = 0;
3724 int iFirst = -1;
3725 int nInst;
3726 int nScore = 0;
3727 int iLast = 0;
3728 sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;
3729
3730 rc = pApi->xInstCount(pFts, &nInst);
3731 for(i=0; i<nInst && rc==SQLITE_OK0; i++){
3732 rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
3733 if( rc==SQLITE_OK0 && ic==iCol && iOff>=iPos && iOff<iEnd ){
3734 nScore += (aSeen[ip] ? 1 : 1000);
3735 aSeen[ip] = 1;
3736 if( iFirst<0 ) iFirst = iOff;
3737 iLast = iOff + pApi->xPhraseSize(pFts, ip);
3738 }
3739 }
3740
3741 *pnScore = nScore;
3742 if( piPos ){
3743 sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
3744 if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
3745 if( iAdj<0 ) iAdj = 0;
3746 *piPos = (int)iAdj;
3747 }
3748
3749 return rc;
3750}
3751
3752/*
3753** Return the value in pVal interpreted as utf-8 text. Except, if pVal
3754** contains a NULL value, return a pointer to a static string zero
3755** bytes in length instead of a NULL pointer.
3756*/
3757static const char *fts5ValueToText(sqlite3_value *pVal){
3758 const char *zRet = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
3759 return zRet ? zRet : "";
3760}
3761
3762/*
3763** Implementation of snippet() function.
3764*/
3765static void fts5SnippetFunction(
3766 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3767 Fts5Context *pFts, /* First arg to pass to pApi functions */
3768 sqlite3_context *pCtx, /* Context for returning result/error */
3769 int nVal, /* Number of values in apVal[] array */
3770 sqlite3_value **apVal /* Array of trailing arguments */
3771){
3772 HighlightContext ctx;
3773 int rc = SQLITE_OK0; /* Return code */
3774 int iCol; /* 1st argument to snippet() */
3775 const char *zEllips; /* 4th argument to snippet() */
3776 int nToken; /* 5th argument to snippet() */
3777 int nInst = 0; /* Number of instance matches this row */
3778 int i; /* Used to iterate through instances */
3779 int nPhrase; /* Number of phrases in query */
3780 unsigned char *aSeen; /* Array of "seen instance" flags */
3781 int iBestCol; /* Column containing best snippet */
3782 int iBestStart = 0; /* First token of best snippet */
3783 int nBestScore = 0; /* Score of best snippet */
3784 int nColSize = 0; /* Total size of iBestCol in tokens */
3785 Fts5SFinder sFinder; /* Used to find the beginnings of sentences */
3786 int nCol;
3787
3788 if( nVal!=5 ){
3789 const char *zErr = "wrong number of arguments to function snippet()";
3790 sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1);
3791 return;
3792 }
3793
3794 nCol = pApi->xColumnCount(pFts);
3795 memset(&ctx, 0, sizeof(HighlightContext));
3796 iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]);
3797 ctx.zOpen = fts5ValueToText(apVal[1]);
3798 ctx.zClose = fts5ValueToText(apVal[2]);
3799 ctx.iRangeEnd = -1;
3800 zEllips = fts5ValueToText(apVal[3]);
3801 nToken = sqlite3_value_intsqlite3_api->value_int(apVal[4]);
3802
3803 iBestCol = (iCol>=0 ? iCol : 0);
3804 nPhrase = pApi->xPhraseCount(pFts);
3805 aSeen = sqlite3_mallocsqlite3_api->malloc(nPhrase);
3806 if( aSeen==0 ){
3807 rc = SQLITE_NOMEM7;
3808 }
3809 if( rc==SQLITE_OK0 ){
3810 rc = pApi->xInstCount(pFts, &nInst);
3811 }
3812
3813 memset(&sFinder, 0, sizeof(Fts5SFinder));
3814 for(i=0; i<nCol; i++){
3815 if( iCol<0 || iCol==i ){
3816 const char *pLoc = 0; /* Locale of column iCol */
3817 int nLoc = 0; /* Size of pLoc in bytes */
3818 int nDoc;
3819 int nDocsize;
3820 int ii;
3821 sFinder.iPos = 0;
3822 sFinder.nFirst = 0;
3823 rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
3824 if( rc!=SQLITE_OK0 ) break;
3825 rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc);
3826 if( rc!=SQLITE_OK0 ) break;
3827 rc = pApi->xTokenize_v2(pFts,
3828 sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb
3829 );
3830 if( rc!=SQLITE_OK0 ) break;
3831 rc = pApi->xColumnSize(pFts, i, &nDocsize);
3832 if( rc!=SQLITE_OK0 ) break;
3833
3834 for(ii=0; rc==SQLITE_OK0 && ii<nInst; ii++){
3835 int ip, ic, io;
3836 int iAdj;
3837 int nScore;
3838 int jj;
3839
3840 rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
3841 if( ic!=i ) continue;
3842 if( io>nDocsize ) rc = FTS5_CORRUPT(11 | (1<<8));
3843 if( rc!=SQLITE_OK0 ) continue;
3844 memset(aSeen, 0, nPhrase);
3845 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3846 io, nToken, &nScore, &iAdj
3847 );
3848 if( rc==SQLITE_OK0 && nScore>nBestScore ){
3849 nBestScore = nScore;
3850 iBestCol = i;
3851 iBestStart = iAdj;
3852 nColSize = nDocsize;
3853 }
3854
3855 if( rc==SQLITE_OK0 && sFinder.nFirst && nDocsize>nToken ){
3856 for(jj=0; jj<(sFinder.nFirst-1); jj++){
3857 if( sFinder.aFirst[jj+1]>io ) break;
3858 }
3859
3860 if( sFinder.aFirst[jj]<io ){
3861 memset(aSeen, 0, nPhrase);
3862 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3863 sFinder.aFirst[jj], nToken, &nScore, 0
3864 );
3865
3866 nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
3867 if( rc==SQLITE_OK0 && nScore>nBestScore ){
3868 nBestScore = nScore;
3869 iBestCol = i;
3870 iBestStart = sFinder.aFirst[jj];
3871 nColSize = nDocsize;
3872 }
3873 }
3874 }
3875 }
3876 }
3877 }
3878
3879 if( rc==SQLITE_OK0 ){
3880 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
3881 }
3882 if( rc==SQLITE_OK0 && nColSize==0 ){
3883 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
3884 }
3885 if( ctx.zIn ){
3886 const char *pLoc = 0; /* Locale of column iBestCol */
3887 int nLoc = 0; /* Bytes in pLoc */
3888
3889 if( rc==SQLITE_OK0 ){
3890 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
3891 }
3892
3893 ctx.iRangeStart = iBestStart;
3894 ctx.iRangeEnd = iBestStart + nToken - 1;
3895
3896 if( iBestStart>0 ){
3897 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3898 }
3899
3900 /* Advance iterator ctx.iter so that it points to the first coalesced
3901 ** phrase instance at or following position iBestStart. */
3902 while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK0 ){
3903 rc = fts5CInstIterNext(&ctx.iter);
3904 }
3905
3906 if( rc==SQLITE_OK0 ){
3907 rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc);
3908 }
3909 if( rc==SQLITE_OK0 ){
3910 rc = pApi->xTokenize_v2(
3911 pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb
3912 );
3913 }
3914 if( ctx.bOpen ){
3915 fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
3916 }
3917 if( ctx.iRangeEnd>=(nColSize-1) ){
3918 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3919 }else{
3920 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3921 }
3922 }
3923 if( rc==SQLITE_OK0 ){
3924 sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
3925 }else{
3926 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
3927 }
3928 sqlite3_freesqlite3_api->free(ctx.zOut);
3929 sqlite3_freesqlite3_api->free(aSeen);
3930 sqlite3_freesqlite3_api->free(sFinder.aFirst);
3931}
3932
3933/************************************************************************/
3934
3935/*
3936** The first time the bm25() function is called for a query, an instance
3937** of the following structure is allocated and populated.
3938*/
3939typedef struct Fts5Bm25Data Fts5Bm25Data;
3940struct Fts5Bm25Data {
3941 int nPhrase; /* Number of phrases in query */
3942 double avgdl; /* Average number of tokens in each row */
3943 double *aIDF; /* IDF for each phrase */
3944 double *aFreq; /* Array used to calculate phrase freq. */
3945};
3946
3947/*
3948** Callback used by fts5Bm25GetData() to count the number of rows in the
3949** table matched by each individual phrase within the query.
3950*/
3951static int fts5CountCb(
3952 const Fts5ExtensionApi *pApi,
3953 Fts5Context *pFts,
3954 void *pUserData /* Pointer to sqlite3_int64 variable */
3955){
3956 sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
3957 UNUSED_PARAM2(pApi, pFts)(void)(pApi), (void)(pFts);
3958 (*pn)++;
3959 return SQLITE_OK0;
3960}
3961
3962/*
3963** Set *ppData to point to the Fts5Bm25Data object for the current query.
3964** If the object has not already been allocated, allocate and populate it
3965** now.
3966*/
3967static int fts5Bm25GetData(
3968 const Fts5ExtensionApi *pApi,
3969 Fts5Context *pFts,
3970 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
3971){
3972 int rc = SQLITE_OK0; /* Return code */
3973 Fts5Bm25Data *p; /* Object to return */
3974
3975 p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0);
3976 if( p==0 ){
3977 int nPhrase; /* Number of phrases in query */
3978 sqlite3_int64 nRow = 0; /* Number of rows in table */
3979 sqlite3_int64 nToken = 0; /* Number of tokens in table */
3980 sqlite3_int64 nByte; /* Bytes of space to allocate */
3981 int i;
3982
3983 /* Allocate the Fts5Bm25Data object */
3984 nPhrase = pApi->xPhraseCount(pFts);
3985 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
3986 p = (Fts5Bm25Data*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
3987 if( p==0 ){
3988 rc = SQLITE_NOMEM7;
3989 }else{
3990 memset(p, 0, (size_t)nByte);
3991 p->nPhrase = nPhrase;
3992 p->aIDF = (double*)&p[1];
3993 p->aFreq = &p->aIDF[nPhrase];
3994 }
3995
3996 /* Calculate the average document length for this FTS5 table */
3997 if( rc==SQLITE_OK0 ) rc = pApi->xRowCount(pFts, &nRow);
3998 assert( rc!=SQLITE_OK || nRow>0 )((void) (0));
3999 if( rc==SQLITE_OK0 ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
4000 if( rc==SQLITE_OK0 ) p->avgdl = (double)nToken / (double)nRow;
4001
4002 /* Calculate an IDF for each phrase in the query */
4003 for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){
4004 sqlite3_int64 nHit = 0;
4005 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
4006 if( rc==SQLITE_OK0 ){
4007 /* Calculate the IDF (Inverse Document Frequency) for phrase i.
4008 ** This is done using the standard BM25 formula as found on wikipedia:
4009 **
4010 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
4011 **
4012 ** where "N" is the total number of documents in the set and nHit
4013 ** is the number that contain at least one instance of the phrase
4014 ** under consideration.
4015 **
4016 ** The problem with this is that if (N < 2*nHit), the IDF is
4017 ** negative. Which is undesirable. So the minimum allowable IDF is
4018 ** (1e-6) - roughly the same as a term that appears in just over
4019 ** half of set of 5,000,000 documents. */
4020 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
4021 if( idf<=0.0 ) idf = 1e-6;
4022 p->aIDF[i] = idf;
4023 }
4024 }
4025
4026 if( rc!=SQLITE_OK0 ){
4027 sqlite3_freesqlite3_api->free(p);
4028 }else{
4029 rc = pApi->xSetAuxdata(pFts, p, sqlite3_freesqlite3_api->free);
4030 }
4031 if( rc!=SQLITE_OK0 ) p = 0;
4032 }
4033 *ppData = p;
4034 return rc;
4035}
4036
4037/*
4038** Implementation of bm25() function.
4039*/
4040static void fts5Bm25Function(
4041 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
4042 Fts5Context *pFts, /* First arg to pass to pApi functions */
4043 sqlite3_context *pCtx, /* Context for returning result/error */
4044 int nVal, /* Number of values in apVal[] array */
4045 sqlite3_value **apVal /* Array of trailing arguments */
4046){
4047 const double k1 = 1.2; /* Constant "k1" from BM25 formula */
4048 const double b = 0.75; /* Constant "b" from BM25 formula */
4049 int rc; /* Error code */
4050 double score = 0.0; /* SQL function return value */
4051 Fts5Bm25Data *pData; /* Values allocated/calculated once only */
4052 int i; /* Iterator variable */
4053 int nInst = 0; /* Value returned by xInstCount() */
4054 double D = 0.0; /* Total number of tokens in row */
4055 double *aFreq = 0; /* Array of phrase freq. for current row */
4056
4057 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
4058 ** for each phrase in the query for the current row. */
4059 rc = fts5Bm25GetData(pApi, pFts, &pData);
4060 if( rc==SQLITE_OK0 ){
4061 aFreq = pData->aFreq;
4062 memset(aFreq, 0, sizeof(double) * pData->nPhrase);
4063 rc = pApi->xInstCount(pFts, &nInst);
4064 }
4065 for(i=0; rc==SQLITE_OK0 && i<nInst; i++){
4066 int ip; int ic; int io;
4067 rc = pApi->xInst(pFts, i, &ip, &ic, &io);
4068 if( rc==SQLITE_OK0 ){
4069 double w = (nVal > ic) ? sqlite3_value_doublesqlite3_api->value_double(apVal[ic]) : 1.0;
4070 aFreq[ip] += w;
4071 }
4072 }
4073
4074 /* Figure out the total size of the current row in tokens. */
4075 if( rc==SQLITE_OK0 ){
4076 int nTok;
4077 rc = pApi->xColumnSize(pFts, -1, &nTok);
4078 D = (double)nTok;
4079 }
4080
4081 /* Determine and return the BM25 score for the current row. Or, if an
4082 ** error has occurred, throw an exception. */
4083 if( rc==SQLITE_OK0 ){
4084 for(i=0; i<pData->nPhrase; i++){
4085 score += pData->aIDF[i] * (
4086 ( aFreq[i] * (k1 + 1.0) ) /
4087 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
4088 );
4089 }
4090 sqlite3_result_doublesqlite3_api->result_double(pCtx, -1.0 * score);
4091 }else{
4092 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
4093 }
4094}
4095
4096/*
4097** Implementation of fts5_get_locale() function.
4098*/
4099static void fts5GetLocaleFunction(
4100 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
4101 Fts5Context *pFts, /* First arg to pass to pApi functions */
4102 sqlite3_context *pCtx, /* Context for returning result/error */
4103 int nVal, /* Number of values in apVal[] array */
4104 sqlite3_value **apVal /* Array of trailing arguments */
4105){
4106 int iCol = 0;
4107 int eType = 0;
4108 int rc = SQLITE_OK0;
4109 const char *zLocale = 0;
4110 int nLocale = 0;
4111
4112 /* xColumnLocale() must be available */
4113 assert( pApi->iVersion>=4 )((void) (0));
4114
4115 if( nVal!=1 ){
4116 const char *z = "wrong number of arguments to function fts5_get_locale()";
4117 sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1);
4118 return;
4119 }
4120
4121 eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[0]);
4122 if( eType!=SQLITE_INTEGER1 ){
4123 const char *z = "non-integer argument passed to function fts5_get_locale()";
4124 sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1);
4125 return;
4126 }
4127
4128 iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]);
4129 if( iCol<0 || iCol>=pApi->xColumnCount(pFts) ){
4130 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, SQLITE_RANGE25);
4131 return;
4132 }
4133
4134 rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale);
4135 if( rc!=SQLITE_OK0 ){
4136 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
4137 return;
4138 }
4139
4140 sqlite3_result_textsqlite3_api->result_text(pCtx, zLocale, nLocale, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
4141}
4142
4143static int sqlite3Fts5AuxInit(fts5_api *pApi){
4144 struct Builtin {
4145 const char *zFunc; /* Function name (nul-terminated) */
4146 void *pUserData; /* User-data pointer */
4147 fts5_extension_function xFunc;/* Callback function */
4148 void (*xDestroy)(void*); /* Destructor function */
4149 } aBuiltin [] = {
4150 { "snippet", 0, fts5SnippetFunction, 0 },
4151 { "highlight", 0, fts5HighlightFunction, 0 },
4152 { "bm25", 0, fts5Bm25Function, 0 },
4153 { "fts5_get_locale", 0, fts5GetLocaleFunction, 0 },
4154 };
4155 int rc = SQLITE_OK0; /* Return code */
4156 int i; /* To iterate through builtin functions */
4157
4158 for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){
4159 rc = pApi->xCreateFunction(pApi,
4160 aBuiltin[i].zFunc,
4161 aBuiltin[i].pUserData,
4162 aBuiltin[i].xFunc,
4163 aBuiltin[i].xDestroy
4164 );
4165 }
4166
4167 return rc;
4168}
4169
4170#line 1 "fts5_buffer.c"
4171/*
4172** 2014 May 31
4173**
4174** The author disclaims copyright to this source code. In place of
4175** a legal notice, here is a blessing:
4176**
4177** May you do good and not evil.
4178** May you find forgiveness for yourself and forgive others.
4179** May you share freely, never taking more than you give.
4180**
4181******************************************************************************
4182*/
4183
4184
4185
4186/* #include "fts5Int.h" */
4187
4188static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
4189 if( (u32)pBuf->nSpace<nByte ){
4190 u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
4191 u8 *pNew;
4192 while( nNew<nByte ){
4193 nNew = nNew * 2;
4194 }
4195 pNew = sqlite3_realloc64sqlite3_api->realloc64(pBuf->p, nNew);
4196 if( pNew==0 ){
4197 *pRc = SQLITE_NOMEM7;
4198 return 1;
4199 }else{
4200 pBuf->nSpace = (int)nNew;
4201 pBuf->p = pNew;
4202 }
4203 }
4204 return 0;
4205}
4206
4207
4208/*
4209** Encode value iVal as an SQLite varint and append it to the buffer object
4210** pBuf. If an OOM error occurs, set the error code in p.
4211*/
4212static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
4213 if( fts5BufferGrow(pRc, pBuf, 9)( (u32)((pBuf)->n) + (u32)(9) <= (u32)((pBuf)->nSpace
) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(9)+(pBuf)->n) )
) return;
4214 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
4215}
4216
4217static void sqlite3Fts5Put32(u8 *aBuf, int iVal){
4218 aBuf[0] = (iVal>>24) & 0x00FF;
4219 aBuf[1] = (iVal>>16) & 0x00FF;
4220 aBuf[2] = (iVal>> 8) & 0x00FF;
4221 aBuf[3] = (iVal>> 0) & 0x00FF;
4222}
4223
4224static int sqlite3Fts5Get32(const u8 *aBuf){
4225 return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
4226}
4227
4228/*
4229** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
4230** the error code in p. If an error has already occurred when this function
4231** is called, it is a no-op.
4232*/
4233static void sqlite3Fts5BufferAppendBlob(
4234 int *pRc,
4235 Fts5Buffer *pBuf,
4236 u32 nData,
4237 const u8 *pData
4238){
4239 if( nData ){
4240 if( fts5BufferGrow(pRc, pBuf, nData)( (u32)((pBuf)->n) + (u32)(nData) <= (u32)((pBuf)->nSpace
) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nData)+(pBuf)->
n) )
) return;
4241 assert( pBuf->p!=0 )((void) (0));
4242 memcpy(&pBuf->p[pBuf->n], pData, nData);
4243 pBuf->n += nData;
4244 }
4245}
4246
4247/*
4248** Append the nul-terminated string zStr to the buffer pBuf. This function
4249** ensures that the byte following the buffer data is set to 0x00, even
4250** though this byte is not included in the pBuf->n count.
4251*/
4252static void sqlite3Fts5BufferAppendString(
4253 int *pRc,
4254 Fts5Buffer *pBuf,
4255 const char *zStr
4256){
4257 int nStr = (int)strlen(zStr);
4258 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
4259 pBuf->n--;
4260}
4261
4262/*
4263** Argument zFmt is a printf() style format string. This function performs
4264** the printf() style processing, then appends the results to buffer pBuf.
4265**
4266** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
4267** following the buffer data is set to 0x00, even though this byte is not
4268** included in the pBuf->n count.
4269*/
4270static void sqlite3Fts5BufferAppendPrintf(
4271 int *pRc,
4272 Fts5Buffer *pBuf,
4273 char *zFmt, ...
4274){
4275 if( *pRc==SQLITE_OK0 ){
4276 char *zTmp;
4277 va_list ap;
4278 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
4279 zTmp = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
4280 va_end(ap)__builtin_va_end(ap);
4281
4282 if( zTmp==0 ){
4283 *pRc = SQLITE_NOMEM7;
4284 }else{
4285 sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
4286 sqlite3_freesqlite3_api->free(zTmp);
4287 }
4288 }
4289}
4290
4291static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
4292 char *zRet = 0;
4293 if( *pRc==SQLITE_OK0 ){
4294 va_list ap;
4295 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
4296 zRet = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
4297 va_end(ap)__builtin_va_end(ap);
4298 if( zRet==0 ){
4299 *pRc = SQLITE_NOMEM7;
4300 }
4301 }
4302 return zRet;
4303}
4304
4305
4306/*
4307** Free any buffer allocated by pBuf. Zero the structure before returning.
4308*/
4309static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
4310 sqlite3_freesqlite3_api->free(pBuf->p);
4311 memset(pBuf, 0, sizeof(Fts5Buffer));
4312}
4313
4314/*
4315** Zero the contents of the buffer object. But do not free the associated
4316** memory allocation.
4317*/
4318static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
4319 pBuf->n = 0;
4320}
4321
4322/*
4323** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
4324** the error code in p. If an error has already occurred when this function
4325** is called, it is a no-op.
4326*/
4327static void sqlite3Fts5BufferSet(
4328 int *pRc,
4329 Fts5Buffer *pBuf,
4330 int nData,
4331 const u8 *pData
4332){
4333 pBuf->n = 0;
4334 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
4335}
4336
4337static int sqlite3Fts5PoslistNext64(
4338 const u8 *a, int n, /* Buffer containing poslist */
4339 int *pi, /* IN/OUT: Offset within a[] */
4340 i64 *piOff /* IN/OUT: Current offset */
4341){
4342 int i = *pi;
4343 assert( a!=0 || i==0 )((void) (0));
4344 if( i>=n ){
4345 /* EOF */
4346 *piOff = -1;
4347 return 1;
4348 }else{
4349 i64 iOff = *piOff;
4350 u32 iVal;
4351 assert( a!=0 )((void) (0));
4352 fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32
(&(a)[i],(u32*)&(iVal)); } }
;
4353 if( iVal<=1 ){
4354 if( iVal==0 ){
4355 *pi = i;
4356 return 0;
4357 }
4358 fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32
(&(a)[i],(u32*)&(iVal)); } }
;
4359 iOff = ((i64)iVal) << 32;
4360 assert( iOff>=0 )((void) (0));
4361 fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32
(&(a)[i],(u32*)&(iVal)); } }
;
4362 if( iVal<2 ){
4363 /* This is a corrupt record. So stop parsing it here. */
4364 *piOff = -1;
4365 return 1;
4366 }
4367 *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
4368 }else{
4369 *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
4370 }
4371 *pi = i;
4372 assert_nc( *piOff>=iOff )((void) (0));
4373 return 0;
4374 }
4375}
4376
4377
4378/*
4379** Advance the iterator object passed as the only argument. Return true
4380** if the iterator reaches EOF, or false otherwise.
4381*/
4382static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
4383 if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
4384 pIter->bEof = 1;
4385 }
4386 return pIter->bEof;
4387}
4388
4389static int sqlite3Fts5PoslistReaderInit(
4390 const u8 *a, int n, /* Poslist buffer to iterate through */
4391 Fts5PoslistReader *pIter /* Iterator object to initialize */
4392){
4393 memset(pIter, 0, sizeof(*pIter));
4394 pIter->a = a;
4395 pIter->n = n;
4396 sqlite3Fts5PoslistReaderNext(pIter);
4397 return pIter->bEof;
4398}
4399
4400/*
4401** Append position iPos to the position list being accumulated in buffer
4402** pBuf, which must be already be large enough to hold the new data.
4403** The previous position written to this list is *piPrev. *piPrev is set
4404** to iPos before returning.
4405*/
4406static void sqlite3Fts5PoslistSafeAppend(
4407 Fts5Buffer *pBuf,
4408 i64 *piPrev,
4409 i64 iPos
4410){
4411 if( iPos>=*piPrev ){
4412 static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
4413 if( (iPos & colmask) != (*piPrev & colmask) ){
4414 pBuf->p[pBuf->n++] = 1;
4415 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
4416 *piPrev = (iPos & colmask);
4417 }
4418 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
4419 *piPrev = iPos;
4420 }
4421}
4422
4423static int sqlite3Fts5PoslistWriterAppend(
4424 Fts5Buffer *pBuf,
4425 Fts5PoslistWriter *pWriter,
4426 i64 iPos
4427){
4428 int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
4429 if( fts5BufferGrow(&rc, pBuf, 5+5+5)( (u32)((pBuf)->n) + (u32)(5+5+5) <= (u32)((pBuf)->nSpace
) ? 0 : sqlite3Fts5BufferSize((&rc),(pBuf),(5+5+5)+(pBuf)
->n) )
) return rc;
4430 sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
4431 return SQLITE_OK0;
4432}
4433
4434static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
4435 void *pRet = 0;
4436 if( *pRc==SQLITE_OK0 ){
4437 pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte);
4438 if( pRet==0 ){
4439 if( nByte>0 ) *pRc = SQLITE_NOMEM7;
4440 }else{
4441 memset(pRet, 0, (size_t)nByte);
4442 }
4443 }
4444 return pRet;
4445}
4446
4447/*
4448** Return a nul-terminated copy of the string indicated by pIn. If nIn
4449** is non-negative, then it is the length of the string in bytes. Otherwise,
4450** the length of the string is determined using strlen().
4451**
4452** It is the responsibility of the caller to eventually free the returned
4453** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
4454*/
4455static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
4456 char *zRet = 0;
4457 if( *pRc==SQLITE_OK0 ){
4458 if( nIn<0 ){
4459 nIn = (int)strlen(pIn);
4460 }
4461 zRet = (char*)sqlite3_mallocsqlite3_api->malloc(nIn+1);
4462 if( zRet ){
4463 memcpy(zRet, pIn, nIn);
4464 zRet[nIn] = '\0';
4465 }else{
4466 *pRc = SQLITE_NOMEM7;
4467 }
4468 }
4469 return zRet;
4470}
4471
4472
4473/*
4474** Return true if character 't' may be part of an FTS5 bareword, or false
4475** otherwise. Characters that may be part of barewords:
4476**
4477** * All non-ASCII characters,
4478** * The 52 upper and lower case ASCII characters, and
4479** * The 10 integer ASCII characters.
4480** * The underscore character "_" (0x5F).
4481** * The unicode "substitute" character (0x1A).
4482*/
4483static int sqlite3Fts5IsBareword(char t){
4484 u8 aBareword[128] = {
4485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
4486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
4487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
4488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
4489 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
4490 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
4491 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
4492 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
4493 };
4494
4495 return (t & 0x80) || aBareword[(int)t];
4496}
4497
4498
4499/*************************************************************************
4500*/
4501typedef struct Fts5TermsetEntry Fts5TermsetEntry;
4502struct Fts5TermsetEntry {
4503 char *pTerm;
4504 int nTerm;
4505 int iIdx; /* Index (main or aPrefix[] entry) */
4506 Fts5TermsetEntry *pNext;
4507};
4508
4509struct Fts5Termset {
4510 Fts5TermsetEntry *apHash[512];
4511};
4512
4513static int sqlite3Fts5TermsetNew(Fts5Termset **pp){
4514 int rc = SQLITE_OK0;
4515 *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
4516 return rc;
4517}
4518
4519static int sqlite3Fts5TermsetAdd(
4520 Fts5Termset *p,
4521 int iIdx,
4522 const char *pTerm, int nTerm,
4523 int *pbPresent
4524){
4525 int rc = SQLITE_OK0;
4526 *pbPresent = 0;
4527 if( p ){
4528 int i;
4529 u32 hash = 13;
4530 Fts5TermsetEntry *pEntry;
4531
4532 /* Calculate a hash value for this term. This is the same hash checksum
4533 ** used by the fts5_hash.c module. This is not important for correct
4534 ** operation of the module, but is necessary to ensure that some tests
4535 ** designed to produce hash table collisions really do work. */
4536 for(i=nTerm-1; i>=0; i--){
4537 hash = (hash << 3) ^ hash ^ pTerm[i];
4538 }
4539 hash = (hash << 3) ^ hash ^ iIdx;
4540 hash = hash % ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0])));
4541
4542 for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
4543 if( pEntry->iIdx==iIdx
4544 && pEntry->nTerm==nTerm
4545 && memcmp(pEntry->pTerm, pTerm, nTerm)==0
4546 ){
4547 *pbPresent = 1;
4548 break;
4549 }
4550 }
4551
4552 if( pEntry==0 ){
4553 pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
4554 if( pEntry ){
4555 pEntry->pTerm = (char*)&pEntry[1];
4556 pEntry->nTerm = nTerm;
4557 pEntry->iIdx = iIdx;
4558 memcpy(pEntry->pTerm, pTerm, nTerm);
4559 pEntry->pNext = p->apHash[hash];
4560 p->apHash[hash] = pEntry;
4561 }
4562 }
4563 }
4564
4565 return rc;
4566}
4567
4568static void sqlite3Fts5TermsetFree(Fts5Termset *p){
4569 if( p ){
4570 u32 i;
4571 for(i=0; i<ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); i++){
4572 Fts5TermsetEntry *pEntry = p->apHash[i];
4573 while( pEntry ){
4574 Fts5TermsetEntry *pDel = pEntry;
4575 pEntry = pEntry->pNext;
4576 sqlite3_freesqlite3_api->free(pDel);
4577 }
4578 }
4579 sqlite3_freesqlite3_api->free(p);
4580 }
4581}
4582
4583#line 1 "fts5_config.c"
4584/*
4585** 2014 Jun 09
4586**
4587** The author disclaims copyright to this source code. In place of
4588** a legal notice, here is a blessing:
4589**
4590** May you do good and not evil.
4591** May you find forgiveness for yourself and forgive others.
4592** May you share freely, never taking more than you give.
4593**
4594******************************************************************************
4595**
4596** This is an SQLite module implementing full-text search.
4597*/
4598
4599
4600/* #include "fts5Int.h" */
4601
4602#define FTS5_DEFAULT_PAGE_SIZE4050 4050
4603#define FTS5_DEFAULT_AUTOMERGE4 4
4604#define FTS5_DEFAULT_USERMERGE4 4
4605#define FTS5_DEFAULT_CRISISMERGE16 16
4606#define FTS5_DEFAULT_HASHSIZE(1024*1024) (1024*1024)
4607
4608#define FTS5_DEFAULT_DELETE_AUTOMERGE10 10 /* default 10% */
4609
4610/* Maximum allowed page size */
4611#define FTS5_MAX_PAGE_SIZE(64*1024) (64*1024)
4612
4613static int fts5_iswhitespace(char x){
4614 return (x==' ');
4615}
4616
4617static int fts5_isopenquote(char x){
4618 return (x=='"' || x=='\'' || x=='[' || x=='`');
4619}
4620
4621/*
4622** Argument pIn points to a character that is part of a nul-terminated
4623** string. Return a pointer to the first character following *pIn in
4624** the string that is not a white-space character.
4625*/
4626static const char *fts5ConfigSkipWhitespace(const char *pIn){
4627 const char *p = pIn;
4628 if( p ){
4629 while( fts5_iswhitespace(*p) ){ p++; }
4630 }
4631 return p;
4632}
4633
4634/*
4635** Argument pIn points to a character that is part of a nul-terminated
4636** string. Return a pointer to the first character following *pIn in
4637** the string that is not a "bareword" character.
4638*/
4639static const char *fts5ConfigSkipBareword(const char *pIn){
4640 const char *p = pIn;
4641 while ( sqlite3Fts5IsBareword(*p) ) p++;
4642 if( p==pIn ) p = 0;
4643 return p;
4644}
4645
4646static int fts5_isdigit(char a){
4647 return (a>='0' && a<='9');
4648}
4649
4650
4651
4652static const char *fts5ConfigSkipLiteral(const char *pIn){
4653 const char *p = pIn;
4654 switch( *p ){
4655 case 'n': case 'N':
4656 if( sqlite3_strnicmpsqlite3_api->strnicmp("null", p, 4)==0 ){
4657 p = &p[4];
4658 }else{
4659 p = 0;
4660 }
4661 break;
4662
4663 case 'x': case 'X':
4664 p++;
4665 if( *p=='\'' ){
4666 p++;
4667 while( (*p>='a' && *p<='f')
4668 || (*p>='A' && *p<='F')
4669 || (*p>='0' && *p<='9')
4670 ){
4671 p++;
4672 }
4673 if( *p=='\'' && 0==((p-pIn)%2) ){
4674 p++;
4675 }else{
4676 p = 0;
4677 }
4678 }else{
4679 p = 0;
4680 }
4681 break;
4682
4683 case '\'':
4684 p++;
4685 while( p ){
4686 if( *p=='\'' ){
4687 p++;
4688 if( *p!='\'' ) break;
4689 }
4690 p++;
4691 if( *p==0 ) p = 0;
4692 }
4693 break;
4694
4695 default:
4696 /* maybe a number */
4697 if( *p=='+' || *p=='-' ) p++;
4698 while( fts5_isdigit(*p) ) p++;
4699
4700 /* At this point, if the literal was an integer, the parse is
4701 ** finished. Or, if it is a floating point value, it may continue
4702 ** with either a decimal point or an 'E' character. */
4703 if( *p=='.' && fts5_isdigit(p[1]) ){
4704 p += 2;
4705 while( fts5_isdigit(*p) ) p++;
4706 }
4707 if( p==pIn ) p = 0;
4708
4709 break;
4710 }
4711
4712 return p;
4713}
4714
4715/*
4716** The first character of the string pointed to by argument z is guaranteed
4717** to be an open-quote character (see function fts5_isopenquote()).
4718**
4719** This function searches for the corresponding close-quote character within
4720** the string and, if found, dequotes the string in place and adds a new
4721** nul-terminator byte.
4722**
4723** If the close-quote is found, the value returned is the byte offset of
4724** the character immediately following it. Or, if the close-quote is not
4725** found, -1 is returned. If -1 is returned, the buffer is left in an
4726** undefined state.
4727*/
4728static int fts5Dequote(char *z){
4729 char q;
4730 int iIn = 1;
4731 int iOut = 0;
4732 q = z[0];
4733
4734 /* Set stack variable q to the close-quote character */
4735 assert( q=='[' || q=='\'' || q=='"' || q=='`' )((void) (0));
4736 if( q=='[' ) q = ']';
4737
4738 while( z[iIn] ){
4739 if( z[iIn]==q ){
4740 if( z[iIn+1]!=q ){
4741 /* Character iIn was the close quote. */
4742 iIn++;
4743 break;
4744 }else{
4745 /* Character iIn and iIn+1 form an escaped quote character. Skip
4746 ** the input cursor past both and copy a single quote character
4747 ** to the output buffer. */
4748 iIn += 2;
4749 z[iOut++] = q;
4750 }
4751 }else{
4752 z[iOut++] = z[iIn++];
4753 }
4754 }
4755
4756 z[iOut] = '\0';
4757 return iIn;
4758}
4759
4760/*
4761** Convert an SQL-style quoted string into a normal string by removing
4762** the quote characters. The conversion is done in-place. If the
4763** input does not begin with a quote character, then this routine
4764** is a no-op.
4765**
4766** Examples:
4767**
4768** "abc" becomes abc
4769** 'xyz' becomes xyz
4770** [pqr] becomes pqr
4771** `mno` becomes mno
4772*/
4773static void sqlite3Fts5Dequote(char *z){
4774 char quote; /* Quote character (if any ) */
4775
4776 assert( 0==fts5_iswhitespace(z[0]) )((void) (0));
4777 quote = z[0];
4778 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
4779 fts5Dequote(z);
4780 }
4781}
4782
4783
4784struct Fts5Enum {
4785 const char *zName;
4786 int eVal;
4787};
4788typedef struct Fts5Enum Fts5Enum;
4789
4790static int fts5ConfigSetEnum(
4791 const Fts5Enum *aEnum,
4792 const char *zEnum,
4793 int *peVal
4794){
4795 int nEnum = (int)strlen(zEnum);
4796 int i;
4797 int iVal = -1;
4798
4799 for(i=0; aEnum[i].zName; i++){
4800 if( sqlite3_strnicmpsqlite3_api->strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
4801 if( iVal>=0 ) return SQLITE_ERROR1;
4802 iVal = aEnum[i].eVal;
4803 }
4804 }
4805
4806 *peVal = iVal;
4807 return iVal<0 ? SQLITE_ERROR1 : SQLITE_OK0;
4808}
4809
4810/*
4811** Parse a "special" CREATE VIRTUAL TABLE directive and update
4812** configuration object pConfig as appropriate.
4813**
4814** If successful, object pConfig is updated and SQLITE_OK returned. If
4815** an error occurs, an SQLite error code is returned and an error message
4816** may be left in *pzErr. It is the responsibility of the caller to
4817** eventually free any such error message using sqlite3_free().
4818*/
4819static int fts5ConfigParseSpecial(
4820 Fts5Config *pConfig, /* Configuration object to update */
4821 const char *zCmd, /* Special command to parse */
4822 const char *zArg, /* Argument to parse */
4823 char **pzErr /* OUT: Error message */
4824){
4825 int rc = SQLITE_OK0;
4826 int nCmd = (int)strlen(zCmd);
4827
4828 if( sqlite3_strnicmpsqlite3_api->strnicmp("prefix", zCmd, nCmd)==0 ){
4829 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES31;
4830 const char *p;
4831 int bFirst = 1;
4832 if( pConfig->aPrefix==0 ){
4833 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
4834 if( rc ) return rc;
4835 }
4836
4837 p = zArg;
4838 while( 1 ){
4839 int nPre = 0;
4840
4841 while( p[0]==' ' ) p++;
4842 if( bFirst==0 && p[0]==',' ){
4843 p++;
4844 while( p[0]==' ' ) p++;
4845 }else if( p[0]=='\0' ){
4846 break;
4847 }
4848 if( p[0]<'0' || p[0]>'9' ){
4849 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed prefix=... directive");
4850 rc = SQLITE_ERROR1;
4851 break;
4852 }
4853
4854 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES31 ){
4855 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
4856 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES31
4857 );
4858 rc = SQLITE_ERROR1;
4859 break;
4860 }
4861
4862 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
4863 nPre = nPre*10 + (p[0] - '0');
4864 p++;
4865 }
4866
4867 if( nPre<=0 || nPre>=1000 ){
4868 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("prefix length out of range (max 999)");
4869 rc = SQLITE_ERROR1;
4870 break;
4871 }
4872
4873 pConfig->aPrefix[pConfig->nPrefix] = nPre;
4874 pConfig->nPrefix++;
4875 bFirst = 0;
4876 }
4877 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES )((void) (0));
4878 return rc;
4879 }
4880
4881 if( sqlite3_strnicmpsqlite3_api->strnicmp("tokenize", zCmd, nCmd)==0 ){
4882 const char *p = (const char*)zArg;
4883 sqlite3_int64 nArg = strlen(zArg) + 1;
4884 char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg);
4885
4886 if( azArg ){
4887 char *pSpace = (char*)&azArg[nArg];
4888 if( pConfig->t.azArg ){
4889 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple tokenize=... directives");
4890 rc = SQLITE_ERROR1;
4891 }else{
4892 for(nArg=0; p && *p; nArg++){
4893 const char *p2 = fts5ConfigSkipWhitespace(p);
4894 if( *p2=='\'' ){
4895 p = fts5ConfigSkipLiteral(p2);
4896 }else{
4897 p = fts5ConfigSkipBareword(p2);
4898 }
4899 if( p ){
4900 memcpy(pSpace, p2, p-p2);
4901 azArg[nArg] = pSpace;
4902 sqlite3Fts5Dequote(pSpace);
4903 pSpace += (p - p2) + 1;
4904 p = fts5ConfigSkipWhitespace(p);
4905 }
4906 }
4907 if( p==0 ){
4908 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in tokenize directive");
4909 rc = SQLITE_ERROR1;
4910 }else{
4911 pConfig->t.azArg = (const char**)azArg;
4912 pConfig->t.nArg = nArg;
4913 azArg = 0;
4914 }
4915 }
4916 }
4917 sqlite3_freesqlite3_api->free(azArg);
4918
4919 return rc;
4920 }
4921
4922 if( sqlite3_strnicmpsqlite3_api->strnicmp("content", zCmd, nCmd)==0 ){
4923 if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 ){
4924 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content=... directives");
4925 rc = SQLITE_ERROR1;
4926 }else{
4927 if( zArg[0] ){
4928 pConfig->eContent = FTS5_CONTENT_EXTERNAL2;
4929 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
4930 }else{
4931 pConfig->eContent = FTS5_CONTENT_NONE1;
4932 }
4933 }
4934 return rc;
4935 }
4936
4937 if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_delete", zCmd, nCmd)==0 ){
4938 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4939 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive");
4940 rc = SQLITE_ERROR1;
4941 }else{
4942 pConfig->bContentlessDelete = (zArg[0]=='1');
4943 }
4944 return rc;
4945 }
4946
4947 if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){
4948 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4949 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive");
4950 rc = SQLITE_ERROR1;
4951 }else{
4952 pConfig->bContentlessUnindexed = (zArg[0]=='1');
4953 }
4954 return rc;
4955 }
4956
4957 if( sqlite3_strnicmpsqlite3_api->strnicmp("content_rowid", zCmd, nCmd)==0 ){
4958 if( pConfig->zContentRowid ){
4959 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content_rowid=... directives");
4960 rc = SQLITE_ERROR1;
4961 }else{
4962 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
4963 }
4964 return rc;
4965 }
4966
4967 if( sqlite3_strnicmpsqlite3_api->strnicmp("columnsize", zCmd, nCmd)==0 ){
4968 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4969 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed columnsize=... directive");
4970 rc = SQLITE_ERROR1;
4971 }else{
4972 pConfig->bColumnsize = (zArg[0]=='1');
4973 }
4974 return rc;
4975 }
4976
4977 if( sqlite3_strnicmpsqlite3_api->strnicmp("locale", zCmd, nCmd)==0 ){
4978 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4979 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed locale=... directive");
4980 rc = SQLITE_ERROR1;
4981 }else{
4982 pConfig->bLocale = (zArg[0]=='1');
4983 }
4984 return rc;
4985 }
4986
4987 if( sqlite3_strnicmpsqlite3_api->strnicmp("detail", zCmd, nCmd)==0 ){
4988 const Fts5Enum aDetail[] = {
4989 { "none", FTS5_DETAIL_NONE1 },
4990 { "full", FTS5_DETAIL_FULL0 },
4991 { "columns", FTS5_DETAIL_COLUMNS2 },
4992 { 0, 0 }
4993 };
4994
4995 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
4996 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed detail=... directive");
4997 }
4998 return rc;
4999 }
5000
5001 if( sqlite3_strnicmpsqlite3_api->strnicmp("tokendata", zCmd, nCmd)==0 ){
5002 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
5003 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed tokendata=... directive");
5004 rc = SQLITE_ERROR1;
5005 }else{
5006 pConfig->bTokendata = (zArg[0]=='1');
5007 }
5008 return rc;
5009 }
5010
5011 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
5012 return SQLITE_ERROR1;
5013}
5014
5015/*
5016** Gobble up the first bareword or quoted word from the input buffer zIn.
5017** Return a pointer to the character immediately following the last in
5018** the gobbled word if successful, or a NULL pointer otherwise (failed
5019** to find close-quote character).
5020**
5021** Before returning, set pzOut to point to a new buffer containing a
5022** nul-terminated, dequoted copy of the gobbled word. If the word was
5023** quoted, *pbQuoted is also set to 1 before returning.
5024**
5025** If *pRc is other than SQLITE_OK when this function is called, it is
5026** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
5027** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
5028** set if a parse error (failed to find close quote) occurs.
5029*/
5030static const char *fts5ConfigGobbleWord(
5031 int *pRc, /* IN/OUT: Error code */
5032 const char *zIn, /* Buffer to gobble string/bareword from */
5033 char **pzOut, /* OUT: malloc'd buffer containing str/bw */
5034 int *pbQuoted /* OUT: Set to true if dequoting required */
5035){
5036 const char *zRet = 0;
5037
5038 sqlite3_int64 nIn = strlen(zIn);
5039 char *zOut = sqlite3_malloc64sqlite3_api->malloc64(nIn+1);
5040
5041 assert( *pRc==SQLITE_OK )((void) (0));
5042 *pbQuoted = 0;
5043 *pzOut = 0;
5044
5045 if( zOut==0 ){
5046 *pRc = SQLITE_NOMEM7;
5047 }else{
5048 memcpy(zOut, zIn, (size_t)(nIn+1));
5049 if( fts5_isopenquote(zOut[0]) ){
5050 int ii = fts5Dequote(zOut);
5051 zRet = &zIn[ii];
5052 *pbQuoted = 1;
5053 }else{
5054 zRet = fts5ConfigSkipBareword(zIn);
5055 if( zRet ){
5056 zOut[zRet-zIn] = '\0';
5057 }
5058 }
5059 }
5060
5061 if( zRet==0 ){
5062 sqlite3_freesqlite3_api->free(zOut);
5063 }else{
5064 *pzOut = zOut;
5065 }
5066
5067 return zRet;
5068}
5069
5070static int fts5ConfigParseColumn(
5071 Fts5Config *p,
5072 char *zCol,
5073 char *zArg,
5074 char **pzErr,
5075 int *pbUnindexed
5076){
5077 int rc = SQLITE_OK0;
5078 if( 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_RANK_NAME"rank")
5079 || 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_ROWID_NAME"rowid")
5080 ){
5081 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 column name: %s", zCol);
5082 rc = SQLITE_ERROR1;
5083 }else if( zArg ){
5084 if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "unindexed") ){
5085 p->abUnindexed[p->nCol] = 1;
5086 *pbUnindexed = 1;
5087 }else{
5088 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized column option: %s", zArg);
5089 rc = SQLITE_ERROR1;
5090 }
5091 }
5092
5093 p->azCol[p->nCol++] = zCol;
5094 return rc;
5095}
5096
5097/*
5098** Populate the Fts5Config.zContentExprlist string.
5099*/
5100static int fts5ConfigMakeExprlist(Fts5Config *p){
5101 int i;
5102 int rc = SQLITE_OK0;
5103 Fts5Buffer buf = {0, 0, 0};
5104
5105 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
5106 if( p->eContent!=FTS5_CONTENT_NONE1 ){
5107 assert( p->eContent==FTS5_CONTENT_EXTERNAL((void) (0))
5108 || p->eContent==FTS5_CONTENT_NORMAL((void) (0))
5109 || p->eContent==FTS5_CONTENT_UNINDEXED((void) (0))
5110 )((void) (0));
5111 for(i=0; i<p->nCol; i++){
5112 if( p->eContent==FTS5_CONTENT_EXTERNAL2 ){
5113 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
5114 }else if( p->eContent==FTS5_CONTENT_NORMAL0 || p->abUnindexed[i] ){
5115 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
5116 }else{
5117 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
5118 }
5119 }
5120 }
5121 if( p->eContent==FTS5_CONTENT_NORMAL0 && p->bLocale ){
5122 for(i=0; i<p->nCol; i++){
5123 if( p->abUnindexed[i]==0 ){
5124 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i);
5125 }else{
5126 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
5127 }
5128 }
5129 }
5130
5131 assert( p->zContentExprlist==0 )((void) (0));
5132 p->zContentExprlist = (char*)buf.p;
5133 return rc;
5134}
5135
5136/*
5137** Arguments nArg/azArg contain the string arguments passed to the xCreate
5138** or xConnect method of the virtual table. This function attempts to
5139** allocate an instance of Fts5Config containing the results of parsing
5140** those arguments.
5141**
5142** If successful, SQLITE_OK is returned and *ppOut is set to point to the
5143** new Fts5Config object. If an error occurs, an SQLite error code is
5144** returned, *ppOut is set to NULL and an error message may be left in
5145** *pzErr. It is the responsibility of the caller to eventually free any
5146** such error message using sqlite3_free().
5147*/
5148static int sqlite3Fts5ConfigParse(
5149 Fts5Global *pGlobal,
5150 sqlite3 *db,
5151 int nArg, /* Number of arguments */
5152 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
5153 Fts5Config **ppOut, /* OUT: Results of parse */
5154 char **pzErr /* OUT: Error message */
5155){
5156 int rc = SQLITE_OK0; /* Return code */
5157 Fts5Config *pRet; /* New object to return */
5158 int i;
5159 sqlite3_int64 nByte;
5160 int bUnindexed = 0; /* True if there are one or more UNINDEXED */
5161
5162 *ppOut = pRet = (Fts5Config*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Config));
5163 if( pRet==0 ) return SQLITE_NOMEM7;
5164 memset(pRet, 0, sizeof(Fts5Config));
5165 pRet->pGlobal = pGlobal;
5166 pRet->db = db;
5167 pRet->iCookie = -1;
5168
5169 nByte = nArg * (sizeof(char*) + sizeof(u8));
5170 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
5171 pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
5172 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
5173 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
5174 pRet->bColumnsize = 1;
5175 pRet->eDetail = FTS5_DETAIL_FULL0;
5176#ifdef SQLITE_DEBUG
5177 pRet->bPrefixIndex = 1;
5178#endif
5179 if( rc==SQLITE_OK0 && sqlite3_stricmpsqlite3_api->stricmp(pRet->zName, FTS5_RANK_NAME"rank")==0 ){
5180 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 table name: %s", pRet->zName);
5181 rc = SQLITE_ERROR1;
5182 }
5183
5184 assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK )((void) (0));
5185 for(i=3; rc==SQLITE_OK0 && i<nArg; i++){
5186 const char *zOrig = azArg[i];
5187 const char *z;
5188 char *zOne = 0;
5189 char *zTwo = 0;
5190 int bOption = 0;
5191 int bMustBeCol = 0;
5192
5193 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
5194 z = fts5ConfigSkipWhitespace(z);
5195 if( z && *z=='=' ){
5196 bOption = 1;
5197 assert( zOne!=0 )((void) (0));
5198 z++;
5199 if( bMustBeCol ) z = 0;
5200 }
5201 z = fts5ConfigSkipWhitespace(z);
5202 if( z && z[0] ){
5203 int bDummy;
5204 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
5205 if( z && z[0] ) z = 0;
5206 }
5207
5208 if( rc==SQLITE_OK0 ){
5209 if( z==0 ){
5210 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in \"%s\"", zOrig);
5211 rc = SQLITE_ERROR1;
5212 }else{
5213 if( bOption ){
5214 rc = fts5ConfigParseSpecial(pRet,
5215 ALWAYS(zOne)(zOne)?zOne:"",
5216 zTwo?zTwo:"",
5217 pzErr
5218 );
5219 }else{
5220 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed);
5221 zOne = 0;
5222 }
5223 }
5224 }
5225
5226 sqlite3_freesqlite3_api->free(zOne);
5227 sqlite3_freesqlite3_api->free(zTwo);
5228 }
5229
5230 /* We only allow contentless_delete=1 if the table is indeed contentless. */
5231 if( rc==SQLITE_OK0
5232 && pRet->bContentlessDelete
5233 && pRet->eContent!=FTS5_CONTENT_NONE1
5234 ){
5235 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5236 "contentless_delete=1 requires a contentless table"
5237 );
5238 rc = SQLITE_ERROR1;
5239 }
5240
5241 /* We only allow contentless_delete=1 if columnsize=0 is not present.
5242 **
5243 ** This restriction may be removed at some point.
5244 */
5245 if( rc==SQLITE_OK0 && pRet->bContentlessDelete && pRet->bColumnsize==0 ){
5246 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5247 "contentless_delete=1 is incompatible with columnsize=0"
5248 );
5249 rc = SQLITE_ERROR1;
5250 }
5251
5252 /* We only allow contentless_unindexed=1 if the table is actually a
5253 ** contentless one.
5254 */
5255 if( rc==SQLITE_OK0
5256 && pRet->bContentlessUnindexed
5257 && pRet->eContent!=FTS5_CONTENT_NONE1
5258 ){
5259 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
5260 "contentless_unindexed=1 requires a contentless table"
5261 );
5262 rc = SQLITE_ERROR1;
5263 }
5264
5265 /* If no zContent option was specified, fill in the default values. */
5266 if( rc==SQLITE_OK0 && pRet->zContent==0 ){
5267 const char *zTail = 0;
5268 assert( pRet->eContent==FTS5_CONTENT_NORMAL((void) (0))
5269 || pRet->eContent==FTS5_CONTENT_NONE((void) (0))
5270 )((void) (0));
5271 if( pRet->eContent==FTS5_CONTENT_NORMAL0 ){
5272 zTail = "content";
5273 }else if( bUnindexed && pRet->bContentlessUnindexed ){
5274 pRet->eContent = FTS5_CONTENT_UNINDEXED3;
5275 zTail = "content";
5276 }else if( pRet->bColumnsize ){
5277 zTail = "docsize";
5278 }
5279
5280 if( zTail ){
5281 pRet->zContent = sqlite3Fts5Mprintf(
5282 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
5283 );
5284 }
5285 }
5286
5287 if( rc==SQLITE_OK0 && pRet->zContentRowid==0 ){
5288 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
5289 }
5290
5291 /* Formulate the zContentExprlist text */
5292 if( rc==SQLITE_OK0 ){
5293 rc = fts5ConfigMakeExprlist(pRet);
5294 }
5295
5296 if( rc!=SQLITE_OK0 ){
5297 sqlite3Fts5ConfigFree(pRet);
5298 *ppOut = 0;
5299 }
5300 return rc;
5301}
5302
5303/*
5304** Free the configuration object passed as the only argument.
5305*/
5306static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
5307 if( pConfig ){
5308 int i;
5309 if( pConfig->t.pTok ){
5310 if( pConfig->t.pApi1 ){
5311 pConfig->t.pApi1->xDelete(pConfig->t.pTok);
5312 }else{
5313 pConfig->t.pApi2->xDelete(pConfig->t.pTok);
5314 }
5315 }
5316 sqlite3_freesqlite3_api->free((char*)pConfig->t.azArg);
5317 sqlite3_freesqlite3_api->free(pConfig->zDb);
5318 sqlite3_freesqlite3_api->free(pConfig->zName);
5319 for(i=0; i<pConfig->nCol; i++){
5320 sqlite3_freesqlite3_api->free(pConfig->azCol[i]);
5321 }
5322 sqlite3_freesqlite3_api->free(pConfig->azCol);
5323 sqlite3_freesqlite3_api->free(pConfig->aPrefix);
5324 sqlite3_freesqlite3_api->free(pConfig->zRank);
5325 sqlite3_freesqlite3_api->free(pConfig->zRankArgs);
5326 sqlite3_freesqlite3_api->free(pConfig->zContent);
5327 sqlite3_freesqlite3_api->free(pConfig->zContentRowid);
5328 sqlite3_freesqlite3_api->free(pConfig->zContentExprlist);
5329 sqlite3_freesqlite3_api->free(pConfig);
5330 }
5331}
5332
5333/*
5334** Call sqlite3_declare_vtab() based on the contents of the configuration
5335** object passed as the only argument. Return SQLITE_OK if successful, or
5336** an SQLite error code if an error occurs.
5337*/
5338static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
5339 int i;
5340 int rc = SQLITE_OK0;
5341 char *zSql;
5342
5343 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
5344 for(i=0; zSql && i<pConfig->nCol; i++){
5345 const char *zSep = (i==0?"":", ");
5346 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
5347 }
5348 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
5349 zSql, pConfig->zName, FTS5_RANK_NAME"rank"
5350 );
5351
5352 assert( zSql || rc==SQLITE_NOMEM )((void) (0));
5353 if( zSql ){
5354 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(pConfig->db, zSql);
5355 sqlite3_freesqlite3_api->free(zSql);
5356 }
5357
5358 return rc;
5359}
5360
5361/*
5362** Tokenize the text passed via the second and third arguments.
5363**
5364** The callback is invoked once for each token in the input text. The
5365** arguments passed to it are, in order:
5366**
5367** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
5368** const char *pToken // Pointer to buffer containing token
5369** int nToken // Size of token in bytes
5370** int iStart // Byte offset of start of token within input text
5371** int iEnd // Byte offset of end of token within input text
5372** int iPos // Position of token in input (first token is 0)
5373**
5374** If the callback returns a non-zero value the tokenization is abandoned
5375** and no further callbacks are issued.
5376**
5377** This function returns SQLITE_OK if successful or an SQLite error code
5378** if an error occurs. If the tokenization was abandoned early because
5379** the callback returned SQLITE_DONE, this is not an error and this function
5380** still returns SQLITE_OK. Or, if the tokenization was abandoned early
5381** because the callback returned another non-zero value, it is assumed
5382** to be an SQLite error code and returned to the caller.
5383*/
5384static int sqlite3Fts5Tokenize(
5385 Fts5Config *pConfig, /* FTS5 Configuration object */
5386 int flags, /* FTS5_TOKENIZE_* flags */
5387 const char *pText, int nText, /* Text to tokenize */
5388 void *pCtx, /* Context passed to xToken() */
5389 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
5390){
5391 int rc = SQLITE_OK0;
5392 if( pText ){
5393 if( pConfig->t.pTok==0 ){
5394 rc = sqlite3Fts5LoadTokenizer(pConfig);
5395 }
5396 if( rc==SQLITE_OK0 ){
5397 if( pConfig->t.pApi1 ){
5398 rc = pConfig->t.pApi1->xTokenize(
5399 pConfig->t.pTok, pCtx, flags, pText, nText, xToken
5400 );
5401 }else{
5402 rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags,
5403 pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken
5404 );
5405 }
5406 }
5407 }
5408 return rc;
5409}
5410
5411/*
5412** Argument pIn points to the first character in what is expected to be
5413** a comma-separated list of SQL literals followed by a ')' character.
5414** If it actually is this, return a pointer to the ')'. Otherwise, return
5415** NULL to indicate a parse error.
5416*/
5417static const char *fts5ConfigSkipArgs(const char *pIn){
5418 const char *p = pIn;
5419
5420 while( 1 ){
5421 p = fts5ConfigSkipWhitespace(p);
5422 p = fts5ConfigSkipLiteral(p);
5423 p = fts5ConfigSkipWhitespace(p);
5424 if( p==0 || *p==')' ) break;
5425 if( *p!=',' ){
5426 p = 0;
5427 break;
5428 }
5429 p++;
5430 }
5431
5432 return p;
5433}
5434
5435/*
5436** Parameter zIn contains a rank() function specification. The format of
5437** this is:
5438**
5439** + Bareword (function name)
5440** + Open parenthesis - "("
5441** + Zero or more SQL literals in a comma separated list
5442** + Close parenthesis - ")"
5443*/
5444static int sqlite3Fts5ConfigParseRank(
5445 const char *zIn, /* Input string */
5446 char **pzRank, /* OUT: Rank function name */
5447 char **pzRankArgs /* OUT: Rank function arguments */
5448){
5449 const char *p = zIn;
5450 const char *pRank;
5451 char *zRank = 0;
5452 char *zRankArgs = 0;
5453 int rc = SQLITE_OK0;
5454
5455 *pzRank = 0;
5456 *pzRankArgs = 0;
5457
5458 if( p==0 ){
5459 rc = SQLITE_ERROR1;
5460 }else{
5461 p = fts5ConfigSkipWhitespace(p);
5462 pRank = p;
5463 p = fts5ConfigSkipBareword(p);
5464
5465 if( p ){
5466 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
5467 if( zRank ) memcpy(zRank, pRank, p-pRank);
5468 }else{
5469 rc = SQLITE_ERROR1;
5470 }
5471
5472 if( rc==SQLITE_OK0 ){
5473 p = fts5ConfigSkipWhitespace(p);
5474 if( *p!='(' ) rc = SQLITE_ERROR1;
5475 p++;
5476 }
5477 if( rc==SQLITE_OK0 ){
5478 const char *pArgs;
5479 p = fts5ConfigSkipWhitespace(p);
5480 pArgs = p;
5481 if( *p!=')' ){
5482 p = fts5ConfigSkipArgs(p);
5483 if( p==0 ){
5484 rc = SQLITE_ERROR1;
5485 }else{
5486 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
5487 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
5488 }
5489 }
5490 }
5491 }
5492
5493 if( rc!=SQLITE_OK0 ){
5494 sqlite3_freesqlite3_api->free(zRank);
5495 assert( zRankArgs==0 )((void) (0));
5496 }else{
5497 *pzRank = zRank;
5498 *pzRankArgs = zRankArgs;
5499 }
5500 return rc;
5501}
5502
5503static int sqlite3Fts5ConfigSetValue(
5504 Fts5Config *pConfig,
5505 const char *zKey,
5506 sqlite3_value *pVal,
5507 int *pbBadkey
5508){
5509 int rc = SQLITE_OK0;
5510
5511 if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "pgsz") ){
5512 int pgsz = 0;
5513 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5514 pgsz = sqlite3_value_intsqlite3_api->value_int(pVal);
5515 }
5516 if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE(64*1024) ){
5517 *pbBadkey = 1;
5518 }else{
5519 pConfig->pgsz = pgsz;
5520 }
5521 }
5522
5523 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "hashsize") ){
5524 int nHashSize = -1;
5525 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5526 nHashSize = sqlite3_value_intsqlite3_api->value_int(pVal);
5527 }
5528 if( nHashSize<=0 ){
5529 *pbBadkey = 1;
5530 }else{
5531 pConfig->nHashSize = nHashSize;
5532 }
5533 }
5534
5535 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "automerge") ){
5536 int nAutomerge = -1;
5537 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5538 nAutomerge = sqlite3_value_intsqlite3_api->value_int(pVal);
5539 }
5540 if( nAutomerge<0 || nAutomerge>64 ){
5541 *pbBadkey = 1;
5542 }else{
5543 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE4;
5544 pConfig->nAutomerge = nAutomerge;
5545 }
5546 }
5547
5548 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "usermerge") ){
5549 int nUsermerge = -1;
5550 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5551 nUsermerge = sqlite3_value_intsqlite3_api->value_int(pVal);
5552 }
5553 if( nUsermerge<2 || nUsermerge>16 ){
5554 *pbBadkey = 1;
5555 }else{
5556 pConfig->nUsermerge = nUsermerge;
5557 }
5558 }
5559
5560 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "crisismerge") ){
5561 int nCrisisMerge = -1;
5562 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5563 nCrisisMerge = sqlite3_value_intsqlite3_api->value_int(pVal);
5564 }
5565 if( nCrisisMerge<0 ){
5566 *pbBadkey = 1;
5567 }else{
5568 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16;
5569 if( nCrisisMerge>=FTS5_MAX_SEGMENT2000 ) nCrisisMerge = FTS5_MAX_SEGMENT2000-1;
5570 pConfig->nCrisisMerge = nCrisisMerge;
5571 }
5572 }
5573
5574 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "deletemerge") ){
5575 int nVal = -1;
5576 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5577 nVal = sqlite3_value_intsqlite3_api->value_int(pVal);
5578 }else{
5579 *pbBadkey = 1;
5580 }
5581 if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE10;
5582 if( nVal>100 ) nVal = 0;
5583 pConfig->nDeleteMerge = nVal;
5584 }
5585
5586 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "rank") ){
5587 const char *zIn = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
5588 char *zRank;
5589 char *zRankArgs;
5590 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
5591 if( rc==SQLITE_OK0 ){
5592 sqlite3_freesqlite3_api->free(pConfig->zRank);
5593 sqlite3_freesqlite3_api->free(pConfig->zRankArgs);
5594 pConfig->zRank = zRank;
5595 pConfig->zRankArgs = zRankArgs;
5596 }else if( rc==SQLITE_ERROR1 ){
5597 rc = SQLITE_OK0;
5598 *pbBadkey = 1;
5599 }
5600 }
5601
5602 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "secure-delete") ){
5603 int bVal = -1;
5604 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5605 bVal = sqlite3_value_intsqlite3_api->value_int(pVal);
5606 }
5607 if( bVal<0 ){
5608 *pbBadkey = 1;
5609 }else{
5610 pConfig->bSecureDelete = (bVal ? 1 : 0);
5611 }
5612 }
5613
5614 else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "insttoken") ){
5615 int bVal = -1;
5616 if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){
5617 bVal = sqlite3_value_intsqlite3_api->value_int(pVal);
5618 }
5619 if( bVal<0 ){
5620 *pbBadkey = 1;
5621 }else{
5622 pConfig->bPrefixInsttoken = (bVal ? 1 : 0);
5623 }
5624
5625 }else{
5626 *pbBadkey = 1;
5627 }
5628 return rc;
5629}
5630
5631/*
5632** Load the contents of the %_config table into memory.
5633*/
5634static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
5635 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
5636 char *zSql;
5637 sqlite3_stmt *p = 0;
5638 int rc = SQLITE_OK0;
5639 int iVersion = 0;
5640
5641 /* Set default values */
5642 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE4050;
5643 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE4;
5644 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE4;
5645 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16;
5646 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE(1024*1024);
5647 pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE10;
5648
5649 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
5650 if( zSql ){
5651 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &p, 0);
5652 sqlite3_freesqlite3_api->free(zSql);
5653 }
5654
5655 assert( rc==SQLITE_OK || p==0 )((void) (0));
5656 if( rc==SQLITE_OK0 ){
5657 while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p) ){
5658 const char *zK = (const char*)sqlite3_column_textsqlite3_api->column_text(p, 0);
5659 sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(p, 1);
5660 if( 0==sqlite3_stricmpsqlite3_api->stricmp(zK, "version") ){
5661 iVersion = sqlite3_value_intsqlite3_api->value_int(pVal);
5662 }else{
5663 int bDummy = 0;
5664 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
5665 }
5666 }
5667 rc = sqlite3_finalizesqlite3_api->finalize(p);
5668 }
5669
5670 if( rc==SQLITE_OK0
5671 && iVersion!=FTS5_CURRENT_VERSION4
5672 && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5
5673 ){
5674 rc = SQLITE_ERROR1;
5675 sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format "
5676 "(found %d, expected %d or %d) - run 'rebuild'",
5677 iVersion, FTS5_CURRENT_VERSION4, FTS5_CURRENT_VERSION_SECUREDELETE5
5678 );
5679 }else{
5680 pConfig->iVersion = iVersion;
5681 }
5682
5683 if( rc==SQLITE_OK0 ){
5684 pConfig->iCookie = iCookie;
5685 }
5686 return rc;
5687}
5688
5689/*
5690** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer
5691** containing the error message created using printf() style formatting
5692** string zFmt and its trailing arguments.
5693*/
5694static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){
5695 va_list ap; /* ... printf arguments */
5696 char *zMsg = 0;
5697
5698 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
5699 zMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
5700 if( pConfig->pzErrmsg ){
5701 assert( *pConfig->pzErrmsg==0 )((void) (0));
5702 *pConfig->pzErrmsg = zMsg;
5703 }else{
5704 sqlite3_freesqlite3_api->free(zMsg);
5705 }
5706
5707 va_end(ap)__builtin_va_end(ap);
5708}
5709
5710
5711
5712#line 1 "fts5_expr.c"
5713/*
5714** 2014 May 31
5715**
5716** The author disclaims copyright to this source code. In place of
5717** a legal notice, here is a blessing:
5718**
5719** May you do good and not evil.
5720** May you find forgiveness for yourself and forgive others.
5721** May you share freely, never taking more than you give.
5722**
5723******************************************************************************
5724**
5725*/
5726
5727
5728
5729/* #include "fts5Int.h" */
5730/* #include "fts5parse.h" */
5731
5732#ifndef SQLITE_FTS5_MAX_EXPR_DEPTH256
5733# define SQLITE_FTS5_MAX_EXPR_DEPTH256 256
5734#endif
5735
5736/*
5737** All token types in the generated fts5parse.h file are greater than 0.
5738*/
5739#define FTS5_EOF0 0
5740
5741#define FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32))
5742
5743typedef struct Fts5ExprTerm Fts5ExprTerm;
5744
5745/*
5746** Functions generated by lemon from fts5parse.y.
5747*/
5748static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
5749static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
5750static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
5751#ifndef NDEBUG1
5752#include <stdio.h>
5753static void sqlite3Fts5ParserTrace(FILE*, char*);
5754#endif
5755static int sqlite3Fts5ParserFallback(int);
5756
5757
5758struct Fts5Expr {
5759 Fts5Index *pIndex;
5760 Fts5Config *pConfig;
5761 Fts5ExprNode *pRoot;
5762 int bDesc; /* Iterate in descending rowid order */
5763 int nPhrase; /* Number of phrases in expression */
5764 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
5765};
5766
5767/*
5768** eType:
5769** Expression node type. Usually one of:
5770**
5771** FTS5_AND (nChild, apChild valid)
5772** FTS5_OR (nChild, apChild valid)
5773** FTS5_NOT (nChild, apChild valid)
5774** FTS5_STRING (pNear valid)
5775** FTS5_TERM (pNear valid)
5776**
5777** An expression node with eType==0 may also exist. It always matches zero
5778** rows. This is created when a phrase containing no tokens is parsed.
5779** e.g. "".
5780**
5781** iHeight:
5782** Distance from this node to furthest leaf. This is always 0 for nodes
5783** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one
5784** greater than the largest child value.
5785*/
5786struct Fts5ExprNode {
5787 int eType; /* Node type */
5788 int bEof; /* True at EOF */
5789 int bNomatch; /* True if entry is not a match */
5790 int iHeight; /* Distance to tree leaf nodes */
5791
5792 /* Next method for this node. */
5793 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
5794
5795 i64 iRowid; /* Current rowid */
5796 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
5797
5798 /* Child nodes. For a NOT node, this array always contains 2 entries. For
5799 ** AND or OR nodes, it contains 2 or more entries. */
5800 int nChild; /* Number of child nodes */
5801 Fts5ExprNode *apChild[FLEXARRAY]; /* Array of child nodes */
5802};
5803
5804/* Size (in bytes) of an Fts5ExprNode object that holds up to N children */
5805#define SZ_FTS5EXPRNODE(N)(__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode
*))
\
5806 (offsetof(Fts5ExprNode,apChild)__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode*))
5807
5808#define Fts5NodeIsString(p)((p)->eType==4 || (p)->eType==9) ((p)->eType==FTS5_TERM4 || (p)->eType==FTS5_STRING9)
5809
5810/*
5811** Invoke the xNext method of an Fts5ExprNode object. This macro should be
5812** used as if it has the same signature as the xNext() methods themselves.
5813*/
5814#define fts5ExprNodeNext(a,b,c,d)(b)->xNext((a), (b), (c), (d)) (b)->xNext((a), (b), (c), (d))
5815
5816/*
5817** An instance of the following structure represents a single search term
5818** or term prefix.
5819*/
5820struct Fts5ExprTerm {
5821 u8 bPrefix; /* True for a prefix term */
5822 u8 bFirst; /* True if token must be first in column */
5823 char *pTerm; /* Term data */
5824 int nQueryTerm; /* Effective size of term in bytes */
5825 int nFullTerm; /* Size of term in bytes incl. tokendata */
5826 Fts5IndexIter *pIter; /* Iterator for this term */
5827 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
5828};
5829
5830/*
5831** A phrase. One or more terms that must appear in a contiguous sequence
5832** within a document for it to match.
5833*/
5834struct Fts5ExprPhrase {
5835 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
5836 Fts5Buffer poslist; /* Current position list */
5837 int nTerm; /* Number of entries in aTerm[] */
5838 Fts5ExprTerm aTerm[FLEXARRAY]; /* Terms that make up this phrase */
5839};
5840
5841/* Size (in bytes) of an Fts5ExprPhrase object that holds up to N terms */
5842#define SZ_FTS5EXPRPHRASE(N)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm
))
\
5843 (offsetof(Fts5ExprPhrase,aTerm)__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm))
5844
5845/*
5846** One or more phrases that must appear within a certain token distance of
5847** each other within each matching document.
5848*/
5849struct Fts5ExprNearset {
5850 int nNear; /* NEAR parameter */
5851 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */
5852 int nPhrase; /* Number of entries in aPhrase[] array */
5853 Fts5ExprPhrase *apPhrase[FLEXARRAY]; /* Array of phrase pointers */
5854};
5855
5856/* Size (in bytes) of an Fts5ExprNearset object covering up to N phrases */
5857#define SZ_FTS5EXPRNEARSET(N)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase
*))
\
5858 (offsetof(Fts5ExprNearset,apPhrase)__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase*))
5859
5860/*
5861** Parse context.
5862*/
5863struct Fts5Parse {
5864 Fts5Config *pConfig;
5865 char *zErr;
5866 int rc;
5867 int nPhrase; /* Size of apPhrase array */
5868 Fts5ExprPhrase **apPhrase; /* Array of all phrases */
5869 Fts5ExprNode *pExpr; /* Result of a successful parse */
5870 int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
5871};
5872
5873/*
5874** Check that the Fts5ExprNode.iHeight variables are set correctly in
5875** the expression tree passed as the only argument.
5876*/
5877#ifndef NDEBUG1
5878static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){
5879 if( rc==SQLITE_OK0 ){
5880 if( p->eType==FTS5_TERM4 || p->eType==FTS5_STRING9 || p->eType==0 ){
5881 assert( p->iHeight==0 )((void) (0));
5882 }else{
5883 int ii;
5884 int iMaxChild = 0;
5885 for(ii=0; ii<p->nChild; ii++){
5886 Fts5ExprNode *pChild = p->apChild[ii];
5887 iMaxChild = MAX(iMaxChild, pChild->iHeight)(((iMaxChild) > (pChild->iHeight)) ? (iMaxChild) : (pChild
->iHeight))
;
5888 assert_expr_depth_ok(SQLITE_OK, pChild);
5889 }
5890 assert( p->iHeight==iMaxChild+1 )((void) (0));
5891 }
5892 }
5893}
5894#else
5895# define assert_expr_depth_ok(rc, p)
5896#endif
5897
5898static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
5899 va_list ap;
5900 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
5901 if( pParse->rc==SQLITE_OK0 ){
5902 assert( pParse->zErr==0 )((void) (0));
5903 pParse->zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
5904 pParse->rc = SQLITE_ERROR1;
5905 }
5906 va_end(ap)__builtin_va_end(ap);
5907}
5908
5909static int fts5ExprIsspace(char t){
5910 return t==' ' || t=='\t' || t=='\n' || t=='\r';
5911}
5912
5913/*
5914** Read the first token from the nul-terminated string at *pz.
5915*/
5916static int fts5ExprGetToken(
5917 Fts5Parse *pParse,
5918 const char **pz, /* IN/OUT: Pointer into buffer */
5919 Fts5Token *pToken
5920){
5921 const char *z = *pz;
5922 int tok;
5923
5924 /* Skip past any whitespace */
5925 while( fts5ExprIsspace(*z) ) z++;
5926
5927 pToken->p = z;
5928 pToken->n = 1;
5929 switch( *z ){
5930 case '(': tok = FTS5_LP10; break;
5931 case ')': tok = FTS5_RP11; break;
5932 case '{': tok = FTS5_LCP7; break;
5933 case '}': tok = FTS5_RCP8; break;
5934 case ':': tok = FTS5_COLON5; break;
5935 case ',': tok = FTS5_COMMA13; break;
5936 case '+': tok = FTS5_PLUS14; break;
5937 case '*': tok = FTS5_STAR15; break;
5938 case '-': tok = FTS5_MINUS6; break;
5939 case '^': tok = FTS5_CARET12; break;
5940 case '\0': tok = FTS5_EOF0; break;
5941
5942 case '"': {
5943 const char *z2;
5944 tok = FTS5_STRING9;
5945
5946 for(z2=&z[1]; 1; z2++){
5947 if( z2[0]=='"' ){
5948 z2++;
5949 if( z2[0]!='"' ) break;
5950 }
5951 if( z2[0]=='\0' ){
5952 sqlite3Fts5ParseError(pParse, "unterminated string");
5953 return FTS5_EOF0;
5954 }
5955 }
5956 pToken->n = (z2 - z);
5957 break;
5958 }
5959
5960 default: {
5961 const char *z2;
5962 if( sqlite3Fts5IsBareword(z[0])==0 ){
5963 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
5964 return FTS5_EOF0;
5965 }
5966 tok = FTS5_STRING9;
5967 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
5968 pToken->n = (z2 - z);
5969 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR1;
5970 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT3;
5971 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND2;
5972 break;
5973 }
5974 }
5975
5976 *pz = &pToken->p[pToken->n];
5977 return tok;
5978}
5979
5980static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)t);}
5981static void fts5ParseFree(void *p){ sqlite3_freesqlite3_api->free(p); }
5982
5983static int sqlite3Fts5ExprNew(
5984 Fts5Config *pConfig, /* FTS5 Configuration */
5985 int bPhraseToAnd,
5986 int iCol,
5987 const char *zExpr, /* Expression text */
5988 Fts5Expr **ppNew,
5989 char **pzErr
5990){
5991 Fts5Parse sParse;
5992 Fts5Token token;
5993 const char *z = zExpr;
5994 int t; /* Next token type */
5995 void *pEngine;
5996 Fts5Expr *pNew;
5997
5998 *ppNew = 0;
5999 *pzErr = 0;
6000 memset(&sParse, 0, sizeof(sParse));
6001 sParse.bPhraseToAnd = bPhraseToAnd;
6002 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
6003 if( pEngine==0 ){ return SQLITE_NOMEM7; }
6004 sParse.pConfig = pConfig;
6005
6006 do {
6007 t = fts5ExprGetToken(&sParse, &z, &token);
6008 sqlite3Fts5Parser(pEngine, t, token, &sParse);
6009 }while( sParse.rc==SQLITE_OK0 && t!=FTS5_EOF0 );
6010 sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
6011
6012 assert( sParse.pExpr || sParse.rc!=SQLITE_OK )((void) (0));
6013 assert_expr_depth_ok(sParse.rc, sParse.pExpr);
6014
6015 /* If the LHS of the MATCH expression was a user column, apply the
6016 ** implicit column-filter. */
6017 if( sParse.rc==SQLITE_OK0 && iCol<pConfig->nCol ){
6018 int n = SZ_FTS5COLSET(1)(sizeof(i64)*((1 +2)/2));
6019 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
6020 if( pColset ){
6021 pColset->nCol = 1;
6022 pColset->aiCol[0] = iCol;
6023 sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
6024 }
6025 }
6026
6027 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 )((void) (0));
6028 if( sParse.rc==SQLITE_OK0 ){
6029 *ppNew = pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Expr));
6030 if( pNew==0 ){
6031 sParse.rc = SQLITE_NOMEM7;
6032 sqlite3Fts5ParseNodeFree(sParse.pExpr);
6033 }else{
6034 pNew->pRoot = sParse.pExpr;
6035 pNew->pIndex = 0;
6036 pNew->pConfig = pConfig;
6037 pNew->apExprPhrase = sParse.apPhrase;
6038 pNew->nPhrase = sParse.nPhrase;
6039 pNew->bDesc = 0;
6040 sParse.apPhrase = 0;
6041 }
6042 }else{
6043 sqlite3Fts5ParseNodeFree(sParse.pExpr);
6044 }
6045
6046 sqlite3_freesqlite3_api->free(sParse.apPhrase);
6047 if( 0==*pzErr ){
6048 *pzErr = sParse.zErr;
6049 }else{
6050 sqlite3_freesqlite3_api->free(sParse.zErr);
6051 }
6052 return sParse.rc;
6053}
6054
6055/*
6056** Assuming that buffer z is at least nByte bytes in size and contains a
6057** valid utf-8 string, return the number of characters in the string.
6058*/
6059static int fts5ExprCountChar(const char *z, int nByte){
6060 int nRet = 0;
6061 int ii;
6062 for(ii=0; ii<nByte; ii++){
6063 if( (z[ii] & 0xC0)!=0x80 ) nRet++;
6064 }
6065 return nRet;
6066}
6067
6068/*
6069** This function is only called when using the special 'trigram' tokenizer.
6070** Argument zText contains the text of a LIKE or GLOB pattern matched
6071** against column iCol. This function creates and compiles an FTS5 MATCH
6072** expression that will match a superset of the rows matched by the LIKE or
6073** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
6074** code.
6075*/
6076static int sqlite3Fts5ExprPattern(
6077 Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
6078){
6079 i64 nText = strlen(zText);
6080 char *zExpr = (char*)sqlite3_malloc64sqlite3_api->malloc64(nText*4 + 1);
6081 int rc = SQLITE_OK0;
6082
6083 if( zExpr==0 ){
6084 rc = SQLITE_NOMEM7;
6085 }else{
6086 char aSpec[3];
6087 int iOut = 0;
6088 int i = 0;
6089 int iFirst = 0;
6090
6091 if( bGlob==0 ){
6092 aSpec[0] = '_';
6093 aSpec[1] = '%';
6094 aSpec[2] = 0;
6095 }else{
6096 aSpec[0] = '*';
6097 aSpec[1] = '?';
6098 aSpec[2] = '[';
6099 }
6100
6101 while( i<=nText ){
6102 if( i==nText
6103 || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
6104 ){
6105
6106 if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){
6107 int jj;
6108 zExpr[iOut++] = '"';
6109 for(jj=iFirst; jj<i; jj++){
6110 zExpr[iOut++] = zText[jj];
6111 if( zText[jj]=='"' ) zExpr[iOut++] = '"';
6112 }
6113 zExpr[iOut++] = '"';
6114 zExpr[iOut++] = ' ';
6115 }
6116 if( zText[i]==aSpec[2] ){
6117 i += 2;
6118 if( zText[i-1]=='^' ) i++;
6119 while( i<nText && zText[i]!=']' ) i++;
6120 }
6121 iFirst = i+1;
6122 }
6123 i++;
6124 }
6125 if( iOut>0 ){
6126 int bAnd = 0;
6127 if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){
6128 bAnd = 1;
6129 if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){
6130 iCol = pConfig->nCol;
6131 }
6132 }
6133 zExpr[iOut] = '\0';
6134 rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
6135 }else{
6136 *pp = 0;
6137 }
6138 sqlite3_freesqlite3_api->free(zExpr);
6139 }
6140
6141 return rc;
6142}
6143
6144/*
6145** Free the expression node object passed as the only argument.
6146*/
6147static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
6148 if( p ){
6149 int i;
6150 for(i=0; i<p->nChild; i++){
6151 sqlite3Fts5ParseNodeFree(p->apChild[i]);
6152 }
6153 sqlite3Fts5ParseNearsetFree(p->pNear);
6154 sqlite3_freesqlite3_api->free(p);
6155 }
6156}
6157
6158/*
6159** Free the expression object passed as the only argument.
6160*/
6161static void sqlite3Fts5ExprFree(Fts5Expr *p){
6162 if( p ){
6163 sqlite3Fts5ParseNodeFree(p->pRoot);
6164 sqlite3_freesqlite3_api->free(p->apExprPhrase);
6165 sqlite3_freesqlite3_api->free(p);
6166 }
6167}
6168
6169static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
6170 Fts5Parse sParse;
6171 memset(&sParse, 0, sizeof(sParse));
6172
6173 if( *pp1 && p2 ){
6174 Fts5Expr *p1 = *pp1;
6175 int nPhrase = p1->nPhrase + p2->nPhrase;
6176
6177 p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND2, p1->pRoot, p2->pRoot,0);
6178 p2->pRoot = 0;
6179
6180 if( sParse.rc==SQLITE_OK0 ){
6181 Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_reallocsqlite3_api->realloc(
6182 p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*)
6183 );
6184 if( ap==0 ){
6185 sParse.rc = SQLITE_NOMEM7;
6186 }else{
6187 int i;
6188 memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*));
6189 for(i=0; i<p2->nPhrase; i++){
6190 ap[i] = p2->apExprPhrase[i];
6191 }
6192 p1->nPhrase = nPhrase;
6193 p1->apExprPhrase = ap;
6194 }
6195 }
6196 sqlite3_freesqlite3_api->free(p2->apExprPhrase);
6197 sqlite3_freesqlite3_api->free(p2);
6198 }else if( p2 ){
6199 *pp1 = p2;
6200 }
6201
6202 return sParse.rc;
6203}
6204
6205/*
6206** Argument pTerm must be a synonym iterator. Return the current rowid
6207** that it points to.
6208*/
6209static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
6210 i64 iRet = 0;
6211 int bRetValid = 0;
6212 Fts5ExprTerm *p;
6213
6214 assert( pTerm )((void) (0));
6215 assert( pTerm->pSynonym )((void) (0));
6216 assert( bDesc==0 || bDesc==1 )((void) (0));
6217 for(p=pTerm; p; p=p->pSynonym){
6218 if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){
6219 i64 iRowid = p->pIter->iRowid;
6220 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
6221 iRet = iRowid;
6222 bRetValid = 1;
6223 }
6224 }
6225 }
6226
6227 if( pbEof && bRetValid==0 ) *pbEof = 1;
6228 return iRet;
6229}
6230
6231/*
6232** Argument pTerm must be a synonym iterator.
6233*/
6234static int fts5ExprSynonymList(
6235 Fts5ExprTerm *pTerm,
6236 i64 iRowid,
6237 Fts5Buffer *pBuf, /* Use this buffer for space if required */
6238 u8 **pa, int *pn
6239){
6240 Fts5PoslistReader aStatic[4];
6241 Fts5PoslistReader *aIter = aStatic;
6242 int nIter = 0;
6243 int nAlloc = 4;
6244 int rc = SQLITE_OK0;
6245 Fts5ExprTerm *p;
6246
6247 assert( pTerm->pSynonym )((void) (0));
6248 for(p=pTerm; p; p=p->pSynonym){
6249 Fts5IndexIter *pIter = p->pIter;
6250 if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 && pIter->iRowid==iRowid ){
6251 if( pIter->nData==0 ) continue;
6252 if( nIter==nAlloc ){
6253 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
6254 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
6255 if( aNew==0 ){
6256 rc = SQLITE_NOMEM7;
6257 goto synonym_poslist_out;
6258 }
6259 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
6260 nAlloc = nAlloc*2;
6261 if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter);
6262 aIter = aNew;
6263 }
6264 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
6265 assert( aIter[nIter].bEof==0 )((void) (0));
6266 nIter++;
6267 }
6268 }
6269
6270 if( nIter==1 ){
6271 *pa = (u8*)aIter[0].a;
6272 *pn = aIter[0].n;
6273 }else{
6274 Fts5PoslistWriter writer = {0};
6275 i64 iPrev = -1;
6276 fts5BufferZero(pBuf)sqlite3Fts5BufferZero(pBuf);
6277 while( 1 ){
6278 int i;
6279 i64 iMin = FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32));
6280 for(i=0; i<nIter; i++){
6281 if( aIter[i].bEof==0 ){
6282 if( aIter[i].iPos==iPrev ){
6283 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
6284 }
6285 if( aIter[i].iPos<iMin ){
6286 iMin = aIter[i].iPos;
6287 }
6288 }
6289 }
6290 if( iMin==FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) || rc!=SQLITE_OK0 ) break;
6291 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
6292 iPrev = iMin;
6293 }
6294 if( rc==SQLITE_OK0 ){
6295 *pa = pBuf->p;
6296 *pn = pBuf->n;
6297 }
6298 }
6299
6300 synonym_poslist_out:
6301 if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter);
6302 return rc;
6303}
6304
6305
6306/*
6307** All individual term iterators in pPhrase are guaranteed to be valid and
6308** pointing to the same rowid when this function is called. This function
6309** checks if the current rowid really is a match, and if so populates
6310** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
6311** is set to true if this is really a match, or false otherwise.
6312**
6313** SQLITE_OK is returned if an error occurs, or an SQLite error code
6314** otherwise. It is not considered an error code if the current rowid is
6315** not a match.
6316*/
6317static int fts5ExprPhraseIsMatch(
6318 Fts5ExprNode *pNode, /* Node pPhrase belongs to */
6319 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
6320 int *pbMatch /* OUT: Set to true if really a match */
6321){
6322 Fts5PoslistWriter writer = {0};
6323 Fts5PoslistReader aStatic[4];
6324 Fts5PoslistReader *aIter = aStatic;
6325 int i;
6326 int rc = SQLITE_OK0;
6327 int bFirst = pPhrase->aTerm[0].bFirst;
6328
6329 fts5BufferZero(&pPhrase->poslist)sqlite3Fts5BufferZero(&pPhrase->poslist);
6330
6331 /* If the aStatic[] array is not large enough, allocate a large array
6332 ** using sqlite3_malloc(). This approach could be improved upon. */
6333 if( pPhrase->nTerm>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){
6334 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
6335 aIter = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
6336 if( !aIter ) return SQLITE_NOMEM7;
6337 }
6338 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
6339
6340 /* Initialize a term iterator for each term in the phrase */
6341 for(i=0; i<pPhrase->nTerm; i++){
6342 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
6343 int n = 0;
6344 int bFlag = 0;
6345 u8 *a = 0;
6346 if( pTerm->pSynonym ){
6347 Fts5Buffer buf = {0, 0, 0};
6348 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
6349 if( rc ){
6350 sqlite3_freesqlite3_api->free(a);
6351 goto ismatch_out;
6352 }
6353 if( a==buf.p ) bFlag = 1;
6354 }else{
6355 a = (u8*)pTerm->pIter->pData;
6356 n = pTerm->pIter->nData;
6357 }
6358 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
6359 aIter[i].bFlag = (u8)bFlag;
6360 if( aIter[i].bEof ) goto ismatch_out;
6361 }
6362
6363 while( 1 ){
6364 int bMatch;
6365 i64 iPos = aIter[0].iPos;
6366 do {
6367 bMatch = 1;
6368 for(i=0; i<pPhrase->nTerm; i++){
6369 Fts5PoslistReader *pPos = &aIter[i];
6370 i64 iAdj = iPos + i;
6371 if( pPos->iPos!=iAdj ){
6372 bMatch = 0;
6373 while( pPos->iPos<iAdj ){
6374 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
6375 }
6376 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
6377 }
6378 }
6379 }while( bMatch==0 );
6380
6381 /* Append position iPos to the output */
6382 if( bFirst==0 || FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF)==0 ){
6383 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
6384 if( rc!=SQLITE_OK0 ) goto ismatch_out;
6385 }
6386
6387 for(i=0; i<pPhrase->nTerm; i++){
6388 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
6389 }
6390 }
6391
6392 ismatch_out:
6393 *pbMatch = (pPhrase->poslist.n>0);
6394 for(i=0; i<pPhrase->nTerm; i++){
6395 if( aIter[i].bFlag ) sqlite3_freesqlite3_api->free((u8*)aIter[i].a);
6396 }
6397 if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter);
6398 return rc;
6399}
6400
6401typedef struct Fts5LookaheadReader Fts5LookaheadReader;
6402struct Fts5LookaheadReader {
6403 const u8 *a; /* Buffer containing position list */
6404 int n; /* Size of buffer a[] in bytes */
6405 int i; /* Current offset in position list */
6406 i64 iPos; /* Current position */
6407 i64 iLookahead; /* Next position */
6408};
6409
6410#define FTS5_LOOKAHEAD_EOF(((i64)1) << 62) (((i64)1) << 62)
6411
6412static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
6413 p->iPos = p->iLookahead;
6414 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
6415 p->iLookahead = FTS5_LOOKAHEAD_EOF(((i64)1) << 62);
6416 }
6417 return (p->iPos==FTS5_LOOKAHEAD_EOF(((i64)1) << 62));
6418}
6419
6420static int fts5LookaheadReaderInit(
6421 const u8 *a, int n, /* Buffer to read position list from */
6422 Fts5LookaheadReader *p /* Iterator object to initialize */
6423){
6424 memset(p, 0, sizeof(Fts5LookaheadReader));
6425 p->a = a;
6426 p->n = n;
6427 fts5LookaheadReaderNext(p);
6428 return fts5LookaheadReaderNext(p);
6429}
6430
6431typedef struct Fts5NearTrimmer Fts5NearTrimmer;
6432struct Fts5NearTrimmer {
6433 Fts5LookaheadReader reader; /* Input iterator */
6434 Fts5PoslistWriter writer; /* Writer context */
6435 Fts5Buffer *pOut; /* Output poslist */
6436};
6437
6438/*
6439** The near-set object passed as the first argument contains more than
6440** one phrase. All phrases currently point to the same row. The
6441** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
6442** tests if the current row contains instances of each phrase sufficiently
6443** close together to meet the NEAR constraint. Non-zero is returned if it
6444** does, or zero otherwise.
6445**
6446** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
6447** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
6448** occurs within this function (*pRc) is set accordingly before returning.
6449** The return value is undefined in both these cases.
6450**
6451** If no error occurs and non-zero (a match) is returned, the position-list
6452** of each phrase object is edited to contain only those entries that
6453** meet the constraint before returning.
6454*/
6455static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
6456 Fts5NearTrimmer aStatic[4];
6457 Fts5NearTrimmer *a = aStatic;
6458 Fts5ExprPhrase **apPhrase = pNear->apPhrase;
6459
6460 int i;
6461 int rc = *pRc;
6462 int bMatch;
6463
6464 assert( pNear->nPhrase>1 )((void) (0));
6465
6466 /* If the aStatic[] array is not large enough, allocate a large array
6467 ** using sqlite3_malloc(). This approach could be improved upon. */
6468 if( pNear->nPhrase>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){
6469 sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
6470 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
6471 }else{
6472 memset(aStatic, 0, sizeof(aStatic));
6473 }
6474 if( rc!=SQLITE_OK0 ){
6475 *pRc = rc;
6476 return 0;
6477 }
6478
6479 /* Initialize a lookahead iterator for each phrase. After passing the
6480 ** buffer and buffer size to the lookaside-reader init function, zero
6481 ** the phrase poslist buffer. The new poslist for the phrase (containing
6482 ** the same entries as the original with some entries removed on account
6483 ** of the NEAR constraint) is written over the original even as it is
6484 ** being read. This is safe as the entries for the new poslist are a
6485 ** subset of the old, so it is not possible for data yet to be read to
6486 ** be overwritten. */
6487 for(i=0; i<pNear->nPhrase; i++){
6488 Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
6489 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
6490 pPoslist->n = 0;
6491 a[i].pOut = pPoslist;
6492 }
6493
6494 while( 1 ){
6495 int iAdv;
6496 i64 iMin;
6497 i64 iMax;
6498
6499 /* This block advances the phrase iterators until they point to a set of
6500 ** entries that together comprise a match. */
6501 iMax = a[0].reader.iPos;
6502 do {
6503 bMatch = 1;
6504 for(i=0; i<pNear->nPhrase; i++){
6505 Fts5LookaheadReader *pPos = &a[i].reader;
6506 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
6507 if( pPos->iPos<iMin || pPos->iPos>iMax ){
6508 bMatch = 0;
6509 while( pPos->iPos<iMin ){
6510 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
6511 }
6512 if( pPos->iPos>iMax ) iMax = pPos->iPos;
6513 }
6514 }
6515 }while( bMatch==0 );
6516
6517 /* Add an entry to each output position list */
6518 for(i=0; i<pNear->nPhrase; i++){
6519 i64 iPos = a[i].reader.iPos;
6520 Fts5PoslistWriter *pWriter = &a[i].writer;
6521 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
6522 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
6523 }
6524 }
6525
6526 iAdv = 0;
6527 iMin = a[0].reader.iLookahead;
6528 for(i=0; i<pNear->nPhrase; i++){
6529 if( a[i].reader.iLookahead < iMin ){
6530 iMin = a[i].reader.iLookahead;
6531 iAdv = i;
6532 }
6533 }
6534 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
6535 }
6536
6537 ismatch_out: {
6538 int bRet = a[0].pOut->n>0;
6539 *pRc = rc;
6540 if( a!=aStatic ) sqlite3_freesqlite3_api->free(a);
6541 return bRet;
6542 }
6543}
6544
6545/*
6546** Advance iterator pIter until it points to a value equal to or laster
6547** than the initial value of *piLast. If this means the iterator points
6548** to a value laster than *piLast, update *piLast to the new lastest value.
6549**
6550** If the iterator reaches EOF, set *pbEof to true before returning. If
6551** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
6552** are set, return a non-zero value. Otherwise, return zero.
6553*/
6554static int fts5ExprAdvanceto(
6555 Fts5IndexIter *pIter, /* Iterator to advance */
6556 int bDesc, /* True if iterator is "rowid DESC" */
6557 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
6558 int *pRc, /* OUT: Error code */
6559 int *pbEof /* OUT: Set to true if EOF */
6560){
6561 i64 iLast = *piLast;
6562 i64 iRowid;
6563
6564 iRowid = pIter->iRowid;
6565 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
6566 int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
6567 if( rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){
6568 *pRc = rc;
6569 *pbEof = 1;
6570 return 1;
6571 }
6572 iRowid = pIter->iRowid;
6573 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) )((void) (0));
6574 }
6575 *piLast = iRowid;
6576
6577 return 0;
6578}
6579
6580static int fts5ExprSynonymAdvanceto(
6581 Fts5ExprTerm *pTerm, /* Term iterator to advance */
6582 int bDesc, /* True if iterator is "rowid DESC" */
6583 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
6584 int *pRc /* OUT: Error code */
6585){
6586 int rc = SQLITE_OK0;
6587 i64 iLast = *piLast;
6588 Fts5ExprTerm *p;
6589 int bEof = 0;
6590
6591 for(p=pTerm; rc==SQLITE_OK0 && p; p=p->pSynonym){
6592 if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){
6593 i64 iRowid = p->pIter->iRowid;
6594 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
6595 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
6596 }
6597 }
6598 }
6599
6600 if( rc!=SQLITE_OK0 ){
6601 *pRc = rc;
6602 bEof = 1;
6603 }else{
6604 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
6605 }
6606 return bEof;
6607}
6608
6609
6610static int fts5ExprNearTest(
6611 int *pRc,
6612 Fts5Expr *pExpr, /* Expression that pNear is a part of */
6613 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
6614){
6615 Fts5ExprNearset *pNear = pNode->pNear;
6616 int rc = *pRc;
6617
6618 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){
6619 Fts5ExprTerm *pTerm;
6620 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
6621 pPhrase->poslist.n = 0;
6622 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
6623 Fts5IndexIter *pIter = pTerm->pIter;
6624 if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){
6625 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
6626 pPhrase->poslist.n = 1;
6627 }
6628 }
6629 }
6630 return pPhrase->poslist.n;
6631 }else{
6632 int i;
6633
6634 /* Check that each phrase in the nearset matches the current row.
6635 ** Populate the pPhrase->poslist buffers at the same time. If any
6636 ** phrase is not a match, break out of the loop early. */
6637 for(i=0; rc==SQLITE_OK0 && i<pNear->nPhrase; i++){
6638 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6639 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
6640 || pNear->pColset || pPhrase->aTerm[0].bFirst
6641 ){
6642 int bMatch = 0;
6643 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
6644 if( bMatch==0 ) break;
6645 }else{
6646 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
6647 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData)sqlite3Fts5BufferSet(&rc,&pPhrase->poslist,pIter->
nData,pIter->pData)
;
6648 }
6649 }
6650
6651 *pRc = rc;
6652 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
6653 return 1;
6654 }
6655 return 0;
6656 }
6657}
6658
6659
6660/*
6661** Initialize all term iterators in the pNear object. If any term is found
6662** to match no documents at all, return immediately without initializing any
6663** further iterators.
6664**
6665** If an error occurs, return an SQLite error code. Otherwise, return
6666** SQLITE_OK. It is not considered an error if some term matches zero
6667** documents.
6668*/
6669static int fts5ExprNearInitAll(
6670 Fts5Expr *pExpr,
6671 Fts5ExprNode *pNode
6672){
6673 Fts5ExprNearset *pNear = pNode->pNear;
6674 int i;
6675
6676 assert( pNode->bNomatch==0 )((void) (0));
6677 for(i=0; i<pNear->nPhrase; i++){
6678 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6679 if( pPhrase->nTerm==0 ){
6680 pNode->bEof = 1;
6681 return SQLITE_OK0;
6682 }else{
6683 int j;
6684 for(j=0; j<pPhrase->nTerm; j++){
6685 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6686 Fts5ExprTerm *p;
6687 int bHit = 0;
6688
6689 for(p=pTerm; p; p=p->pSynonym){
6690 int rc;
6691 if( p->pIter ){
6692 sqlite3Fts5IterClose(p->pIter);
6693 p->pIter = 0;
6694 }
6695 rc = sqlite3Fts5IndexQuery(
6696 pExpr->pIndex, p->pTerm, p->nQueryTerm,
6697 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX0x0001 : 0) |
6698 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC0x0002 : 0),
6699 pNear->pColset,
6700 &p->pIter
6701 );
6702 assert( (rc==SQLITE_OK)==(p->pIter!=0) )((void) (0));
6703 if( rc!=SQLITE_OK0 ) return rc;
6704 if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){
6705 bHit = 1;
6706 }
6707 }
6708
6709 if( bHit==0 ){
6710 pNode->bEof = 1;
6711 return SQLITE_OK0;
6712 }
6713 }
6714 }
6715 }
6716
6717 pNode->bEof = 0;
6718 return SQLITE_OK0;
6719}
6720
6721/*
6722** If pExpr is an ASC iterator, this function returns a value with the
6723** same sign as:
6724**
6725** (iLhs - iRhs)
6726**
6727** Otherwise, if this is a DESC iterator, the opposite is returned:
6728**
6729** (iRhs - iLhs)
6730*/
6731static int fts5RowidCmp(
6732 Fts5Expr *pExpr,
6733 i64 iLhs,
6734 i64 iRhs
6735){
6736 assert( pExpr->bDesc==0 || pExpr->bDesc==1 )((void) (0));
6737 if( pExpr->bDesc==0 ){
6738 if( iLhs<iRhs ) return -1;
6739 return (iLhs > iRhs);
6740 }else{
6741 if( iLhs>iRhs ) return -1;
6742 return (iLhs < iRhs);
6743 }
6744}
6745
6746static void fts5ExprSetEof(Fts5ExprNode *pNode){
6747 int i;
6748 pNode->bEof = 1;
6749 pNode->bNomatch = 0;
6750 for(i=0; i<pNode->nChild; i++){
6751 fts5ExprSetEof(pNode->apChild[i]);
6752 }
6753}
6754
6755static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
6756 if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){
6757 Fts5ExprNearset *pNear = pNode->pNear;
6758 int i;
6759 for(i=0; i<pNear->nPhrase; i++){
6760 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6761 pPhrase->poslist.n = 0;
6762 }
6763 }else{
6764 int i;
6765 for(i=0; i<pNode->nChild; i++){
6766 fts5ExprNodeZeroPoslist(pNode->apChild[i]);
6767 }
6768 }
6769}
6770
6771
6772
6773/*
6774** Compare the values currently indicated by the two nodes as follows:
6775**
6776** res = (*p1) - (*p2)
6777**
6778** Nodes that point to values that come later in the iteration order are
6779** considered to be larger. Nodes at EOF are the largest of all.
6780**
6781** This means that if the iteration order is ASC, then numerically larger
6782** rowids are considered larger. Or if it is the default DESC, numerically
6783** smaller rowids are larger.
6784*/
6785static int fts5NodeCompare(
6786 Fts5Expr *pExpr,
6787 Fts5ExprNode *p1,
6788 Fts5ExprNode *p2
6789){
6790 if( p2->bEof ) return -1;
6791 if( p1->bEof ) return +1;
6792 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
6793}
6794
6795/*
6796** All individual term iterators in pNear are guaranteed to be valid when
6797** this function is called. This function checks if all term iterators
6798** point to the same rowid, and if not, advances them until they do.
6799** If an EOF is reached before this happens, *pbEof is set to true before
6800** returning.
6801**
6802** SQLITE_OK is returned if an error occurs, or an SQLite error code
6803** otherwise. It is not considered an error code if an iterator reaches
6804** EOF.
6805*/
6806static int fts5ExprNodeTest_STRING(
6807 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6808 Fts5ExprNode *pNode
6809){
6810 Fts5ExprNearset *pNear = pNode->pNear;
6811 Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
6812 int rc = SQLITE_OK0;
6813 i64 iLast; /* Lastest rowid any iterator points to */
6814 int i, j; /* Phrase and token index, respectively */
6815 int bMatch; /* True if all terms are at the same rowid */
6816 const int bDesc = pExpr->bDesc;
6817
6818 /* Check that this node should not be FTS5_TERM */
6819 assert( pNear->nPhrase>1((void) (0))
6820 || pNear->apPhrase[0]->nTerm>1((void) (0))
6821 || pNear->apPhrase[0]->aTerm[0].pSynonym((void) (0))
6822 || pNear->apPhrase[0]->aTerm[0].bFirst((void) (0))
6823 )((void) (0));
6824
6825 /* Initialize iLast, the "lastest" rowid any iterator points to. If the
6826 ** iterator skips through rowids in the default ascending order, this means
6827 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
6828 ** means the minimum rowid. */
6829 if( pLeft->aTerm[0].pSynonym ){
6830 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
6831 }else{
6832 iLast = pLeft->aTerm[0].pIter->iRowid;
6833 }
6834
6835 do {
6836 bMatch = 1;
6837 for(i=0; i<pNear->nPhrase; i++){
6838 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6839 for(j=0; j<pPhrase->nTerm; j++){
6840 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6841 if( pTerm->pSynonym ){
6842 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
6843 if( iRowid==iLast ) continue;
6844 bMatch = 0;
6845 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
6846 pNode->bNomatch = 0;
6847 pNode->bEof = 1;
6848 return rc;
6849 }
6850 }else{
6851 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
6852 if( pIter->iRowid==iLast ) continue;
6853 bMatch = 0;
6854 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
6855 return rc;
6856 }
6857 }
6858 }
6859 }
6860 }while( bMatch==0 );
6861
6862 pNode->iRowid = iLast;
6863 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK0);
6864 assert( pNode->bEof==0 || pNode->bNomatch==0 )((void) (0));
6865
6866 return rc;
6867}
6868
6869/*
6870** Advance the first term iterator in the first phrase of pNear. Set output
6871** variable *pbEof to true if it reaches EOF or if an error occurs.
6872**
6873** Return SQLITE_OK if successful, or an SQLite error code if an error
6874** occurs.
6875*/
6876static int fts5ExprNodeNext_STRING(
6877 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6878 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */
6879 int bFromValid,
6880 i64 iFrom
6881){
6882 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
6883 int rc = SQLITE_OK0;
6884
6885 pNode->bNomatch = 0;
6886 if( pTerm->pSynonym ){
6887 int bEof = 1;
6888 Fts5ExprTerm *p;
6889
6890 /* Find the firstest rowid any synonym points to. */
6891 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
6892
6893 /* Advance each iterator that currently points to iRowid. Or, if iFrom
6894 ** is valid - each iterator that points to a rowid before iFrom. */
6895 for(p=pTerm; p; p=p->pSynonym){
6896 if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){
6897 i64 ii = p->pIter->iRowid;
6898 if( ii==iRowid
6899 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
6900 ){
6901 if( bFromValid ){
6902 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
6903 }else{
6904 rc = sqlite3Fts5IterNext(p->pIter);
6905 }
6906 if( rc!=SQLITE_OK0 ) break;
6907 if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){
6908 bEof = 0;
6909 }
6910 }else{
6911 bEof = 0;
6912 }
6913 }
6914 }
6915
6916 /* Set the EOF flag if either all synonym iterators are at EOF or an
6917 ** error has occurred. */
6918 pNode->bEof = (rc || bEof);
6919 }else{
6920 Fts5IndexIter *pIter = pTerm->pIter;
6921
6922 assert( Fts5NodeIsString(pNode) )((void) (0));
6923 if( bFromValid ){
6924 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6925 }else{
6926 rc = sqlite3Fts5IterNext(pIter);
6927 }
6928
6929 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof));
6930 }
6931
6932 if( pNode->bEof==0 ){
6933 assert( rc==SQLITE_OK )((void) (0));
6934 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
6935 }
6936
6937 return rc;
6938}
6939
6940
6941static int fts5ExprNodeTest_TERM(
6942 Fts5Expr *pExpr, /* Expression that pNear is a part of */
6943 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
6944){
6945 /* As this "NEAR" object is actually a single phrase that consists
6946 ** of a single term only, grab pointers into the poslist managed by the
6947 ** fts5_index.c iterator object. This is much faster than synthesizing
6948 ** a new poslist the way we have to for more complicated phrase or NEAR
6949 ** expressions. */
6950 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
6951 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
6952
6953 assert( pNode->eType==FTS5_TERM )((void) (0));
6954 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 )((void) (0));
6955 assert( pPhrase->aTerm[0].pSynonym==0 )((void) (0));
6956
6957 pPhrase->poslist.n = pIter->nData;
6958 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL0 ){
6959 pPhrase->poslist.p = (u8*)pIter->pData;
6960 }
6961 pNode->iRowid = pIter->iRowid;
6962 pNode->bNomatch = (pPhrase->poslist.n==0);
6963 return SQLITE_OK0;
6964}
6965
6966/*
6967** xNext() method for a node of type FTS5_TERM.
6968*/
6969static int fts5ExprNodeNext_TERM(
6970 Fts5Expr *pExpr,
6971 Fts5ExprNode *pNode,
6972 int bFromValid,
6973 i64 iFrom
6974){
6975 int rc;
6976 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
6977
6978 assert( pNode->bEof==0 )((void) (0));
6979 if( bFromValid ){
6980 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6981 }else{
6982 rc = sqlite3Fts5IterNext(pIter);
6983 }
6984 if( rc==SQLITE_OK0 && sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){
6985 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
6986 }else{
6987 pNode->bEof = 1;
6988 pNode->bNomatch = 0;
6989 }
6990 return rc;
6991}
6992
6993static void fts5ExprNodeTest_OR(
6994 Fts5Expr *pExpr, /* Expression of which pNode is a part */
6995 Fts5ExprNode *pNode /* Expression node to test */
6996){
6997 Fts5ExprNode *pNext = pNode->apChild[0];
6998 int i;
6999
7000 for(i=1; i<pNode->nChild; i++){
7001 Fts5ExprNode *pChild = pNode->apChild[i];
7002 int cmp = fts5NodeCompare(pExpr, pNext, pChild);
7003 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
7004 pNext = pChild;
7005 }
7006 }
7007 pNode->iRowid = pNext->iRowid;
7008 pNode->bEof = pNext->bEof;
7009 pNode->bNomatch = pNext->bNomatch;
7010}
7011
7012static int fts5ExprNodeNext_OR(
7013 Fts5Expr *pExpr,
7014 Fts5ExprNode *pNode,
7015 int bFromValid,
7016 i64 iFrom
7017){
7018 int i;
7019 i64 iLast = pNode->iRowid;
7020
7021 for(i=0; i<pNode->nChild; i++){
7022 Fts5ExprNode *p1 = pNode->apChild[i];
7023 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 )((void) (0));
7024 if( p1->bEof==0 ){
7025 if( (p1->iRowid==iLast)
7026 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
7027 ){
7028 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom)(p1)->xNext((pExpr), (p1), (bFromValid), (iFrom));
7029 if( rc!=SQLITE_OK0 ){
7030 pNode->bNomatch = 0;
7031 return rc;
7032 }
7033 }
7034 }
7035 }
7036
7037 fts5ExprNodeTest_OR(pExpr, pNode);
7038 return SQLITE_OK0;
7039}
7040
7041/*
7042** Argument pNode is an FTS5_AND node.
7043*/
7044static int fts5ExprNodeTest_AND(
7045 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
7046 Fts5ExprNode *pAnd /* FTS5_AND node to advance */
7047){
7048 int iChild;
7049 i64 iLast = pAnd->iRowid;
7050 int rc = SQLITE_OK0;
7051 int bMatch;
7052
7053 assert( pAnd->bEof==0 )((void) (0));
7054 do {
7055 pAnd->bNomatch = 0;
7056 bMatch = 1;
7057 for(iChild=0; iChild<pAnd->nChild; iChild++){
7058 Fts5ExprNode *pChild = pAnd->apChild[iChild];
7059 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
7060 if( cmp>0 ){
7061 /* Advance pChild until it points to iLast or laster */
7062 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast)(pChild)->xNext((pExpr), (pChild), (1), (iLast));
7063 if( rc!=SQLITE_OK0 ){
7064 pAnd->bNomatch = 0;
7065 return rc;
7066 }
7067 }
7068
7069 /* If the child node is now at EOF, so is the parent AND node. Otherwise,
7070 ** the child node is guaranteed to have advanced at least as far as
7071 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
7072 ** new lastest rowid seen so far. */
7073 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 )((void) (0));
7074 if( pChild->bEof ){
7075 fts5ExprSetEof(pAnd);
7076 bMatch = 1;
7077 break;
7078 }else if( iLast!=pChild->iRowid ){
7079 bMatch = 0;
7080 iLast = pChild->iRowid;
7081 }
7082
7083 if( pChild->bNomatch ){
7084 pAnd->bNomatch = 1;
7085 }
7086 }
7087 }while( bMatch==0 );
7088
7089 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
7090 fts5ExprNodeZeroPoslist(pAnd);
7091 }
7092 pAnd->iRowid = iLast;
7093 return SQLITE_OK0;
7094}
7095
7096static int fts5ExprNodeNext_AND(
7097 Fts5Expr *pExpr,
7098 Fts5ExprNode *pNode,
7099 int bFromValid,
7100 i64 iFrom
7101){
7102 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[
0]), (bFromValid), (iFrom))
;
7103 if( rc==SQLITE_OK0 ){
7104 rc = fts5ExprNodeTest_AND(pExpr, pNode);
7105 }else{
7106 pNode->bNomatch = 0;
7107 }
7108 return rc;
7109}
7110
7111static int fts5ExprNodeTest_NOT(
7112 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
7113 Fts5ExprNode *pNode /* FTS5_NOT node to advance */
7114){
7115 int rc = SQLITE_OK0;
7116 Fts5ExprNode *p1 = pNode->apChild[0];
7117 Fts5ExprNode *p2 = pNode->apChild[1];
7118 assert( pNode->nChild==2 )((void) (0));
7119
7120 while( rc==SQLITE_OK0 && p1->bEof==0 ){
7121 int cmp = fts5NodeCompare(pExpr, p1, p2);
7122 if( cmp>0 ){
7123 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid)(p2)->xNext((pExpr), (p2), (1), (p1->iRowid));
7124 cmp = fts5NodeCompare(pExpr, p1, p2);
7125 }
7126 assert( rc!=SQLITE_OK || cmp<=0 )((void) (0));
7127 if( cmp || p2->bNomatch ) break;
7128 rc = fts5ExprNodeNext(pExpr, p1, 0, 0)(p1)->xNext((pExpr), (p1), (0), (0));
7129 }
7130 pNode->bEof = p1->bEof;
7131 pNode->bNomatch = p1->bNomatch;
7132 pNode->iRowid = p1->iRowid;
7133 if( p1->bEof ){
7134 fts5ExprNodeZeroPoslist(p2);
7135 }
7136 return rc;
7137}
7138
7139static int fts5ExprNodeNext_NOT(
7140 Fts5Expr *pExpr,
7141 Fts5ExprNode *pNode,
7142 int bFromValid,
7143 i64 iFrom
7144){
7145 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[
0]), (bFromValid), (iFrom))
;
7146 if( rc==SQLITE_OK0 ){
7147 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
7148 }
7149 if( rc!=SQLITE_OK0 ){
7150 pNode->bNomatch = 0;
7151 }
7152 return rc;
7153}
7154
7155/*
7156** If pNode currently points to a match, this function returns SQLITE_OK
7157** without modifying it. Otherwise, pNode is advanced until it does point
7158** to a match or EOF is reached.
7159*/
7160static int fts5ExprNodeTest(
7161 Fts5Expr *pExpr, /* Expression of which pNode is a part */
7162 Fts5ExprNode *pNode /* Expression node to test */
7163){
7164 int rc = SQLITE_OK0;
7165 if( pNode->bEof==0 ){
7166 switch( pNode->eType ){
7167
7168 case FTS5_STRING9: {
7169 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
7170 break;
7171 }
7172
7173 case FTS5_TERM4: {
7174 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
7175 break;
7176 }
7177
7178 case FTS5_AND2: {
7179 rc = fts5ExprNodeTest_AND(pExpr, pNode);
7180 break;
7181 }
7182
7183 case FTS5_OR1: {
7184 fts5ExprNodeTest_OR(pExpr, pNode);
7185 break;
7186 }
7187
7188 default: assert( pNode->eType==FTS5_NOT )((void) (0)); {
7189 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
7190 break;
7191 }
7192 }
7193 }
7194 return rc;
7195}
7196
7197
7198/*
7199** Set node pNode, which is part of expression pExpr, to point to the first
7200** match. If there are no matches, set the Node.bEof flag to indicate EOF.
7201**
7202** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
7203** It is not an error if there are no matches.
7204*/
7205static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
7206 int rc = SQLITE_OK0;
7207 pNode->bEof = 0;
7208 pNode->bNomatch = 0;
7209
7210 if( Fts5NodeIsString(pNode)((pNode)->eType==4 || (pNode)->eType==9) ){
7211 /* Initialize all term iterators in the NEAR object. */
7212 rc = fts5ExprNearInitAll(pExpr, pNode);
7213 }else if( pNode->xNext==0 ){
7214 pNode->bEof = 1;
7215 }else{
7216 int i;
7217 int nEof = 0;
7218 for(i=0; i<pNode->nChild && rc==SQLITE_OK0; i++){
7219 Fts5ExprNode *pChild = pNode->apChild[i];
7220 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
7221 assert( pChild->bEof==0 || pChild->bEof==1 )((void) (0));
7222 nEof += pChild->bEof;
7223 }
7224 pNode->iRowid = pNode->apChild[0]->iRowid;
7225
7226 switch( pNode->eType ){
7227 case FTS5_AND2:
7228 if( nEof>0 ) fts5ExprSetEof(pNode);
7229 break;
7230
7231 case FTS5_OR1:
7232 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
7233 break;
7234
7235 default:
7236 assert( pNode->eType==FTS5_NOT )((void) (0));
7237 pNode->bEof = pNode->apChild[0]->bEof;
7238 break;
7239 }
7240 }
7241
7242 if( rc==SQLITE_OK0 ){
7243 rc = fts5ExprNodeTest(pExpr, pNode);
7244 }
7245 return rc;
7246}
7247
7248
7249/*
7250** Begin iterating through the set of documents in index pIdx matched by
7251** the MATCH expression passed as the first argument. If the "bDesc"
7252** parameter is passed a non-zero value, iteration is in descending rowid
7253** order. Or, if it is zero, in ascending order.
7254**
7255** If iterating in ascending rowid order (bDesc==0), the first document
7256** visited is that with the smallest rowid that is larger than or equal
7257** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
7258** then the first document visited must have a rowid smaller than or
7259** equal to iFirst.
7260**
7261** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
7262** is not considered an error if the query does not match any documents.
7263*/
7264static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
7265 Fts5ExprNode *pRoot = p->pRoot;
7266 int rc; /* Return code */
7267
7268 p->pIndex = pIdx;
7269 p->bDesc = bDesc;
7270 rc = fts5ExprNodeFirst(p, pRoot);
7271
7272 /* If not at EOF but the current rowid occurs earlier than iFirst in
7273 ** the iteration order, move to document iFirst or later. */
7274 if( rc==SQLITE_OK0
7275 && 0==pRoot->bEof
7276 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
7277 ){
7278 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst)(pRoot)->xNext((p), (pRoot), (1), (iFirst));
7279 }
7280
7281 /* If the iterator is not at a real match, skip forward until it is. */
7282 while( pRoot->bNomatch && rc==SQLITE_OK0 ){
7283 assert( pRoot->bEof==0 )((void) (0));
7284 rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0));
7285 }
7286 return rc;
7287}
7288
7289/*
7290** Move to the next document
7291**
7292** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
7293** is not considered an error if the query does not match any documents.
7294*/
7295static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
7296 int rc;
7297 Fts5ExprNode *pRoot = p->pRoot;
7298 assert( pRoot->bEof==0 && pRoot->bNomatch==0 )((void) (0));
7299 do {
7300 rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0));
7301 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) )((void) (0));
7302 }while( pRoot->bNomatch );
7303 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
7304 pRoot->bEof = 1;
7305 }
7306 return rc;
7307}
7308
7309static int sqlite3Fts5ExprEof(Fts5Expr *p){
7310 return p->pRoot->bEof;
7311}
7312
7313static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
7314 return p->pRoot->iRowid;
7315}
7316
7317static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
7318 int rc = SQLITE_OK0;
7319 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
7320 return rc;
7321}
7322
7323/*
7324** Free the phrase object passed as the only argument.
7325*/
7326static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
7327 if( pPhrase ){
7328 int i;
7329 for(i=0; i<pPhrase->nTerm; i++){
7330 Fts5ExprTerm *pSyn;
7331 Fts5ExprTerm *pNext;
7332 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
7333 sqlite3_freesqlite3_api->free(pTerm->pTerm);
7334 sqlite3Fts5IterClose(pTerm->pIter);
7335 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
7336 pNext = pSyn->pSynonym;
7337 sqlite3Fts5IterClose(pSyn->pIter);
7338 fts5BufferFree((Fts5Buffer*)&pSyn[1])sqlite3Fts5BufferFree((Fts5Buffer*)&pSyn[1]);
7339 sqlite3_freesqlite3_api->free(pSyn);
7340 }
7341 }
7342 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist)sqlite3Fts5BufferFree(&pPhrase->poslist);
7343 sqlite3_freesqlite3_api->free(pPhrase);
7344 }
7345}
7346
7347/*
7348** Set the "bFirst" flag on the first token of the phrase passed as the
7349** only argument.
7350*/
7351static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){
7352 if( pPhrase && pPhrase->nTerm ){
7353 pPhrase->aTerm[0].bFirst = 1;
7354 }
7355}
7356
7357/*
7358** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
7359** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
7360** appended to it and the results returned.
7361**
7362** If an OOM error occurs, both the pNear and pPhrase objects are freed and
7363** NULL returned.
7364*/
7365static Fts5ExprNearset *sqlite3Fts5ParseNearset(
7366 Fts5Parse *pParse, /* Parse context */
7367 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
7368 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
7369){
7370 const int SZALLOC = 8;
7371 Fts5ExprNearset *pRet = 0;
7372
7373 if( pParse->rc==SQLITE_OK0 ){
7374 if( pNear==0 ){
7375 sqlite3_int64 nByte;
7376 nByte = SZ_FTS5EXPRNEARSET(SZALLOC+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(SZALLOC+1)*sizeof
(Fts5ExprPhrase*))
;
7377 pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte);
7378 if( pRet==0 ){
7379 pParse->rc = SQLITE_NOMEM7;
7380 }else{
7381 memset(pRet, 0, (size_t)nByte);
7382 }
7383 }else if( (pNear->nPhrase % SZALLOC)==0 ){
7384 int nNew = pNear->nPhrase + SZALLOC;
7385 sqlite3_int64 nByte;
7386
7387 nByte = SZ_FTS5EXPRNEARSET(nNew+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(nNew+1)*sizeof
(Fts5ExprPhrase*))
;
7388 pRet = (Fts5ExprNearset*)sqlite3_realloc64sqlite3_api->realloc64(pNear, nByte);
7389 if( pRet==0 ){
7390 pParse->rc = SQLITE_NOMEM7;
7391 }
7392 }else{
7393 pRet = pNear;
7394 }
7395 }
7396
7397 if( pRet==0 ){
7398 assert( pParse->rc!=SQLITE_OK )((void) (0));
7399 sqlite3Fts5ParseNearsetFree(pNear);
7400 sqlite3Fts5ParsePhraseFree(pPhrase);
7401 }else{
7402 if( pRet->nPhrase>0 ){
7403 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
7404 assert( pParse!=0 )((void) (0));
7405 assert( pParse->apPhrase!=0 )((void) (0));
7406 assert( pParse->nPhrase>=2 )((void) (0));
7407 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] )((void) (0));
7408 if( pPhrase->nTerm==0 ){
7409 fts5ExprPhraseFree(pPhrase);
7410 pRet->nPhrase--;
7411 pParse->nPhrase--;
7412 pPhrase = pLast;
7413 }else if( pLast->nTerm==0 ){
7414 fts5ExprPhraseFree(pLast);
7415 pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
7416 pParse->nPhrase--;
7417 pRet->nPhrase--;
7418 }
7419 }
7420 pRet->apPhrase[pRet->nPhrase++] = pPhrase;
7421 }
7422 return pRet;
7423}
7424
7425typedef struct TokenCtx TokenCtx;
7426struct TokenCtx {
7427 Fts5ExprPhrase *pPhrase;
7428 Fts5Config *pConfig;
7429 int rc;
7430};
7431
7432/*
7433** Callback for tokenizing terms used by ParseTerm().
7434*/
7435static int fts5ParseTokenize(
7436 void *pContext, /* Pointer to Fts5InsertCtx object */
7437 int tflags, /* Mask of FTS5_TOKEN_* flags */
7438 const char *pToken, /* Buffer containing token */
7439 int nToken, /* Size of token in bytes */
7440 int iUnused1, /* Start offset of token */
7441 int iUnused2 /* End offset of token */
7442){
7443 int rc = SQLITE_OK0;
7444 const int SZALLOC = 8;
7445 TokenCtx *pCtx = (TokenCtx*)pContext;
7446 Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
7447
7448 UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2);
7449
7450 /* If an error has already occurred, this is a no-op */
7451 if( pCtx->rc!=SQLITE_OK0 ) return pCtx->rc;
7452 if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768;
7453
7454 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED0x0001) ){
7455 Fts5ExprTerm *pSyn;
7456 sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
7457 pSyn = (Fts5ExprTerm*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
7458 if( pSyn==0 ){
7459 rc = SQLITE_NOMEM7;
7460 }else{
7461 memset(pSyn, 0, (size_t)nByte);
7462 pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
7463 pSyn->nFullTerm = pSyn->nQueryTerm = nToken;
7464 if( pCtx->pConfig->bTokendata ){
7465 pSyn->nQueryTerm = (int)strlen(pSyn->pTerm);
7466 }
7467 memcpy(pSyn->pTerm, pToken, nToken);
7468 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
7469 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
7470 }
7471 }else{
7472 Fts5ExprTerm *pTerm;
7473 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
7474 Fts5ExprPhrase *pNew;
7475 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
7476
7477 pNew = (Fts5ExprPhrase*)sqlite3_realloc64sqlite3_api->realloc64(pPhrase,
7478 SZ_FTS5EXPRPHRASE(nNew+1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (nNew+1)*sizeof(
Fts5ExprTerm))
7479 );
7480 if( pNew==0 ){
7481 rc = SQLITE_NOMEM7;
7482 }else{
7483 if( pPhrase==0 ) memset(pNew, 0, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm
))
);
7484 pCtx->pPhrase = pPhrase = pNew;
7485 pNew->nTerm = nNew - SZALLOC;
7486 }
7487 }
7488
7489 if( rc==SQLITE_OK0 ){
7490 pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
7491 memset(pTerm, 0, sizeof(Fts5ExprTerm));
7492 pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
7493 pTerm->nFullTerm = pTerm->nQueryTerm = nToken;
7494 if( pCtx->pConfig->bTokendata && rc==SQLITE_OK0 ){
7495 pTerm->nQueryTerm = (int)strlen(pTerm->pTerm);
7496 }
7497 }
7498 }
7499
7500 pCtx->rc = rc;
7501 return rc;
7502}
7503
7504
7505/*
7506** Free the phrase object passed as the only argument.
7507*/
7508static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
7509 fts5ExprPhraseFree(pPhrase);
7510}
7511
7512/*
7513** Free the phrase object passed as the second argument.
7514*/
7515static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
7516 if( pNear ){
7517 int i;
7518 for(i=0; i<pNear->nPhrase; i++){
7519 fts5ExprPhraseFree(pNear->apPhrase[i]);
7520 }
7521 sqlite3_freesqlite3_api->free(pNear->pColset);
7522 sqlite3_freesqlite3_api->free(pNear);
7523 }
7524}
7525
7526static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
7527 assert( pParse->pExpr==0 )((void) (0));
7528 pParse->pExpr = p;
7529}
7530
7531static int parseGrowPhraseArray(Fts5Parse *pParse){
7532 if( (pParse->nPhrase % 8)==0 ){
7533 sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
7534 Fts5ExprPhrase **apNew;
7535 apNew = (Fts5ExprPhrase**)sqlite3_realloc64sqlite3_api->realloc64(pParse->apPhrase, nByte);
7536 if( apNew==0 ){
7537 pParse->rc = SQLITE_NOMEM7;
7538 return SQLITE_NOMEM7;
7539 }
7540 pParse->apPhrase = apNew;
7541 }
7542 return SQLITE_OK0;
7543}
7544
7545/*
7546** This function is called by the parser to process a string token. The
7547** string may or may not be quoted. In any case it is tokenized and a
7548** phrase object consisting of all tokens returned.
7549*/
7550static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
7551 Fts5Parse *pParse, /* Parse context */
7552 Fts5ExprPhrase *pAppend, /* Phrase to append to */
7553 Fts5Token *pToken, /* String to tokenize */
7554 int bPrefix /* True if there is a trailing "*" */
7555){
7556 Fts5Config *pConfig = pParse->pConfig;
7557 TokenCtx sCtx; /* Context object passed to callback */
7558 int rc; /* Tokenize return code */
7559 char *z = 0;
7560
7561 memset(&sCtx, 0, sizeof(TokenCtx));
7562 sCtx.pPhrase = pAppend;
7563 sCtx.pConfig = pConfig;
7564
7565 rc = fts5ParseStringFromToken(pToken, &z);
7566 if( rc==SQLITE_OK0 ){
7567 int flags = FTS5_TOKENIZE_QUERY0x0001 | (bPrefix ? FTS5_TOKENIZE_PREFIX0x0002 : 0);
7568 int n;
7569 sqlite3Fts5Dequote(z);
7570 n = (int)strlen(z);
7571 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
7572 }
7573 sqlite3_freesqlite3_api->free(z);
7574 if( rc || (rc = sCtx.rc) ){
7575 pParse->rc = rc;
7576 fts5ExprPhraseFree(sCtx.pPhrase);
7577 sCtx.pPhrase = 0;
7578 }else{
7579
7580 if( pAppend==0 ){
7581 if( parseGrowPhraseArray(pParse) ){
7582 fts5ExprPhraseFree(sCtx.pPhrase);
7583 return 0;
7584 }
7585 pParse->nPhrase++;
7586 }
7587
7588 if( sCtx.pPhrase==0 ){
7589 /* This happens when parsing a token or quoted phrase that contains
7590 ** no token characters at all. (e.g ... MATCH '""'). */
7591 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm
))
);
7592 }else if( sCtx.pPhrase->nTerm ){
7593 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
7594 }
7595 assert( pParse->apPhrase!=0 )((void) (0));
7596 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
7597 }
7598
7599 return sCtx.pPhrase;
7600}
7601
7602/*
7603** Create a new FTS5 expression by cloning phrase iPhrase of the
7604** expression passed as the second argument.
7605*/
7606static int sqlite3Fts5ExprClonePhrase(
7607 Fts5Expr *pExpr,
7608 int iPhrase,
7609 Fts5Expr **ppNew
7610){
7611 int rc = SQLITE_OK0; /* Return code */
7612 Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */
7613 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
7614 TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */
7615 if( !pExpr || iPhrase<0 || iPhrase>=pExpr->nPhrase ){
7616 rc = SQLITE_RANGE25;
7617 }else{
7618 pOrig = pExpr->apExprPhrase[iPhrase];
7619 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
7620 }
7621 if( rc==SQLITE_OK0 ){
7622 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
7623 sizeof(Fts5ExprPhrase*));
7624 }
7625 if( rc==SQLITE_OK0 ){
7626 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRNODE(1)(__builtin_offsetof(Fts5ExprNode, apChild) + (1)*sizeof(Fts5ExprNode
*))
);
7627 }
7628 if( rc==SQLITE_OK0 ){
7629 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
7630 SZ_FTS5EXPRNEARSET(2)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(2)*sizeof(Fts5ExprPhrase
*))
);
7631 }
7632 if( rc==SQLITE_OK0 && ALWAYS(pOrig!=0)(pOrig!=0) ){
7633 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
7634 if( pColsetOrig ){
7635 sqlite3_int64 nByte;
7636 Fts5Colset *pColset;
7637 nByte = SZ_FTS5COLSET(pColsetOrig->nCol)(sizeof(i64)*((pColsetOrig->nCol+2)/2));
7638 pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
7639 if( pColset ){
7640 memcpy(pColset, pColsetOrig, (size_t)nByte);
7641 }
7642 pNew->pRoot->pNear->pColset = pColset;
7643 }
7644 }
7645
7646 if( rc==SQLITE_OK0 ){
7647 if( pOrig->nTerm ){
7648 int i; /* Used to iterate through phrase terms */
7649 sCtx.pConfig = pExpr->pConfig;
7650 for(i=0; rc==SQLITE_OK0 && i<pOrig->nTerm; i++){
7651 int tflags = 0;
7652 Fts5ExprTerm *p;
7653 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK0; p=p->pSynonym){
7654 rc = fts5ParseTokenize((void*)&sCtx,tflags,p->pTerm,p->nFullTerm,0,0);
7655 tflags = FTS5_TOKEN_COLOCATED0x0001;
7656 }
7657 if( rc==SQLITE_OK0 ){
7658 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
7659 sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst;
7660 }
7661 }
7662 }else{
7663 /* This happens when parsing a token or quoted phrase that contains
7664 ** no token characters at all. (e.g ... MATCH '""'). */
7665 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm
))
);
7666 }
7667 }
7668
7669 if( rc==SQLITE_OK0 && ALWAYS(sCtx.pPhrase)(sCtx.pPhrase) ){
7670 /* All the allocations succeeded. Put the expression object together. */
7671 pNew->pIndex = pExpr->pIndex;
7672 pNew->pConfig = pExpr->pConfig;
7673 pNew->nPhrase = 1;
7674 pNew->apExprPhrase[0] = sCtx.pPhrase;
7675 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
7676 pNew->pRoot->pNear->nPhrase = 1;
7677 sCtx.pPhrase->pNode = pNew->pRoot;
7678
7679 if( pOrig->nTerm==1
7680 && pOrig->aTerm[0].pSynonym==0
7681 && pOrig->aTerm[0].bFirst==0
7682 ){
7683 pNew->pRoot->eType = FTS5_TERM4;
7684 pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
7685 }else{
7686 pNew->pRoot->eType = FTS5_STRING9;
7687 pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
7688 }
7689 }else{
7690 sqlite3Fts5ExprFree(pNew);
7691 fts5ExprPhraseFree(sCtx.pPhrase);
7692 pNew = 0;
7693 }
7694
7695 *ppNew = pNew;
7696 return rc;
7697}
7698
7699
7700/*
7701** Token pTok has appeared in a MATCH expression where the NEAR operator
7702** is expected. If token pTok does not contain "NEAR", store an error
7703** in the pParse object.
7704*/
7705static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
7706 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
7707 sqlite3Fts5ParseError(
7708 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
7709 );
7710 }
7711}
7712
7713static void sqlite3Fts5ParseSetDistance(
7714 Fts5Parse *pParse,
7715 Fts5ExprNearset *pNear,
7716 Fts5Token *p
7717){
7718 if( pNear ){
7719 int nNear = 0;
7720 int i;
7721 if( p->n ){
7722 for(i=0; i<p->n; i++){
7723 char c = (char)p->p[i];
7724 if( c<'0' || c>'9' ){
7725 sqlite3Fts5ParseError(
7726 pParse, "expected integer, got \"%.*s\"", p->n, p->p
7727 );
7728 return;
7729 }
7730 if( nNear<214748363 ) nNear = nNear * 10 + (p->p[i] - '0');
7731 /* ^^^^^^^^^^^^^^^--- Prevent integer overflow */
7732 }
7733 }else{
7734 nNear = FTS5_DEFAULT_NEARDIST10;
7735 }
7736 pNear->nNear = nNear;
7737 }
7738}
7739
7740/*
7741** The second argument passed to this function may be NULL, or it may be
7742** an existing Fts5Colset object. This function returns a pointer to
7743** a new colset object containing the contents of (p) with new value column
7744** number iCol appended.
7745**
7746** If an OOM error occurs, store an error code in pParse and return NULL.
7747** The old colset object (if any) is not freed in this case.
7748*/
7749static Fts5Colset *fts5ParseColset(
7750 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
7751 Fts5Colset *p, /* Existing colset object */
7752 int iCol /* New column to add to colset object */
7753){
7754 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
7755 Fts5Colset *pNew; /* New colset object to return */
7756
7757 assert( pParse->rc==SQLITE_OK )((void) (0));
7758 assert( iCol>=0 && iCol<pParse->pConfig->nCol )((void) (0));
7759
7760 pNew = sqlite3_realloc64sqlite3_api->realloc64(p, SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2)));
7761 if( pNew==0 ){
7762 pParse->rc = SQLITE_NOMEM7;
7763 }else{
7764 int *aiCol = pNew->aiCol;
7765 int i, j;
7766 for(i=0; i<nCol; i++){
7767 if( aiCol[i]==iCol ) return pNew;
7768 if( aiCol[i]>iCol ) break;
7769 }
7770 for(j=nCol; j>i; j--){
7771 aiCol[j] = aiCol[j-1];
7772 }
7773 aiCol[i] = iCol;
7774 pNew->nCol = nCol+1;
7775
7776#ifndef NDEBUG1
7777 /* Check that the array is in order and contains no duplicate entries. */
7778 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] )((void) (0));
7779#endif
7780 }
7781
7782 return pNew;
7783}
7784
7785/*
7786** Allocate and return an Fts5Colset object specifying the inverse of
7787** the colset passed as the second argument. Free the colset passed
7788** as the second argument before returning.
7789*/
7790static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
7791 Fts5Colset *pRet;
7792 int nCol = pParse->pConfig->nCol;
7793
7794 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
7795 SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2))
7796 );
7797 if( pRet ){
7798 int i;
7799 int iOld = 0;
7800 for(i=0; i<nCol; i++){
7801 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
7802 pRet->aiCol[pRet->nCol++] = i;
7803 }else{
7804 iOld++;
7805 }
7806 }
7807 }
7808
7809 sqlite3_freesqlite3_api->free(p);
7810 return pRet;
7811}
7812
7813static Fts5Colset *sqlite3Fts5ParseColset(
7814 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
7815 Fts5Colset *pColset, /* Existing colset object */
7816 Fts5Token *p
7817){
7818 Fts5Colset *pRet = 0;
7819 int iCol;
7820 char *z; /* Dequoted copy of token p */
7821
7822 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
7823 if( pParse->rc==SQLITE_OK0 ){
7824 Fts5Config *pConfig = pParse->pConfig;
7825 sqlite3Fts5Dequote(z);
7826 for(iCol=0; iCol<pConfig->nCol; iCol++){
7827 if( 0==sqlite3_stricmpsqlite3_api->stricmp(pConfig->azCol[iCol], z) ) break;
7828 }
7829 if( iCol==pConfig->nCol ){
7830 sqlite3Fts5ParseError(pParse, "no such column: %s", z);
7831 }else{
7832 pRet = fts5ParseColset(pParse, pColset, iCol);
7833 }
7834 sqlite3_freesqlite3_api->free(z);
7835 }
7836
7837 if( pRet==0 ){
7838 assert( pParse->rc!=SQLITE_OK )((void) (0));
7839 sqlite3_freesqlite3_api->free(pColset);
7840 }
7841
7842 return pRet;
7843}
7844
7845/*
7846** If argument pOrig is NULL, or if (*pRc) is set to anything other than
7847** SQLITE_OK when this function is called, NULL is returned.
7848**
7849** Otherwise, a copy of (*pOrig) is made into memory obtained from
7850** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation
7851** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
7852*/
7853static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
7854 Fts5Colset *pRet;
7855 if( pOrig ){
7856 sqlite3_int64 nByte = SZ_FTS5COLSET(pOrig->nCol)(sizeof(i64)*((pOrig->nCol+2)/2));
7857 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte);
7858 if( pRet ){
7859 memcpy(pRet, pOrig, (size_t)nByte);
7860 }
7861 }else{
7862 pRet = 0;
7863 }
7864 return pRet;
7865}
7866
7867/*
7868** Remove from colset pColset any columns that are not also in colset pMerge.
7869*/
7870static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){
7871 int iIn = 0; /* Next input in pColset */
7872 int iMerge = 0; /* Next input in pMerge */
7873 int iOut = 0; /* Next output slot in pColset */
7874
7875 while( iIn<pColset->nCol && iMerge<pMerge->nCol ){
7876 int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge];
7877 if( iDiff==0 ){
7878 pColset->aiCol[iOut++] = pMerge->aiCol[iMerge];
7879 iMerge++;
7880 iIn++;
7881 }else if( iDiff>0 ){
7882 iMerge++;
7883 }else{
7884 iIn++;
7885 }
7886 }
7887 pColset->nCol = iOut;
7888}
7889
7890/*
7891** Recursively apply colset pColset to expression node pNode and all of
7892** its decendents. If (*ppFree) is not NULL, it contains a spare copy
7893** of pColset. This function may use the spare copy and set (*ppFree) to
7894** zero, or it may create copies of pColset using fts5CloneColset().
7895*/
7896static void fts5ParseSetColset(
7897 Fts5Parse *pParse,
7898 Fts5ExprNode *pNode,
7899 Fts5Colset *pColset,
7900 Fts5Colset **ppFree
7901){
7902 if( pParse->rc==SQLITE_OK0 ){
7903 assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING((void) (0))
7904 || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR((void) (0))
7905 || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF((void) (0))
7906 )((void) (0));
7907 if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){
7908 Fts5ExprNearset *pNear = pNode->pNear;
7909 if( pNear->pColset ){
7910 fts5MergeColset(pNear->pColset, pColset);
7911 if( pNear->pColset->nCol==0 ){
7912 pNode->eType = FTS5_EOF0;
7913 pNode->xNext = 0;
7914 }
7915 }else if( *ppFree ){
7916 pNear->pColset = pColset;
7917 *ppFree = 0;
7918 }else{
7919 pNear->pColset = fts5CloneColset(&pParse->rc, pColset);
7920 }
7921 }else{
7922 int i;
7923 assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 )((void) (0));
7924 for(i=0; i<pNode->nChild; i++){
7925 fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree);
7926 }
7927 }
7928 }
7929}
7930
7931/*
7932** Apply colset pColset to expression node pExpr and all of its descendents.
7933*/
7934static void sqlite3Fts5ParseSetColset(
7935 Fts5Parse *pParse,
7936 Fts5ExprNode *pExpr,
7937 Fts5Colset *pColset
7938){
7939 Fts5Colset *pFree = pColset;
7940 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE1 ){
7941 sqlite3Fts5ParseError(pParse,
7942 "fts5: column queries are not supported (detail=none)"
7943 );
7944 }else{
7945 fts5ParseSetColset(pParse, pExpr, pColset, &pFree);
7946 }
7947 sqlite3_freesqlite3_api->free(pFree);
7948}
7949
7950static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
7951 switch( pNode->eType ){
7952 case FTS5_STRING9: {
7953 Fts5ExprNearset *pNear = pNode->pNear;
7954 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
7955 && pNear->apPhrase[0]->aTerm[0].pSynonym==0
7956 && pNear->apPhrase[0]->aTerm[0].bFirst==0
7957 ){
7958 pNode->eType = FTS5_TERM4;
7959 pNode->xNext = fts5ExprNodeNext_TERM;
7960 }else{
7961 pNode->xNext = fts5ExprNodeNext_STRING;
7962 }
7963 break;
7964 };
7965
7966 case FTS5_OR1: {
7967 pNode->xNext = fts5ExprNodeNext_OR;
7968 break;
7969 };
7970
7971 case FTS5_AND2: {
7972 pNode->xNext = fts5ExprNodeNext_AND;
7973 break;
7974 };
7975
7976 default: assert( pNode->eType==FTS5_NOT )((void) (0)); {
7977 pNode->xNext = fts5ExprNodeNext_NOT;
7978 break;
7979 };
7980 }
7981}
7982
7983/*
7984** Add pSub as a child of p.
7985*/
7986static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
7987 int ii = p->nChild;
7988 if( p->eType!=FTS5_NOT3 && pSub->eType==p->eType ){
7989 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
7990 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
7991 p->nChild += pSub->nChild;
7992 sqlite3_freesqlite3_api->free(pSub);
7993 }else{
7994 p->apChild[p->nChild++] = pSub;
7995 }
7996 for( ; ii<p->nChild; ii++){
7997 p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1)(((p->iHeight) > (p->apChild[ii]->iHeight + 1)) ?
(p->iHeight) : (p->apChild[ii]->iHeight + 1))
;
7998 }
7999}
8000
8001/*
8002** This function is used when parsing LIKE or GLOB patterns against
8003** trigram indexes that specify either detail=column or detail=none.
8004** It converts a phrase:
8005**
8006** abc + def + ghi
8007**
8008** into an AND tree:
8009**
8010** abc AND def AND ghi
8011*/
8012static Fts5ExprNode *fts5ParsePhraseToAnd(
8013 Fts5Parse *pParse,
8014 Fts5ExprNearset *pNear
8015){
8016 int nTerm = pNear->apPhrase[0]->nTerm;
8017 int ii;
8018 int nByte;
8019 Fts5ExprNode *pRet;
8020
8021 assert( pNear->nPhrase==1 )((void) (0));
8022 assert( pParse->bPhraseToAnd )((void) (0));
8023
8024 nByte = SZ_FTS5EXPRNODE(nTerm+1)(__builtin_offsetof(Fts5ExprNode, apChild) + (nTerm+1)*sizeof
(Fts5ExprNode*))
;
8025 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
8026 if( pRet ){
8027 pRet->eType = FTS5_AND2;
8028 pRet->nChild = nTerm;
8029 pRet->iHeight = 1;
8030 fts5ExprAssignXNext(pRet);
8031 pParse->nPhrase--;
8032 for(ii=0; ii<nTerm; ii++){
8033 Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
8034 &pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm
))
8035 );
8036 if( pPhrase ){
8037 if( parseGrowPhraseArray(pParse) ){
8038 fts5ExprPhraseFree(pPhrase);
8039 }else{
8040 Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii];
8041 Fts5ExprTerm *pTo = &pPhrase->aTerm[0];
8042 pParse->apPhrase[pParse->nPhrase++] = pPhrase;
8043 pPhrase->nTerm = 1;
8044 pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm);
8045 pTo->nQueryTerm = p->nQueryTerm;
8046 pTo->nFullTerm = p->nFullTerm;
8047 pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING9,
8048 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
8049 );
8050 }
8051 }
8052 }
8053
8054 if( pParse->rc ){
8055 sqlite3Fts5ParseNodeFree(pRet);
8056 pRet = 0;
8057 }else{
8058 sqlite3Fts5ParseNearsetFree(pNear);
8059 }
8060 }
8061
8062 return pRet;
8063}
8064
8065/*
8066** Allocate and return a new expression object. If anything goes wrong (i.e.
8067** OOM error), leave an error code in pParse and return NULL.
8068*/
8069static Fts5ExprNode *sqlite3Fts5ParseNode(
8070 Fts5Parse *pParse, /* Parse context */
8071 int eType, /* FTS5_STRING, AND, OR or NOT */
8072 Fts5ExprNode *pLeft, /* Left hand child expression */
8073 Fts5ExprNode *pRight, /* Right hand child expression */
8074 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
8075){
8076 Fts5ExprNode *pRet = 0;
8077
8078 if( pParse->rc==SQLITE_OK0 ){
8079 int nChild = 0; /* Number of children of returned node */
8080 sqlite3_int64 nByte; /* Bytes of space to allocate for this node */
8081
8082 assert( (eType!=FTS5_STRING && !pNear)((void) (0))
8083 || (eType==FTS5_STRING && !pLeft && !pRight)((void) (0))
8084 )((void) (0));
8085 if( eType==FTS5_STRING9 && pNear==0 ) return 0;
8086 if( eType!=FTS5_STRING9 && pLeft==0 ) return pRight;
8087 if( eType!=FTS5_STRING9 && pRight==0 ) return pLeft;
8088
8089 if( eType==FTS5_STRING9
8090 && pParse->bPhraseToAnd
8091 && pNear->apPhrase[0]->nTerm>1
8092 ){
8093 pRet = fts5ParsePhraseToAnd(pParse, pNear);
8094 }else{
8095 if( eType==FTS5_NOT3 ){
8096 nChild = 2;
8097 }else if( eType==FTS5_AND2 || eType==FTS5_OR1 ){
8098 nChild = 2;
8099 if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
8100 if( pRight->eType==eType ) nChild += pRight->nChild-1;
8101 }
8102
8103 nByte = SZ_FTS5EXPRNODE(nChild)(__builtin_offsetof(Fts5ExprNode, apChild) + (nChild)*sizeof(
Fts5ExprNode*))
;
8104 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
8105
8106 if( pRet ){
8107 pRet->eType = eType;
8108 pRet->pNear = pNear;
8109 fts5ExprAssignXNext(pRet);
8110 if( eType==FTS5_STRING9 ){
8111 int iPhrase;
8112 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
8113 pNear->apPhrase[iPhrase]->pNode = pRet;
8114 if( pNear->apPhrase[iPhrase]->nTerm==0 ){
8115 pRet->xNext = 0;
8116 pRet->eType = FTS5_EOF0;
8117 }
8118 }
8119
8120 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){
8121 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
8122 if( pNear->nPhrase!=1
8123 || pPhrase->nTerm>1
8124 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst)
8125 ){
8126 sqlite3Fts5ParseError(pParse,
8127 "fts5: %s queries are not supported (detail!=full)",
8128 pNear->nPhrase==1 ? "phrase": "NEAR"
8129 );
8130 sqlite3Fts5ParseNodeFree(pRet);
8131 pRet = 0;
8132 pNear = 0;
8133 assert( pLeft==0 && pRight==0 )((void) (0));
8134 }
8135 }
8136 }else{
8137 assert( pNear==0 )((void) (0));
8138 fts5ExprAddChildren(pRet, pLeft);
8139 fts5ExprAddChildren(pRet, pRight);
8140 pLeft = pRight = 0;
8141 if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH256 ){
8142 sqlite3Fts5ParseError(pParse,
8143 "fts5 expression tree is too large (maximum depth %d)",
8144 SQLITE_FTS5_MAX_EXPR_DEPTH256
8145 );
8146 sqlite3Fts5ParseNodeFree(pRet);
8147 pRet = 0;
8148 }
8149 }
8150 }
8151 }
8152 }
8153
8154 if( pRet==0 ){
8155 assert( pParse->rc!=SQLITE_OK )((void) (0));
8156 sqlite3Fts5ParseNodeFree(pLeft);
8157 sqlite3Fts5ParseNodeFree(pRight);
8158 sqlite3Fts5ParseNearsetFree(pNear);
8159 }
8160 return pRet;
8161}
8162
8163static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
8164 Fts5Parse *pParse, /* Parse context */
8165 Fts5ExprNode *pLeft, /* Left hand child expression */
8166 Fts5ExprNode *pRight /* Right hand child expression */
8167){
8168 Fts5ExprNode *pRet = 0;
8169 Fts5ExprNode *pPrev;
8170
8171 if( pParse->rc ){
8172 sqlite3Fts5ParseNodeFree(pLeft);
8173 sqlite3Fts5ParseNodeFree(pRight);
8174 }else{
8175
8176 assert( pLeft->eType==FTS5_STRING((void) (0))
8177 || pLeft->eType==FTS5_TERM((void) (0))
8178 || pLeft->eType==FTS5_EOF((void) (0))
8179 || pLeft->eType==FTS5_AND((void) (0))
8180 )((void) (0));
8181 assert( pRight->eType==FTS5_STRING((void) (0))
8182 || pRight->eType==FTS5_TERM((void) (0))
8183 || pRight->eType==FTS5_EOF((void) (0))
8184 || (pRight->eType==FTS5_AND && pParse->bPhraseToAnd)((void) (0))
8185 )((void) (0));
8186
8187 if( pLeft->eType==FTS5_AND2 ){
8188 pPrev = pLeft->apChild[pLeft->nChild-1];
8189 }else{
8190 pPrev = pLeft;
8191 }
8192 assert( pPrev->eType==FTS5_STRING((void) (0))
8193 || pPrev->eType==FTS5_TERM((void) (0))
8194 || pPrev->eType==FTS5_EOF((void) (0))
8195 )((void) (0));
8196
8197 if( pRight->eType==FTS5_EOF0 ){
8198 assert( pParse->apPhrase!=0 )((void) (0));
8199 assert( pParse->nPhrase>0 )((void) (0));
8200 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] )((void) (0));
8201 sqlite3Fts5ParseNodeFree(pRight);
8202 pRet = pLeft;
8203 pParse->nPhrase--;
8204 }
8205 else if( pPrev->eType==FTS5_EOF0 ){
8206 Fts5ExprPhrase **ap;
8207
8208 if( pPrev==pLeft ){
8209 pRet = pRight;
8210 }else{
8211 pLeft->apChild[pLeft->nChild-1] = pRight;
8212 pRet = pLeft;
8213 }
8214
8215 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
8216 assert( ap[0]==pPrev->pNear->apPhrase[0] )((void) (0));
8217 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
8218 pParse->nPhrase--;
8219
8220 sqlite3Fts5ParseNodeFree(pPrev);
8221 }
8222 else{
8223 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND2, pLeft, pRight, 0);
8224 }
8225 }
8226
8227 return pRet;
8228}
8229
8230#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8231static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
8232 sqlite3_int64 nByte = 0;
8233 Fts5ExprTerm *p;
8234 char *zQuoted;
8235
8236 /* Determine the maximum amount of space required. */
8237 for(p=pTerm; p; p=p->pSynonym){
8238 nByte += pTerm->nQueryTerm * 2 + 3 + 2;
8239 }
8240 zQuoted = sqlite3_malloc64sqlite3_api->malloc64(nByte);
8241
8242 if( zQuoted ){
8243 int i = 0;
8244 for(p=pTerm; p; p=p->pSynonym){
8245 char *zIn = p->pTerm;
8246 char *zEnd = &zIn[p->nQueryTerm];
8247 zQuoted[i++] = '"';
8248 while( zIn<zEnd ){
8249 if( *zIn=='"' ) zQuoted[i++] = '"';
8250 zQuoted[i++] = *zIn++;
8251 }
8252 zQuoted[i++] = '"';
8253 if( p->pSynonym ) zQuoted[i++] = '|';
8254 }
8255 if( pTerm->bPrefix ){
8256 zQuoted[i++] = ' ';
8257 zQuoted[i++] = '*';
8258 }
8259 zQuoted[i++] = '\0';
8260 }
8261 return zQuoted;
8262}
8263
8264static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
8265 char *zNew;
8266 va_list ap;
8267 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
8268 zNew = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
8269 va_end(ap)__builtin_va_end(ap);
8270 if( zApp && zNew ){
8271 char *zNew2 = sqlite3_mprintfsqlite3_api->mprintf("%s%s", zApp, zNew);
8272 sqlite3_freesqlite3_api->free(zNew);
8273 zNew = zNew2;
8274 }
8275 sqlite3_freesqlite3_api->free(zApp);
8276 return zNew;
8277}
8278
8279/*
8280** Compose a tcl-readable representation of expression pExpr. Return a
8281** pointer to a buffer containing that representation. It is the
8282** responsibility of the caller to at some point free the buffer using
8283** sqlite3_free().
8284*/
8285static char *fts5ExprPrintTcl(
8286 Fts5Config *pConfig,
8287 const char *zNearsetCmd,
8288 Fts5ExprNode *pExpr
8289){
8290 char *zRet = 0;
8291 if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){
8292 Fts5ExprNearset *pNear = pExpr->pNear;
8293 int i;
8294 int iTerm;
8295
8296 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
8297 if( zRet==0 ) return 0;
8298 if( pNear->pColset ){
8299 int *aiCol = pNear->pColset->aiCol;
8300 int nCol = pNear->pColset->nCol;
8301 if( nCol==1 ){
8302 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
8303 }else{
8304 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
8305 for(i=1; i<pNear->pColset->nCol; i++){
8306 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
8307 }
8308 zRet = fts5PrintfAppend(zRet, "} ");
8309 }
8310 if( zRet==0 ) return 0;
8311 }
8312
8313 if( pNear->nPhrase>1 ){
8314 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
8315 if( zRet==0 ) return 0;
8316 }
8317
8318 zRet = fts5PrintfAppend(zRet, "--");
8319 if( zRet==0 ) return 0;
8320
8321 for(i=0; i<pNear->nPhrase; i++){
8322 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
8323
8324 zRet = fts5PrintfAppend(zRet, " {");
8325 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
8326 Fts5ExprTerm *p = &pPhrase->aTerm[iTerm];
8327 zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ",
8328 p->nQueryTerm, p->pTerm
8329 );
8330 if( pPhrase->aTerm[iTerm].bPrefix ){
8331 zRet = fts5PrintfAppend(zRet, "*");
8332 }
8333 }
8334
8335 if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
8336 if( zRet==0 ) return 0;
8337 }
8338
8339 }else if( pExpr->eType==0 ){
8340 zRet = sqlite3_mprintfsqlite3_api->mprintf("{}");
8341 }else{
8342 char const *zOp = 0;
8343 int i;
8344 switch( pExpr->eType ){
8345 case FTS5_AND2: zOp = "AND"; break;
8346 case FTS5_NOT3: zOp = "NOT"; break;
8347 default:
8348 assert( pExpr->eType==FTS5_OR )((void) (0));
8349 zOp = "OR";
8350 break;
8351 }
8352
8353 zRet = sqlite3_mprintfsqlite3_api->mprintf("%s", zOp);
8354 for(i=0; zRet && i<pExpr->nChild; i++){
8355 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
8356 if( !z ){
8357 sqlite3_freesqlite3_api->free(zRet);
8358 zRet = 0;
8359 }else{
8360 zRet = fts5PrintfAppend(zRet, " [%z]", z);
8361 }
8362 }
8363 }
8364
8365 return zRet;
8366}
8367
8368static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
8369 char *zRet = 0;
8370 if( pExpr->eType==0 ){
8371 return sqlite3_mprintfsqlite3_api->mprintf("\"\"");
8372 }else
8373 if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){
8374 Fts5ExprNearset *pNear = pExpr->pNear;
8375 int i;
8376 int iTerm;
8377
8378 if( pNear->pColset ){
8379 int ii;
8380 Fts5Colset *pColset = pNear->pColset;
8381 if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{");
8382 for(ii=0; ii<pColset->nCol; ii++){
8383 zRet = fts5PrintfAppend(zRet, "%s%s",
8384 pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " "
8385 );
8386 }
8387 if( zRet ){
8388 zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : "");
8389 }
8390 if( zRet==0 ) return 0;
8391 }
8392
8393 if( pNear->nPhrase>1 ){
8394 zRet = fts5PrintfAppend(zRet, "NEAR(");
8395 if( zRet==0 ) return 0;
8396 }
8397
8398 for(i=0; i<pNear->nPhrase; i++){
8399 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
8400 if( i!=0 ){
8401 zRet = fts5PrintfAppend(zRet, " ");
8402 if( zRet==0 ) return 0;
8403 }
8404 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
8405 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
8406 if( zTerm ){
8407 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
8408 sqlite3_freesqlite3_api->free(zTerm);
8409 }
8410 if( zTerm==0 || zRet==0 ){
8411 sqlite3_freesqlite3_api->free(zRet);
8412 return 0;
8413 }
8414 }
8415 }
8416
8417 if( pNear->nPhrase>1 ){
8418 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
8419 if( zRet==0 ) return 0;
8420 }
8421
8422 }else{
8423 char const *zOp = 0;
8424 int i;
8425
8426 switch( pExpr->eType ){
8427 case FTS5_AND2: zOp = " AND "; break;
8428 case FTS5_NOT3: zOp = " NOT "; break;
8429 default:
8430 assert( pExpr->eType==FTS5_OR )((void) (0));
8431 zOp = " OR ";
8432 break;
8433 }
8434
8435 for(i=0; i<pExpr->nChild; i++){
8436 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
8437 if( z==0 ){
8438 sqlite3_freesqlite3_api->free(zRet);
8439 zRet = 0;
8440 }else{
8441 int e = pExpr->apChild[i]->eType;
8442 int b = (e!=FTS5_STRING9 && e!=FTS5_TERM4 && e!=FTS5_EOF0);
8443 zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
8444 (i==0 ? "" : zOp),
8445 (b?"(":""), z, (b?")":"")
8446 );
8447 }
8448 if( zRet==0 ) break;
8449 }
8450 }
8451
8452 return zRet;
8453}
8454
8455/*
8456** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
8457** and fts5_expr_tcl() (bTcl!=0).
8458*/
8459static void fts5ExprFunction(
8460 sqlite3_context *pCtx, /* Function call context */
8461 int nArg, /* Number of args */
8462 sqlite3_value **apVal, /* Function arguments */
8463 int bTcl
8464){
8465 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx);
8466 sqlite3 *db = sqlite3_context_db_handlesqlite3_api->context_db_handle(pCtx);
8467 const char *zExpr = 0;
8468 char *zErr = 0;
8469 Fts5Expr *pExpr = 0;
8470 int rc;
8471 int i;
8472
8473 const char **azConfig; /* Array of arguments for Fts5Config */
8474 const char *zNearsetCmd = "nearset";
8475 int nConfig; /* Size of azConfig[] */
8476 Fts5Config *pConfig = 0;
8477 int iArg = 1;
8478
8479 if( nArg<1 ){
8480 zErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of arguments to function %s",
8481 bTcl ? "fts5_expr_tcl" : "fts5_expr"
8482 );
8483 sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1);
8484 sqlite3_freesqlite3_api->free(zErr);
8485 return;
8486 }
8487
8488 if( bTcl && nArg>1 ){
8489 zNearsetCmd = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]);
8490 iArg = 2;
8491 }
8492
8493 nConfig = 3 + (nArg-iArg);
8494 azConfig = (const char**)sqlite3_malloc64sqlite3_api->malloc64(sizeof(char*) * nConfig);
8495 if( azConfig==0 ){
8496 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx);
8497 return;
8498 }
8499 azConfig[0] = 0;
8500 azConfig[1] = "main";
8501 azConfig[2] = "tbl";
8502 for(i=3; iArg<nArg; iArg++){
8503 const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[iArg]);
8504 azConfig[i++] = (z ? z : "");
8505 }
8506
8507 zExpr = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]);
8508 if( zExpr==0 ) zExpr = "";
8509
8510 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
8511 if( rc==SQLITE_OK0 ){
8512 rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
8513 }
8514 if( rc==SQLITE_OK0 ){
8515 char *zText;
8516 if( pExpr->pRoot->xNext==0 ){
8517 zText = sqlite3_mprintfsqlite3_api->mprintf("");
8518 }else if( bTcl ){
8519 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
8520 }else{
8521 zText = fts5ExprPrint(pConfig, pExpr->pRoot);
8522 }
8523 if( zText==0 ){
8524 rc = SQLITE_NOMEM7;
8525 }else{
8526 sqlite3_result_textsqlite3_api->result_text(pCtx, zText, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
8527 sqlite3_freesqlite3_api->free(zText);
8528 }
8529 }
8530
8531 if( rc!=SQLITE_OK0 ){
8532 if( zErr ){
8533 sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1);
8534 sqlite3_freesqlite3_api->free(zErr);
8535 }else{
8536 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
8537 }
8538 }
8539 sqlite3_freesqlite3_api->free((void *)azConfig);
8540 sqlite3Fts5ConfigFree(pConfig);
8541 sqlite3Fts5ExprFree(pExpr);
8542}
8543
8544static void fts5ExprFunctionHr(
8545 sqlite3_context *pCtx, /* Function call context */
8546 int nArg, /* Number of args */
8547 sqlite3_value **apVal /* Function arguments */
8548){
8549 fts5ExprFunction(pCtx, nArg, apVal, 0);
8550}
8551static void fts5ExprFunctionTcl(
8552 sqlite3_context *pCtx, /* Function call context */
8553 int nArg, /* Number of args */
8554 sqlite3_value **apVal /* Function arguments */
8555){
8556 fts5ExprFunction(pCtx, nArg, apVal, 1);
8557}
8558
8559/*
8560** The implementation of an SQLite user-defined-function that accepts a
8561** single integer as an argument. If the integer is an alpha-numeric
8562** unicode code point, 1 is returned. Otherwise 0.
8563*/
8564static void fts5ExprIsAlnum(
8565 sqlite3_context *pCtx, /* Function call context */
8566 int nArg, /* Number of args */
8567 sqlite3_value **apVal /* Function arguments */
8568){
8569 int iCode;
8570 u8 aArr[32];
8571 if( nArg!=1 ){
8572 sqlite3_result_errorsqlite3_api->result_error(pCtx,
8573 "wrong number of arguments to function fts5_isalnum", -1
8574 );
8575 return;
8576 }
8577 memset(aArr, 0, sizeof(aArr));
8578 sqlite3Fts5UnicodeCatParse("L*", aArr);
8579 sqlite3Fts5UnicodeCatParse("N*", aArr);
8580 sqlite3Fts5UnicodeCatParse("Co", aArr);
8581 iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]);
8582 sqlite3_result_intsqlite3_api->result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
8583}
8584
8585static void fts5ExprFold(
8586 sqlite3_context *pCtx, /* Function call context */
8587 int nArg, /* Number of args */
8588 sqlite3_value **apVal /* Function arguments */
8589){
8590 if( nArg!=1 && nArg!=2 ){
8591 sqlite3_result_errorsqlite3_api->result_error(pCtx,
8592 "wrong number of arguments to function fts5_fold", -1
8593 );
8594 }else{
8595 int iCode;
8596 int bRemoveDiacritics = 0;
8597 iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]);
8598 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_intsqlite3_api->value_int(apVal[1]);
8599 sqlite3_result_intsqlite3_api->result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
8600 }
8601}
8602#endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */
8603
8604/*
8605** This is called during initialization to register the fts5_expr() scalar
8606** UDF with the SQLite handle passed as the only argument.
8607*/
8608static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
8609#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8610 struct Fts5ExprFunc {
8611 const char *z;
8612 void (*x)(sqlite3_context*,int,sqlite3_value**);
8613 } aFunc[] = {
8614 { "fts5_expr", fts5ExprFunctionHr },
8615 { "fts5_expr_tcl", fts5ExprFunctionTcl },
8616 { "fts5_isalnum", fts5ExprIsAlnum },
8617 { "fts5_fold", fts5ExprFold },
8618 };
8619 int i;
8620 int rc = SQLITE_OK0;
8621 void *pCtx = (void*)pGlobal;
8622
8623 for(i=0; rc==SQLITE_OK0 && i<ArraySize(aFunc)((int)(sizeof(aFunc) / sizeof(aFunc[0]))); i++){
8624 struct Fts5ExprFunc *p = &aFunc[i];
8625 rc = sqlite3_create_functionsqlite3_api->create_function(db, p->z, -1, SQLITE_UTF81, pCtx, p->x, 0, 0);
8626 }
8627#else
8628 int rc = SQLITE_OK0;
8629 UNUSED_PARAM2(pGlobal,db)(void)(pGlobal), (void)(db);
8630#endif
8631
8632 /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
8633 ** sqlite3Fts5ParserFallback() are unused */
8634#ifndef NDEBUG1
8635 (void)sqlite3Fts5ParserTrace;
8636#endif
8637 (void)sqlite3Fts5ParserFallback;
8638
8639 return rc;
8640}
8641
8642/*
8643** Return the number of phrases in expression pExpr.
8644*/
8645static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
8646 return (pExpr ? pExpr->nPhrase : 0);
8647}
8648
8649/*
8650** Return the number of terms in the iPhrase'th phrase in pExpr.
8651*/
8652static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
8653 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
8654 return pExpr->apExprPhrase[iPhrase]->nTerm;
8655}
8656
8657/*
8658** This function is used to access the current position list for phrase
8659** iPhrase.
8660*/
8661static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
8662 int nRet;
8663 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
8664 Fts5ExprNode *pNode = pPhrase->pNode;
8665 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
8666 *pa = pPhrase->poslist.p;
8667 nRet = pPhrase->poslist.n;
8668 }else{
8669 *pa = 0;
8670 nRet = 0;
8671 }
8672 return nRet;
8673}
8674
8675struct Fts5PoslistPopulator {
8676 Fts5PoslistWriter writer;
8677 int bOk; /* True if ok to populate */
8678 int bMiss;
8679};
8680
8681/*
8682** Clear the position lists associated with all phrases in the expression
8683** passed as the first argument. Argument bLive is true if the expression
8684** might be pointing to a real entry, otherwise it has just been reset.
8685**
8686** At present this function is only used for detail=col and detail=none
8687** fts5 tables. This implies that all phrases must be at most 1 token
8688** in size, as phrase matches are not supported without detail=full.
8689*/
8690static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
8691 Fts5PoslistPopulator *pRet;
8692 pRet = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
8693 if( pRet ){
8694 int i;
8695 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
8696 for(i=0; i<pExpr->nPhrase; i++){
8697 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
8698 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
8699 assert( pExpr->apExprPhrase[i]->nTerm<=1 )((void) (0));
8700 if( bLive &&
8701 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
8702 ){
8703 pRet[i].bMiss = 1;
8704 }else{
8705 pBuf->n = 0;
8706 }
8707 }
8708 }
8709 return pRet;
8710}
8711
8712struct Fts5ExprCtx {
8713 Fts5Expr *pExpr;
8714 Fts5PoslistPopulator *aPopulator;
8715 i64 iOff;
8716};
8717typedef struct Fts5ExprCtx Fts5ExprCtx;
8718
8719/*
8720** TODO: Make this more efficient!
8721*/
8722static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
8723 int i;
8724 for(i=0; i<pColset->nCol; i++){
8725 if( pColset->aiCol[i]==iCol ) return 1;
8726 }
8727 return 0;
8728}
8729
8730/*
8731** pToken is a buffer nToken bytes in size that may or may not contain
8732** an embedded 0x00 byte. If it does, return the number of bytes in
8733** the buffer before the 0x00. If it does not, return nToken.
8734*/
8735static int fts5QueryTerm(const char *pToken, int nToken){
8736 int ii;
8737 for(ii=0; ii<nToken && pToken[ii]; ii++){}
8738 return ii;
8739}
8740
8741static int fts5ExprPopulatePoslistsCb(
8742 void *pCtx, /* Copy of 2nd argument to xTokenize() */
8743 int tflags, /* Mask of FTS5_TOKEN_* flags */
8744 const char *pToken, /* Pointer to buffer containing token */
8745 int nToken, /* Size of token in bytes */
8746 int iUnused1, /* Byte offset of token within input text */
8747 int iUnused2 /* Byte offset of end of token within input text */
8748){
8749 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
8750 Fts5Expr *pExpr = p->pExpr;
8751 int i;
8752 int nQuery = nToken;
8753 i64 iRowid = pExpr->pRoot->iRowid;
8754
8755 UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2);
8756
8757 if( nQuery>FTS5_MAX_TOKEN_SIZE32768 ) nQuery = FTS5_MAX_TOKEN_SIZE32768;
8758 if( pExpr->pConfig->bTokendata ){
8759 nQuery = fts5QueryTerm(pToken, nQuery);
8760 }
8761 if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ) p->iOff++;
8762 for(i=0; i<pExpr->nPhrase; i++){
8763 Fts5ExprTerm *pT;
8764 if( p->aPopulator[i].bOk==0 ) continue;
8765 for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
8766 if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix))
8767 && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0
8768 ){
8769 int rc = sqlite3Fts5PoslistWriterAppend(
8770 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
8771 );
8772 if( rc==SQLITE_OK0 && (pExpr->pConfig->bTokendata || pT->bPrefix) ){
8773 int iCol = p->iOff>>32;
8774 int iTokOff = p->iOff & 0x7FFFFFFF;
8775 rc = sqlite3Fts5IndexIterWriteTokendata(
8776 pT->pIter, pToken, nToken, iRowid, iCol, iTokOff
8777 );
8778 }
8779 if( rc ) return rc;
8780 break;
8781 }
8782 }
8783 }
8784 return SQLITE_OK0;
8785}
8786
8787static int sqlite3Fts5ExprPopulatePoslists(
8788 Fts5Config *pConfig,
8789 Fts5Expr *pExpr,
8790 Fts5PoslistPopulator *aPopulator,
8791 int iCol,
8792 const char *z, int n
8793){
8794 int i;
8795 Fts5ExprCtx sCtx;
8796 sCtx.pExpr = pExpr;
8797 sCtx.aPopulator = aPopulator;
8798 sCtx.iOff = (((i64)iCol) << 32) - 1;
8799
8800 for(i=0; i<pExpr->nPhrase; i++){
8801 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
8802 Fts5Colset *pColset = pNode->pNear->pColset;
8803 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
8804 || aPopulator[i].bMiss
8805 ){
8806 aPopulator[i].bOk = 0;
8807 }else{
8808 aPopulator[i].bOk = 1;
8809 }
8810 }
8811
8812 return sqlite3Fts5Tokenize(pConfig,
8813 FTS5_TOKENIZE_DOCUMENT0x0004, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
8814 );
8815}
8816
8817static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
8818 if( pNode->eType==FTS5_TERM4 || pNode->eType==FTS5_STRING9 ){
8819 pNode->pNear->apPhrase[0]->poslist.n = 0;
8820 }else{
8821 int i;
8822 for(i=0; i<pNode->nChild; i++){
8823 fts5ExprClearPoslists(pNode->apChild[i]);
8824 }
8825 }
8826}
8827
8828static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
8829 pNode->iRowid = iRowid;
8830 pNode->bEof = 0;
8831 switch( pNode->eType ){
8832 case 0:
8833 case FTS5_TERM4:
8834 case FTS5_STRING9:
8835 return (pNode->pNear->apPhrase[0]->poslist.n>0);
8836
8837 case FTS5_AND2: {
8838 int i;
8839 for(i=0; i<pNode->nChild; i++){
8840 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
8841 fts5ExprClearPoslists(pNode);
8842 return 0;
8843 }
8844 }
8845 break;
8846 }
8847
8848 case FTS5_OR1: {
8849 int i;
8850 int bRet = 0;
8851 for(i=0; i<pNode->nChild; i++){
8852 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
8853 bRet = 1;
8854 }
8855 }
8856 return bRet;
8857 }
8858
8859 default: {
8860 assert( pNode->eType==FTS5_NOT )((void) (0));
8861 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
8862 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
8863 ){
8864 fts5ExprClearPoslists(pNode);
8865 return 0;
8866 }
8867 break;
8868 }
8869 }
8870 return 1;
8871}
8872
8873static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
8874 fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
8875}
8876
8877/*
8878** This function is only called for detail=columns tables.
8879*/
8880static int sqlite3Fts5ExprPhraseCollist(
8881 Fts5Expr *pExpr,
8882 int iPhrase,
8883 const u8 **ppCollist,
8884 int *pnCollist
8885){
8886 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
8887 Fts5ExprNode *pNode = pPhrase->pNode;
8888 int rc = SQLITE_OK0;
8889
8890 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase )((void) (0));
8891 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0));
8892
8893 if( pNode->bEof==0
8894 && pNode->iRowid==pExpr->pRoot->iRowid
8895 && pPhrase->poslist.n>0
8896 ){
8897 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
8898 if( pTerm->pSynonym ){
8899 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
8900 rc = fts5ExprSynonymList(
8901 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
8902 );
8903 }else{
8904 *ppCollist = pPhrase->aTerm[0].pIter->pData;
8905 *pnCollist = pPhrase->aTerm[0].pIter->nData;
8906 }
8907 }else{
8908 *ppCollist = 0;
8909 *pnCollist = 0;
8910 }
8911
8912 return rc;
8913}
8914
8915/*
8916** Does the work of the fts5_api.xQueryToken() API method.
8917*/
8918static int sqlite3Fts5ExprQueryToken(
8919 Fts5Expr *pExpr,
8920 int iPhrase,
8921 int iToken,
8922 const char **ppOut,
8923 int *pnOut
8924){
8925 Fts5ExprPhrase *pPhrase = 0;
8926
8927 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
8928 return SQLITE_RANGE25;
8929 }
8930 pPhrase = pExpr->apExprPhrase[iPhrase];
8931 if( iToken<0 || iToken>=pPhrase->nTerm ){
8932 return SQLITE_RANGE25;
8933 }
8934
8935 *ppOut = pPhrase->aTerm[iToken].pTerm;
8936 *pnOut = pPhrase->aTerm[iToken].nFullTerm;
8937 return SQLITE_OK0;
8938}
8939
8940/*
8941** Does the work of the fts5_api.xInstToken() API method.
8942*/
8943static int sqlite3Fts5ExprInstToken(
8944 Fts5Expr *pExpr,
8945 i64 iRowid,
8946 int iPhrase,
8947 int iCol,
8948 int iOff,
8949 int iToken,
8950 const char **ppOut,
8951 int *pnOut
8952){
8953 Fts5ExprPhrase *pPhrase = 0;
8954 Fts5ExprTerm *pTerm = 0;
8955 int rc = SQLITE_OK0;
8956
8957 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
8958 return SQLITE_RANGE25;
8959 }
8960 pPhrase = pExpr->apExprPhrase[iPhrase];
8961 if( iToken<0 || iToken>=pPhrase->nTerm ){
8962 return SQLITE_RANGE25;
8963 }
8964 pTerm = &pPhrase->aTerm[iToken];
8965 if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){
8966 rc = sqlite3Fts5IterToken(
8967 pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm,
8968 iRowid, iCol, iOff+iToken, ppOut, pnOut
8969 );
8970 }else{
8971 *ppOut = pTerm->pTerm;
8972 *pnOut = pTerm->nFullTerm;
8973 }
8974 return rc;
8975}
8976
8977/*
8978** Clear the token mappings for all Fts5IndexIter objects managed by
8979** the expression passed as the only argument.
8980*/
8981static void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
8982 int ii;
8983 for(ii=0; ii<pExpr->nPhrase; ii++){
8984 Fts5ExprTerm *pT;
8985 for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
8986 sqlite3Fts5IndexIterClearTokendata(pT->pIter);
8987 }
8988 }
8989}
8990
8991#line 1 "fts5_hash.c"
8992/*
8993** 2014 August 11
8994**
8995** The author disclaims copyright to this source code. In place of
8996** a legal notice, here is a blessing:
8997**
8998** May you do good and not evil.
8999** May you find forgiveness for yourself and forgive others.
9000** May you share freely, never taking more than you give.
9001**
9002******************************************************************************
9003**
9004*/
9005
9006
9007
9008/* #include "fts5Int.h" */
9009
9010typedef struct Fts5HashEntry Fts5HashEntry;
9011
9012/*
9013** This file contains the implementation of an in-memory hash table used
9014** to accumulate "term -> doclist" content before it is flushed to a level-0
9015** segment.
9016*/
9017
9018
9019struct Fts5Hash {
9020 int eDetail; /* Copy of Fts5Config.eDetail */
9021 int *pnByte; /* Pointer to bytes counter */
9022 int nEntry; /* Number of entries currently in hash */
9023 int nSlot; /* Size of aSlot[] array */
9024 Fts5HashEntry *pScan; /* Current ordered scan item */
9025 Fts5HashEntry **aSlot; /* Array of hash slots */
9026};
9027
9028/*
9029** Each entry in the hash table is represented by an object of the
9030** following type. Each object, its key, and its current data are stored
9031** in a single memory allocation. The key immediately follows the object
9032** in memory. The position list data immediately follows the key data
9033** in memory.
9034**
9035** The key is Fts5HashEntry.nKey bytes in size. It consists of a single
9036** byte identifying the index (either the main term index or a prefix-index),
9037** followed by the term data. For example: "0token". There is no
9038** nul-terminator - in this case nKey=6.
9039**
9040** The data that follows the key is in a similar, but not identical format
9041** to the doclist data stored in the database. It is:
9042**
9043** * Rowid, as a varint
9044** * Position list, without 0x00 terminator.
9045** * Size of previous position list and rowid, as a 4 byte
9046** big-endian integer.
9047**
9048** iRowidOff:
9049** Offset of last rowid written to data area. Relative to first byte of
9050** structure.
9051**
9052** nData:
9053** Bytes of data written since iRowidOff.
9054*/
9055struct Fts5HashEntry {
9056 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
9057 Fts5HashEntry *pScanNext; /* Next entry in sorted order */
9058
9059 int nAlloc; /* Total size of allocation */
9060 int iSzPoslist; /* Offset of space for 4-byte poslist size */
9061 int nData; /* Total bytes of data (incl. structure) */
9062 int nKey; /* Length of key in bytes */
9063 u8 bDel; /* Set delete-flag @ iSzPoslist */
9064 u8 bContent; /* Set content-flag (detail=none mode) */
9065 i16 iCol; /* Column of last value written */
9066 int iPos; /* Position of last value written */
9067 i64 iRowid; /* Rowid of last value written */
9068};
9069
9070/*
9071** Equivalent to:
9072**
9073** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
9074*/
9075#define fts5EntryKey(p)( ((char *)(&(p)[1])) ) ( ((char *)(&(p)[1])) )
9076
9077
9078/*
9079** Allocate a new hash table.
9080*/
9081static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
9082 int rc = SQLITE_OK0;
9083 Fts5Hash *pNew;
9084
9085 *ppNew = pNew = (Fts5Hash*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Hash));
9086 if( pNew==0 ){
9087 rc = SQLITE_NOMEM7;
9088 }else{
9089 sqlite3_int64 nByte;
9090 memset(pNew, 0, sizeof(Fts5Hash));
9091 pNew->pnByte = pnByte;
9092 pNew->eDetail = pConfig->eDetail;
9093
9094 pNew->nSlot = 1024;
9095 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
9096 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nByte);
9097 if( pNew->aSlot==0 ){
9098 sqlite3_freesqlite3_api->free(pNew);
9099 *ppNew = 0;
9100 rc = SQLITE_NOMEM7;
9101 }else{
9102 memset(pNew->aSlot, 0, (size_t)nByte);
9103 }
9104 }
9105 return rc;
9106}
9107
9108/*
9109** Free a hash table object.
9110*/
9111static void sqlite3Fts5HashFree(Fts5Hash *pHash){
9112 if( pHash ){
9113 sqlite3Fts5HashClear(pHash);
9114 sqlite3_freesqlite3_api->free(pHash->aSlot);
9115 sqlite3_freesqlite3_api->free(pHash);
9116 }
9117}
9118
9119/*
9120** Empty (but do not delete) a hash table.
9121*/
9122static void sqlite3Fts5HashClear(Fts5Hash *pHash){
9123 int i;
9124 for(i=0; i<pHash->nSlot; i++){
9125 Fts5HashEntry *pNext;
9126 Fts5HashEntry *pSlot;
9127 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
9128 pNext = pSlot->pHashNext;
9129 sqlite3_freesqlite3_api->free(pSlot);
9130 }
9131 }
9132 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
9133 pHash->nEntry = 0;
9134}
9135
9136static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
9137 int i;
9138 unsigned int h = 13;
9139 for(i=n-1; i>=0; i--){
9140 h = (h << 3) ^ h ^ p[i];
9141 }
9142 return (h % nSlot);
9143}
9144
9145static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
9146 int i;
9147 unsigned int h = 13;
9148 for(i=n-1; i>=0; i--){
9149 h = (h << 3) ^ h ^ p[i];
9150 }
9151 h = (h << 3) ^ h ^ b;
9152 return (h % nSlot);
9153}
9154
9155/*
9156** Resize the hash table by doubling the number of slots.
9157*/
9158static int fts5HashResize(Fts5Hash *pHash){
9159 int nNew = pHash->nSlot*2;
9160 int i;
9161 Fts5HashEntry **apNew;
9162 Fts5HashEntry **apOld = pHash->aSlot;
9163
9164 apNew = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nNew*sizeof(Fts5HashEntry*));
9165 if( !apNew ) return SQLITE_NOMEM7;
9166 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
9167
9168 for(i=0; i<pHash->nSlot; i++){
9169 while( apOld[i] ){
9170 unsigned int iHash;
9171 Fts5HashEntry *p = apOld[i];
9172 apOld[i] = p->pHashNext;
9173 iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p)( ((char *)(&(p)[1])) ), p->nKey);
9174 p->pHashNext = apNew[iHash];
9175 apNew[iHash] = p;
9176 }
9177 }
9178
9179 sqlite3_freesqlite3_api->free(apOld);
9180 pHash->nSlot = nNew;
9181 pHash->aSlot = apNew;
9182 return SQLITE_OK0;
9183}
9184
9185static int fts5HashAddPoslistSize(
9186 Fts5Hash *pHash,
9187 Fts5HashEntry *p,
9188 Fts5HashEntry *p2
9189){
9190 int nRet = 0;
9191 if( p->iSzPoslist ){
9192 u8 *pPtr = p2 ? (u8*)p2 : (u8*)p;
9193 int nData = p->nData;
9194 if( pHash->eDetail==FTS5_DETAIL_NONE1 ){
9195 assert( nData==p->iSzPoslist )((void) (0));
9196 if( p->bDel ){
9197 pPtr[nData++] = 0x00;
9198 if( p->bContent ){
9199 pPtr[nData++] = 0x00;
9200 }
9201 }
9202 }else{
9203 int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */
9204 int nPos = nSz*2 + p->bDel; /* Value of nPos field */
9205
9206 assert( p->bDel==0 || p->bDel==1 )((void) (0));
9207 if( nPos<=127 ){
9208 pPtr[p->iSzPoslist] = (u8)nPos;
9209 }else{
9210 int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
9211 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
9212 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
9213 nData += (nByte-1);
9214 }
9215 }
9216
9217 nRet = nData - p->nData;
9218 if( p2==0 ){
9219 p->iSzPoslist = 0;
9220 p->bDel = 0;
9221 p->bContent = 0;
9222 p->nData = nData;
9223 }
9224 }
9225 return nRet;
9226}
9227
9228/*
9229** Add an entry to the in-memory hash table. The key is the concatenation
9230** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
9231**
9232** (bByte || pToken) -> (iRowid,iCol,iPos)
9233**
9234** Or, if iCol is negative, then the value is a delete marker.
9235*/
9236static int sqlite3Fts5HashWrite(
9237 Fts5Hash *pHash,
9238 i64 iRowid, /* Rowid for this entry */
9239 int iCol, /* Column token appears in (-ve -> delete) */
9240 int iPos, /* Position of token within column */
9241 char bByte, /* First byte of token */
9242 const char *pToken, int nToken /* Token to add or remove to or from index */
9243){
9244 unsigned int iHash;
9245 Fts5HashEntry *p;
9246 u8 *pPtr;
9247 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
9248 int bNew; /* If non-delete entry should be written */
9249
9250 bNew = (pHash->eDetail==FTS5_DETAIL_FULL0);
9251
9252 /* Attempt to locate an existing hash entry */
9253 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
9254 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
9255 char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) );
9256 if( zKey[0]==bByte
9257 && p->nKey==nToken+1
9258 && memcmp(&zKey[1], pToken, nToken)==0
9259 ){
9260 break;
9261 }
9262 }
9263
9264 /* If an existing hash entry cannot be found, create a new one. */
9265 if( p==0 ){
9266 /* Figure out how much space to allocate */
9267 char *zKey;
9268 sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
9269 if( nByte<128 ) nByte = 128;
9270
9271 /* Grow the Fts5Hash.aSlot[] array if necessary. */
9272 if( (pHash->nEntry*2)>=pHash->nSlot ){
9273 int rc = fts5HashResize(pHash);
9274 if( rc!=SQLITE_OK0 ) return rc;
9275 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
9276 }
9277
9278 /* Allocate new Fts5HashEntry and add it to the hash table. */
9279 p = (Fts5HashEntry*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
9280 if( !p ) return SQLITE_NOMEM7;
9281 memset(p, 0, sizeof(Fts5HashEntry));
9282 p->nAlloc = (int)nByte;
9283 zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) );
9284 zKey[0] = bByte;
9285 memcpy(&zKey[1], pToken, nToken);
9286 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) )((void) (0));
9287 p->nKey = nToken+1;
9288 zKey[nToken+1] = '\0';
9289 p->nData = nToken+1 + sizeof(Fts5HashEntry);
9290 p->pHashNext = pHash->aSlot[iHash];
9291 pHash->aSlot[iHash] = p;
9292 pHash->nEntry++;
9293
9294 /* Add the first rowid field to the hash-entry */
9295 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
9296 p->iRowid = iRowid;
9297
9298 p->iSzPoslist = p->nData;
9299 if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){
9300 p->nData += 1;
9301 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1);
9302 }
9303
9304 }else{
9305
9306 /* Appending to an existing hash-entry. Check that there is enough
9307 ** space to append the largest possible new entry. Worst case scenario
9308 ** is:
9309 **
9310 ** + 9 bytes for a new rowid,
9311 ** + 4 byte reserved for the "poslist size" varint.
9312 ** + 1 byte for a "new column" byte,
9313 ** + 3 bytes for a new column number (16-bit max) as a varint,
9314 ** + 5 bytes for the new position offset (32-bit max).
9315 */
9316 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
9317 sqlite3_int64 nNew = p->nAlloc * 2;
9318 Fts5HashEntry *pNew;
9319 Fts5HashEntry **pp;
9320 pNew = (Fts5HashEntry*)sqlite3_realloc64sqlite3_api->realloc64(p, nNew);
9321 if( pNew==0 ) return SQLITE_NOMEM7;
9322 pNew->nAlloc = (int)nNew;
9323 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
9324 *pp = pNew;
9325 p = pNew;
9326 }
9327 nIncr -= p->nData;
9328 }
9329 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) )((void) (0));
9330
9331 pPtr = (u8*)p;
9332
9333 /* If this is a new rowid, append the 4-byte size field for the previous
9334 ** entry, and the new rowid for this entry. */
9335 if( iRowid!=p->iRowid ){
9336 u64 iDiff = (u64)iRowid - (u64)p->iRowid;
9337 fts5HashAddPoslistSize(pHash, p, 0);
9338 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff);
9339 p->iRowid = iRowid;
9340 bNew = 1;
9341 p->iSzPoslist = p->nData;
9342 if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){
9343 p->nData += 1;
9344 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1);
9345 p->iPos = 0;
9346 }
9347 }
9348
9349 if( iCol>=0 ){
9350 if( pHash->eDetail==FTS5_DETAIL_NONE1 ){
9351 p->bContent = 1;
9352 }else{
9353 /* Append a new column value, if necessary */
9354 assert_nc( iCol>=p->iCol )((void) (0));
9355 if( iCol!=p->iCol ){
9356 if( pHash->eDetail==FTS5_DETAIL_FULL0 ){
9357 pPtr[p->nData++] = 0x01;
9358 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
9359 p->iCol = (i16)iCol;
9360 p->iPos = 0;
9361 }else{
9362 bNew = 1;
9363 p->iCol = (i16)(iPos = iCol);
9364 }
9365 }
9366
9367 /* Append the new position offset, if necessary */
9368 if( bNew ){
9369 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
9370 p->iPos = iPos;
9371 }
9372 }
9373 }else{
9374 /* This is a delete. Set the delete flag. */
9375 p->bDel = 1;
9376 }
9377
9378 nIncr += p->nData;
9379 *pHash->pnByte += nIncr;
9380 return SQLITE_OK0;
9381}
9382
9383
9384/*
9385** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
9386** each sorted in key order. This function merges the two lists into a
9387** single list and returns a pointer to its first element.
9388*/
9389static Fts5HashEntry *fts5HashEntryMerge(
9390 Fts5HashEntry *pLeft,
9391 Fts5HashEntry *pRight
9392){
9393 Fts5HashEntry *p1 = pLeft;
9394 Fts5HashEntry *p2 = pRight;
9395 Fts5HashEntry *pRet = 0;
9396 Fts5HashEntry **ppOut = &pRet;
9397
9398 while( p1 || p2 ){
9399 if( p1==0 ){
9400 *ppOut = p2;
9401 p2 = 0;
9402 }else if( p2==0 ){
9403 *ppOut = p1;
9404 p1 = 0;
9405 }else{
9406 char *zKey1 = fts5EntryKey(p1)( ((char *)(&(p1)[1])) );
9407 char *zKey2 = fts5EntryKey(p2)( ((char *)(&(p2)[1])) );
9408 int nMin = MIN(p1->nKey, p2->nKey)(((p1->nKey) < (p2->nKey)) ? (p1->nKey) : (p2->
nKey))
;
9409
9410 int cmp = memcmp(zKey1, zKey2, nMin);
9411 if( cmp==0 ){
9412 cmp = p1->nKey - p2->nKey;
9413 }
9414 assert( cmp!=0 )((void) (0));
9415
9416 if( cmp>0 ){
9417 /* p2 is smaller */
9418 *ppOut = p2;
9419 ppOut = &p2->pScanNext;
9420 p2 = p2->pScanNext;
9421 }else{
9422 /* p1 is smaller */
9423 *ppOut = p1;
9424 ppOut = &p1->pScanNext;
9425 p1 = p1->pScanNext;
9426 }
9427 *ppOut = 0;
9428 }
9429 }
9430
9431 return pRet;
9432}
9433
9434/*
9435** Link all tokens from hash table iHash into a list in sorted order. The
9436** tokens are not removed from the hash table.
9437*/
9438static int fts5HashEntrySort(
9439 Fts5Hash *pHash,
9440 const char *pTerm, int nTerm, /* Query prefix, if any */
9441 Fts5HashEntry **ppSorted
9442){
9443 const int nMergeSlot = 32;
9444 Fts5HashEntry **ap;
9445 Fts5HashEntry *pList;
9446 int iSlot;
9447 int i;
9448
9449 *ppSorted = 0;
9450 ap = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5HashEntry*) * nMergeSlot);
9451 if( !ap ) return SQLITE_NOMEM7;
9452 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
9453
9454 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
9455 Fts5HashEntry *pIter;
9456 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
9457 if( pTerm==0
9458 || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter)( ((char *)(&(pIter)[1])) ), pTerm, nTerm))
9459 ){
9460 Fts5HashEntry *pEntry = pIter;
9461 pEntry->pScanNext = 0;
9462 for(i=0; ap[i]; i++){
9463 pEntry = fts5HashEntryMerge(pEntry, ap[i]);
9464 ap[i] = 0;
9465 }
9466 ap[i] = pEntry;
9467 }
9468 }
9469 }
9470
9471 pList = 0;
9472 for(i=0; i<nMergeSlot; i++){
9473 pList = fts5HashEntryMerge(pList, ap[i]);
9474 }
9475
9476 sqlite3_freesqlite3_api->free(ap);
9477 *ppSorted = pList;
9478 return SQLITE_OK0;
9479}
9480
9481/*
9482** Query the hash table for a doclist associated with term pTerm/nTerm.
9483*/
9484static int sqlite3Fts5HashQuery(
9485 Fts5Hash *pHash, /* Hash table to query */
9486 int nPre,
9487 const char *pTerm, int nTerm, /* Query term */
9488 void **ppOut, /* OUT: Pointer to new object */
9489 int *pnDoclist /* OUT: Size of doclist in bytes */
9490){
9491 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
9492 char *zKey = 0;
9493 Fts5HashEntry *p;
9494
9495 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
9496 zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) );
9497 if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break;
9498 }
9499
9500 if( p ){
9501 int nHashPre = sizeof(Fts5HashEntry) + nTerm;
9502 int nList = p->nData - nHashPre;
9503 u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64sqlite3_api->malloc64(nPre + nList + 10));
9504 if( pRet ){
9505 Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre];
9506 memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList);
9507 nList += fts5HashAddPoslistSize(pHash, p, pFaux);
9508 *pnDoclist = nList;
9509 }else{
9510 *pnDoclist = 0;
9511 return SQLITE_NOMEM7;
9512 }
9513 }else{
9514 *ppOut = 0;
9515 *pnDoclist = 0;
9516 }
9517
9518 return SQLITE_OK0;
9519}
9520
9521static int sqlite3Fts5HashScanInit(
9522 Fts5Hash *p, /* Hash table to query */
9523 const char *pTerm, int nTerm /* Query prefix */
9524){
9525 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
9526}
9527
9528#ifdef SQLITE_DEBUG
9529static int fts5HashCount(Fts5Hash *pHash){
9530 int nEntry = 0;
9531 int ii;
9532 for(ii=0; ii<pHash->nSlot; ii++){
9533 Fts5HashEntry *p = 0;
9534 for(p=pHash->aSlot[ii]; p; p=p->pHashNext){
9535 nEntry++;
9536 }
9537 }
9538 return nEntry;
9539}
9540#endif
9541
9542/*
9543** Return true if the hash table is empty, false otherwise.
9544*/
9545static int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){
9546 assert( pHash->nEntry==fts5HashCount(pHash) )((void) (0));
9547 return pHash->nEntry==0;
9548}
9549
9550static void sqlite3Fts5HashScanNext(Fts5Hash *p){
9551 assert( !sqlite3Fts5HashScanEof(p) )((void) (0));
9552 p->pScan = p->pScan->pScanNext;
9553}
9554
9555static int sqlite3Fts5HashScanEof(Fts5Hash *p){
9556 return (p->pScan==0);
9557}
9558
9559static void sqlite3Fts5HashScanEntry(
9560 Fts5Hash *pHash,
9561 const char **pzTerm, /* OUT: term (nul-terminated) */
9562 int *pnTerm, /* OUT: Size of term in bytes */
9563 const u8 **ppDoclist, /* OUT: pointer to doclist */
9564 int *pnDoclist /* OUT: size of doclist in bytes */
9565){
9566 Fts5HashEntry *p;
9567 if( (p = pHash->pScan) ){
9568 char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) );
9569 int nTerm = p->nKey;
9570 fts5HashAddPoslistSize(pHash, p, 0);
9571 *pzTerm = zKey;
9572 *pnTerm = nTerm;
9573 *ppDoclist = (const u8*)&zKey[nTerm];
9574 *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm);
9575 }else{
9576 *pzTerm = 0;
9577 *pnTerm = 0;
9578 *ppDoclist = 0;
9579 *pnDoclist = 0;
9580 }
9581}
9582
9583#line 1 "fts5_index.c"
9584/*
9585** 2014 May 31
9586**
9587** The author disclaims copyright to this source code. In place of
9588** a legal notice, here is a blessing:
9589**
9590** May you do good and not evil.
9591** May you find forgiveness for yourself and forgive others.
9592** May you share freely, never taking more than you give.
9593**
9594******************************************************************************
9595**
9596** Low level access to the FTS index stored in the database file. The
9597** routines in this file file implement all read and write access to the
9598** %_data table. Other parts of the system access this functionality via
9599** the interface defined in fts5Int.h.
9600*/
9601
9602
9603/* #include "fts5Int.h" */
9604
9605/*
9606** Overview:
9607**
9608** The %_data table contains all the FTS indexes for an FTS5 virtual table.
9609** As well as the main term index, there may be up to 31 prefix indexes.
9610** The format is similar to FTS3/4, except that:
9611**
9612** * all segment b-tree leaf data is stored in fixed size page records
9613** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
9614** taken to ensure it is possible to iterate in either direction through
9615** the entries in a doclist, or to seek to a specific entry within a
9616** doclist, without loading it into memory.
9617**
9618** * large doclists that span many pages have associated "doclist index"
9619** records that contain a copy of the first rowid on each page spanned by
9620** the doclist. This is used to speed up seek operations, and merges of
9621** large doclists with very small doclists.
9622**
9623** * extra fields in the "structure record" record the state of ongoing
9624** incremental merge operations.
9625**
9626*/
9627
9628
9629#define FTS5_OPT_WORK_UNIT1000 1000 /* Number of leaf pages per optimize step */
9630#define FTS5_WORK_UNIT64 64 /* Number of leaf pages in unit of work */
9631
9632#define FTS5_MIN_DLIDX_SIZE4 4 /* Add dlidx if this many empty pages */
9633
9634#define FTS5_MAIN_PREFIX'0' '0'
9635
9636#if FTS5_MAX_PREFIX_INDEXES31 > 31
9637# error "FTS5_MAX_PREFIX_INDEXES is too large"
9638#endif
9639
9640#define FTS5_MAX_LEVEL64 64
9641
9642/*
9643** There are two versions of the format used for the structure record:
9644**
9645** 1. the legacy format, that may be read by all fts5 versions, and
9646**
9647** 2. the V2 format, which is used by contentless_delete=1 databases.
9648**
9649** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
9650** format structure record contains a varint - the number of levels in
9651** the structure. Whereas a V2 structure record contains the constant
9652** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
9653** varint has to be at least 16256 to begin with "0xFF". And the default
9654** maximum number of levels is 64.
9655**
9656** See below for more on structure record formats.
9657*/
9658#define FTS5_STRUCTURE_V2"\xFF\x00\x00\x01" "\xFF\x00\x00\x01"
9659
9660/*
9661** Details:
9662**
9663** The %_data table managed by this module,
9664**
9665** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
9666**
9667** , contains the following 6 types of records. See the comments surrounding
9668** the FTS5_*_ROWID macros below for a description of how %_data rowids are
9669** assigned to each fo them.
9670**
9671** 1. Structure Records:
9672**
9673** The set of segments that make up an index - the index structure - are
9674** recorded in a single record within the %_data table. The record consists
9675** of a single 32-bit configuration cookie value followed by a list of
9676** SQLite varints.
9677**
9678** If the structure record is a V2 record, the configuration cookie is
9679** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
9680**
9681** Next, the record continues with three varints:
9682**
9683** + number of levels,
9684** + total number of segments on all levels,
9685** + value of write counter.
9686**
9687** Then, for each level from 0 to nMax:
9688**
9689** + number of input segments in ongoing merge.
9690** + total number of segments in level.
9691** + for each segment from oldest to newest:
9692** + segment id (always > 0)
9693** + first leaf page number (often 1, always greater than 0)
9694** + final leaf page number
9695**
9696** Then, for V2 structures only:
9697**
9698** + lower origin counter value,
9699** + upper origin counter value,
9700** + the number of tombstone hash pages.
9701**
9702** 2. The Averages Record:
9703**
9704** A single record within the %_data table. The data is a list of varints.
9705** The first value is the number of rows in the index. Then, for each column
9706** from left to right, the total number of tokens in the column for all
9707** rows of the table.
9708**
9709** 3. Segment leaves:
9710**
9711** TERM/DOCLIST FORMAT:
9712**
9713** Most of each segment leaf is taken up by term/doclist data. The
9714** general format of term/doclist, starting with the first term
9715** on the leaf page, is:
9716**
9717** varint : size of first term
9718** blob: first term data
9719** doclist: first doclist
9720** zero-or-more {
9721** varint: number of bytes in common with previous term
9722** varint: number of bytes of new term data (nNew)
9723** blob: nNew bytes of new term data
9724** doclist: next doclist
9725** }
9726**
9727** doclist format:
9728**
9729** varint: first rowid
9730** poslist: first poslist
9731** zero-or-more {
9732** varint: rowid delta (always > 0)
9733** poslist: next poslist
9734** }
9735**
9736** poslist format:
9737**
9738** varint: size of poslist in bytes multiplied by 2, not including
9739** this field. Plus 1 if this entry carries the "delete" flag.
9740** collist: collist for column 0
9741** zero-or-more {
9742** 0x01 byte
9743** varint: column number (I)
9744** collist: collist for column I
9745** }
9746**
9747** collist format:
9748**
9749** varint: first offset + 2
9750** zero-or-more {
9751** varint: offset delta + 2
9752** }
9753**
9754** PAGE FORMAT
9755**
9756** Each leaf page begins with a 4-byte header containing 2 16-bit
9757** unsigned integer fields in big-endian format. They are:
9758**
9759** * The byte offset of the first rowid on the page, if it exists
9760** and occurs before the first term (otherwise 0).
9761**
9762** * The byte offset of the start of the page footer. If the page
9763** footer is 0 bytes in size, then this field is the same as the
9764** size of the leaf page in bytes.
9765**
9766** The page footer consists of a single varint for each term located
9767** on the page. Each varint is the byte offset of the current term
9768** within the page, delta-compressed against the previous value. In
9769** other words, the first varint in the footer is the byte offset of
9770** the first term, the second is the byte offset of the second less that
9771** of the first, and so on.
9772**
9773** The term/doclist format described above is accurate if the entire
9774** term/doclist data fits on a single leaf page. If this is not the case,
9775** the format is changed in two ways:
9776**
9777** + if the first rowid on a page occurs before the first term, it
9778** is stored as a literal value:
9779**
9780** varint: first rowid
9781**
9782** + the first term on each page is stored in the same way as the
9783** very first term of the segment:
9784**
9785** varint : size of first term
9786** blob: first term data
9787**
9788** 5. Segment doclist indexes:
9789**
9790** Doclist indexes are themselves b-trees, however they usually consist of
9791** a single leaf record only. The format of each doclist index leaf page
9792** is:
9793**
9794** * Flags byte. Bits are:
9795** 0x01: Clear if leaf is also the root page, otherwise set.
9796**
9797** * Page number of fts index leaf page. As a varint.
9798**
9799** * First rowid on page indicated by previous field. As a varint.
9800**
9801** * A list of varints, one for each subsequent termless page. A
9802** positive delta if the termless page contains at least one rowid,
9803** or an 0x00 byte otherwise.
9804**
9805** Internal doclist index nodes are:
9806**
9807** * Flags byte. Bits are:
9808** 0x01: Clear for root page, otherwise set.
9809**
9810** * Page number of first child page. As a varint.
9811**
9812** * Copy of first rowid on page indicated by previous field. As a varint.
9813**
9814** * A list of delta-encoded varints - the first rowid on each subsequent
9815** child page.
9816**
9817** 6. Tombstone Hash Page
9818**
9819** These records are only ever present in contentless_delete=1 tables.
9820** There are zero or more of these associated with each segment. They
9821** are used to store the tombstone rowids for rows contained in the
9822** associated segments.
9823**
9824** The set of nHashPg tombstone hash pages associated with a single
9825** segment together form a single hash table containing tombstone rowids.
9826** To find the page of the hash on which a key might be stored:
9827**
9828** iPg = (rowid % nHashPg)
9829**
9830** Then, within page iPg, which has nSlot slots:
9831**
9832** iSlot = (rowid / nHashPg) % nSlot
9833**
9834** Each tombstone hash page begins with an 8 byte header:
9835**
9836** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
9837** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
9838** first tombstone hash page for each segment (iPg=0). If set,
9839** the hash table contains rowid 0. If clear, it does not.
9840** Rowid 0 is handled specially.
9841** 2-bytes: unused.
9842** 4-bytes: Big-endian integer containing number of entries on page.
9843**
9844** Following this are nSlot 4 or 8 byte slots (depending on the key-size
9845** in the first byte of the page header). The number of slots may be
9846** determined based on the size of the page record and the key-size:
9847**
9848** nSlot = (nByte - 8) / key-size
9849*/
9850
9851/*
9852** Rowids for the averages and structure records in the %_data table.
9853*/
9854#define FTS5_AVERAGES_ROWID1 1 /* Rowid used for the averages record */
9855#define FTS5_STRUCTURE_ROWID10 10 /* The structure record */
9856
9857/*
9858** Macros determining the rowids used by segment leaves and dlidx leaves
9859** and nodes. All nodes and leaves are stored in the %_data table with large
9860** positive rowids.
9861**
9862** Each segment has a unique non-zero 16-bit id.
9863**
9864** The rowid for each segment leaf is found by passing the segment id and
9865** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
9866** sequentially starting from 1.
9867*/
9868#define FTS5_DATA_ID_B16 16 /* Max seg id number 65535 */
9869#define FTS5_DATA_DLI_B1 1 /* Doclist-index flag (1 bit) */
9870#define FTS5_DATA_HEIGHT_B5 5 /* Max dlidx tree height of 32 */
9871#define FTS5_DATA_PAGE_B31 31 /* Max page number of 2147483648 */
9872
9873#define fts5_dri(segid, dlidx, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(dlidx) <<
(31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) )
( \
9874 ((i64)(segid) << (FTS5_DATA_PAGE_B31+FTS5_DATA_HEIGHT_B5+FTS5_DATA_DLI_B1)) + \
9875 ((i64)(dlidx) << (FTS5_DATA_PAGE_B31 + FTS5_DATA_HEIGHT_B5)) + \
9876 ((i64)(height) << (FTS5_DATA_PAGE_B31)) + \
9877 ((i64)(pgno)) \
9878)
9879
9880#define FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) )
fts5_dri(segid, 0, 0, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) )
9881#define FTS5_DLIDX_ROWID(segid, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31
+ 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) )
fts5_dri(segid, 1, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31
+ 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) )
9882#define FTS5_TOMBSTONE_ROWID(segid,ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0
) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg)
) )
fts5_dri(segid+(1<<16), 0, 0, ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0
) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg)
) )
9883
9884#ifdef SQLITE_DEBUG
9885static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB(11 | (1<<8)); }
9886#endif
9887
9888
9889/*
9890** Each time a blob is read from the %_data table, it is padded with this
9891** many zero bytes. This makes it easier to decode the various record formats
9892** without overreading if the records are corrupt.
9893*/
9894#define FTS5_DATA_ZERO_PADDING8 8
9895#define FTS5_DATA_PADDING20 20
9896
9897typedef struct Fts5Data Fts5Data;
9898typedef struct Fts5DlidxIter Fts5DlidxIter;
9899typedef struct Fts5DlidxLvl Fts5DlidxLvl;
9900typedef struct Fts5DlidxWriter Fts5DlidxWriter;
9901typedef struct Fts5Iter Fts5Iter;
9902typedef struct Fts5PageWriter Fts5PageWriter;
9903typedef struct Fts5SegIter Fts5SegIter;
9904typedef struct Fts5DoclistIter Fts5DoclistIter;
9905typedef struct Fts5SegWriter Fts5SegWriter;
9906typedef struct Fts5Structure Fts5Structure;
9907typedef struct Fts5StructureLevel Fts5StructureLevel;
9908typedef struct Fts5StructureSegment Fts5StructureSegment;
9909typedef struct Fts5TokenDataIter Fts5TokenDataIter;
9910typedef struct Fts5TokenDataMap Fts5TokenDataMap;
9911typedef struct Fts5TombstoneArray Fts5TombstoneArray;
9912
9913struct Fts5Data {
9914 u8 *p; /* Pointer to buffer containing record */
9915 int nn; /* Size of record in bytes */
9916 int szLeaf; /* Size of leaf without page-index */
9917};
9918
9919/*
9920** One object per %_data table.
9921**
9922** nContentlessDelete:
9923** The number of contentless delete operations since the most recent
9924** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
9925** so that extra auto-merge work can be done by fts5IndexFlush() to
9926** account for the delete operations.
9927*/
9928struct Fts5Index {
9929 Fts5Config *pConfig; /* Virtual table configuration */
9930 char *zDataTbl; /* Name of %_data table */
9931 int nWorkUnit; /* Leaf pages in a "unit" of work */
9932
9933 /*
9934 ** Variables related to the accumulation of tokens and doclists within the
9935 ** in-memory hash tables before they are flushed to disk.
9936 */
9937 Fts5Hash *pHash; /* Hash table for in-memory data */
9938 int nPendingData; /* Current bytes of pending data */
9939 i64 iWriteRowid; /* Rowid for current doc being written */
9940 int bDelete; /* Current write is a delete */
9941 int nContentlessDelete; /* Number of contentless delete ops */
9942 int nPendingRow; /* Number of INSERT in hash table */
9943
9944 /* Error state. */
9945 int rc; /* Current error code */
9946 int flushRc;
9947
9948 /* State used by the fts5DataXXX() functions. */
9949 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
9950 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
9951 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
9952 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
9953 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
9954 sqlite3_stmt *pIdxSelect;
9955 sqlite3_stmt *pIdxNextSelect;
9956 int nRead; /* Total number of blocks read */
9957
9958 sqlite3_stmt *pDeleteFromIdx;
9959
9960 sqlite3_stmt *pDataVersion;
9961 i64 iStructVersion; /* data_version when pStruct read */
9962 Fts5Structure *pStruct; /* Current db structure (or NULL) */
9963};
9964
9965struct Fts5DoclistIter {
9966 u8 *aEof; /* Pointer to 1 byte past end of doclist */
9967
9968 /* Output variables. aPoslist==0 at EOF */
9969 i64 iRowid;
9970 u8 *aPoslist;
9971 int nPoslist;
9972 int nSize;
9973};
9974
9975/*
9976** The contents of the "structure" record for each index are represented
9977** using an Fts5Structure record in memory. Which uses instances of the
9978** other Fts5StructureXXX types as components.
9979**
9980** nOriginCntr:
9981** This value is set to non-zero for structure records created for
9982** contentlessdelete=1 tables only. In that case it represents the
9983** origin value to apply to the next top-level segment created.
9984*/
9985struct Fts5StructureSegment {
9986 int iSegid; /* Segment id */
9987 int pgnoFirst; /* First leaf page number in segment */
9988 int pgnoLast; /* Last leaf page number in segment */
9989
9990 /* contentlessdelete=1 tables only: */
9991 u64 iOrigin1;
9992 u64 iOrigin2;
9993 int nPgTombstone; /* Number of tombstone hash table pages */
9994 u64 nEntryTombstone; /* Number of tombstone entries that "count" */
9995 u64 nEntry; /* Number of rows in this segment */
9996};
9997struct Fts5StructureLevel {
9998 int nMerge; /* Number of segments in incr-merge */
9999 int nSeg; /* Total number of segments on level */
10000 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
10001};
10002struct Fts5Structure {
10003 int nRef; /* Object reference count */
10004 u64 nWriteCounter; /* Total leaves written to level 0 */
10005 u64 nOriginCntr; /* Origin value for next top-level segment */
10006 int nSegment; /* Total segments in this structure */
10007 int nLevel; /* Number of levels in this index */
10008 Fts5StructureLevel aLevel[FLEXARRAY]; /* Array of nLevel level objects */
10009};
10010
10011/* Size (in bytes) of an Fts5Structure object holding up to N levels */
10012#define SZ_FTS5STRUCTURE(N)(__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel
))
\
10013 (offsetof(Fts5Structure,aLevel)__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel))
10014
10015/*
10016** An object of type Fts5SegWriter is used to write to segments.
10017*/
10018struct Fts5PageWriter {
10019 int pgno; /* Page number for this page */
10020 int iPrevPgidx; /* Previous value written into pgidx */
10021 Fts5Buffer buf; /* Buffer containing leaf data */
10022 Fts5Buffer pgidx; /* Buffer containing page-index */
10023 Fts5Buffer term; /* Buffer containing previous term on page */
10024};
10025struct Fts5DlidxWriter {
10026 int pgno; /* Page number for this page */
10027 int bPrevValid; /* True if iPrev is valid */
10028 i64 iPrev; /* Previous rowid value written to page */
10029 Fts5Buffer buf; /* Buffer containing page data */
10030};
10031struct Fts5SegWriter {
10032 int iSegid; /* Segid to write to */
10033 Fts5PageWriter writer; /* PageWriter object */
10034 i64 iPrevRowid; /* Previous rowid written to current leaf */
10035 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
10036 u8 bFirstRowidInPage; /* True if next rowid is first in page */
10037 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
10038 u8 bFirstTermInPage; /* True if next term will be first in leaf */
10039 int nLeafWritten; /* Number of leaf pages written */
10040 int nEmpty; /* Number of contiguous term-less nodes */
10041
10042 int nDlidx; /* Allocated size of aDlidx[] array */
10043 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
10044
10045 /* Values to insert into the %_idx table */
10046 Fts5Buffer btterm; /* Next term to insert into %_idx table */
10047 int iBtPage; /* Page number corresponding to btterm */
10048};
10049
10050typedef struct Fts5CResult Fts5CResult;
10051struct Fts5CResult {
10052 u16 iFirst; /* aSeg[] index of firstest iterator */
10053 u8 bTermEq; /* True if the terms are equal */
10054};
10055
10056/*
10057** Object for iterating through a single segment, visiting each term/rowid
10058** pair in the segment.
10059**
10060** pSeg:
10061** The segment to iterate through.
10062**
10063** iLeafPgno:
10064** Current leaf page number within segment.
10065**
10066** iLeafOffset:
10067** Byte offset within the current leaf that is the first byte of the
10068** position list data (one byte passed the position-list size field).
10069**
10070** pLeaf:
10071** Buffer containing current leaf page data. Set to NULL at EOF.
10072**
10073** iTermLeafPgno, iTermLeafOffset:
10074** Leaf page number containing the last term read from the segment. And
10075** the offset immediately following the term data.
10076**
10077** flags:
10078** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
10079**
10080** FTS5_SEGITER_ONETERM:
10081** If set, set the iterator to point to EOF after the current doclist
10082** has been exhausted. Do not proceed to the next term in the segment.
10083**
10084** FTS5_SEGITER_REVERSE:
10085** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
10086** it is set, iterate through rowid in descending order instead of the
10087** default ascending order.
10088**
10089** iRowidOffset/nRowidOffset/aRowidOffset:
10090** These are used if the FTS5_SEGITER_REVERSE flag is set.
10091**
10092** For each rowid on the page corresponding to the current term, the
10093** corresponding aRowidOffset[] entry is set to the byte offset of the
10094** start of the "position-list-size" field within the page.
10095**
10096** iTermIdx:
10097** Index of current term on iTermLeafPgno.
10098**
10099** apTombstone/nTombstone:
10100** These are used for contentless_delete=1 tables only. When the cursor
10101** is first allocated, the apTombstone[] array is allocated so that it
10102** is large enough for all tombstones hash pages associated with the
10103** segment. The pages themselves are loaded lazily from the database as
10104** they are required.
10105*/
10106struct Fts5SegIter {
10107 Fts5StructureSegment *pSeg; /* Segment to iterate through */
10108 int flags; /* Mask of configuration flags */
10109 int iLeafPgno; /* Current leaf page number */
10110 Fts5Data *pLeaf; /* Current leaf data */
10111 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
10112 i64 iLeafOffset; /* Byte offset within current leaf */
10113 Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
10114
10115 /* Next method */
10116 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
10117
10118 /* The page and offset from which the current term was read. The offset
10119 ** is the offset of the first rowid in the current doclist. */
10120 int iTermLeafPgno;
10121 int iTermLeafOffset;
10122
10123 int iPgidxOff; /* Next offset in pgidx */
10124 int iEndofDoclist;
10125
10126 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
10127 int iRowidOffset; /* Current entry in aRowidOffset[] */
10128 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
10129 int *aRowidOffset; /* Array of offset to rowid fields */
10130
10131 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
10132
10133 /* Variables populated based on current entry. */
10134 Fts5Buffer term; /* Current term */
10135 i64 iRowid; /* Current rowid */
10136 int nPos; /* Number of bytes in current position list */
10137 u8 bDel; /* True if the delete flag is set */
10138};
10139
10140/*
10141** Array of tombstone pages. Reference counted.
10142*/
10143struct Fts5TombstoneArray {
10144 int nRef; /* Number of pointers to this object */
10145 int nTombstone;
10146 Fts5Data *apTombstone[FLEXARRAY]; /* Array of tombstone pages */
10147};
10148
10149/* Size (in bytes) of an Fts5TombstoneArray holding up to N tombstones */
10150#define SZ_FTS5TOMBSTONEARRAY(N)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof
(Fts5Data*))
\
10151 (offsetof(Fts5TombstoneArray,apTombstone)__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof(Fts5Data*))
10152
10153/*
10154** Argument is a pointer to an Fts5Data structure that contains a
10155** leaf page.
10156*/
10157#define ASSERT_SZLEAF_OK(x)((void) (0)) assert( \((void) (0))
10158 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \((void) (0))
10159)((void) (0))
10160
10161#define FTS5_SEGITER_ONETERM0x01 0x01
10162#define FTS5_SEGITER_REVERSE0x02 0x02
10163
10164/*
10165** Argument is a pointer to an Fts5Data structure that contains a leaf
10166** page. This macro evaluates to true if the leaf contains no terms, or
10167** false if it contains at least one term.
10168*/
10169#define fts5LeafIsTermless(x)((x)->szLeaf >= (x)->nn) ((x)->szLeaf >= (x)->nn)
10170
10171#define fts5LeafTermOff(x, i)(fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
10172
10173#define fts5LeafFirstRowidOff(x)(fts5GetU16((x)->p)) (fts5GetU16((x)->p))
10174
10175/*
10176** Object for iterating through the merged results of one or more segments,
10177** visiting each term/rowid pair in the merged data.
10178**
10179** nSeg is always a power of two greater than or equal to the number of
10180** segments that this object is merging data from. Both the aSeg[] and
10181** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
10182** with zeroed objects - these are handled as if they were iterators opened
10183** on empty segments.
10184**
10185** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
10186** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
10187** comparison in this context is the index of the iterator that currently
10188** points to the smaller term/rowid combination. Iterators at EOF are
10189** considered to be greater than all other iterators.
10190**
10191** aFirst[1] contains the index in aSeg[] of the iterator that points to
10192** the smallest key overall. aFirst[0] is unused.
10193**
10194** poslist:
10195** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
10196** There is no way to tell if this is populated or not.
10197**
10198** pColset:
10199** If not NULL, points to an object containing a set of column indices.
10200** Only matches that occur in one of these columns will be returned.
10201** The Fts5Iter does not own the Fts5Colset object, and so it is not
10202** freed when the iterator is closed - it is owned by the upper layer.
10203*/
10204struct Fts5Iter {
10205 Fts5IndexIter base; /* Base class containing output vars */
10206 Fts5TokenDataIter *pTokenDataIter;
10207
10208 Fts5Index *pIndex; /* Index that owns this iterator */
10209 Fts5Buffer poslist; /* Buffer containing current poslist */
10210 Fts5Colset *pColset; /* Restrict matches to these columns */
10211
10212 /* Invoked to set output variables. */
10213 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
10214
10215 int nSeg; /* Size of aSeg[] array */
10216 int bRev; /* True to iterate in reverse order */
10217 u8 bSkipEmpty; /* True to skip deleted entries */
10218
10219 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
10220 Fts5CResult *aFirst; /* Current merge state (see above) */
10221 Fts5SegIter aSeg[FLEXARRAY]; /* Array of segment iterators */
10222};
10223
10224/* Size (in bytes) of an Fts5Iter object holding up to N segment iterators */
10225#define SZ_FTS5ITER(N)(__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) (offsetof(Fts5Iter,aSeg)__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter))
10226
10227/*
10228** An instance of the following type is used to iterate through the contents
10229** of a doclist-index record.
10230**
10231** pData:
10232** Record containing the doclist-index data.
10233**
10234** bEof:
10235** Set to true once iterator has reached EOF.
10236**
10237** iOff:
10238** Set to the current offset within record pData.
10239*/
10240struct Fts5DlidxLvl {
10241 Fts5Data *pData; /* Data for current page of this level */
10242 int iOff; /* Current offset into pData */
10243 int bEof; /* At EOF already */
10244 int iFirstOff; /* Used by reverse iterators */
10245
10246 /* Output variables */
10247 int iLeafPgno; /* Page number of current leaf page */
10248 i64 iRowid; /* First rowid on leaf iLeafPgno */
10249};
10250struct Fts5DlidxIter {
10251 int nLvl;
10252 int iSegid;
10253 Fts5DlidxLvl aLvl[FLEXARRAY];
10254};
10255
10256/* Size (in bytes) of an Fts5DlidxIter object with up to N levels */
10257#define SZ_FTS5DLIDXITER(N)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl
))
\
10258 (offsetof(Fts5DlidxIter,aLvl)__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl))
10259
10260static void fts5PutU16(u8 *aOut, u16 iVal){
10261 aOut[0] = (iVal>>8);
10262 aOut[1] = (iVal&0xFF);
10263}
10264
10265static u16 fts5GetU16(const u8 *aIn){
10266 return ((u16)aIn[0] << 8) + aIn[1];
10267}
10268
10269/*
10270** The only argument points to a buffer at least 8 bytes in size. This
10271** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
10272** unsigned integer and returns the result.
10273*/
10274static u64 fts5GetU64(u8 *a){
10275 return ((u64)a[0] << 56)
10276 + ((u64)a[1] << 48)
10277 + ((u64)a[2] << 40)
10278 + ((u64)a[3] << 32)
10279 + ((u64)a[4] << 24)
10280 + ((u64)a[5] << 16)
10281 + ((u64)a[6] << 8)
10282 + ((u64)a[7] << 0);
10283}
10284
10285/*
10286** The only argument points to a buffer at least 4 bytes in size. This
10287** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
10288** unsigned integer and returns the result.
10289*/
10290static u32 fts5GetU32(const u8 *a){
10291 return ((u32)a[0] << 24)
10292 + ((u32)a[1] << 16)
10293 + ((u32)a[2] << 8)
10294 + ((u32)a[3] << 0);
10295}
10296
10297/*
10298** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
10299** buffer indicated by the first argument.
10300*/
10301static void fts5PutU64(u8 *a, u64 iVal){
10302 a[0] = ((iVal >> 56) & 0xFF);
10303 a[1] = ((iVal >> 48) & 0xFF);
10304 a[2] = ((iVal >> 40) & 0xFF);
10305 a[3] = ((iVal >> 32) & 0xFF);
10306 a[4] = ((iVal >> 24) & 0xFF);
10307 a[5] = ((iVal >> 16) & 0xFF);
10308 a[6] = ((iVal >> 8) & 0xFF);
10309 a[7] = ((iVal >> 0) & 0xFF);
10310}
10311
10312/*
10313** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
10314** buffer indicated by the first argument.
10315*/
10316static void fts5PutU32(u8 *a, u32 iVal){
10317 a[0] = ((iVal >> 24) & 0xFF);
10318 a[1] = ((iVal >> 16) & 0xFF);
10319 a[2] = ((iVal >> 8) & 0xFF);
10320 a[3] = ((iVal >> 0) & 0xFF);
10321}
10322
10323/*
10324** Allocate and return a buffer at least nByte bytes in size.
10325**
10326** If an OOM error is encountered, return NULL and set the error code in
10327** the Fts5Index handle passed as the first argument.
10328*/
10329static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
10330 return sqlite3Fts5MallocZero(&p->rc, nByte);
10331}
10332
10333/*
10334** Compare the contents of the pLeft buffer with the pRight/nRight blob.
10335**
10336** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
10337** +ve if pRight is smaller than pLeft. In other words:
10338**
10339** res = *pLeft - *pRight
10340*/
10341#ifdef SQLITE_DEBUG
10342static int fts5BufferCompareBlob(
10343 Fts5Buffer *pLeft, /* Left hand side of comparison */
10344 const u8 *pRight, int nRight /* Right hand side of comparison */
10345){
10346 int nCmp = MIN(pLeft->n, nRight)(((pLeft->n) < (nRight)) ? (pLeft->n) : (nRight));
10347 int res = memcmp(pLeft->p, pRight, nCmp);
10348 return (res==0 ? (pLeft->n - nRight) : res);
10349}
10350#endif
10351
10352/*
10353** Compare the contents of the two buffers using memcmp(). If one buffer
10354** is a prefix of the other, it is considered the lesser.
10355**
10356** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
10357** +ve if pRight is smaller than pLeft. In other words:
10358**
10359** res = *pLeft - *pRight
10360*/
10361static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
10362 int nCmp, res;
10363 nCmp = MIN(pLeft->n, pRight->n)(((pLeft->n) < (pRight->n)) ? (pLeft->n) : (pRight
->n))
;
10364 assert( nCmp<=0 || pLeft->p!=0 )((void) (0));
10365 assert( nCmp<=0 || pRight->p!=0 )((void) (0));
10366 res = fts5Memcmp(pLeft->p, pRight->p, nCmp)((nCmp)<=0 ? 0 : memcmp((pLeft->p), (pRight->p), (nCmp
)))
;
10367 return (res==0 ? (pLeft->n - pRight->n) : res);
10368}
10369
10370static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
10371 int ret;
10372 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret)sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(ret))
;
10373 return ret;
10374}
10375
10376/*
10377** Close the read-only blob handle, if it is open.
10378*/
10379static void fts5IndexCloseReader(Fts5Index *p){
10380 if( p->pReader ){
10381 int rc;
10382 sqlite3_blob *pReader = p->pReader;
10383 p->pReader = 0;
10384 rc = sqlite3_blob_closesqlite3_api->blob_close(pReader);
10385 if( p->rc==SQLITE_OK0 ) p->rc = rc;
10386 }
10387}
10388
10389/*
10390** Retrieve a record from the %_data table.
10391**
10392** If an error occurs, NULL is returned and an error left in the
10393** Fts5Index object.
10394*/
10395static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
10396 Fts5Data *pRet = 0;
10397 if( p->rc==SQLITE_OK0 ){
10398 int rc = SQLITE_OK0;
10399
10400 if( p->pReader ){
10401 /* This call may return SQLITE_ABORT if there has been a savepoint
10402 ** rollback since it was last used. In this case a new blob handle
10403 ** is required. */
10404 sqlite3_blob *pBlob = p->pReader;
10405 p->pReader = 0;
10406 rc = sqlite3_blob_reopensqlite3_api->blob_reopen(pBlob, iRowid);
10407 assert( p->pReader==0 )((void) (0));
10408 p->pReader = pBlob;
10409 if( rc!=SQLITE_OK0 ){
10410 fts5IndexCloseReader(p);
10411 }
10412 if( rc==SQLITE_ABORT4 ) rc = SQLITE_OK0;
10413 }
10414
10415 /* If the blob handle is not open at this point, open it and seek
10416 ** to the requested entry. */
10417 if( p->pReader==0 && rc==SQLITE_OK0 ){
10418 Fts5Config *pConfig = p->pConfig;
10419 rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db,
10420 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
10421 );
10422 }
10423
10424 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
10425 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
10426 ** All the reasons those functions might return SQLITE_ERROR - missing
10427 ** table, missing row, non-blob/text in block column - indicate
10428 ** backing store corruption. */
10429 if( rc==SQLITE_ERROR1 ) rc = FTS5_CORRUPT(11 | (1<<8));
10430
10431 if( rc==SQLITE_OK0 ){
10432 u8 *aOut = 0; /* Read blob data into this buffer */
10433 int nByte = sqlite3_blob_bytessqlite3_api->blob_bytes(p->pReader);
10434 int szData = (sizeof(Fts5Data) + 7) & ~7;
10435 sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING20;
10436 pRet = (Fts5Data*)sqlite3_malloc64sqlite3_api->malloc64(nAlloc);
10437 if( pRet ){
10438 pRet->nn = nByte;
10439 aOut = pRet->p = (u8*)pRet + szData;
10440 }else{
10441 rc = SQLITE_NOMEM7;
10442 }
10443
10444 if( rc==SQLITE_OK0 ){
10445 rc = sqlite3_blob_readsqlite3_api->blob_read(p->pReader, aOut, nByte, 0);
10446 }
10447 if( rc!=SQLITE_OK0 ){
10448 sqlite3_freesqlite3_api->free(pRet);
10449 pRet = 0;
10450 }else{
10451 /* TODO1: Fix this */
10452 pRet->p[nByte] = 0x00;
10453 pRet->p[nByte+1] = 0x00;
10454 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
10455 }
10456 }
10457 p->rc = rc;
10458 p->nRead++;
10459 }
10460
10461 assert( (pRet==0)==(p->rc!=SQLITE_OK) )((void) (0));
10462 assert( pRet==0 || EIGHT_BYTE_ALIGNMENT( pRet->p ) )((void) (0));
10463 return pRet;
10464}
10465
10466
10467/*
10468** Release a reference to data record returned by an earlier call to
10469** fts5DataRead().
10470*/
10471static void fts5DataRelease(Fts5Data *pData){
10472 sqlite3_freesqlite3_api->free(pData);
10473}
10474
10475static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
10476 Fts5Data *pRet = fts5DataRead(p, iRowid);
10477 if( pRet ){
10478 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
10479 p->rc = FTS5_CORRUPT(11 | (1<<8));
10480 fts5DataRelease(pRet);
10481 pRet = 0;
10482 }
10483 }
10484 return pRet;
10485}
10486
10487static int fts5IndexPrepareStmt(
10488 Fts5Index *p,
10489 sqlite3_stmt **ppStmt,
10490 char *zSql
10491){
10492 if( p->rc==SQLITE_OK0 ){
10493 if( zSql ){
10494 int rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(p->pConfig->db, zSql, -1,
10495 SQLITE_PREPARE_PERSISTENT0x01|SQLITE_PREPARE_NO_VTAB0x04,
10496 ppStmt, 0);
10497 /* If this prepare() call fails with SQLITE_ERROR, then one of the
10498 ** %_idx or %_data tables has been removed or modified. Call this
10499 ** corruption. */
10500 p->rc = (rc==SQLITE_ERROR1 ? SQLITE_CORRUPT11 : rc);
10501 }else{
10502 p->rc = SQLITE_NOMEM7;
10503 }
10504 }
10505 sqlite3_freesqlite3_api->free(zSql);
10506 return p->rc;
10507}
10508
10509
10510/*
10511** INSERT OR REPLACE a record into the %_data table.
10512*/
10513static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
10514 if( p->rc!=SQLITE_OK0 ) return;
10515
10516 if( p->pWriter==0 ){
10517 Fts5Config *pConfig = p->pConfig;
10518 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintfsqlite3_api->mprintf(
10519 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
10520 pConfig->zDb, pConfig->zName
10521 ));
10522 if( p->rc ) return;
10523 }
10524
10525 sqlite3_bind_int64sqlite3_api->bind_int64(p->pWriter, 1, iRowid);
10526 sqlite3_bind_blobsqlite3_api->bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC((sqlite3_destructor_type)0));
10527 sqlite3_stepsqlite3_api->step(p->pWriter);
10528 p->rc = sqlite3_resetsqlite3_api->reset(p->pWriter);
10529 sqlite3_bind_nullsqlite3_api->bind_null(p->pWriter, 2);
10530}
10531
10532/*
10533** Execute the following SQL:
10534**
10535** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
10536*/
10537static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
10538 if( p->rc!=SQLITE_OK0 ) return;
10539
10540 if( p->pDeleter==0 ){
10541 Fts5Config *pConfig = p->pConfig;
10542 char *zSql = sqlite3_mprintfsqlite3_api->mprintf(
10543 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
10544 pConfig->zDb, pConfig->zName
10545 );
10546 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
10547 }
10548
10549 sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 1, iFirst);
10550 sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 2, iLast);
10551 sqlite3_stepsqlite3_api->step(p->pDeleter);
10552 p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleter);
10553}
10554
10555/*
10556** Remove all records associated with segment iSegid.
10557*/
10558static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
10559 int iSegid = pSeg->iSegid;
10560 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(0)) )
;
10561 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)( ((i64)(iSegid+1) << (31 +5 +1)) + ((i64)(0) << (
31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) )
-1;
10562 fts5DataDelete(p, iFirst, iLast);
10563
10564 if( pSeg->nPgTombstone ){
10565 i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(
0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0))
)
;
10566 i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(
0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg
->nPgTombstone-1)) )
;
10567 fts5DataDelete(p, iTomb1, iTomb2);
10568 }
10569 if( p->pIdxDeleter==0 ){
10570 Fts5Config *pConfig = p->pConfig;
10571 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintfsqlite3_api->mprintf(
10572 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
10573 pConfig->zDb, pConfig->zName
10574 ));
10575 }
10576 if( p->rc==SQLITE_OK0 ){
10577 sqlite3_bind_intsqlite3_api->bind_int(p->pIdxDeleter, 1, iSegid);
10578 sqlite3_stepsqlite3_api->step(p->pIdxDeleter);
10579 p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxDeleter);
10580 }
10581}
10582
10583/*
10584** Release a reference to an Fts5Structure object returned by an earlier
10585** call to fts5StructureRead() or fts5StructureDecode().
10586*/
10587static void fts5StructureRelease(Fts5Structure *pStruct){
10588 if( pStruct && 0>=(--pStruct->nRef) ){
10589 int i;
10590 assert( pStruct->nRef==0 )((void) (0));
10591 for(i=0; i<pStruct->nLevel; i++){
10592 sqlite3_freesqlite3_api->free(pStruct->aLevel[i].aSeg);
10593 }
10594 sqlite3_freesqlite3_api->free(pStruct);
10595 }
10596}
10597
10598static void fts5StructureRef(Fts5Structure *pStruct){
10599 pStruct->nRef++;
10600}
10601
10602static void *sqlite3Fts5StructureRef(Fts5Index *p){
10603 fts5StructureRef(p->pStruct);
10604 return (void*)p->pStruct;
10605}
10606static void sqlite3Fts5StructureRelease(void *p){
10607 if( p ){
10608 fts5StructureRelease((Fts5Structure*)p);
10609 }
10610}
10611static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
10612 if( p->pStruct!=(Fts5Structure*)pStruct ){
10613 return SQLITE_ABORT4;
10614 }
10615 return SQLITE_OK0;
10616}
10617
10618/*
10619** Ensure that structure object (*pp) is writable.
10620**
10621** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
10622** an error occurs, (*pRc) is set to an SQLite error code before returning.
10623*/
10624static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
10625 Fts5Structure *p = *pp;
10626 if( *pRc==SQLITE_OK0 && p->nRef>1 ){
10627 i64 nByte = SZ_FTS5STRUCTURE(p->nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (p->nLevel)*sizeof
(Fts5StructureLevel))
;
10628 Fts5Structure *pNew;
10629 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
10630 if( pNew ){
10631 int i;
10632 memcpy(pNew, p, nByte);
10633 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
10634 for(i=0; i<p->nLevel; i++){
10635 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
10636 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
10637 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
10638 if( pLvl->aSeg==0 ){
10639 for(i=0; i<p->nLevel; i++){
10640 sqlite3_freesqlite3_api->free(pNew->aLevel[i].aSeg);
10641 }
10642 sqlite3_freesqlite3_api->free(pNew);
10643 return;
10644 }
10645 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
10646 }
10647 p->nRef--;
10648 pNew->nRef = 1;
10649 }
10650 *pp = pNew;
10651 }
10652}
10653
10654/*
10655** Deserialize and return the structure record currently stored in serialized
10656** form within buffer pData/nData.
10657**
10658** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
10659** are over-allocated by one slot. This allows the structure contents
10660** to be more easily edited.
10661**
10662** If an error occurs, *ppOut is set to NULL and an SQLite error code
10663** returned. Otherwise, *ppOut is set to point to the new object and
10664** SQLITE_OK returned.
10665*/
10666static int fts5StructureDecode(
10667 const u8 *pData, /* Buffer containing serialized structure */
10668 int nData, /* Size of buffer pData in bytes */
10669 int *piCookie, /* Configuration cookie value */
10670 Fts5Structure **ppOut /* OUT: Deserialized object */
10671){
10672 int rc = SQLITE_OK0;
10673 int i = 0;
10674 int iLvl;
10675 int nLevel = 0;
10676 int nSegment = 0;
10677 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
10678 Fts5Structure *pRet = 0; /* Structure object to return */
10679 int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
10680 u64 nOriginCntr = 0; /* Largest origin value seen so far */
10681
10682 /* Grab the cookie value */
10683 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
10684 i = 4;
10685
10686 /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
10687 if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2"\xFF\x00\x00\x01", 4) ){
10688 i += 4;
10689 bStructureV2 = 1;
10690 }
10691
10692 /* Read the total number of levels and segments from the start of the
10693 ** structure record. */
10694 i += fts5GetVarint32(&pData[i], nLevel)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nLevel));
10695 i += fts5GetVarint32(&pData[i], nSegment)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nSegment));
10696 if( nLevel>FTS5_MAX_SEGMENT2000 || nLevel<0
10697 || nSegment>FTS5_MAX_SEGMENT2000 || nSegment<0
10698 ){
10699 return FTS5_CORRUPT(11 | (1<<8));
10700 }
10701 nByte = SZ_FTS5STRUCTURE(nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel)*sizeof(
Fts5StructureLevel))
;
10702 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
10703
10704 if( pRet ){
10705 pRet->nRef = 1;
10706 pRet->nLevel = nLevel;
10707 pRet->nSegment = nSegment;
10708 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
10709
10710 for(iLvl=0; rc==SQLITE_OK0 && iLvl<nLevel; iLvl++){
10711 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
10712 int nTotal = 0;
10713 int iSeg;
10714
10715 if( i>=nData ){
10716 rc = FTS5_CORRUPT(11 | (1<<8));
10717 }else{
10718 i += fts5GetVarint32(&pData[i], pLvl->nMerge)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pLvl->nMerge
))
;
10719 i += fts5GetVarint32(&pData[i], nTotal)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nTotal));
10720 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8));
10721 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
10722 nTotal * sizeof(Fts5StructureSegment)
10723 );
10724 nSegment -= nTotal;
10725 }
10726
10727 if( rc==SQLITE_OK0 ){
10728 pLvl->nSeg = nTotal;
10729 for(iSeg=0; iSeg<nTotal; iSeg++){
10730 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
10731 if( i>=nData ){
10732 rc = FTS5_CORRUPT(11 | (1<<8));
10733 break;
10734 }
10735 assert( pSeg!=0 )((void) (0));
10736 i += fts5GetVarint32(&pData[i], pSeg->iSegid)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->iSegid
))
;
10737 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoFirst
))
;
10738 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoLast
))
;
10739 if( bStructureV2 ){
10740 i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin1);
10741 i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin2);
10742 i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->nPgTombstone
))
;
10743 i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
10744 i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntry);
10745 nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2)(((nOriginCntr) > (pSeg->iOrigin2)) ? (nOriginCntr) : (
pSeg->iOrigin2))
;
10746 }
10747 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
10748 rc = FTS5_CORRUPT(11 | (1<<8));
10749 break;
10750 }
10751 }
10752 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT(11 | (1<<8));
10753 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8));
10754 }
10755 }
10756 if( nSegment!=0 && rc==SQLITE_OK0 ) rc = FTS5_CORRUPT(11 | (1<<8));
10757 if( bStructureV2 ){
10758 pRet->nOriginCntr = nOriginCntr+1;
10759 }
10760
10761 if( rc!=SQLITE_OK0 ){
10762 fts5StructureRelease(pRet);
10763 pRet = 0;
10764 }
10765 }
10766
10767 *ppOut = pRet;
10768 return rc;
10769}
10770
10771/*
10772** Add a level to the Fts5Structure.aLevel[] array of structure object
10773** (*ppStruct).
10774*/
10775static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
10776 fts5StructureMakeWritable(pRc, ppStruct);
10777 assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK )((void) (0));
10778 if( *pRc==SQLITE_OK0 ){
10779 Fts5Structure *pStruct = *ppStruct;
10780 int nLevel = pStruct->nLevel;
10781 sqlite3_int64 nByte = SZ_FTS5STRUCTURE(nLevel+2)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel+2)*sizeof
(Fts5StructureLevel))
;
10782
10783 pStruct = sqlite3_realloc64sqlite3_api->realloc64(pStruct, nByte);
10784 if( pStruct ){
10785 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
10786 pStruct->nLevel++;
10787 *ppStruct = pStruct;
10788 }else{
10789 *pRc = SQLITE_NOMEM7;
10790 }
10791 }
10792}
10793
10794/*
10795** Extend level iLvl so that there is room for at least nExtra more
10796** segments.
10797*/
10798static void fts5StructureExtendLevel(
10799 int *pRc,
10800 Fts5Structure *pStruct,
10801 int iLvl,
10802 int nExtra,
10803 int bInsert
10804){
10805 if( *pRc==SQLITE_OK0 ){
10806 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
10807 Fts5StructureSegment *aNew;
10808 sqlite3_int64 nByte;
10809
10810 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
10811 aNew = sqlite3_realloc64sqlite3_api->realloc64(pLvl->aSeg, nByte);
10812 if( aNew ){
10813 if( bInsert==0 ){
10814 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
10815 }else{
10816 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
10817 memmove(&aNew[nExtra], aNew, nMove);
10818 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
10819 }
10820 pLvl->aSeg = aNew;
10821 }else{
10822 *pRc = SQLITE_NOMEM7;
10823 }
10824 }
10825}
10826
10827static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
10828 Fts5Structure *pRet = 0;
10829 Fts5Config *pConfig = p->pConfig;
10830 int iCookie; /* Configuration cookie */
10831 Fts5Data *pData;
10832
10833 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID10);
10834 if( p->rc==SQLITE_OK0 ){
10835 /* TODO: Do we need this if the leaf-index is appended? Probably... */
10836 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING20);
10837 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
10838 if( p->rc==SQLITE_OK0 && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
10839 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
10840 }
10841 fts5DataRelease(pData);
10842 if( p->rc!=SQLITE_OK0 ){
10843 fts5StructureRelease(pRet);
10844 pRet = 0;
10845 }
10846 }
10847
10848 return pRet;
10849}
10850
10851static i64 fts5IndexDataVersion(Fts5Index *p){
10852 i64 iVersion = 0;
10853
10854 if( p->rc==SQLITE_OK0 ){
10855 if( p->pDataVersion==0 ){
10856 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
10857 sqlite3_mprintfsqlite3_api->mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
10858 );
10859 if( p->rc ) return 0;
10860 }
10861
10862 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p->pDataVersion) ){
10863 iVersion = sqlite3_column_int64sqlite3_api->column_int64(p->pDataVersion, 0);
10864 }
10865 p->rc = sqlite3_resetsqlite3_api->reset(p->pDataVersion);
10866 }
10867
10868 return iVersion;
10869}
10870
10871/*
10872** Read, deserialize and return the structure record.
10873**
10874** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
10875** are over-allocated as described for function fts5StructureDecode()
10876** above.
10877**
10878** If an error occurs, NULL is returned and an error code left in the
10879** Fts5Index handle. If an error has already occurred when this function
10880** is called, it is a no-op.
10881*/
10882static Fts5Structure *fts5StructureRead(Fts5Index *p){
10883
10884 if( p->pStruct==0 ){
10885 p->iStructVersion = fts5IndexDataVersion(p);
10886 if( p->rc==SQLITE_OK0 ){
10887 p->pStruct = fts5StructureReadUncached(p);
10888 }
10889 }
10890
10891#if 0
10892 else{
10893 Fts5Structure *pTest = fts5StructureReadUncached(p);
10894 if( pTest ){
10895 int i, j;
10896 assert_nc( p->pStruct->nSegment==pTest->nSegment )((void) (0));
10897 assert_nc( p->pStruct->nLevel==pTest->nLevel )((void) (0));
10898 for(i=0; i<pTest->nLevel; i++){
10899 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge )((void) (0));
10900 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg )((void) (0));
10901 for(j=0; j<pTest->aLevel[i].nSeg; j++){
10902 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
10903 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
10904 assert_nc( p1->iSegid==p2->iSegid )((void) (0));
10905 assert_nc( p1->pgnoFirst==p2->pgnoFirst )((void) (0));
10906 assert_nc( p1->pgnoLast==p2->pgnoLast )((void) (0));
10907 }
10908 }
10909 fts5StructureRelease(pTest);
10910 }
10911 }
10912#endif
10913
10914 if( p->rc!=SQLITE_OK0 ) return 0;
10915 assert( p->iStructVersion!=0 )((void) (0));
10916 assert( p->pStruct!=0 )((void) (0));
10917 fts5StructureRef(p->pStruct);
10918 return p->pStruct;
10919}
10920
10921static void fts5StructureInvalidate(Fts5Index *p){
10922 if( p->pStruct ){
10923 fts5StructureRelease(p->pStruct);
10924 p->pStruct = 0;
10925 }
10926}
10927
10928/*
10929** Return the total number of segments in index structure pStruct. This
10930** function is only ever used as part of assert() conditions.
10931*/
10932#ifdef SQLITE_DEBUG
10933static int fts5StructureCountSegments(Fts5Structure *pStruct){
10934 int nSegment = 0; /* Total number of segments */
10935 if( pStruct ){
10936 int iLvl; /* Used to iterate through levels */
10937 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
10938 nSegment += pStruct->aLevel[iLvl].nSeg;
10939 }
10940 }
10941
10942 return nSegment;
10943}
10944#endif
10945
10946#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pBlob
, nBlob); (pBuf)->n += nBlob; }
{ \
10947 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) )((void) (0)); \
10948 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
10949 (pBuf)->n += nBlob; \
10950}
10951
10952#define fts5BufferSafeAppendVarint(pBuf, iVal){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf
)->n], (iVal)); ((void) (0)); }
{ \
10953 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
10954 assert( (pBuf)->nSpace>=(pBuf)->n )((void) (0)); \
10955}
10956
10957
10958/*
10959** Serialize and store the "structure" record.
10960**
10961** If an error occurs, leave an error code in the Fts5Index object. If an
10962** error has already occurred, this function is a no-op.
10963*/
10964static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
10965 if( p->rc==SQLITE_OK0 ){
10966 Fts5Buffer buf; /* Buffer to serialize record into */
10967 int iLvl; /* Used to iterate through levels */
10968 int iCookie; /* Cookie value to store */
10969 int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
10970
10971 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0));
10972 memset(&buf, 0, sizeof(Fts5Buffer));
10973
10974 /* Append the current configuration cookie */
10975 iCookie = p->pConfig->iCookie;
10976 if( iCookie<0 ) iCookie = 0;
10977
10978 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
10979 sqlite3Fts5Put32(buf.p, iCookie);
10980 buf.n = 4;
10981 if( pStruct->nOriginCntr>0 ){
10982 fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4){ ((void) (0)); memcpy(&(&buf)->p[(&buf)->n
], "\xFF\x00\x00\x01", 4); (&buf)->n += 4; }
;
10983 }
10984 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)->
p[(&buf)->n], (pStruct->nLevel)); ((void) (0)); }
;
10985 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)->
p[(&buf)->n], (pStruct->nSegment)); ((void) (0)); }
;
10986 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)->
p[(&buf)->n], ((i64)pStruct->nWriteCounter)); ((void
) (0)); }
;
10987 }
10988
10989 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
10990 int iSeg; /* Used to iterate through segments */
10991 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
10992 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl
->nMerge)
;
10993 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl
->nSeg)
;
10994 assert( pLvl->nMerge<=pLvl->nSeg )((void) (0));
10995
10996 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
10997 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
10998 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->iSegid)
;
10999 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->pgnoFirst)
;
11000 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->pgnoLast)
;
11001 if( pStruct->nOriginCntr>0 ){
11002 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->iOrigin1)
;
11003 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->iOrigin2)
;
11004 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->nPgTombstone)
;
11005 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->nEntryTombstone)
;
11006 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->nEntry)
;
11007 }
11008 }
11009 }
11010
11011 fts5DataWrite(p, FTS5_STRUCTURE_ROWID10, buf.p, buf.n);
11012 fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf);
11013 }
11014}
11015
11016#if 0
11017static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
11018static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
11019 int rc = SQLITE_OK0;
11020 Fts5Buffer buf;
11021 memset(&buf, 0, sizeof(buf));
11022 fts5DebugStructure(&rc, &buf, pStruct);
11023 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
11024 fflush(stdout);
11025 fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf);
11026}
11027#else
11028# define fts5PrintStructure(x,y)
11029#endif
11030
11031static int fts5SegmentSize(Fts5StructureSegment *pSeg){
11032 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
11033}
11034
11035/*
11036** Return a copy of index structure pStruct. Except, promote as many
11037** segments as possible to level iPromote. If an OOM occurs, NULL is
11038** returned.
11039*/
11040static void fts5StructurePromoteTo(
11041 Fts5Index *p,
11042 int iPromote,
11043 int szPromote,
11044 Fts5Structure *pStruct
11045){
11046 int il, is;
11047 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
11048
11049 if( pOut->nMerge==0 ){
11050 for(il=iPromote+1; il<pStruct->nLevel; il++){
11051 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
11052 if( pLvl->nMerge ) return;
11053 for(is=pLvl->nSeg-1; is>=0; is--){
11054 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
11055 if( sz>szPromote ) return;
11056 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
11057 if( p->rc ) return;
11058 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
11059 pOut->nSeg++;
11060 pLvl->nSeg--;
11061 }
11062 }
11063 }
11064}
11065
11066/*
11067** A new segment has just been written to level iLvl of index structure
11068** pStruct. This function determines if any segments should be promoted
11069** as a result. Segments are promoted in two scenarios:
11070**
11071** a) If the segment just written is smaller than one or more segments
11072** within the previous populated level, it is promoted to the previous
11073** populated level.
11074**
11075** b) If the segment just written is larger than the newest segment on
11076** the next populated level, then that segment, and any other adjacent
11077** segments that are also smaller than the one just written, are
11078** promoted.
11079**
11080** If one or more segments are promoted, the structure object is updated
11081** to reflect this.
11082*/
11083static void fts5StructurePromote(
11084 Fts5Index *p, /* FTS5 backend object */
11085 int iLvl, /* Index level just updated */
11086 Fts5Structure *pStruct /* Index structure */
11087){
11088 if( p->rc==SQLITE_OK0 ){
11089 int iTst;
11090 int iPromote = -1;
11091 int szPromote = 0; /* Promote anything this size or smaller */
11092 Fts5StructureSegment *pSeg; /* Segment just written */
11093 int szSeg; /* Size of segment just written */
11094 int nSeg = pStruct->aLevel[iLvl].nSeg;
11095
11096 if( nSeg==0 ) return;
11097 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
11098 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
11099
11100 /* Check for condition (a) */
11101 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
11102 if( iTst>=0 ){
11103 int i;
11104 int szMax = 0;
11105 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
11106 assert( pTst->nMerge==0 )((void) (0));
11107 for(i=0; i<pTst->nSeg; i++){
11108 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
11109 if( sz>szMax ) szMax = sz;
11110 }
11111 if( szMax>=szSeg ){
11112 /* Condition (a) is true. Promote the newest segment on level
11113 ** iLvl to level iTst. */
11114 iPromote = iTst;
11115 szPromote = szMax;
11116 }
11117 }
11118
11119 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
11120 ** is a no-op if it is not. */
11121 if( iPromote<0 ){
11122 iPromote = iLvl;
11123 szPromote = szSeg;
11124 }
11125 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
11126 }
11127}
11128
11129
11130/*
11131** Advance the iterator passed as the only argument. If the end of the
11132** doclist-index page is reached, return non-zero.
11133*/
11134static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
11135 Fts5Data *pData = pLvl->pData;
11136
11137 if( pLvl->iOff==0 ){
11138 assert( pLvl->bEof==0 )((void) (0));
11139 pLvl->iOff = 1;
11140 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno)sqlite3Fts5GetVarint32(&pData->p[1],(u32*)&(pLvl->
iLeafPgno))
;
11141 pLvl->iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
11142 pLvl->iFirstOff = pLvl->iOff;
11143 }else{
11144 int iOff;
11145 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
11146 if( pData->p[iOff] ) break;
11147 }
11148
11149 if( iOff<pData->nn ){
11150 u64 iVal;
11151 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
11152 iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[iOff], &iVal);
11153 pLvl->iRowid += iVal;
11154 pLvl->iOff = iOff;
11155 }else{
11156 pLvl->bEof = 1;
11157 }
11158 }
11159
11160 return pLvl->bEof;
11161}
11162
11163/*
11164** Advance the iterator passed as the only argument.
11165*/
11166static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
11167 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
11168
11169 assert( iLvl<pIter->nLvl )((void) (0));
11170 if( fts5DlidxLvlNext(pLvl) ){
11171 if( (iLvl+1) < pIter->nLvl ){
11172 fts5DlidxIterNextR(p, pIter, iLvl+1);
11173 if( pLvl[1].bEof==0 ){
11174 fts5DataRelease(pLvl->pData);
11175 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
11176 pLvl->pData = fts5DataRead(p,
11177 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) <<
(31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno
)) )
11178 );
11179 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
11180 }
11181 }
11182 }
11183
11184 return pIter->aLvl[0].bEof;
11185}
11186static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
11187 return fts5DlidxIterNextR(p, pIter, 0);
11188}
11189
11190/*
11191** The iterator passed as the first argument has the following fields set
11192** as follows. This function sets up the rest of the iterator so that it
11193** points to the first rowid in the doclist-index.
11194**
11195** pData:
11196** pointer to doclist-index record,
11197**
11198** When this function is called pIter->iLeafPgno is the page number the
11199** doclist is associated with (the one featuring the term).
11200*/
11201static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
11202 int i;
11203 for(i=0; i<pIter->nLvl; i++){
11204 fts5DlidxLvlNext(&pIter->aLvl[i]);
11205 }
11206 return pIter->aLvl[0].bEof;
11207}
11208
11209
11210static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
11211 return p->rc!=SQLITE_OK0 || pIter->aLvl[0].bEof;
11212}
11213
11214static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
11215 int i;
11216
11217 /* Advance each level to the last entry on the last page */
11218 for(i=pIter->nLvl-1; p->rc==SQLITE_OK0 && i>=0; i--){
11219 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
11220 while( fts5DlidxLvlNext(pLvl)==0 );
11221 pLvl->bEof = 0;
11222
11223 if( i>0 ){
11224 Fts5DlidxLvl *pChild = &pLvl[-1];
11225 fts5DataRelease(pChild->pData);
11226 memset(pChild, 0, sizeof(Fts5DlidxLvl));
11227 pChild->pData = fts5DataRead(p,
11228 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) <<
(31 + 5)) + ((i64)(i-1) << (31)) + ((i64)(pLvl->iLeafPgno
)) )
11229 );
11230 }
11231 }
11232}
11233
11234/*
11235** Move the iterator passed as the only argument to the previous entry.
11236*/
11237static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
11238 int iOff = pLvl->iOff;
11239
11240 assert( pLvl->bEof==0 )((void) (0));
11241 if( iOff<=pLvl->iFirstOff ){
11242 pLvl->bEof = 1;
11243 }else{
11244 u8 *a = pLvl->pData->p;
11245
11246 pLvl->iOff = 0;
11247 fts5DlidxLvlNext(pLvl);
11248 while( 1 ){
11249 int nZero = 0;
11250 int ii = pLvl->iOff;
11251 u64 delta = 0;
11252
11253 while( a[ii]==0 ){
11254 nZero++;
11255 ii++;
11256 }
11257 ii += sqlite3Fts5GetVarint(&a[ii], &delta);
11258
11259 if( ii>=iOff ) break;
11260 pLvl->iLeafPgno += nZero+1;
11261 pLvl->iRowid += delta;
11262 pLvl->iOff = ii;
11263 }
11264 }
11265
11266 return pLvl->bEof;
11267}
11268
11269static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
11270 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
11271
11272 assert( iLvl<pIter->nLvl )((void) (0));
11273 if( fts5DlidxLvlPrev(pLvl) ){
11274 if( (iLvl+1) < pIter->nLvl ){
11275 fts5DlidxIterPrevR(p, pIter, iLvl+1);
11276 if( pLvl[1].bEof==0 ){
11277 fts5DataRelease(pLvl->pData);
11278 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
11279 pLvl->pData = fts5DataRead(p,
11280 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) <<
(31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno
)) )
11281 );
11282 if( pLvl->pData ){
11283 while( fts5DlidxLvlNext(pLvl)==0 );
11284 pLvl->bEof = 0;
11285 }
11286 }
11287 }
11288 }
11289
11290 return pIter->aLvl[0].bEof;
11291}
11292static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
11293 return fts5DlidxIterPrevR(p, pIter, 0);
11294}
11295
11296/*
11297** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
11298*/
11299static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
11300 if( pIter ){
11301 int i;
11302 for(i=0; i<pIter->nLvl; i++){
11303 fts5DataRelease(pIter->aLvl[i].pData);
11304 }
11305 sqlite3_freesqlite3_api->free(pIter);
11306 }
11307}
11308
11309static Fts5DlidxIter *fts5DlidxIterInit(
11310 Fts5Index *p, /* Fts5 Backend to iterate within */
11311 int bRev, /* True for ORDER BY ASC */
11312 int iSegid, /* Segment id */
11313 int iLeafPg /* Leaf page number to load dlidx for */
11314){
11315 Fts5DlidxIter *pIter = 0;
11316 int i;
11317 int bDone = 0;
11318
11319 for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){
11320 sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(i+1)*sizeof(Fts5DlidxLvl
))
;
11321 Fts5DlidxIter *pNew;
11322
11323 pNew = (Fts5DlidxIter*)sqlite3_realloc64sqlite3_api->realloc64(pIter, nByte);
11324 if( pNew==0 ){
11325 p->rc = SQLITE_NOMEM7;
11326 }else{
11327 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(1) << (31
+ 5)) + ((i64)(i) << (31)) + ((i64)(iLeafPg)) )
;
11328 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
11329 pIter = pNew;
11330 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
11331 pLvl->pData = fts5DataRead(p, iRowid);
11332 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
11333 bDone = 1;
11334 }
11335 pIter->nLvl = i+1;
11336 }
11337 }
11338
11339 if( p->rc==SQLITE_OK0 ){
11340 pIter->iSegid = iSegid;
11341 if( bRev==0 ){
11342 fts5DlidxIterFirst(pIter);
11343 }else{
11344 fts5DlidxIterLast(p, pIter);
11345 }
11346 }
11347
11348 if( p->rc!=SQLITE_OK0 ){
11349 fts5DlidxIterFree(pIter);
11350 pIter = 0;
11351 }
11352
11353 return pIter;
11354}
11355
11356static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
11357 return pIter->aLvl[0].iRowid;
11358}
11359static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
11360 return pIter->aLvl[0].iLeafPgno;
11361}
11362
11363/*
11364** Load the next leaf page into the segment iterator.
11365*/
11366static void fts5SegIterNextPage(
11367 Fts5Index *p, /* FTS5 backend object */
11368 Fts5SegIter *pIter /* Iterator to advance to next page */
11369){
11370 Fts5Data *pLeaf;
11371 Fts5StructureSegment *pSeg = pIter->pSeg;
11372 fts5DataRelease(pIter->pLeaf);
11373 pIter->iLeafPgno++;
11374 if( pIter->pNextLeaf ){
11375 pIter->pLeaf = pIter->pNextLeaf;
11376 pIter->pNextLeaf = 0;
11377 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
11378 pIter->pLeaf = fts5LeafRead(p,
11379 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) <<
(31 + 5)) + ((i64)(0) << (31)) + ((i64)(pIter->iLeafPgno
)) )
11380 );
11381 }else{
11382 pIter->pLeaf = 0;
11383 }
11384 pLeaf = pIter->pLeaf;
11385
11386 if( pLeaf ){
11387 pIter->iPgidxOff = pLeaf->szLeaf;
11388 if( fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ){
11389 pIter->iEndofDoclist = pLeaf->nn+1;
11390 }else{
11391 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
(u32*)&(pIter->iEndofDoclist))
11392 pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
(u32*)&(pIter->iEndofDoclist))
11393 )sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
(u32*)&(pIter->iEndofDoclist))
;
11394 }
11395 }
11396}
11397
11398/*
11399** Argument p points to a buffer containing a varint to be interpreted as a
11400** position list size field. Read the varint and return the number of bytes
11401** read. Before returning, set *pnSz to the number of bytes in the position
11402** list, and *pbDel to true if the delete flag is set, or false otherwise.
11403*/
11404static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
11405 int nSz;
11406 int n = 0;
11407 fts5FastGetVarint32(p, n, nSz){ nSz = (p)[n++]; if( nSz & 0x80 ){ n--; n += sqlite3Fts5GetVarint32
(&(p)[n],(u32*)&(nSz)); } }
;
11408 assert_nc( nSz>=0 )((void) (0));
11409 *pnSz = nSz/2;
11410 *pbDel = nSz & 0x0001;
11411 return n;
11412}
11413
11414/*
11415** Fts5SegIter.iLeafOffset currently points to the first byte of a
11416** position-list size field. Read the value of the field and store it
11417** in the following variables:
11418**
11419** Fts5SegIter.nPos
11420** Fts5SegIter.bDel
11421**
11422** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
11423** position list content (if any).
11424*/
11425static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
11426 if( p->rc==SQLITE_OK0 ){
11427 int iOff = pIter->iLeafOffset; /* Offset to read at */
11428 ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0));
11429 if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){
11430 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf)(((pIter->iEndofDoclist) < (pIter->pLeaf->szLeaf)
) ? (pIter->iEndofDoclist) : (pIter->pLeaf->szLeaf))
;
11431 pIter->bDel = 0;
11432 pIter->nPos = 1;
11433 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
11434 pIter->bDel = 1;
11435 iOff++;
11436 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
11437 pIter->nPos = 1;
11438 iOff++;
11439 }else{
11440 pIter->nPos = 0;
11441 }
11442 }
11443 }else{
11444 int nSz;
11445 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz){ nSz = (pIter->pLeaf->p)[iOff++]; if( nSz & 0x80 )
{ iOff--; iOff += sqlite3Fts5GetVarint32(&(pIter->pLeaf
->p)[iOff],(u32*)&(nSz)); } }
;
11446 pIter->bDel = (nSz & 0x0001);
11447 pIter->nPos = nSz>>1;
11448 assert_nc( pIter->nPos>=0 )((void) (0));
11449 }
11450 pIter->iLeafOffset = iOff;
11451 }
11452}
11453
11454static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
11455 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
11456 i64 iOff = pIter->iLeafOffset;
11457
11458 ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0));
11459 while( iOff>=pIter->pLeaf->szLeaf ){
11460 fts5SegIterNextPage(p, pIter);
11461 if( pIter->pLeaf==0 ){
11462 if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
11463 return;
11464 }
11465 iOff = 4;
11466 a = pIter->pLeaf->p;
11467 }
11468 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
11469 pIter->iLeafOffset = iOff;
11470}
11471
11472/*
11473** Fts5SegIter.iLeafOffset currently points to the first byte of the
11474** "nSuffix" field of a term. Function parameter nKeep contains the value
11475** of the "nPrefix" field (if there was one - it is passed 0 if this is
11476** the first term in the segment).
11477**
11478** This function populates:
11479**
11480** Fts5SegIter.term
11481** Fts5SegIter.rowid
11482**
11483** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
11484** the first position list. The position list belonging to document
11485** (Fts5SegIter.iRowid).
11486*/
11487static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
11488 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
11489 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
11490 int nNew; /* Bytes of new data */
11491
11492 iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew));
11493 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
11494 p->rc = FTS5_CORRUPT(11 | (1<<8));
11495 return;
11496 }
11497 pIter->term.n = nKeep;
11498 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term
,nNew,&a[iOff])
;
11499 assert( pIter->term.n<=pIter->term.nSpace )((void) (0));
11500 iOff += nNew;
11501 pIter->iTermLeafOffset = iOff;
11502 pIter->iTermLeafPgno = pIter->iLeafPgno;
11503 pIter->iLeafOffset = iOff;
11504
11505 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
11506 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
11507 }else{
11508 int nExtra;
11509 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)&
(nExtra))
;
11510 pIter->iEndofDoclist += nExtra;
11511 }
11512
11513 fts5SegIterLoadRowid(p, pIter);
11514}
11515
11516static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
11517static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
11518static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
11519
11520static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
11521 if( pIter->flags & FTS5_SEGITER_REVERSE0x02 ){
11522 pIter->xNext = fts5SegIterNext_Reverse;
11523 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){
11524 pIter->xNext = fts5SegIterNext_None;
11525 }else{
11526 pIter->xNext = fts5SegIterNext;
11527 }
11528}
11529
11530/*
11531** Allocate a tombstone hash page array object (pIter->pTombArray) for
11532** the iterator passed as the second argument. If an OOM error occurs,
11533** leave an error in the Fts5Index object.
11534*/
11535static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
11536 const int nTomb = pIter->pSeg->nPgTombstone;
11537 if( nTomb>0 ){
11538 int nByte = SZ_FTS5TOMBSTONEARRAY(nTomb+1)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(nTomb+1
)*sizeof(Fts5Data*))
;
11539 Fts5TombstoneArray *pNew;
11540 pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte);
11541 if( pNew ){
11542 pNew->nTombstone = nTomb;
11543 pNew->nRef = 1;
11544 pIter->pTombArray = pNew;
11545 }
11546 }
11547}
11548
11549/*
11550** Initialize the iterator object pIter to iterate through the entries in
11551** segment pSeg. The iterator is left pointing to the first entry when
11552** this function returns.
11553**
11554** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
11555** an error has already occurred when this function is called, it is a no-op.
11556*/
11557static void fts5SegIterInit(
11558 Fts5Index *p, /* FTS index object */
11559 Fts5StructureSegment *pSeg, /* Description of segment */
11560 Fts5SegIter *pIter /* Object to populate */
11561){
11562 if( pSeg->pgnoFirst==0 ){
11563 /* This happens if the segment is being used as an input to an incremental
11564 ** merge and all data has already been "trimmed". See function
11565 ** fts5TrimSegments() for details. In this case leave the iterator empty.
11566 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
11567 ** at EOF already. */
11568 assert( pIter->pLeaf==0 )((void) (0));
11569 return;
11570 }
11571
11572 if( p->rc==SQLITE_OK0 ){
11573 memset(pIter, 0, sizeof(*pIter));
11574 fts5SegIterSetNext(p, pIter);
11575 pIter->pSeg = pSeg;
11576 pIter->iLeafPgno = pSeg->pgnoFirst-1;
11577 do {
11578 fts5SegIterNextPage(p, pIter);
11579 }while( p->rc==SQLITE_OK0 && pIter->pLeaf && pIter->pLeaf->nn==4 );
11580 }
11581
11582 if( p->rc==SQLITE_OK0 && pIter->pLeaf ){
11583 pIter->iLeafOffset = 4;
11584 assert( pIter->pLeaf!=0 )((void) (0));
11585 assert_nc( pIter->pLeaf->nn>4 )((void) (0));
11586 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 )((void) (0));
11587 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
11588 fts5SegIterLoadTerm(p, pIter, 0);
11589 fts5SegIterLoadNPos(p, pIter);
11590 fts5SegIterAllocTombstone(p, pIter);
11591 }
11592}
11593
11594/*
11595** This function is only ever called on iterators created by calls to
11596** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
11597**
11598** The iterator is in an unusual state when this function is called: the
11599** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
11600** the position-list size field for the first relevant rowid on the page.
11601** Fts5SegIter.rowid is set, but nPos and bDel are not.
11602**
11603** This function advances the iterator so that it points to the last
11604** relevant rowid on the page and, if necessary, initializes the
11605** aRowidOffset[] and iRowidOffset variables. At this point the iterator
11606** is in its regular state - Fts5SegIter.iLeafOffset points to the first
11607** byte of the position list content associated with said rowid.
11608*/
11609static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
11610 int eDetail = p->pConfig->eDetail;
11611 int n = pIter->pLeaf->szLeaf;
11612 int i = pIter->iLeafOffset;
11613 u8 *a = pIter->pLeaf->p;
11614 int iRowidOffset = 0;
11615
11616 if( n>pIter->iEndofDoclist ){
11617 n = pIter->iEndofDoclist;
11618 }
11619
11620 ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0));
11621 while( 1 ){
11622 u64 iDelta = 0;
11623
11624 if( eDetail==FTS5_DETAIL_NONE1 ){
11625 /* todo */
11626 if( i<n && a[i]==0 ){
11627 i++;
11628 if( i<n && a[i]==0 ) i++;
11629 }
11630 }else{
11631 int nPos;
11632 int bDummy;
11633 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
11634 i += nPos;
11635 }
11636 if( i>=n ) break;
11637 i += fts5GetVarintsqlite3Fts5GetVarint(&a[i], &iDelta);
11638 pIter->iRowid += iDelta;
11639
11640 /* If necessary, grow the pIter->aRowidOffset[] array. */
11641 if( iRowidOffset>=pIter->nRowidOffset ){
11642 int nNew = pIter->nRowidOffset + 8;
11643 int *aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(pIter->aRowidOffset,nNew*sizeof(int));
11644 if( aNew==0 ){
11645 p->rc = SQLITE_NOMEM7;
11646 break;
11647 }
11648 pIter->aRowidOffset = aNew;
11649 pIter->nRowidOffset = nNew;
11650 }
11651
11652 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
11653 pIter->iLeafOffset = i;
11654 }
11655 pIter->iRowidOffset = iRowidOffset;
11656 fts5SegIterLoadNPos(p, pIter);
11657}
11658
11659/*
11660**
11661*/
11662static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
11663 assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0));
11664 assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0));
11665
11666 fts5DataRelease(pIter->pLeaf);
11667 pIter->pLeaf = 0;
11668 while( p->rc==SQLITE_OK0 && pIter->iLeafPgno>pIter->iTermLeafPgno ){
11669 Fts5Data *pNew;
11670 pIter->iLeafPgno--;
11671 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + ((
i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64
)(pIter->iLeafPgno)) )
11672 pIter->pSeg->iSegid, pIter->iLeafPgno( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + ((
i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64
)(pIter->iLeafPgno)) )
11673 )( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + ((
i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64
)(pIter->iLeafPgno)) )
);
11674 if( pNew ){
11675 /* iTermLeafOffset may be equal to szLeaf if the term is the last
11676 ** thing on the page - i.e. the first rowid is on the following page.
11677 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
11678 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
11679 assert( pIter->pLeaf==0 )((void) (0));
11680 if( pIter->iTermLeafOffset<pNew->szLeaf ){
11681 pIter->pLeaf = pNew;
11682 pIter->iLeafOffset = pIter->iTermLeafOffset;
11683 }
11684 }else{
11685 int iRowidOff;
11686 iRowidOff = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p));
11687 if( iRowidOff ){
11688 if( iRowidOff>=pNew->szLeaf ){
11689 p->rc = FTS5_CORRUPT(11 | (1<<8));
11690 }else{
11691 pIter->pLeaf = pNew;
11692 pIter->iLeafOffset = iRowidOff;
11693 }
11694 }
11695 }
11696
11697 if( pIter->pLeaf ){
11698 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
11699 pIter->iLeafOffset += fts5GetVarintsqlite3Fts5GetVarint(a, (u64*)&pIter->iRowid);
11700 break;
11701 }else{
11702 fts5DataRelease(pNew);
11703 }
11704 }
11705 }
11706
11707 if( pIter->pLeaf ){
11708 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
11709 fts5SegIterReverseInitPage(p, pIter);
11710 }
11711}
11712
11713/*
11714** Return true if the iterator passed as the second argument currently
11715** points to a delete marker. A delete marker is an entry with a 0 byte
11716** position-list.
11717*/
11718static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
11719 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
11720 return (p->rc==SQLITE_OK0 && pSeg->pLeaf && pSeg->nPos==0);
11721}
11722
11723/*
11724** Advance iterator pIter to the next entry.
11725**
11726** This version of fts5SegIterNext() is only used by reverse iterators.
11727*/
11728static void fts5SegIterNext_Reverse(
11729 Fts5Index *p, /* FTS5 backend object */
11730 Fts5SegIter *pIter, /* Iterator to advance */
11731 int *pbUnused /* Unused */
11732){
11733 assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0));
11734 assert( pIter->pNextLeaf==0 )((void) (0));
11735 UNUSED_PARAM(pbUnused)(void)(pbUnused);
11736
11737 if( pIter->iRowidOffset>0 ){
11738 u8 *a = pIter->pLeaf->p;
11739 int iOff;
11740 u64 iDelta;
11741
11742 pIter->iRowidOffset--;
11743 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
11744 fts5SegIterLoadNPos(p, pIter);
11745 iOff = pIter->iLeafOffset;
11746 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE1 ){
11747 iOff += pIter->nPos;
11748 }
11749 fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], &iDelta);
11750 pIter->iRowid -= iDelta;
11751 }else{
11752 fts5SegIterReverseNewPage(p, pIter);
11753 }
11754}
11755
11756/*
11757** Advance iterator pIter to the next entry.
11758**
11759** This version of fts5SegIterNext() is only used if detail=none and the
11760** iterator is not a reverse direction iterator.
11761*/
11762static void fts5SegIterNext_None(
11763 Fts5Index *p, /* FTS5 backend object */
11764 Fts5SegIter *pIter, /* Iterator to advance */
11765 int *pbNewTerm /* OUT: Set for new term */
11766){
11767 int iOff;
11768
11769 assert( p->rc==SQLITE_OK )((void) (0));
11770 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 )((void) (0));
11771 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0));
11772
11773 ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0));
11774 iOff = pIter->iLeafOffset;
11775
11776 /* Next entry is on the next page */
11777 while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
11778 fts5SegIterNextPage(p, pIter);
11779 if( p->rc || pIter->pLeaf==0 ) return;
11780 pIter->iRowid = 0;
11781 iOff = 4;
11782 }
11783
11784 if( iOff<pIter->iEndofDoclist ){
11785 /* Next entry is on the current page */
11786 u64 iDelta;
11787 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
11788 pIter->iLeafOffset = iOff;
11789 pIter->iRowid += iDelta;
11790 }else if( (pIter->flags & FTS5_SEGITER_ONETERM0x01)==0 ){
11791 if( pIter->pSeg ){
11792 int nKeep = 0;
11793 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
11794 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iOff],(u32*
)&(nKeep))
;
11795 }
11796 pIter->iLeafOffset = iOff;
11797 fts5SegIterLoadTerm(p, pIter, nKeep);
11798 }else{
11799 const u8 *pList = 0;
11800 const char *zTerm = 0;
11801 int nTerm = 0;
11802 int nList;
11803 sqlite3Fts5HashScanNext(p->pHash);
11804 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
11805 if( pList==0 ) goto next_none_eof;
11806 pIter->pLeaf->p = (u8*)pList;
11807 pIter->pLeaf->nn = nList;
11808 pIter->pLeaf->szLeaf = nList;
11809 pIter->iEndofDoclist = nList;
11810 sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm);
11811 pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid);
11812 }
11813
11814 if( pbNewTerm ) *pbNewTerm = 1;
11815 }else{
11816 goto next_none_eof;
11817 }
11818
11819 fts5SegIterLoadNPos(p, pIter);
11820
11821 return;
11822 next_none_eof:
11823 fts5DataRelease(pIter->pLeaf);
11824 pIter->pLeaf = 0;
11825}
11826
11827
11828/*
11829** Advance iterator pIter to the next entry.
11830**
11831** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
11832** is not considered an error if the iterator reaches EOF. If an error has
11833** already occurred when this function is called, it is a no-op.
11834*/
11835static void fts5SegIterNext(
11836 Fts5Index *p, /* FTS5 backend object */
11837 Fts5SegIter *pIter, /* Iterator to advance */
11838 int *pbNewTerm /* OUT: Set for new term */
11839){
11840 Fts5Data *pLeaf = pIter->pLeaf;
11841 int iOff;
11842 int bNewTerm = 0;
11843 int nKeep = 0;
11844 u8 *a;
11845 int n;
11846
11847 assert( pbNewTerm==0 || *pbNewTerm==0 )((void) (0));
11848 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0));
11849
11850 /* Search for the end of the position list within the current page. */
11851 a = pLeaf->p;
11852 n = pLeaf->szLeaf;
11853
11854 ASSERT_SZLEAF_OK(pLeaf)((void) (0));
11855 iOff = pIter->iLeafOffset + pIter->nPos;
11856
11857 if( iOff<n ){
11858 /* The next entry is on the current page. */
11859 assert_nc( iOff<=pIter->iEndofDoclist )((void) (0));
11860 if( iOff>=pIter->iEndofDoclist ){
11861 bNewTerm = 1;
11862 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
11863 iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep));
11864 }
11865 }else{
11866 u64 iDelta;
11867 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
11868 pIter->iRowid += iDelta;
11869 assert_nc( iDelta>0 )((void) (0));
11870 }
11871 pIter->iLeafOffset = iOff;
11872
11873 }else if( pIter->pSeg==0 ){
11874 const u8 *pList = 0;
11875 const char *zTerm = 0;
11876 int nTerm = 0;
11877 int nList = 0;
11878 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm )((void) (0));
11879 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM0x01) ){
11880 sqlite3Fts5HashScanNext(p->pHash);
11881 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
11882 }
11883 if( pList==0 ){
11884 fts5DataRelease(pIter->pLeaf);
11885 pIter->pLeaf = 0;
11886 }else{
11887 pIter->pLeaf->p = (u8*)pList;
11888 pIter->pLeaf->nn = nList;
11889 pIter->pLeaf->szLeaf = nList;
11890 pIter->iEndofDoclist = nList+1;
11891 sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm);
11892 pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid);
11893 *pbNewTerm = 1;
11894 }
11895 }else{
11896 iOff = 0;
11897 /* Next entry is not on the current page */
11898 while( iOff==0 ){
11899 fts5SegIterNextPage(p, pIter);
11900 pLeaf = pIter->pLeaf;
11901 if( pLeaf==0 ) break;
11902 ASSERT_SZLEAF_OK(pLeaf)((void) (0));
11903 if( (iOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))) && iOff<pLeaf->szLeaf ){
11904 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
11905 pIter->iLeafOffset = iOff;
11906
11907 if( pLeaf->nn>pLeaf->szLeaf ){
11908 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(pIter->iEndofDoclist))
11909 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(pIter->iEndofDoclist))
11910 )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(pIter->iEndofDoclist))
;
11911 }
11912 }
11913 else if( pLeaf->nn>pLeaf->szLeaf ){
11914 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(iOff))
11915 &pLeaf->p[pLeaf->szLeaf], iOffsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(iOff))
11916 )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32
*)&(iOff))
;
11917 pIter->iLeafOffset = iOff;
11918 pIter->iEndofDoclist = iOff;
11919 bNewTerm = 1;
11920 }
11921 assert_nc( iOff<pLeaf->szLeaf )((void) (0));
11922 if( iOff>pLeaf->szLeaf ){
11923 p->rc = FTS5_CORRUPT(11 | (1<<8));
11924 return;
11925 }
11926 }
11927 }
11928
11929 /* Check if the iterator is now at EOF. If so, return early. */
11930 if( pIter->pLeaf ){
11931 if( bNewTerm ){
11932 if( pIter->flags & FTS5_SEGITER_ONETERM0x01 ){
11933 fts5DataRelease(pIter->pLeaf);
11934 pIter->pLeaf = 0;
11935 }else{
11936 fts5SegIterLoadTerm(p, pIter, nKeep);
11937 fts5SegIterLoadNPos(p, pIter);
11938 if( pbNewTerm ) *pbNewTerm = 1;
11939 }
11940 }else{
11941 /* The following could be done by calling fts5SegIterLoadNPos(). But
11942 ** this block is particularly performance critical, so equivalent
11943 ** code is inlined. */
11944 int nSz;
11945 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn )((void) (0));
11946 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz){ nSz = (pIter->pLeaf->p)[pIter->iLeafOffset++]; if(
nSz & 0x80 ){ pIter->iLeafOffset--; pIter->iLeafOffset
+= sqlite3Fts5GetVarint32(&(pIter->pLeaf->p)[pIter
->iLeafOffset],(u32*)&(nSz)); } }
;
11947 pIter->bDel = (nSz & 0x0001);
11948 pIter->nPos = nSz>>1;
11949 assert_nc( pIter->nPos>=0 )((void) (0));
11950 }
11951 }
11952}
11953
11954#define SWAPVAL(T, a, b){ T tmp; tmp=a; a=b; b=tmp; } { T tmp; tmp=a; a=b; b=tmp; }
11955
11956#define fts5IndexSkipVarint(a, iOff){ int iEnd = iOff+9; while( (a[iOff++] & 0x80) &&
iOff<iEnd ); }
{ \
11957 int iEnd = iOff+9; \
11958 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
11959}
11960
11961/*
11962** Iterator pIter currently points to the first rowid in a doclist. This
11963** function sets the iterator up so that iterates in reverse order through
11964** the doclist.
11965*/
11966static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
11967 Fts5DlidxIter *pDlidx = pIter->pDlidx;
11968 Fts5Data *pLast = 0;
11969 int pgnoLast = 0;
11970
11971 if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION4 ){
11972 int iSegid = pIter->pSeg->iSegid;
11973 pgnoLast = fts5DlidxIterPgno(pDlidx);
11974 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pgnoLast)) )
);
11975 }else{
11976 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
11977
11978 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
11979 ** position-list content for the current rowid. Back it up so that it
11980 ** points to the start of the position-list size field. */
11981 int iPoslist;
11982 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
11983 iPoslist = pIter->iTermLeafOffset;
11984 }else{
11985 iPoslist = 4;
11986 }
11987 fts5IndexSkipVarint(pLeaf->p, iPoslist){ int iEnd = iPoslist+9; while( (pLeaf->p[iPoslist++] &
0x80) && iPoslist<iEnd ); }
;
11988 pIter->iLeafOffset = iPoslist;
11989
11990 /* If this condition is true then the largest rowid for the current
11991 ** term may not be stored on the current page. So search forward to
11992 ** see where said rowid really is. */
11993 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
11994 int pgno;
11995 Fts5StructureSegment *pSeg = pIter->pSeg;
11996
11997 /* The last rowid in the doclist may not be on the current page. Search
11998 ** forward to find the page containing the last rowid. */
11999 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
12000 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) <<
(31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) )
;
12001 Fts5Data *pNew = fts5LeafRead(p, iAbs);
12002 if( pNew ){
12003 int iRowid, bTermless;
12004 iRowid = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p));
12005 bTermless = fts5LeafIsTermless(pNew)((pNew)->szLeaf >= (pNew)->nn);
12006 if( iRowid ){
12007 SWAPVAL(Fts5Data*, pNew, pLast){ Fts5Data* tmp; tmp=pNew; pNew=pLast; pLast=tmp; };
12008 pgnoLast = pgno;
12009 }
12010 fts5DataRelease(pNew);
12011 if( bTermless==0 ) break;
12012 }
12013 }
12014 }
12015 }
12016
12017 /* If pLast is NULL at this point, then the last rowid for this doclist
12018 ** lies on the page currently indicated by the iterator. In this case
12019 ** pIter->iLeafOffset is already set to point to the position-list size
12020 ** field associated with the first relevant rowid on the page.
12021 **
12022 ** Or, if pLast is non-NULL, then it is the page that contains the last
12023 ** rowid. In this case configure the iterator so that it points to the
12024 ** first rowid on this page.
12025 */
12026 if( pLast ){
12027 int iOff;
12028 fts5DataRelease(pIter->pLeaf);
12029 pIter->pLeaf = pLast;
12030 pIter->iLeafPgno = pgnoLast;
12031 iOff = fts5LeafFirstRowidOff(pLast)(fts5GetU16((pLast)->p));
12032 if( iOff>pLast->szLeaf ){
12033 p->rc = FTS5_CORRUPT(11 | (1<<8));
12034 return;
12035 }
12036 iOff += fts5GetVarintsqlite3Fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
12037 pIter->iLeafOffset = iOff;
12038
12039 if( fts5LeafIsTermless(pLast)((pLast)->szLeaf >= (pLast)->nn) ){
12040 pIter->iEndofDoclist = pLast->nn+1;
12041 }else{
12042 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
12043 }
12044 }
12045
12046 fts5SegIterReverseInitPage(p, pIter);
12047}
12048
12049/*
12050** Iterator pIter currently points to the first rowid of a doclist.
12051** There is a doclist-index associated with the final term on the current
12052** page. If the current term is the last term on the page, load the
12053** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
12054*/
12055static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
12056 int iSeg = pIter->pSeg->iSegid;
12057 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02);
12058 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
12059
12060 assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0));
12061 assert( pIter->pDlidx==0 )((void) (0));
12062
12063 /* Check if the current doclist ends on this page. If it does, return
12064 ** early without loading the doclist-index (as it belongs to a different
12065 ** term. */
12066 if( pIter->iTermLeafPgno==pIter->iLeafPgno
12067 && pIter->iEndofDoclist<pLeaf->szLeaf
12068 ){
12069 return;
12070 }
12071
12072 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
12073}
12074
12075/*
12076** The iterator object passed as the second argument currently contains
12077** no valid values except for the Fts5SegIter.pLeaf member variable. This
12078** function searches the leaf page for a term matching (pTerm/nTerm).
12079**
12080** If the specified term is found on the page, then the iterator is left
12081** pointing to it. If argument bGe is zero and the term is not found,
12082** the iterator is left pointing at EOF.
12083**
12084** If bGe is non-zero and the specified term is not found, then the
12085** iterator is left pointing to the smallest term in the segment that
12086** is larger than the specified term, even if this term is not on the
12087** current page.
12088*/
12089static void fts5LeafSeek(
12090 Fts5Index *p, /* Leave any error code here */
12091 int bGe, /* True for a >= search */
12092 Fts5SegIter *pIter, /* Iterator to seek */
12093 const u8 *pTerm, int nTerm /* Term to search for */
12094){
12095 u32 iOff;
12096 const u8 *a = pIter->pLeaf->p;
12097 u32 n = (u32)pIter->pLeaf->nn;
12098
12099 u32 nMatch = 0;
12100 u32 nKeep = 0;
12101 u32 nNew = 0;
12102 u32 iTermOff;
12103 u32 iPgidx; /* Current offset in pgidx */
12104 int bEndOfPage = 0;
12105
12106 assert( p->rc==SQLITE_OK )((void) (0));
12107
12108 iPgidx = (u32)pIter->pLeaf->szLeaf;
12109 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(iTermOff));
12110 iOff = iTermOff;
12111 if( iOff>n ){
12112 p->rc = FTS5_CORRUPT(11 | (1<<8));
12113 return;
12114 }
12115
12116 while( 1 ){
12117
12118 /* Figure out how many new bytes are in this term */
12119 fts5FastGetVarint32(a, iOff, nNew){ nNew = (a)[iOff++]; if( nNew & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32
(&(a)[iOff],(u32*)&(nNew)); } }
;
12120 if( nKeep<nMatch ){
12121 goto search_failed;
12122 }
12123
12124 assert( nKeep>=nMatch )((void) (0));
12125 if( nKeep==nMatch ){
12126 u32 nCmp;
12127 u32 i;
12128 nCmp = (u32)MIN(nNew, nTerm-nMatch)(((nNew) < (nTerm-nMatch)) ? (nNew) : (nTerm-nMatch));
12129 for(i=0; i<nCmp; i++){
12130 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
12131 }
12132 nMatch += i;
12133
12134 if( (u32)nTerm==nMatch ){
12135 if( i==nNew ){
12136 goto search_success;
12137 }else{
12138 goto search_failed;
12139 }
12140 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
12141 goto search_failed;
12142 }
12143 }
12144
12145 if( iPgidx>=n ){
12146 bEndOfPage = 1;
12147 break;
12148 }
12149
12150 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nKeep));
12151 iTermOff += nKeep;
12152 iOff = iTermOff;
12153
12154 if( iOff>=n ){
12155 p->rc = FTS5_CORRUPT(11 | (1<<8));
12156 return;
12157 }
12158
12159 /* Read the nKeep field of the next term. */
12160 fts5FastGetVarint32(a, iOff, nKeep){ nKeep = (a)[iOff++]; if( nKeep & 0x80 ){ iOff--; iOff +=
sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nKeep)); }
}
;
12161 }
12162
12163 search_failed:
12164 if( bGe==0 ){
12165 fts5DataRelease(pIter->pLeaf);
12166 pIter->pLeaf = 0;
12167 return;
12168 }else if( bEndOfPage ){
12169 do {
12170 fts5SegIterNextPage(p, pIter);
12171 if( pIter->pLeaf==0 ) return;
12172 a = pIter->pLeaf->p;
12173 if( fts5LeafIsTermless(pIter->pLeaf)((pIter->pLeaf)->szLeaf >= (pIter->pLeaf)->nn)==0 ){
12174 iPgidx = (u32)pIter->pLeaf->szLeaf;
12175 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iPgidx],(u32
*)&(iOff))
;
12176 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
12177 p->rc = FTS5_CORRUPT(11 | (1<<8));
12178 return;
12179 }else{
12180 nKeep = 0;
12181 iTermOff = iOff;
12182 n = (u32)pIter->pLeaf->nn;
12183 iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew));
12184 break;
12185 }
12186 }
12187 }while( 1 );
12188 }
12189
12190 search_success:
12191 if( (i64)iOff+nNew>n || nNew<1 ){
12192 p->rc = FTS5_CORRUPT(11 | (1<<8));
12193 return;
12194 }
12195 pIter->iLeafOffset = iOff + nNew;
12196 pIter->iTermLeafOffset = pIter->iLeafOffset;
12197 pIter->iTermLeafPgno = pIter->iLeafPgno;
12198
12199 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm)sqlite3Fts5BufferSet(&p->rc,&pIter->term,nKeep,
pTerm)
;
12200 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term
,nNew,&a[iOff])
;
12201
12202 if( iPgidx>=n ){
12203 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
12204 }else{
12205 int nExtra;
12206 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nExtra));
12207 pIter->iEndofDoclist = iTermOff + nExtra;
12208 }
12209 pIter->iPgidxOff = iPgidx;
12210
12211 fts5SegIterLoadRowid(p, pIter);
12212 fts5SegIterLoadNPos(p, pIter);
12213}
12214
12215static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
12216 if( p->pIdxSelect==0 ){
12217 Fts5Config *pConfig = p->pConfig;
12218 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintfsqlite3_api->mprintf(
12219 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
12220 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
12221 pConfig->zDb, pConfig->zName
12222 ));
12223 }
12224 return p->pIdxSelect;
12225}
12226
12227/*
12228** Initialize the object pIter to point to term pTerm/nTerm within segment
12229** pSeg. If there is no such term in the index, the iterator is set to EOF.
12230**
12231** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
12232** an error has already occurred when this function is called, it is a no-op.
12233*/
12234static void fts5SegIterSeekInit(
12235 Fts5Index *p, /* FTS5 backend */
12236 const u8 *pTerm, int nTerm, /* Term to seek to */
12237 int flags, /* Mask of FTS5INDEX_XXX flags */
12238 Fts5StructureSegment *pSeg, /* Description of segment */
12239 Fts5SegIter *pIter /* Object to populate */
12240){
12241 int iPg = 1;
12242 int bGe = (flags & FTS5INDEX_QUERY_SCAN0x0008);
12243 int bDlidx = 0; /* True if there is a doclist-index */
12244 sqlite3_stmt *pIdxSelect = 0;
12245
12246 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 )((void) (0));
12247 assert( pTerm && nTerm )((void) (0));
12248 memset(pIter, 0, sizeof(*pIter));
12249 pIter->pSeg = pSeg;
12250
12251 /* This block sets stack variable iPg to the leaf page number that may
12252 ** contain term (pTerm/nTerm), if it is present in the segment. */
12253 pIdxSelect = fts5IdxSelectStmt(p);
12254 if( p->rc ) return;
12255 sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, pSeg->iSegid);
12256 sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0));
12257 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pIdxSelect) ){
12258 i64 val = sqlite3_column_intsqlite3_api->column_int(pIdxSelect, 0);
12259 iPg = (int)(val>>1);
12260 bDlidx = (val & 0x0001);
12261 }
12262 p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect);
12263 sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2);
12264
12265 if( iPg<pSeg->pgnoFirst ){
12266 iPg = pSeg->pgnoFirst;
12267 bDlidx = 0;
12268 }
12269
12270 pIter->iLeafPgno = iPg - 1;
12271 fts5SegIterNextPage(p, pIter);
12272
12273 if( pIter->pLeaf ){
12274 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
12275 }
12276
12277 if( p->rc==SQLITE_OK0 && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM0x0100)) ){
12278 pIter->flags |= FTS5_SEGITER_ONETERM0x01;
12279 if( pIter->pLeaf ){
12280 if( flags & FTS5INDEX_QUERY_DESC0x0002 ){
12281 pIter->flags |= FTS5_SEGITER_REVERSE0x02;
12282 }
12283 if( bDlidx ){
12284 fts5SegIterLoadDlidx(p, pIter);
12285 }
12286 if( flags & FTS5INDEX_QUERY_DESC0x0002 ){
12287 fts5SegIterReverse(p, pIter);
12288 }
12289 }
12290 }
12291
12292 fts5SegIterSetNext(p, pIter);
12293 if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM0x0100) ){
12294 fts5SegIterAllocTombstone(p, pIter);
12295 }
12296
12297 /* Either:
12298 **
12299 ** 1) an error has occurred, or
12300 ** 2) the iterator points to EOF, or
12301 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
12302 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
12303 ** to an entry with a term greater than or equal to (pTerm/nTerm).
12304 */
12305 assert_nc( p->rc!=SQLITE_OK /* 1 */((void) (0))
12306 || pIter->pLeaf==0 /* 2 */((void) (0))
12307 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */((void) (0))
12308 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */((void) (0))
12309 )((void) (0));
12310}
12311
12312
12313/*
12314** SQL used by fts5SegIterNextInit() to find the page to open.
12315*/
12316static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
12317 if( p->pIdxNextSelect==0 ){
12318 Fts5Config *pConfig = p->pConfig;
12319 fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintfsqlite3_api->mprintf(
12320 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
12321 "segid=? AND term>? ORDER BY term ASC LIMIT 1",
12322 pConfig->zDb, pConfig->zName
12323 ));
12324
12325 }
12326 return p->pIdxNextSelect;
12327}
12328
12329/*
12330** This is similar to fts5SegIterSeekInit(), except that it initializes
12331** the segment iterator to point to the first term following the page
12332** with pToken/nToken on it.
12333*/
12334static void fts5SegIterNextInit(
12335 Fts5Index *p,
12336 const char *pTerm, int nTerm,
12337 Fts5StructureSegment *pSeg, /* Description of segment */
12338 Fts5SegIter *pIter /* Object to populate */
12339){
12340 int iPg = -1; /* Page of segment to open */
12341 int bDlidx = 0;
12342 sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
12343
12344 pSel = fts5IdxNextStmt(p);
12345 if( pSel ){
12346 assert( p->rc==SQLITE_OK )((void) (0));
12347 sqlite3_bind_intsqlite3_api->bind_int(pSel, 1, pSeg->iSegid);
12348 sqlite3_bind_blobsqlite3_api->bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0));
12349
12350 if( sqlite3_stepsqlite3_api->step(pSel)==SQLITE_ROW100 ){
12351 i64 val = sqlite3_column_int64sqlite3_api->column_int64(pSel, 0);
12352 iPg = (int)(val>>1);
12353 bDlidx = (val & 0x0001);
12354 }
12355 p->rc = sqlite3_resetsqlite3_api->reset(pSel);
12356 sqlite3_bind_nullsqlite3_api->bind_null(pSel, 2);
12357 if( p->rc ) return;
12358 }
12359
12360 memset(pIter, 0, sizeof(*pIter));
12361 pIter->pSeg = pSeg;
12362 pIter->flags |= FTS5_SEGITER_ONETERM0x01;
12363 if( iPg>=0 ){
12364 pIter->iLeafPgno = iPg - 1;
12365 fts5SegIterNextPage(p, pIter);
12366 fts5SegIterSetNext(p, pIter);
12367 }
12368 if( pIter->pLeaf ){
12369 const u8 *a = pIter->pLeaf->p;
12370 int iTermOff = 0;
12371
12372 pIter->iPgidxOff = pIter->pLeaf->szLeaf;
12373 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)&
(iTermOff))
;
12374 pIter->iLeafOffset = iTermOff;
12375 fts5SegIterLoadTerm(p, pIter, 0);
12376 fts5SegIterLoadNPos(p, pIter);
12377 if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
12378
12379 assert( p->rc!=SQLITE_OK ||((void) (0))
12380 fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0((void) (0))
12381 )((void) (0));
12382 }
12383}
12384
12385/*
12386** Initialize the object pIter to point to term pTerm/nTerm within the
12387** in-memory hash table. If there is no such term in the hash-table, the
12388** iterator is set to EOF.
12389**
12390** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
12391** an error has already occurred when this function is called, it is a no-op.
12392*/
12393static void fts5SegIterHashInit(
12394 Fts5Index *p, /* FTS5 backend */
12395 const u8 *pTerm, int nTerm, /* Term to seek to */
12396 int flags, /* Mask of FTS5INDEX_XXX flags */
12397 Fts5SegIter *pIter /* Object to populate */
12398){
12399 int nList = 0;
12400 const u8 *z = 0;
12401 int n = 0;
12402 Fts5Data *pLeaf = 0;
12403
12404 assert( p->pHash )((void) (0));
12405 assert( p->rc==SQLITE_OK )((void) (0));
12406
12407 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN0x0008) ){
12408 const u8 *pList = 0;
12409
12410 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
12411 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList);
12412 if( pList ){
12413 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
12414 if( pLeaf ){
12415 pLeaf->p = (u8*)pList;
12416 }
12417 }
12418
12419 /* The call to sqlite3Fts5HashScanInit() causes the hash table to
12420 ** fill the size field of all existing position lists. This means they
12421 ** can no longer be appended to. Since the only scenario in which they
12422 ** can be appended to is if the previous operation on this table was
12423 ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
12424 ** possibility altogether. */
12425 p->bDelete = 0;
12426 }else{
12427 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
12428 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
12429 );
12430 if( pLeaf ){
12431 pLeaf->p = (u8*)&pLeaf[1];
12432 }
12433 z = pTerm;
12434 n = nTerm;
12435 pIter->flags |= FTS5_SEGITER_ONETERM0x01;
12436 }
12437
12438 if( pLeaf ){
12439 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
12440 pLeaf->nn = pLeaf->szLeaf = nList;
12441 pIter->pLeaf = pLeaf;
12442 pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
12443 pIter->iEndofDoclist = pLeaf->nn;
12444
12445 if( flags & FTS5INDEX_QUERY_DESC0x0002 ){
12446 pIter->flags |= FTS5_SEGITER_REVERSE0x02;
12447 fts5SegIterReverseInitPage(p, pIter);
12448 }else{
12449 fts5SegIterLoadNPos(p, pIter);
12450 }
12451 }
12452
12453 fts5SegIterSetNext(p, pIter);
12454}
12455
12456/*
12457** Array ap[] contains n elements. Release each of these elements using
12458** fts5DataRelease(). Then free the array itself using sqlite3_free().
12459*/
12460static void fts5IndexFreeArray(Fts5Data **ap, int n){
12461 if( ap ){
12462 int ii;
12463 for(ii=0; ii<n; ii++){
12464 fts5DataRelease(ap[ii]);
12465 }
12466 sqlite3_freesqlite3_api->free(ap);
12467 }
12468}
12469
12470/*
12471** Decrement the ref-count of the object passed as the only argument. If it
12472** reaches 0, free it and its contents.
12473*/
12474static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){
12475 if( p ){
12476 p->nRef--;
12477 if( p->nRef<=0 ){
12478 int ii;
12479 for(ii=0; ii<p->nTombstone; ii++){
12480 fts5DataRelease(p->apTombstone[ii]);
12481 }
12482 sqlite3_freesqlite3_api->free(p);
12483 }
12484 }
12485}
12486
12487/*
12488** Zero the iterator passed as the only argument.
12489*/
12490static void fts5SegIterClear(Fts5SegIter *pIter){
12491 fts5BufferFree(&pIter->term)sqlite3Fts5BufferFree(&pIter->term);
12492 fts5DataRelease(pIter->pLeaf);
12493 fts5DataRelease(pIter->pNextLeaf);
12494 fts5TombstoneArrayDelete(pIter->pTombArray);
12495 fts5DlidxIterFree(pIter->pDlidx);
12496 sqlite3_freesqlite3_api->free(pIter->aRowidOffset);
12497 memset(pIter, 0, sizeof(Fts5SegIter));
12498}
12499
12500#ifdef SQLITE_DEBUG
12501
12502/*
12503** This function is used as part of the big assert() procedure implemented by
12504** fts5AssertMultiIterSetup(). It ensures that the result currently stored
12505** in *pRes is the correct result of comparing the current positions of the
12506** two iterators.
12507*/
12508static void fts5AssertComparisonResult(
12509 Fts5Iter *pIter,
12510 Fts5SegIter *p1,
12511 Fts5SegIter *p2,
12512 Fts5CResult *pRes
12513){
12514 int i1 = p1 - pIter->aSeg;
12515 int i2 = p2 - pIter->aSeg;
12516
12517 if( p1->pLeaf || p2->pLeaf ){
12518 if( p1->pLeaf==0 ){
12519 assert( pRes->iFirst==i2 )((void) (0));
12520 }else if( p2->pLeaf==0 ){
12521 assert( pRes->iFirst==i1 )((void) (0));
12522 }else{
12523 int nMin = MIN(p1->term.n, p2->term.n)(((p1->term.n) < (p2->term.n)) ? (p1->term.n) : (
p2->term.n))
;
12524 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin)((nMin)<=0 ? 0 : memcmp((p1->term.p), (p2->term.p), (
nMin)))
;
12525 if( res==0 ) res = p1->term.n - p2->term.n;
12526
12527 if( res==0 ){
12528 assert( pRes->bTermEq==1 )((void) (0));
12529 assert( p1->iRowid!=p2->iRowid )((void) (0));
12530 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
12531 }else{
12532 assert( pRes->bTermEq==0 )((void) (0));
12533 }
12534
12535 if( res<0 ){
12536 assert( pRes->iFirst==i1 )((void) (0));
12537 }else{
12538 assert( pRes->iFirst==i2 )((void) (0));
12539 }
12540 }
12541 }
12542}
12543
12544/*
12545** This function is a no-op unless SQLITE_DEBUG is defined when this module
12546** is compiled. In that case, this function is essentially an assert()
12547** statement used to verify that the contents of the pIter->aFirst[] array
12548** are correct.
12549*/
12550static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
12551 if( p->rc==SQLITE_OK0 ){
12552 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
12553 int i;
12554
12555 assert( (pFirst->pLeaf==0)==pIter->base.bEof )((void) (0));
12556
12557 /* Check that pIter->iSwitchRowid is set correctly. */
12558 for(i=0; i<pIter->nSeg; i++){
12559 Fts5SegIter *p1 = &pIter->aSeg[i];
12560 assert( p1==pFirst((void) (0))
12561 || p1->pLeaf==0((void) (0))
12562 || fts5BufferCompare(&pFirst->term, &p1->term)((void) (0))
12563 || p1->iRowid==pIter->iSwitchRowid((void) (0))
12564 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev((void) (0))
12565 )((void) (0));
12566 }
12567
12568 for(i=0; i<pIter->nSeg; i+=2){
12569 Fts5SegIter *p1 = &pIter->aSeg[i];
12570 Fts5SegIter *p2 = &pIter->aSeg[i+1];
12571 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
12572 fts5AssertComparisonResult(pIter, p1, p2, pRes);
12573 }
12574
12575 for(i=1; i<(pIter->nSeg / 2); i+=2){
12576 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
12577 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
12578 Fts5CResult *pRes = &pIter->aFirst[i];
12579 fts5AssertComparisonResult(pIter, p1, p2, pRes);
12580 }
12581 }
12582}
12583#else
12584# define fts5AssertMultiIterSetup(x,y)
12585#endif
12586
12587/*
12588** Do the comparison necessary to populate pIter->aFirst[iOut].
12589**
12590** If the returned value is non-zero, then it is the index of an entry
12591** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
12592** to a key that is a duplicate of another, higher priority,
12593** segment-iterator in the pSeg->aSeg[] array.
12594*/
12595static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
12596 int i1; /* Index of left-hand Fts5SegIter */
12597 int i2; /* Index of right-hand Fts5SegIter */
12598 int iRes;
12599 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
12600 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
12601 Fts5CResult *pRes = &pIter->aFirst[iOut];
12602
12603 assert( iOut<pIter->nSeg && iOut>0 )((void) (0));
12604 assert( pIter->bRev==0 || pIter->bRev==1 )((void) (0));
12605
12606 if( iOut>=(pIter->nSeg/2) ){
12607 i1 = (iOut - pIter->nSeg/2) * 2;
12608 i2 = i1 + 1;
12609 }else{
12610 i1 = pIter->aFirst[iOut*2].iFirst;
12611 i2 = pIter->aFirst[iOut*2+1].iFirst;
12612 }
12613 p1 = &pIter->aSeg[i1];
12614 p2 = &pIter->aSeg[i2];
12615
12616 pRes->bTermEq = 0;
12617 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
12618 iRes = i2;
12619 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
12620 iRes = i1;
12621 }else{
12622 int res = fts5BufferCompare(&p1->term, &p2->term);
12623 if( res==0 ){
12624 assert_nc( i2>i1 )((void) (0));
12625 assert_nc( i2!=0 )((void) (0));
12626 pRes->bTermEq = 1;
12627 if( p1->iRowid==p2->iRowid ){
12628 return i2;
12629 }
12630 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
12631 }
12632 assert( res!=0 )((void) (0));
12633 if( res<0 ){
12634 iRes = i1;
12635 }else{
12636 iRes = i2;
12637 }
12638 }
12639
12640 pRes->iFirst = (u16)iRes;
12641 return 0;
12642}
12643
12644/*
12645** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
12646** It is an error if leaf iLeafPgno does not exist. Unless the db is
12647** a 'secure-delete' db, if it contains no rowids then this is also an error.
12648*/
12649static void fts5SegIterGotoPage(
12650 Fts5Index *p, /* FTS5 backend object */
12651 Fts5SegIter *pIter, /* Iterator to advance */
12652 int iLeafPgno
12653){
12654 assert( iLeafPgno>pIter->iLeafPgno )((void) (0));
12655
12656 if( iLeafPgno>pIter->pSeg->pgnoLast ){
12657 p->rc = FTS5_CORRUPT(11 | (1<<8));
12658 }else{
12659 fts5DataRelease(pIter->pNextLeaf);
12660 pIter->pNextLeaf = 0;
12661 pIter->iLeafPgno = iLeafPgno-1;
12662
12663 while( p->rc==SQLITE_OK0 ){
12664 int iOff;
12665 fts5SegIterNextPage(p, pIter);
12666 if( pIter->pLeaf==0 ) break;
12667 iOff = fts5LeafFirstRowidOff(pIter->pLeaf)(fts5GetU16((pIter->pLeaf)->p));
12668 if( iOff>0 ){
12669 u8 *a = pIter->pLeaf->p;
12670 int n = pIter->pLeaf->szLeaf;
12671 if( iOff<4 || iOff>=n ){
12672 p->rc = FTS5_CORRUPT(11 | (1<<8));
12673 }else{
12674 iOff += fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
12675 pIter->iLeafOffset = iOff;
12676 fts5SegIterLoadNPos(p, pIter);
12677 }
12678 break;
12679 }
12680 }
12681 }
12682}
12683
12684/*
12685** Advance the iterator passed as the second argument until it is at or
12686** past rowid iFrom. Regardless of the value of iFrom, the iterator is
12687** always advanced at least once.
12688*/
12689static void fts5SegIterNextFrom(
12690 Fts5Index *p, /* FTS5 backend object */
12691 Fts5SegIter *pIter, /* Iterator to advance */
12692 i64 iMatch /* Advance iterator at least this far */
12693){
12694 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02);
12695 Fts5DlidxIter *pDlidx = pIter->pDlidx;
12696 int iLeafPgno = pIter->iLeafPgno;
12697 int bMove = 1;
12698
12699 assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0));
12700 assert( pIter->pDlidx )((void) (0));
12701 assert( pIter->pLeaf )((void) (0));
12702
12703 if( bRev==0 ){
12704 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
12705 iLeafPgno = fts5DlidxIterPgno(pDlidx);
12706 fts5DlidxIterNext(p, pDlidx);
12707 }
12708 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc )((void) (0));
12709 if( iLeafPgno>pIter->iLeafPgno ){
12710 fts5SegIterGotoPage(p, pIter, iLeafPgno);
12711 bMove = 0;
12712 }
12713 }else{
12714 assert( pIter->pNextLeaf==0 )((void) (0));
12715 assert( iMatch<pIter->iRowid )((void) (0));
12716 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
12717 fts5DlidxIterPrev(p, pDlidx);
12718 }
12719 iLeafPgno = fts5DlidxIterPgno(pDlidx);
12720
12721 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno )((void) (0));
12722
12723 if( iLeafPgno<pIter->iLeafPgno ){
12724 pIter->iLeafPgno = iLeafPgno+1;
12725 fts5SegIterReverseNewPage(p, pIter);
12726 bMove = 0;
12727 }
12728 }
12729
12730 do{
12731 if( bMove && p->rc==SQLITE_OK0 ) pIter->xNext(p, pIter, 0);
12732 if( pIter->pLeaf==0 ) break;
12733 if( bRev==0 && pIter->iRowid>=iMatch ) break;
12734 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
12735 bMove = 1;
12736 }while( p->rc==SQLITE_OK0 );
12737}
12738
12739/*
12740** Free the iterator object passed as the second argument.
12741*/
12742static void fts5MultiIterFree(Fts5Iter *pIter){
12743 if( pIter ){
12744 int i;
12745 for(i=0; i<pIter->nSeg; i++){
12746 fts5SegIterClear(&pIter->aSeg[i]);
12747 }
12748 fts5BufferFree(&pIter->poslist)sqlite3Fts5BufferFree(&pIter->poslist);
12749 sqlite3_freesqlite3_api->free(pIter);
12750 }
12751}
12752
12753static void fts5MultiIterAdvanced(
12754 Fts5Index *p, /* FTS5 backend to iterate within */
12755 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
12756 int iChanged, /* Index of sub-iterator just advanced */
12757 int iMinset /* Minimum entry in aFirst[] to set */
12758){
12759 int i;
12760 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK0; i=i/2){
12761 int iEq;
12762 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
12763 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
12764 assert( p->rc==SQLITE_OK )((void) (0));
12765 pSeg->xNext(p, pSeg, 0);
12766 i = pIter->nSeg + iEq;
12767 }
12768 }
12769}
12770
12771/*
12772** Sub-iterator iChanged of iterator pIter has just been advanced. It still
12773** points to the same term though - just a different rowid. This function
12774** attempts to update the contents of the pIter->aFirst[] accordingly.
12775** If it does so successfully, 0 is returned. Otherwise 1.
12776**
12777** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
12778** on the iterator instead. That function does the same as this one, except
12779** that it deals with more complicated cases as well.
12780*/
12781static int fts5MultiIterAdvanceRowid(
12782 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
12783 int iChanged, /* Index of sub-iterator just advanced */
12784 Fts5SegIter **ppFirst
12785){
12786 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
12787
12788 if( pNew->iRowid==pIter->iSwitchRowid
12789 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
12790 ){
12791 int i;
12792 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
12793 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) : LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32));
12794 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
12795 Fts5CResult *pRes = &pIter->aFirst[i];
12796
12797 assert( pNew->pLeaf )((void) (0));
12798 assert( pRes->bTermEq==0 || pOther->pLeaf )((void) (0));
12799
12800 if( pRes->bTermEq ){
12801 if( pNew->iRowid==pOther->iRowid ){
12802 return 1;
12803 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
12804 pIter->iSwitchRowid = pOther->iRowid;
12805 pNew = pOther;
12806 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
12807 pIter->iSwitchRowid = pOther->iRowid;
12808 }
12809 }
12810 pRes->iFirst = (u16)(pNew - pIter->aSeg);
12811 if( i==1 ) break;
12812
12813 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
12814 }
12815 }
12816
12817 *ppFirst = pNew;
12818 return 0;
12819}
12820
12821/*
12822** Set the pIter->bEof variable based on the state of the sub-iterators.
12823*/
12824static void fts5MultiIterSetEof(Fts5Iter *pIter){
12825 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
12826 pIter->base.bEof = pSeg->pLeaf==0;
12827 pIter->iSwitchRowid = pSeg->iRowid;
12828}
12829
12830/*
12831** The argument to this macro must be an Fts5Data structure containing a
12832** tombstone hash page. This macro returns the key-size of the hash-page.
12833*/
12834#define TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) (pPg->p[0]==4 ? 4 : 8)
12835
12836#define TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4
: 8)) : 1)
\
12837 ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8)) : 1)
12838
12839/*
12840** Query a single tombstone hash table for rowid iRowid. Return true if
12841** it is found or false otherwise. The tombstone hash table is one of
12842** nHashTable tables.
12843*/
12844static int fts5IndexTombstoneQuery(
12845 Fts5Data *pHash, /* Hash table page to query */
12846 int nHashTable, /* Number of pages attached to segment */
12847 u64 iRowid /* Rowid to query hash for */
12848){
12849 const int szKey = TOMBSTONE_KEYSIZE(pHash)(pHash->p[0]==4 ? 4 : 8);
12850 const int nSlot = TOMBSTONE_NSLOT(pHash)((pHash->nn > 16) ? ((pHash->nn-8) / (pHash->p[0]
==4 ? 4 : 8)) : 1)
;
12851 int iSlot = (iRowid / nHashTable) % nSlot;
12852 int nCollide = nSlot;
12853
12854 if( iRowid==0 ){
12855 return pHash->p[1];
12856 }else if( szKey==4 ){
12857 u32 *aSlot = (u32*)&pHash->p[8];
12858 while( aSlot[iSlot] ){
12859 if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
12860 if( nCollide--==0 ) break;
12861 iSlot = (iSlot+1)%nSlot;
12862 }
12863 }else{
12864 u64 *aSlot = (u64*)&pHash->p[8];
12865 while( aSlot[iSlot] ){
12866 if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
12867 if( nCollide--==0 ) break;
12868 iSlot = (iSlot+1)%nSlot;
12869 }
12870 }
12871
12872 return 0;
12873}
12874
12875/*
12876** Return true if the iterator passed as the only argument points
12877** to an segment entry for which there is a tombstone. Return false
12878** if there is no tombstone or if the iterator is already at EOF.
12879*/
12880static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
12881 int iFirst = pIter->aFirst[1].iFirst;
12882 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
12883 Fts5TombstoneArray *pArray = pSeg->pTombArray;
12884
12885 if( pSeg->pLeaf && pArray ){
12886 /* Figure out which page the rowid might be present on. */
12887 int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
12888 assert( iPg>=0 )((void) (0));
12889
12890 /* If tombstone hash page iPg has not yet been loaded from the
12891 ** database, load it now. */
12892 if( pArray->apTombstone[iPg]==0 ){
12893 pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
12894 FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)( ((i64)(pSeg->pSeg->iSegid+(1<<16)) << (31
+5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (
31)) + ((i64)(iPg)) )
12895 );
12896 if( pArray->apTombstone[iPg]==0 ) return 0;
12897 }
12898
12899 return fts5IndexTombstoneQuery(
12900 pArray->apTombstone[iPg],
12901 pArray->nTombstone,
12902 pSeg->iRowid
12903 );
12904 }
12905
12906 return 0;
12907}
12908
12909/*
12910** Move the iterator to the next entry.
12911**
12912** If an error occurs, an error code is left in Fts5Index.rc. It is not
12913** considered an error if the iterator reaches EOF, or if it is already at
12914** EOF when this function is called.
12915*/
12916static void fts5MultiIterNext(
12917 Fts5Index *p,
12918 Fts5Iter *pIter,
12919 int bFrom, /* True if argument iFrom is valid */
12920 i64 iFrom /* Advance at least as far as this */
12921){
12922 int bUseFrom = bFrom;
12923 assert( pIter->base.bEof==0 )((void) (0));
12924 while( p->rc==SQLITE_OK0 ){
12925 int iFirst = pIter->aFirst[1].iFirst;
12926 int bNewTerm = 0;
12927 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
12928 assert( p->rc==SQLITE_OK )((void) (0));
12929 if( bUseFrom && pSeg->pDlidx ){
12930 fts5SegIterNextFrom(p, pSeg, iFrom);
12931 }else{
12932 pSeg->xNext(p, pSeg, &bNewTerm);
12933 }
12934
12935 if( pSeg->pLeaf==0 || bNewTerm
12936 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
12937 ){
12938 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
12939 fts5MultiIterSetEof(pIter);
12940 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
12941 if( pSeg->pLeaf==0 ) return;
12942 }
12943
12944 fts5AssertMultiIterSetup(p, pIter);
12945 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf )((void) (0));
12946 if( (pIter->bSkipEmpty==0 || pSeg->nPos)
12947 && 0==fts5MultiIterIsDeleted(pIter)
12948 ){
12949 pIter->xSetOutputs(pIter, pSeg);
12950 return;
12951 }
12952 bUseFrom = 0;
12953 }
12954}
12955
12956static void fts5MultiIterNext2(
12957 Fts5Index *p,
12958 Fts5Iter *pIter,
12959 int *pbNewTerm /* OUT: True if *might* be new term */
12960){
12961 assert( pIter->bSkipEmpty )((void) (0));
12962 if( p->rc==SQLITE_OK0 ){
12963 *pbNewTerm = 0;
12964 do{
12965 int iFirst = pIter->aFirst[1].iFirst;
12966 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
12967 int bNewTerm = 0;
12968
12969 assert( p->rc==SQLITE_OK )((void) (0));
12970 pSeg->xNext(p, pSeg, &bNewTerm);
12971 if( pSeg->pLeaf==0 || bNewTerm
12972 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
12973 ){
12974 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
12975 fts5MultiIterSetEof(pIter);
12976 *pbNewTerm = 1;
12977 }
12978 fts5AssertMultiIterSetup(p, pIter);
12979
12980 }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
12981 && (p->rc==SQLITE_OK0)
12982 );
12983 }
12984}
12985
12986static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
12987 UNUSED_PARAM2(pUnused1, pUnused2)(void)(pUnused1), (void)(pUnused2);
12988}
12989
12990static Fts5Iter *fts5MultiIterAlloc(
12991 Fts5Index *p, /* FTS5 backend to iterate within */
12992 int nSeg
12993){
12994 Fts5Iter *pNew;
12995 i64 nSlot; /* Power of two >= nSeg */
12996
12997 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
12998 pNew = fts5IdxMalloc(p,
12999 SZ_FTS5ITER(nSlot)(__builtin_offsetof(Fts5Iter, aSeg)+(nSlot)*sizeof(Fts5SegIter
))
+ /* pNew + pNew->aSeg[] */
13000 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
13001 );
13002 if( pNew ){
13003 pNew->nSeg = nSlot;
13004 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
13005 pNew->pIndex = p;
13006 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
13007 }
13008 return pNew;
13009}
13010
13011static void fts5PoslistCallback(
13012 Fts5Index *pUnused,
13013 void *pContext,
13014 const u8 *pChunk, int nChunk
13015){
13016 UNUSED_PARAM(pUnused)(void)(pUnused);
13017 assert_nc( nChunk>=0 )((void) (0));
13018 if( nChunk>0 ){
13019 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk){ ((void) (0)); memcpy(&((Fts5Buffer*)pContext)->p[((Fts5Buffer
*)pContext)->n], pChunk, nChunk); ((Fts5Buffer*)pContext)->
n += nChunk; }
;
13020 }
13021}
13022
13023typedef struct PoslistCallbackCtx PoslistCallbackCtx;
13024struct PoslistCallbackCtx {
13025 Fts5Buffer *pBuf; /* Append to this buffer */
13026 Fts5Colset *pColset; /* Restrict matches to this column */
13027 int eState; /* See above */
13028};
13029
13030typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
13031struct PoslistOffsetsCtx {
13032 Fts5Buffer *pBuf; /* Append to this buffer */
13033 Fts5Colset *pColset; /* Restrict matches to this column */
13034 int iRead;
13035 int iWrite;
13036};
13037
13038/*
13039** TODO: Make this more efficient!
13040*/
13041static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
13042 int i;
13043 for(i=0; i<pColset->nCol; i++){
13044 if( pColset->aiCol[i]==iCol ) return 1;
13045 }
13046 return 0;
13047}
13048
13049static void fts5PoslistOffsetsCallback(
13050 Fts5Index *pUnused,
13051 void *pContext,
13052 const u8 *pChunk, int nChunk
13053){
13054 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
13055 UNUSED_PARAM(pUnused)(void)(pUnused);
13056 assert_nc( nChunk>=0 )((void) (0));
13057 if( nChunk>0 ){
13058 int i = 0;
13059 while( i<nChunk ){
13060 int iVal;
13061 i += fts5GetVarint32(&pChunk[i], iVal)sqlite3Fts5GetVarint32(&pChunk[i],(u32*)&(iVal));
13062 iVal += pCtx->iRead - 2;
13063 pCtx->iRead = iVal;
13064 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
13065 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx->
pBuf)->p[(pCtx->pBuf)->n], (iVal + 2 - pCtx->iWrite
)); ((void) (0)); }
;
13066 pCtx->iWrite = iVal;
13067 }
13068 }
13069 }
13070}
13071
13072static void fts5PoslistFilterCallback(
13073 Fts5Index *pUnused,
13074 void *pContext,
13075 const u8 *pChunk, int nChunk
13076){
13077 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
13078 UNUSED_PARAM(pUnused)(void)(pUnused);
13079 assert_nc( nChunk>=0 )((void) (0));
13080 if( nChunk>0 ){
13081 /* Search through to find the first varint with value 1. This is the
13082 ** start of the next columns hits. */
13083 int i = 0;
13084 int iStart = 0;
13085
13086 if( pCtx->eState==2 ){
13087 int iCol;
13088 fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32
(&(pChunk)[i],(u32*)&(iCol)); } }
;
13089 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
13090 pCtx->eState = 1;
13091 fts5BufferSafeAppendVarint(pCtx->pBuf, 1){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx->
pBuf)->p[(pCtx->pBuf)->n], (1)); ((void) (0)); }
;
13092 }else{
13093 pCtx->eState = 0;
13094 }
13095 }
13096
13097 do {
13098 while( i<nChunk && pChunk[i]!=0x01 ){
13099 while( pChunk[i] & 0x80 ) i++;
13100 i++;
13101 }
13102 if( pCtx->eState ){
13103 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf
)->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)->
n += i-iStart; }
;
13104 }
13105 if( i<nChunk ){
13106 int iCol;
13107 iStart = i;
13108 i++;
13109 if( i>=nChunk ){
13110 pCtx->eState = 2;
13111 }else{
13112 fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32
(&(pChunk)[i],(u32*)&(iCol)); } }
;
13113 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
13114 if( pCtx->eState ){
13115 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf
)->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)->
n += i-iStart; }
;
13116 iStart = i;
13117 }
13118 }
13119 }
13120 }while( i<nChunk );
13121 }
13122}
13123
13124static void fts5ChunkIterate(
13125 Fts5Index *p, /* Index object */
13126 Fts5SegIter *pSeg, /* Poslist of this iterator */
13127 void *pCtx, /* Context pointer for xChunk callback */
13128 void (*xChunk)(Fts5Index*, void*, const u8*, int)
13129){
13130 int nRem = pSeg->nPos; /* Number of bytes still to come */
13131 Fts5Data *pData = 0;
13132 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
13133 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset)(((nRem) < (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset
)) ? (nRem) : (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset
))
;
13134 int pgno = pSeg->iLeafPgno;
13135 int pgnoSave = 0;
13136
13137 /* This function does not work with detail=none databases. */
13138 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0));
13139
13140 if( (pSeg->flags & FTS5_SEGITER_REVERSE0x02)==0 ){
13141 pgnoSave = pgno+1;
13142 }
13143
13144 while( 1 ){
13145 xChunk(p, pCtx, pChunk, nChunk);
13146 nRem -= nChunk;
13147 fts5DataRelease(pData);
13148 if( nRem<=0 ){
13149 break;
13150 }else if( pSeg->pSeg==0 ){
13151 p->rc = FTS5_CORRUPT(11 | (1<<8));
13152 return;
13153 }else{
13154 pgno++;
13155 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)( ((i64)(pSeg->pSeg->iSegid) << (31 +5 +1)) + ((i64
)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno
)) )
);
13156 if( pData==0 ) break;
13157 pChunk = &pData->p[4];
13158 nChunk = MIN(nRem, pData->szLeaf - 4)(((nRem) < (pData->szLeaf - 4)) ? (nRem) : (pData->szLeaf
- 4))
;
13159 if( pgno==pgnoSave ){
13160 assert( pSeg->pNextLeaf==0 )((void) (0));
13161 pSeg->pNextLeaf = pData;
13162 pData = 0;
13163 }
13164 }
13165 }
13166}
13167
13168/*
13169** Iterator pIter currently points to a valid entry (not EOF). This
13170** function appends the position list data for the current entry to
13171** buffer pBuf. It does not make a copy of the position-list size
13172** field.
13173*/
13174static void fts5SegiterPoslist(
13175 Fts5Index *p,
13176 Fts5SegIter *pSeg,
13177 Fts5Colset *pColset,
13178 Fts5Buffer *pBuf
13179){
13180 assert( pBuf!=0 )((void) (0));
13181 assert( pSeg!=0 )((void) (0));
13182 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING)( (u32)((pBuf)->n) + (u32)(pSeg->nPos+8) <= (u32)((pBuf
)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf
),(pSeg->nPos+8)+(pBuf)->n) )
){
19
Assuming the condition is true
20
'?' condition is true
21
Taking true branch
13183 assert( pBuf->p!=0 )((void) (0));
13184 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING )((void) (0));
13185 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING8);
22
Null pointer passed to 1st parameter expecting 'nonnull'
13186 if( pColset==0 ){
13187 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
13188 }else{
13189 if( p->pConfig->eDetail==FTS5_DETAIL_FULL0 ){
13190 PoslistCallbackCtx sCtx;
13191 sCtx.pBuf = pBuf;
13192 sCtx.pColset = pColset;
13193 sCtx.eState = fts5IndexColsetTest(pColset, 0);
13194 assert( sCtx.eState==0 || sCtx.eState==1 )((void) (0));
13195 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
13196 }else{
13197 PoslistOffsetsCtx sCtx;
13198 memset(&sCtx, 0, sizeof(sCtx));
13199 sCtx.pBuf = pBuf;
13200 sCtx.pColset = pColset;
13201 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
13202 }
13203 }
13204 }
13205}
13206
13207/*
13208** Parameter pPos points to a buffer containing a position list, size nPos.
13209** This function filters it according to pColset (which must be non-NULL)
13210** and sets pIter->base.pData/nData to point to the new position list.
13211** If memory is required for the new position list, use buffer pIter->poslist.
13212** Or, if the new position list is a contiguous subset of the input, set
13213** pIter->base.pData/nData to point directly to it.
13214**
13215** This function is a no-op if *pRc is other than SQLITE_OK when it is
13216** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
13217** before returning.
13218*/
13219static void fts5IndexExtractColset(
13220 int *pRc,
13221 Fts5Colset *pColset, /* Colset to filter on */
13222 const u8 *pPos, int nPos, /* Position list */
13223 Fts5Iter *pIter
13224){
13225 if( *pRc==SQLITE_OK0 ){
13226 const u8 *p = pPos;
13227 const u8 *aCopy = p;
13228 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
13229 int i = 0;
13230 int iCurrent = 0;
13231
13232 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
13233 return;
13234 }
13235
13236 while( 1 ){
13237 while( pColset->aiCol[i]<iCurrent ){
13238 i++;
13239 if( i==pColset->nCol ){
13240 pIter->base.pData = pIter->poslist.p;
13241 pIter->base.nData = pIter->poslist.n;
13242 return;
13243 }
13244 }
13245
13246 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
13247 ** not part of a varint */
13248 while( p<pEnd && *p!=0x01 ){
13249 while( *p++ & 0x80 );
13250 }
13251
13252 if( pColset->aiCol[i]==iCurrent ){
13253 if( pColset->nCol==1 ){
13254 pIter->base.pData = aCopy;
13255 pIter->base.nData = p-aCopy;
13256 return;
13257 }
13258 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy){ ((void) (0)); memcpy(&(&pIter->poslist)->p[(&
pIter->poslist)->n], aCopy, p-aCopy); (&pIter->poslist
)->n += p-aCopy; }
;
13259 }
13260 if( p>=pEnd ){
13261 pIter->base.pData = pIter->poslist.p;
13262 pIter->base.nData = pIter->poslist.n;
13263 return;
13264 }
13265 aCopy = p++;
13266 iCurrent = *p++;
13267 if( iCurrent & 0x80 ){
13268 p--;
13269 p += fts5GetVarint32(p, iCurrent)sqlite3Fts5GetVarint32(p,(u32*)&(iCurrent));
13270 }
13271 }
13272 }
13273
13274}
13275
13276/*
13277** xSetOutputs callback used by detail=none tables.
13278*/
13279static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
13280 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0));
13281 pIter->base.iRowid = pSeg->iRowid;
13282 pIter->base.nData = pSeg->nPos;
13283}
13284
13285/*
13286** xSetOutputs callback used by detail=full and detail=col tables when no
13287** column filters are specified.
13288*/
13289static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
13290 pIter->base.iRowid = pSeg->iRowid;
13291 pIter->base.nData = pSeg->nPos;
13292
13293 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0));
13294 assert( pIter->pColset==0 )((void) (0));
13295
13296 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
13297 /* All data is stored on the current page. Populate the output
13298 ** variables to point into the body of the page object. */
13299 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
13300 }else{
13301 /* The data is distributed over two or more pages. Copy it into the
13302 ** Fts5Iter.poslist buffer and then set the output pointer to point
13303 ** to this buffer. */
13304 fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist);
13305 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
13306 pIter->base.pData = pIter->poslist.p;
13307 }
13308}
13309
13310/*
13311** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
13312** against no columns at all).
13313*/
13314static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
13315 UNUSED_PARAM(pSeg)(void)(pSeg);
13316 pIter->base.nData = 0;
13317}
13318
13319/*
13320** xSetOutputs callback used by detail=col when there is a column filter
13321** and there are 100 or more columns. Also called as a fallback from
13322** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
13323*/
13324static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
13325 fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist);
13326 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
13327 pIter->base.iRowid = pSeg->iRowid;
13328 pIter->base.pData = pIter->poslist.p;
13329 pIter->base.nData = pIter->poslist.n;
13330}
13331
13332/*
13333** xSetOutputs callback used when:
13334**
13335** * detail=col,
13336** * there is a column filter, and
13337** * the table contains 100 or fewer columns.
13338**
13339** The last point is to ensure all column numbers are stored as
13340** single-byte varints.
13341*/
13342static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
13343
13344 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0));
13345 assert( pIter->pColset )((void) (0));
13346
13347 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
13348 fts5IterSetOutputs_Col(pIter, pSeg);
13349 }else{
13350 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
13351 u8 *pEnd = (u8*)&a[pSeg->nPos];
13352 int iPrev = 0;
13353 int *aiCol = pIter->pColset->aiCol;
13354 int *aiColEnd = &aiCol[pIter->pColset->nCol];
13355
13356 u8 *aOut = pIter->poslist.p;
13357 int iPrevOut = 0;
13358
13359 pIter->base.iRowid = pSeg->iRowid;
13360
13361 while( a<pEnd ){
13362 iPrev += (int)a++[0] - 2;
13363 while( *aiCol<iPrev ){
13364 aiCol++;
13365 if( aiCol==aiColEnd ) goto setoutputs_col_out;
13366 }
13367 if( *aiCol==iPrev ){
13368 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
13369 iPrevOut = iPrev;
13370 }
13371 }
13372
13373setoutputs_col_out:
13374 pIter->base.pData = pIter->poslist.p;
13375 pIter->base.nData = aOut - pIter->poslist.p;
13376 }
13377}
13378
13379/*
13380** xSetOutputs callback used by detail=full when there is a column filter.
13381*/
13382static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
13383 Fts5Colset *pColset = pIter->pColset;
13384 pIter->base.iRowid = pSeg->iRowid;
13385
13386 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL )((void) (0));
13387 assert( pColset )((void) (0));
13388
13389 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
13390 /* All data is stored on the current page. Populate the output
13391 ** variables to point into the body of the page object. */
13392 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
13393 int *pRc = &pIter->pIndex->rc;
13394 fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist);
13395 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
13396 }else{
13397 /* The data is distributed over two or more pages. Copy it into the
13398 ** Fts5Iter.poslist buffer and then set the output pointer to point
13399 ** to this buffer. */
13400 fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist);
13401 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
13402 pIter->base.pData = pIter->poslist.p;
13403 pIter->base.nData = pIter->poslist.n;
13404 }
13405}
13406
13407static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
13408 assert( pIter!=0 || (*pRc)!=SQLITE_OK )((void) (0));
13409 if( *pRc==SQLITE_OK0 ){
13410 Fts5Config *pConfig = pIter->pIndex->pConfig;
13411 if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){
13412 pIter->xSetOutputs = fts5IterSetOutputs_None;
13413 }
13414
13415 else if( pIter->pColset==0 ){
13416 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
13417 }
13418
13419 else if( pIter->pColset->nCol==0 ){
13420 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
13421 }
13422
13423 else if( pConfig->eDetail==FTS5_DETAIL_FULL0 ){
13424 pIter->xSetOutputs = fts5IterSetOutputs_Full;
13425 }
13426
13427 else{
13428 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0));
13429 if( pConfig->nCol<=100 ){
13430 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
13431 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
13432 }else{
13433 pIter->xSetOutputs = fts5IterSetOutputs_Col;
13434 }
13435 }
13436 }
13437}
13438
13439/*
13440** All the component segment-iterators of pIter have been set up. This
13441** functions finishes setup for iterator pIter itself.
13442*/
13443static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){
13444 int iIter;
13445 for(iIter=pIter->nSeg-1; iIter>0; iIter--){
13446 int iEq;
13447 if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
13448 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
13449 if( p->rc==SQLITE_OK0 ) pSeg->xNext(p, pSeg, 0);
13450 fts5MultiIterAdvanced(p, pIter, iEq, iIter);
13451 }
13452 }
13453 fts5MultiIterSetEof(pIter);
13454 fts5AssertMultiIterSetup(p, pIter);
13455
13456 if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter))
13457 || fts5MultiIterIsDeleted(pIter)
13458 ){
13459 fts5MultiIterNext(p, pIter, 0, 0);
13460 }else if( pIter->base.bEof==0 ){
13461 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
13462 pIter->xSetOutputs(pIter, pSeg);
13463 }
13464}
13465
13466/*
13467** Allocate a new Fts5Iter object.
13468**
13469** The new object will be used to iterate through data in structure pStruct.
13470** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
13471** is zero or greater, data from the first nSegment segments on level iLevel
13472** is merged.
13473**
13474** The iterator initially points to the first term/rowid entry in the
13475** iterated data.
13476*/
13477static void fts5MultiIterNew(
13478 Fts5Index *p, /* FTS5 backend to iterate within */
13479 Fts5Structure *pStruct, /* Structure of specific index */
13480 int flags, /* FTS5INDEX_QUERY_XXX flags */
13481 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
13482 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
13483 int iLevel, /* Level to iterate (-1 for all) */
13484 int nSegment, /* Number of segments to merge (iLevel>=0) */
13485 Fts5Iter **ppOut /* New object */
13486){
13487 int nSeg = 0; /* Number of segment-iters in use */
13488 int iIter = 0; /* */
13489 int iSeg; /* Used to iterate through segments */
13490 Fts5StructureLevel *pLvl;
13491 Fts5Iter *pNew;
13492
13493 assert( (pTerm==0 && nTerm==0) || iLevel<0 )((void) (0));
13494
13495 /* Allocate space for the new multi-seg-iterator. */
13496 if( p->rc==SQLITE_OK0 ){
13497 if( iLevel<0 ){
13498 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0));
13499 nSeg = pStruct->nSegment;
13500 nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040));
13501 }else{
13502 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment)(((pStruct->aLevel[iLevel].nSeg) < (nSegment)) ? (pStruct
->aLevel[iLevel].nSeg) : (nSegment))
;
13503 }
13504 }
13505 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
13506 if( pNew==0 ){
13507 assert( p->rc!=SQLITE_OK )((void) (0));
13508 goto fts5MultiIterNew_post_check;
13509 }
13510 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC0x0002));
13511 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY0x0010));
13512 pNew->pColset = pColset;
13513 if( (flags & FTS5INDEX_QUERY_NOOUTPUT0x0020)==0 ){
13514 fts5IterSetOutputCb(&p->rc, pNew);
13515 }
13516
13517 /* Initialize each of the component segment iterators. */
13518 if( p->rc==SQLITE_OK0 ){
13519 if( iLevel<0 ){
13520 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
13521 if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040) ){
13522 /* Add a segment iterator for the current contents of the hash table. */
13523 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
13524 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
13525 }
13526 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
13527 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
13528 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
13529 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
13530 if( pTerm==0 ){
13531 fts5SegIterInit(p, pSeg, pIter);
13532 }else{
13533 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
13534 }
13535 }
13536 }
13537 }else{
13538 pLvl = &pStruct->aLevel[iLevel];
13539 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
13540 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
13541 }
13542 }
13543 assert( iIter==nSeg )((void) (0));
13544 }
13545
13546 /* If the above was successful, each component iterator now points
13547 ** to the first entry in its segment. In this case initialize the
13548 ** aFirst[] array. Or, if an error has occurred, free the iterator
13549 ** object and set the output variable to NULL. */
13550 if( p->rc==SQLITE_OK0 ){
13551 fts5MultiIterFinishSetup(p, pNew);
13552 }else{
13553 fts5MultiIterFree(pNew);
13554 *ppOut = 0;
13555 }
13556
13557fts5MultiIterNew_post_check:
13558 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK )((void) (0));
13559 return;
13560}
13561
13562/*
13563** Create an Fts5Iter that iterates through the doclist provided
13564** as the second argument.
13565*/
13566static void fts5MultiIterNew2(
13567 Fts5Index *p, /* FTS5 backend to iterate within */
13568 Fts5Data *pData, /* Doclist to iterate through */
13569 int bDesc, /* True for descending rowid order */
13570 Fts5Iter **ppOut /* New object */
13571){
13572 Fts5Iter *pNew;
13573 pNew = fts5MultiIterAlloc(p, 2);
13574 if( pNew ){
13575 Fts5SegIter *pIter = &pNew->aSeg[1];
13576 pIter->flags = FTS5_SEGITER_ONETERM0x01;
13577 if( pData->szLeaf>0 ){
13578 pIter->pLeaf = pData;
13579 pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
13580 pIter->iEndofDoclist = pData->nn;
13581 pNew->aFirst[1].iFirst = 1;
13582 if( bDesc ){
13583 pNew->bRev = 1;
13584 pIter->flags |= FTS5_SEGITER_REVERSE0x02;
13585 fts5SegIterReverseInitPage(p, pIter);
13586 }else{
13587 fts5SegIterLoadNPos(p, pIter);
13588 }
13589 pData = 0;
13590 }else{
13591 pNew->base.bEof = 1;
13592 }
13593 fts5SegIterSetNext(p, pIter);
13594
13595 *ppOut = pNew;
13596 }
13597
13598 fts5DataRelease(pData);
13599}
13600
13601/*
13602** Return true if the iterator is at EOF or if an error has occurred.
13603** False otherwise.
13604*/
13605static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
13606 assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0));
13607 assert( p->rc!=SQLITE_OK((void) (0))
13608 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof((void) (0))
13609 )((void) (0));
13610 return (p->rc || pIter->base.bEof);
13611}
13612
13613/*
13614** Return the rowid of the entry that the iterator currently points
13615** to. If the iterator points to EOF when this function is called the
13616** results are undefined.
13617*/
13618static i64 fts5MultiIterRowid(Fts5Iter *pIter){
13619 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf )((void) (0));
13620 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
13621}
13622
13623/*
13624** Move the iterator to the next entry at or following iMatch.
13625*/
13626static void fts5MultiIterNextFrom(
13627 Fts5Index *p,
13628 Fts5Iter *pIter,
13629 i64 iMatch
13630){
13631 while( 1 ){
13632 i64 iRowid;
13633 fts5MultiIterNext(p, pIter, 1, iMatch);
13634 if( fts5MultiIterEof(p, pIter) ) break;
13635 iRowid = fts5MultiIterRowid(pIter);
13636 if( pIter->bRev==0 && iRowid>=iMatch ) break;
13637 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
13638 }
13639}
13640
13641/*
13642** Return a pointer to a buffer containing the term associated with the
13643** entry that the iterator currently points to.
13644*/
13645static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
13646 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
13647 *pn = p->term.n;
13648 return p->term.p;
13649}
13650
13651/*
13652** Allocate a new segment-id for the structure pStruct. The new segment
13653** id must be between 1 and 65335 inclusive, and must not be used by
13654** any currently existing segment. If a free segment id cannot be found,
13655** SQLITE_FULL is returned.
13656**
13657** If an error has already occurred, this function is a no-op. 0 is
13658** returned in this case.
13659*/
13660static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
13661 int iSegid = 0;
13662
13663 if( p->rc==SQLITE_OK0 ){
13664 if( pStruct->nSegment>=FTS5_MAX_SEGMENT2000 ){
13665 p->rc = SQLITE_FULL13;
13666 }else{
13667 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
13668 ** array is 63 elements, or 252 bytes, in size. */
13669 u32 aUsed[(FTS5_MAX_SEGMENT2000+31) / 32];
13670 int iLvl, iSeg;
13671 int i;
13672 u32 mask;
13673 memset(aUsed, 0, sizeof(aUsed));
13674 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
13675 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
13676 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
13677 if( iId<=FTS5_MAX_SEGMENT2000 && iId>0 ){
13678 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
13679 }
13680 }
13681 }
13682
13683 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
13684 mask = aUsed[i];
13685 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
13686 iSegid += 1 + i*32;
13687
13688#ifdef SQLITE_DEBUG
13689 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
13690 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
13691 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid )((void) (0));
13692 }
13693 }
13694 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT )((void) (0));
13695
13696 {
13697 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
13698 if( p->rc==SQLITE_OK0 ){
13699 u8 aBlob[2] = {0xff, 0xff};
13700 sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, iSegid);
13701 sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC((sqlite3_destructor_type)0));
13702 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW )((void) (0));
13703 p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect);
13704 sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2);
13705 }
13706 }
13707#endif
13708 }
13709 }
13710
13711 return iSegid;
13712}
13713
13714/*
13715** Discard all data currently cached in the hash-tables.
13716*/
13717static void fts5IndexDiscardData(Fts5Index *p){
13718 assert( p->pHash || p->nPendingData==0 )((void) (0));
13719 if( p->pHash ){
13720 sqlite3Fts5HashClear(p->pHash);
13721 p->nPendingData = 0;
13722 p->nPendingRow = 0;
13723 p->flushRc = SQLITE_OK0;
13724 }
13725 p->nContentlessDelete = 0;
13726}
13727
13728/*
13729** Return the size of the prefix, in bytes, that buffer
13730** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
13731**
13732** Buffer (pNew/<length-unknown>) is guaranteed to be greater
13733** than buffer (pOld/nOld).
13734*/
13735static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
13736 int i;
13737 for(i=0; i<nOld; i++){
13738 if( pOld[i]!=pNew[i] ) break;
13739 }
13740 return i;
13741}
13742
13743static void fts5WriteDlidxClear(
13744 Fts5Index *p,
13745 Fts5SegWriter *pWriter,
13746 int bFlush /* If true, write dlidx to disk */
13747){
13748 int i;
13749 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) )((void) (0));
13750 for(i=0; i<pWriter->nDlidx; i++){
13751 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
13752 if( pDlidx->buf.n==0 ) break;
13753 if( bFlush ){
13754 assert( pDlidx->pgno!=0 )((void) (0));
13755 fts5DataWrite(p,
13756 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1)
<< (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx
->pgno)) )
,
13757 pDlidx->buf.p, pDlidx->buf.n
13758 );
13759 }
13760 sqlite3Fts5BufferZero(&pDlidx->buf);
13761 pDlidx->bPrevValid = 0;
13762 }
13763}
13764
13765/*
13766** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
13767** Any new array elements are zeroed before returning.
13768*/
13769static int fts5WriteDlidxGrow(
13770 Fts5Index *p,
13771 Fts5SegWriter *pWriter,
13772 int nLvl
13773){
13774 if( p->rc==SQLITE_OK0 && nLvl>=pWriter->nDlidx ){
13775 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64sqlite3_api->realloc64(
13776 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
13777 );
13778 if( aDlidx==0 ){
13779 p->rc = SQLITE_NOMEM7;
13780 }else{
13781 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
13782 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
13783 pWriter->aDlidx = aDlidx;
13784 pWriter->nDlidx = nLvl;
13785 }
13786 }
13787 return p->rc;
13788}
13789
13790/*
13791** If the current doclist-index accumulating in pWriter->aDlidx[] is large
13792** enough, flush it to disk and return 1. Otherwise discard it and return
13793** zero.
13794*/
13795static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
13796 int bFlag = 0;
13797
13798 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
13799 ** to the database, also write the doclist-index to disk. */
13800 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE4 ){
13801 bFlag = 1;
13802 }
13803 fts5WriteDlidxClear(p, pWriter, bFlag);
13804 pWriter->nEmpty = 0;
13805 return bFlag;
13806}
13807
13808/*
13809** This function is called whenever processing of the doclist for the
13810** last term on leaf page (pWriter->iBtPage) is completed.
13811**
13812** The doclist-index for that term is currently stored in-memory within the
13813** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
13814** writes it out to disk. Or, if it is too small to bother with, discards
13815** it.
13816**
13817** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
13818*/
13819static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
13820 int bFlag;
13821
13822 assert( pWriter->iBtPage || pWriter->nEmpty==0 )((void) (0));
13823 if( pWriter->iBtPage==0 ) return;
13824 bFlag = fts5WriteFlushDlidx(p, pWriter);
13825
13826 if( p->rc==SQLITE_OK0 ){
13827 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
13828 /* The following was already done in fts5WriteInit(): */
13829 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
13830 sqlite3_bind_blobsqlite3_api->bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC((sqlite3_destructor_type)0));
13831 sqlite3_bind_int64sqlite3_api->bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
13832 sqlite3_stepsqlite3_api->step(p->pIdxWriter);
13833 p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxWriter);
13834 sqlite3_bind_nullsqlite3_api->bind_null(p->pIdxWriter, 2);
13835 }
13836 pWriter->iBtPage = 0;
13837}
13838
13839/*
13840** This is called once for each leaf page except the first that contains
13841** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
13842** is larger than all terms written to earlier leaves, and equal to or
13843** smaller than the first term on the new leaf.
13844**
13845** If an error occurs, an error code is left in Fts5Index.rc. If an error
13846** has already occurred when this function is called, it is a no-op.
13847*/
13848static void fts5WriteBtreeTerm(
13849 Fts5Index *p, /* FTS5 backend object */
13850 Fts5SegWriter *pWriter, /* Writer object */
13851 int nTerm, const u8 *pTerm /* First term on new page */
13852){
13853 fts5WriteFlushBtree(p, pWriter);
13854 if( p->rc==SQLITE_OK0 ){
13855 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pWriter->btterm,nTerm
,pTerm)
;
13856 pWriter->iBtPage = pWriter->writer.pgno;
13857 }
13858}
13859
13860/*
13861** This function is called when flushing a leaf page that contains no
13862** terms at all to disk.
13863*/
13864static void fts5WriteBtreeNoTerm(
13865 Fts5Index *p, /* FTS5 backend object */
13866 Fts5SegWriter *pWriter /* Writer object */
13867){
13868 /* If there were no rowids on the leaf page either and the doclist-index
13869 ** has already been started, append an 0x00 byte to it. */
13870 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
13871 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
13872 assert( pDlidx->bPrevValid )((void) (0));
13873 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
13874 }
13875
13876 /* Increment the "number of sequential leaves without a term" counter. */
13877 pWriter->nEmpty++;
13878}
13879
13880static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
13881 i64 iRowid;
13882 int iOff;
13883
13884 iOff = 1 + fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
13885 fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
13886 return iRowid;
13887}
13888
13889/*
13890** Rowid iRowid has just been appended to the current leaf page. It is the
13891** first on the page. This function appends an appropriate entry to the current
13892** doclist-index.
13893*/
13894static void fts5WriteDlidxAppend(
13895 Fts5Index *p,
13896 Fts5SegWriter *pWriter,
13897 i64 iRowid
13898){
13899 int i;
13900 int bDone = 0;
13901
13902 for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){
13903 i64 iVal;
13904 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
13905
13906 if( pDlidx->buf.n>=p->pConfig->pgsz ){
13907 /* The current doclist-index page is full. Write it to disk and push
13908 ** a copy of iRowid (which will become the first rowid on the next
13909 ** doclist-index leaf page) up into the next level of the b-tree
13910 ** hierarchy. If the node being flushed is currently the root node,
13911 ** also push its first rowid upwards. */
13912 pDlidx->buf.p[0] = 0x01; /* Not the root node */
13913 fts5DataWrite(p,
13914 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1)
<< (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx
->pgno)) )
,
13915 pDlidx->buf.p, pDlidx->buf.n
13916 );
13917 fts5WriteDlidxGrow(p, pWriter, i+2);
13918 pDlidx = &pWriter->aDlidx[i];
13919 if( p->rc==SQLITE_OK0 && pDlidx[1].buf.n==0 ){
13920 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
13921
13922 /* This was the root node. Push its first rowid up to the new root. */
13923 pDlidx[1].pgno = pDlidx->pgno;
13924 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
13925 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
13926 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
13927 pDlidx[1].bPrevValid = 1;
13928 pDlidx[1].iPrev = iFirst;
13929 }
13930
13931 sqlite3Fts5BufferZero(&pDlidx->buf);
13932 pDlidx->bPrevValid = 0;
13933 pDlidx->pgno++;
13934 }else{
13935 bDone = 1;
13936 }
13937
13938 if( pDlidx->bPrevValid ){
13939 iVal = (u64)iRowid - (u64)pDlidx->iPrev;
13940 }else{
13941 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
13942 assert( pDlidx->buf.n==0 )((void) (0));
13943 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
13944 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
13945 iVal = iRowid;
13946 }
13947
13948 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
13949 pDlidx->bPrevValid = 1;
13950 pDlidx->iPrev = iRowid;
13951 }
13952}
13953
13954static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
13955 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
13956 Fts5PageWriter *pPage = &pWriter->writer;
13957 i64 iRowid;
13958
13959 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) )((void) (0));
13960
13961 /* Set the szLeaf header field. */
13962 assert( 0==fts5GetU16(&pPage->buf.p[2]) )((void) (0));
13963 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
13964
13965 if( pWriter->bFirstTermInPage ){
13966 /* No term was written to this page. */
13967 assert( pPage->pgidx.n==0 )((void) (0));
13968 fts5WriteBtreeNoTerm(p, pWriter);
13969 }else{
13970 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
13971 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf,
pPage->pgidx.n,pPage->pgidx.p)
;
13972 }
13973
13974 /* Write the page out to disk */
13975 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(0)
<< (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pPage
->pgno)) )
;
13976 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
13977
13978 /* Initialize the next page. */
13979 fts5BufferZero(&pPage->buf)sqlite3Fts5BufferZero(&pPage->buf);
13980 fts5BufferZero(&pPage->pgidx)sqlite3Fts5BufferZero(&pPage->pgidx);
13981 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf,
4,zero)
;
13982 pPage->iPrevPgidx = 0;
13983 pPage->pgno++;
13984
13985 /* Increase the leaves written counter */
13986 pWriter->nLeafWritten++;
13987
13988 /* The new leaf holds no terms or rowids */
13989 pWriter->bFirstTermInPage = 1;
13990 pWriter->bFirstRowidInPage = 1;
13991}
13992
13993/*
13994** Append term pTerm/nTerm to the segment being written by the writer passed
13995** as the second argument.
13996**
13997** If an error occurs, set the Fts5Index.rc error code. If an error has
13998** already occurred, this function is a no-op.
13999*/
14000static void fts5WriteAppendTerm(
14001 Fts5Index *p,
14002 Fts5SegWriter *pWriter,
14003 int nTerm, const u8 *pTerm
14004){
14005 int nPrefix; /* Bytes of prefix compression for term */
14006 Fts5PageWriter *pPage = &pWriter->writer;
14007 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
14008 int nMin = MIN(pPage->term.n, nTerm)(((pPage->term.n) < (nTerm)) ? (pPage->term.n) : (nTerm
))
;
14009
14010 assert( p->rc==SQLITE_OK )((void) (0));
14011 assert( pPage->buf.n>=4 )((void) (0));
14012 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage )((void) (0));
14013
14014 /* If the current leaf page is full, flush it to disk. */
14015 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
14016 if( pPage->buf.n>4 ){
14017 fts5WriteFlushLeaf(p, pWriter);
14018 if( p->rc!=SQLITE_OK0 ) return;
14019 }
14020 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING)( (u32)((&pPage->buf)->n) + (u32)(nTerm+20) <= (
u32)((&pPage->buf)->nSpace) ? 0 : sqlite3Fts5BufferSize
((&p->rc),(&pPage->buf),(nTerm+20)+(&pPage->
buf)->n) )
;
14021 }
14022
14023 /* TODO1: Updating pgidx here. */
14024 pPgidx->n += sqlite3Fts5PutVarint(
14025 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
14026 );
14027 pPage->iPrevPgidx = pPage->buf.n;
14028#if 0
14029 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
14030 pPgidx->n += 2;
14031#endif
14032
14033 if( pWriter->bFirstTermInPage ){
14034 nPrefix = 0;
14035 if( pPage->pgno!=1 ){
14036 /* This is the first term on a leaf that is not the leftmost leaf in
14037 ** the segment b-tree. In this case it is necessary to add a term to
14038 ** the b-tree hierarchy that is (a) larger than the largest term
14039 ** already written to the segment and (b) smaller than or equal to
14040 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
14041 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
14042 ** previous term.
14043 **
14044 ** Usually, the previous term is available in pPage->term. The exception
14045 ** is if this is the first term written in an incremental-merge step.
14046 ** In this case the previous term is not available, so just write a
14047 ** copy of (pTerm/nTerm) into the parent node. This is slightly
14048 ** inefficient, but still correct. */
14049 int n = nTerm;
14050 if( pPage->term.n ){
14051 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
14052 }
14053 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
14054 if( p->rc!=SQLITE_OK0 ) return;
14055 pPage = &pWriter->writer;
14056 }
14057 }else{
14058 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
14059 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)nPrefix)
;
14060 }
14061
14062 /* Append the number of bytes of new data, then the term data itself
14063 ** to the page. */
14064 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)nTerm - nPrefix)
;
14065 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix])sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf,
nTerm - nPrefix,&pTerm[nPrefix])
;
14066
14067 /* Update the Fts5PageWriter.term field. */
14068 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pPage->term,nTerm,
pTerm)
;
14069 pWriter->bFirstTermInPage = 0;
14070
14071 pWriter->bFirstRowidInPage = 0;
14072 pWriter->bFirstRowidInDoclist = 1;
14073
14074 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) )((void) (0));
14075 pWriter->aDlidx[0].pgno = pPage->pgno;
14076}
14077
14078/*
14079** Append a rowid and position-list size field to the writers output.
14080*/
14081static void fts5WriteAppendRowid(
14082 Fts5Index *p,
14083 Fts5SegWriter *pWriter,
14084 i64 iRowid
14085){
14086 if( p->rc==SQLITE_OK0 ){
14087 Fts5PageWriter *pPage = &pWriter->writer;
14088
14089 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
14090 fts5WriteFlushLeaf(p, pWriter);
14091 }
14092
14093 /* If this is to be the first rowid written to the page, set the
14094 ** rowid-pointer in the page-header. Also append a value to the dlidx
14095 ** buffer, in case a doclist-index is required. */
14096 if( pWriter->bFirstRowidInPage ){
14097 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
14098 fts5WriteDlidxAppend(p, pWriter, iRowid);
14099 }
14100
14101 /* Write the rowid. */
14102 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
14103 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)iRowid)
;
14104 }else{
14105 assert_nc( p->rc || iRowid>pWriter->iPrevRowid )((void) (0));
14106 fts5BufferAppendVarint(&p->rc, &pPage->buf,sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid)
14107 (u64)iRowid - (u64)pWriter->iPrevRowidsqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid)
14108 )sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf
,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid)
;
14109 }
14110 pWriter->iPrevRowid = iRowid;
14111 pWriter->bFirstRowidInDoclist = 0;
14112 pWriter->bFirstRowidInPage = 0;
14113 }
14114}
14115
14116static void fts5WriteAppendPoslistData(
14117 Fts5Index *p,
14118 Fts5SegWriter *pWriter,
14119 const u8 *aData,
14120 int nData
14121){
14122 Fts5PageWriter *pPage = &pWriter->writer;
14123 const u8 *a = aData;
14124 int n = nData;
14125
14126 assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK )((void) (0));
14127 while( p->rc==SQLITE_OK0
14128 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
14129 ){
14130 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
14131 int nCopy = 0;
14132 while( nCopy<nReq ){
14133 i64 dummy;
14134 nCopy += fts5GetVarintsqlite3Fts5GetVarint(&a[nCopy], (u64*)&dummy);
14135 }
14136 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf,
nCopy,a)
;
14137 a += nCopy;
14138 n -= nCopy;
14139 fts5WriteFlushLeaf(p, pWriter);
14140 }
14141 if( n>0 ){
14142 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf,
n,a)
;
14143 }
14144}
14145
14146/*
14147** Flush any data cached by the writer object to the database. Free any
14148** allocations associated with the writer.
14149*/
14150static void fts5WriteFinish(
14151 Fts5Index *p,
14152 Fts5SegWriter *pWriter, /* Writer object */
14153 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
14154){
14155 int i;
14156 Fts5PageWriter *pLeaf = &pWriter->writer;
14157 if( p->rc==SQLITE_OK0 ){
14158 assert( pLeaf->pgno>=1 )((void) (0));
14159 if( pLeaf->buf.n>4 ){
14160 fts5WriteFlushLeaf(p, pWriter);
14161 }
14162 *pnLeaf = pLeaf->pgno-1;
14163 if( pLeaf->pgno>1 ){
14164 fts5WriteFlushBtree(p, pWriter);
14165 }
14166 }
14167 fts5BufferFree(&pLeaf->term)sqlite3Fts5BufferFree(&pLeaf->term);
14168 fts5BufferFree(&pLeaf->buf)sqlite3Fts5BufferFree(&pLeaf->buf);
14169 fts5BufferFree(&pLeaf->pgidx)sqlite3Fts5BufferFree(&pLeaf->pgidx);
14170 fts5BufferFree(&pWriter->btterm)sqlite3Fts5BufferFree(&pWriter->btterm);
14171
14172 for(i=0; i<pWriter->nDlidx; i++){
14173 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
14174 }
14175 sqlite3_freesqlite3_api->free(pWriter->aDlidx);
14176}
14177
14178static void fts5WriteInit(
14179 Fts5Index *p,
14180 Fts5SegWriter *pWriter,
14181 int iSegid
14182){
14183 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING20;
14184
14185 memset(pWriter, 0, sizeof(Fts5SegWriter));
14186 pWriter->iSegid = iSegid;
14187
14188 fts5WriteDlidxGrow(p, pWriter, 1);
14189 pWriter->writer.pgno = 1;
14190 pWriter->bFirstTermInPage = 1;
14191 pWriter->iBtPage = 1;
14192
14193 assert( pWriter->writer.buf.n==0 )((void) (0));
14194 assert( pWriter->writer.pgidx.n==0 )((void) (0));
14195
14196 /* Grow the two buffers to pgsz + padding bytes in size. */
14197 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
14198 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
14199
14200 if( p->pIdxWriter==0 ){
14201 Fts5Config *pConfig = p->pConfig;
14202 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintfsqlite3_api->mprintf(
14203 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
14204 pConfig->zDb, pConfig->zName
14205 ));
14206 }
14207
14208 if( p->rc==SQLITE_OK0 ){
14209 /* Initialize the 4-byte leaf-page header to 0x00. */
14210 memset(pWriter->writer.buf.p, 0, 4);
14211 pWriter->writer.buf.n = 4;
14212
14213 /* Bind the current output segment id to the index-writer. This is an
14214 ** optimization over binding the same value over and over as rows are
14215 ** inserted into %_idx by the current writer. */
14216 sqlite3_bind_intsqlite3_api->bind_int(p->pIdxWriter, 1, pWriter->iSegid);
14217 }
14218}
14219
14220/*
14221** Iterator pIter was used to iterate through the input segments of on an
14222** incremental merge operation. This function is called if the incremental
14223** merge step has finished but the input has not been completely exhausted.
14224*/
14225static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
14226 int i;
14227 Fts5Buffer buf;
14228 memset(&buf, 0, sizeof(Fts5Buffer));
14229 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK0; i++){
14230 Fts5SegIter *pSeg = &pIter->aSeg[i];
14231 if( pSeg->pSeg==0 ){
14232 /* no-op */
14233 }else if( pSeg->pLeaf==0 ){
14234 /* All keys from this input segment have been transfered to the output.
14235 ** Set both the first and last page-numbers to 0 to indicate that the
14236 ** segment is now empty. */
14237 pSeg->pSeg->pgnoLast = 0;
14238 pSeg->pSeg->pgnoFirst = 0;
14239 }else{
14240 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
14241 i64 iLeafRowid;
14242 Fts5Data *pData;
14243 int iId = pSeg->pSeg->iSegid;
14244 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
14245
14246 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 +
5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno
)) )
;
14247 pData = fts5LeafRead(p, iLeafRowid);
14248 if( pData ){
14249 if( iOff>pData->szLeaf ){
14250 /* This can occur if the pages that the segments occupy overlap - if
14251 ** a single page has been assigned to more than one segment. In
14252 ** this case a prior iteration of this loop may have corrupted the
14253 ** segment currently being trimmed. */
14254 p->rc = FTS5_CORRUPT(11 | (1<<8));
14255 }else{
14256 fts5BufferZero(&buf)sqlite3Fts5BufferZero(&buf);
14257 fts5BufferGrow(&p->rc, &buf, pData->nn)( (u32)((&buf)->n) + (u32)(pData->nn) <= (u32)((
&buf)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->
rc),(&buf),(pData->nn)+(&buf)->n) )
;
14258 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,sizeof(aHdr
),aHdr)
;
14259 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg
->term.n)
;
14260 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pSeg->term
.n,pSeg->term.p)
;
14261 fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData->
szLeaf-iOff,&pData->p[iOff])
;
14262 if( p->rc==SQLITE_OK0 ){
14263 /* Set the szLeaf field */
14264 fts5PutU16(&buf.p[2], (u16)buf.n);
14265 }
14266
14267 /* Set up the new page-index array */
14268 fts5BufferAppendVarint(&p->rc, &buf, 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)4);
14269 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
14270 && pSeg->iEndofDoclist<pData->szLeaf
14271 && pSeg->iPgidxOff<=pData->nn
14272 ){
14273 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
14274 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)buf
.n - 1 - nDiff - 4)
;
14275 fts5BufferAppendBlob(&p->rc, &buf,sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData->
nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff])
14276 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData->
nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff])
14277 )sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData->
nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff])
;
14278 }
14279
14280 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
14281 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 +
5)) + ((i64)(0) << (31)) + ((i64)(1)) )
, iLeafRowid);
14282 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
14283 }
14284 fts5DataRelease(pData);
14285 }
14286 }
14287 }
14288 fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf);
14289}
14290
14291static void fts5MergeChunkCallback(
14292 Fts5Index *p,
14293 void *pCtx,
14294 const u8 *pChunk, int nChunk
14295){
14296 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
14297 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
14298}
14299
14300/*
14301**
14302*/
14303static void fts5IndexMergeLevel(
14304 Fts5Index *p, /* FTS5 backend object */
14305 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
14306 int iLvl, /* Level to read input from */
14307 int *pnRem /* Write up to this many output leaves */
14308){
14309 Fts5Structure *pStruct = *ppStruct;
14310 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
14311 Fts5StructureLevel *pLvlOut;
14312 Fts5Iter *pIter = 0; /* Iterator to read input data */
14313 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
14314 int nInput; /* Number of input segments */
14315 Fts5SegWriter writer; /* Writer object */
14316 Fts5StructureSegment *pSeg; /* Output segment */
14317 Fts5Buffer term;
14318 int bOldest; /* True if the output segment is the oldest */
14319 int eDetail = p->pConfig->eDetail;
14320 const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020;
14321 int bTermWritten = 0; /* True if current term already output */
14322
14323 assert( iLvl<pStruct->nLevel )((void) (0));
14324 assert( pLvl->nMerge<=pLvl->nSeg )((void) (0));
14325
14326 memset(&writer, 0, sizeof(Fts5SegWriter));
14327 memset(&term, 0, sizeof(Fts5Buffer));
14328 if( pLvl->nMerge ){
14329 pLvlOut = &pStruct->aLevel[iLvl+1];
14330 assert( pLvlOut->nSeg>0 )((void) (0));
14331 nInput = pLvl->nMerge;
14332 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
14333
14334 fts5WriteInit(p, &writer, pSeg->iSegid);
14335 writer.writer.pgno = pSeg->pgnoLast+1;
14336 writer.iBtPage = 0;
14337 }else{
14338 int iSegid = fts5AllocateSegid(p, pStruct);
14339
14340 /* Extend the Fts5Structure object as required to ensure the output
14341 ** segment exists. */
14342 if( iLvl==pStruct->nLevel-1 ){
14343 fts5StructureAddLevel(&p->rc, ppStruct);
14344 pStruct = *ppStruct;
14345 }
14346 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
14347 if( p->rc ) return;
14348 pLvl = &pStruct->aLevel[iLvl];
14349 pLvlOut = &pStruct->aLevel[iLvl+1];
14350
14351 fts5WriteInit(p, &writer, iSegid);
14352
14353 /* Add the new segment to the output level */
14354 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
14355 pLvlOut->nSeg++;
14356 pSeg->pgnoFirst = 1;
14357 pSeg->iSegid = iSegid;
14358 pStruct->nSegment++;
14359
14360 /* Read input from all segments in the input level */
14361 nInput = pLvl->nSeg;
14362
14363 /* Set the range of origins that will go into the output segment. */
14364 if( pStruct->nOriginCntr>0 ){
14365 pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
14366 pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
14367 }
14368 }
14369 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
14370
14371 assert( iLvl>=0 )((void) (0));
14372 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
14373 fts5MultiIterEof(p, pIter)==0;
14374 fts5MultiIterNext(p, pIter, 0, 0)
14375 ){
14376 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
14377 int nPos; /* position-list size field value */
14378 int nTerm;
14379 const u8 *pTerm;
14380
14381 pTerm = fts5MultiIterTerm(pIter, &nTerm);
14382 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm)((nTerm)<=0 ? 0 : memcmp((pTerm), (term.p), (nTerm))) ){
14383 if( pnRem && writer.nLeafWritten>nRem ){
14384 break;
14385 }
14386 fts5BufferSet(&p->rc, &term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&term,nTerm,pTerm);
14387 bTermWritten =0;
14388 }
14389
14390 /* Check for key annihilation. */
14391 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
14392
14393 if( p->rc==SQLITE_OK0 && bTermWritten==0 ){
14394 /* This is a new term. Append a term to the output segment. */
14395 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
14396 bTermWritten = 1;
14397 }
14398
14399 /* Append the rowid to the output */
14400 /* WRITEPOSLISTSIZE */
14401 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
14402
14403 if( eDetail==FTS5_DETAIL_NONE1 ){
14404 if( pSegIter->bDel ){
14405 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer
.buf,(i64)0)
;
14406 if( pSegIter->nPos>0 ){
14407 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer
.buf,(i64)0)
;
14408 }
14409 }
14410 }else{
14411 /* Append the position-list data to the output */
14412 nPos = pSegIter->nPos*2 + pSegIter->bDel;
14413 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer
.buf,(i64)nPos)
;
14414 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
14415 }
14416 }
14417
14418 /* Flush the last leaf page to disk. Set the output segment b-tree height
14419 ** and last leaf page number at the same time. */
14420 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
14421
14422 assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0));
14423 if( fts5MultiIterEof(p, pIter) ){
14424 int i;
14425
14426 /* Remove the redundant segments from the %_data table */
14427 assert( pSeg->nEntry==0 )((void) (0));
14428 for(i=0; i<nInput; i++){
14429 Fts5StructureSegment *pOld = &pLvl->aSeg[i];
14430 pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
14431 fts5DataRemoveSegment(p, pOld);
14432 }
14433
14434 /* Remove the redundant segments from the input level */
14435 if( pLvl->nSeg!=nInput ){
14436 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
14437 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
14438 }
14439 pStruct->nSegment -= nInput;
14440 pLvl->nSeg -= nInput;
14441 pLvl->nMerge = 0;
14442 if( pSeg->pgnoLast==0 ){
14443 pLvlOut->nSeg--;
14444 pStruct->nSegment--;
14445 }
14446 }else{
14447 assert( pSeg->pgnoLast>0 )((void) (0));
14448 fts5TrimSegments(p, pIter);
14449 pLvl->nMerge = nInput;
14450 }
14451
14452 fts5MultiIterFree(pIter);
14453 fts5BufferFree(&term)sqlite3Fts5BufferFree(&term);
14454 if( pnRem ) *pnRem -= writer.nLeafWritten;
14455}
14456
14457/*
14458** If this is not a contentless_delete=1 table, or if the 'deletemerge'
14459** configuration option is set to 0, then this function always returns -1.
14460** Otherwise, it searches the structure object passed as the second argument
14461** for a level suitable for merging due to having a large number of
14462** tombstones in the tombstone hash. If one is found, its index is returned.
14463** Otherwise, if there is no suitable level, -1.
14464*/
14465static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
14466 Fts5Config *pConfig = p->pConfig;
14467 int iRet = -1;
14468 if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
14469 int ii;
14470 int nBest = 0;
14471
14472 for(ii=0; ii<pStruct->nLevel; ii++){
14473 Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
14474 i64 nEntry = 0;
14475 i64 nTomb = 0;
14476 int iSeg;
14477 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
14478 nEntry += pLvl->aSeg[iSeg].nEntry;
14479 nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
14480 }
14481 assert_nc( nEntry>0 || pLvl->nSeg==0 )((void) (0));
14482 if( nEntry>0 ){
14483 int nPercent = (nTomb * 100) / nEntry;
14484 if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
14485 iRet = ii;
14486 nBest = nPercent;
14487 }
14488 }
14489
14490 /* If pLvl is already the input level to an ongoing merge, look no
14491 ** further for a merge candidate. The caller should be allowed to
14492 ** continue merging from pLvl first. */
14493 if( pLvl->nMerge ) break;
14494 }
14495 }
14496 return iRet;
14497}
14498
14499/*
14500** Do up to nPg pages of automerge work on the index.
14501**
14502** Return true if any changes were actually made, or false otherwise.
14503*/
14504static int fts5IndexMerge(
14505 Fts5Index *p, /* FTS5 backend object */
14506 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
14507 int nPg, /* Pages of work to do */
14508 int nMin /* Minimum number of segments to merge */
14509){
14510 int nRem = nPg;
14511 int bRet = 0;
14512 Fts5Structure *pStruct = *ppStruct;
14513 while( nRem>0 && p->rc==SQLITE_OK0 ){
14514 int iLvl; /* To iterate through levels */
14515 int iBestLvl = 0; /* Level offering the most input segments */
14516 int nBest = 0; /* Number of input segments on best level */
14517
14518 /* Set iBestLvl to the level to read input segments from. Or to -1 if
14519 ** there is no level suitable to merge segments from. */
14520 assert( pStruct->nLevel>0 )((void) (0));
14521 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
14522 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
14523 if( pLvl->nMerge ){
14524 if( pLvl->nMerge>nBest ){
14525 iBestLvl = iLvl;
14526 nBest = nMin;
14527 }
14528 break;
14529 }
14530 if( pLvl->nSeg>nBest ){
14531 nBest = pLvl->nSeg;
14532 iBestLvl = iLvl;
14533 }
14534 }
14535 if( nBest<nMin ){
14536 iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
14537 }
14538
14539 if( iBestLvl<0 ) break;
14540 bRet = 1;
14541 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
14542 if( p->rc==SQLITE_OK0 && pStruct->aLevel[iBestLvl].nMerge==0 ){
14543 fts5StructurePromote(p, iBestLvl+1, pStruct);
14544 }
14545
14546 if( nMin==1 ) nMin = 2;
14547 }
14548 *ppStruct = pStruct;
14549 return bRet;
14550}
14551
14552/*
14553** A total of nLeaf leaf pages of data has just been flushed to a level-0
14554** segment. This function updates the write-counter accordingly and, if
14555** necessary, performs incremental merge work.
14556**
14557** If an error occurs, set the Fts5Index.rc error code. If an error has
14558** already occurred, this function is a no-op.
14559*/
14560static void fts5IndexAutomerge(
14561 Fts5Index *p, /* FTS5 backend object */
14562 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
14563 int nLeaf /* Number of output leaves just written */
14564){
14565 if( p->rc==SQLITE_OK0 && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0)((*ppStruct)!=0) ){
14566 Fts5Structure *pStruct = *ppStruct;
14567 u64 nWrite; /* Initial value of write-counter */
14568 int nWork; /* Number of work-quanta to perform */
14569 int nRem; /* Number of leaf pages left to write */
14570
14571 /* Update the write-counter. While doing so, set nWork. */
14572 nWrite = pStruct->nWriteCounter;
14573 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
14574 pStruct->nWriteCounter += nLeaf;
14575 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
14576
14577 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
14578 }
14579}
14580
14581static void fts5IndexCrisismerge(
14582 Fts5Index *p, /* FTS5 backend object */
14583 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
14584){
14585 const int nCrisis = p->pConfig->nCrisisMerge;
14586 Fts5Structure *pStruct = *ppStruct;
14587 if( pStruct && pStruct->nLevel>0 ){
14588 int iLvl = 0;
14589 while( p->rc==SQLITE_OK0 && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
14590 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
14591 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) )((void) (0));
14592 fts5StructurePromote(p, iLvl+1, pStruct);
14593 iLvl++;
14594 }
14595 *ppStruct = pStruct;
14596 }
14597}
14598
14599static int fts5IndexReturn(Fts5Index *p){
14600 int rc = p->rc;
14601 p->rc = SQLITE_OK0;
14602 return rc;
14603}
14604
14605/*
14606** Close the read-only blob handle, if it is open.
14607*/
14608static void sqlite3Fts5IndexCloseReader(Fts5Index *p){
14609 fts5IndexCloseReader(p);
14610 fts5IndexReturn(p);
14611}
14612
14613typedef struct Fts5FlushCtx Fts5FlushCtx;
14614struct Fts5FlushCtx {
14615 Fts5Index *pIdx;
14616 Fts5SegWriter writer;
14617};
14618
14619/*
14620** Buffer aBuf[] contains a list of varints, all small enough to fit
14621** in a 32-bit integer. Return the size of the largest prefix of this
14622** list nMax bytes or less in size.
14623*/
14624static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
14625 int ret;
14626 u32 dummy;
14627 ret = fts5GetVarint32(aBuf, dummy)sqlite3Fts5GetVarint32(aBuf,(u32*)&(dummy));
14628 if( ret<nMax ){
14629 while( 1 ){
14630 int i = fts5GetVarint32(&aBuf[ret], dummy)sqlite3Fts5GetVarint32(&aBuf[ret],(u32*)&(dummy));
14631 if( (ret + i) > nMax ) break;
14632 ret += i;
14633 }
14634 }
14635 return ret;
14636}
14637
14638/*
14639** Execute the SQL statement:
14640**
14641** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
14642**
14643** This is used when a secure-delete operation removes the last term
14644** from a segment leaf page. In that case the %_idx entry is removed
14645** too. This is done to ensure that if all instances of a token are
14646** removed from an fts5 database in secure-delete mode, no trace of
14647** the token itself remains in the database.
14648*/
14649static void fts5SecureDeleteIdxEntry(
14650 Fts5Index *p, /* FTS5 backend object */
14651 int iSegid, /* Id of segment to delete entry for */
14652 int iPgno /* Page number within segment */
14653){
14654 if( iPgno!=1 ){
14655 assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE )((void) (0));
14656 if( p->pDeleteFromIdx==0 ){
14657 fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintfsqlite3_api->mprintf(
14658 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
14659 p->pConfig->zDb, p->pConfig->zName
14660 ));
14661 }
14662 if( p->rc==SQLITE_OK0 ){
14663 sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 1, iSegid);
14664 sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 2, iPgno);
14665 sqlite3_stepsqlite3_api->step(p->pDeleteFromIdx);
14666 p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleteFromIdx);
14667 }
14668 }
14669}
14670
14671/*
14672** This is called when a secure-delete operation removes a position-list
14673** that overflows onto segment page iPgno of segment pSeg. This function
14674** rewrites node iPgno, and possibly one or more of its right-hand peers,
14675** to remove this portion of the position list.
14676**
14677** Output variable (*pbLastInDoclist) is set to true if the position-list
14678** removed is followed by a new term or the end-of-segment, or false if
14679** it is followed by another rowid/position list.
14680*/
14681static void fts5SecureDeleteOverflow(
14682 Fts5Index *p,
14683 Fts5StructureSegment *pSeg,
14684 int iPgno,
14685 int *pbLastInDoclist
14686){
14687 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1);
14688 int pgno;
14689 Fts5Data *pLeaf = 0;
14690 assert( iPgno!=1 )((void) (0));
14691
14692 *pbLastInDoclist = 1;
14693 for(pgno=iPgno; p->rc==SQLITE_OK0 && pgno<=pSeg->pgnoLast; pgno++){
14694 i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) <<
(31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) )
;
14695 int iNext = 0;
14696 u8 *aPg = 0;
14697
14698 pLeaf = fts5DataRead(p, iRowid);
14699 if( pLeaf==0 ) break;
14700 aPg = pLeaf->p;
14701
14702 iNext = fts5GetU16(&aPg[0]);
14703 if( iNext!=0 ){
14704 *pbLastInDoclist = 0;
14705 }
14706 if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
14707 fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext)sqlite3Fts5GetVarint32(&aPg[pLeaf->szLeaf],(u32*)&
(iNext))
;
14708 }
14709
14710 if( iNext==0 ){
14711 /* The page contains no terms or rowids. Replace it with an empty
14712 ** page and move on to the right-hand peer. */
14713 const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
14714 assert_nc( bDetailNone==0 || pLeaf->nn==4 )((void) (0));
14715 if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
14716 fts5DataRelease(pLeaf);
14717 pLeaf = 0;
14718 }else if( bDetailNone ){
14719 break;
14720 }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
14721 p->rc = FTS5_CORRUPT(11 | (1<<8));
14722 break;
14723 }else{
14724 int nShift = iNext - 4;
14725 int nPg;
14726
14727 int nIdx = 0;
14728 u8 *aIdx = 0;
14729
14730 /* Unless the current page footer is 0 bytes in size (in which case
14731 ** the new page footer will be as well), allocate and populate a
14732 ** buffer containing the new page footer. Set stack variables aIdx
14733 ** and nIdx accordingly. */
14734 if( pLeaf->nn>pLeaf->szLeaf ){
14735 int iFirst = 0;
14736 int i1 = pLeaf->szLeaf;
14737 int i2 = 0;
14738
14739 i1 += fts5GetVarint32(&aPg[i1], iFirst)sqlite3Fts5GetVarint32(&aPg[i1],(u32*)&(iFirst));
14740 if( iFirst<iNext ){
14741 p->rc = FTS5_CORRUPT(11 | (1<<8));
14742 break;
14743 }
14744 aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
14745 if( aIdx==0 ) break;
14746 i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
14747 if( i1<pLeaf->nn ){
14748 memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
14749 i2 += (pLeaf->nn-i1);
14750 }
14751 nIdx = i2;
14752 }
14753
14754 /* Modify the contents of buffer aPg[]. Set nPg to the new size
14755 ** in bytes. The new page is always smaller than the old. */
14756 nPg = pLeaf->szLeaf - nShift;
14757 memmove(&aPg[4], &aPg[4+nShift], nPg-4);
14758 fts5PutU16(&aPg[2], nPg);
14759 if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
14760 if( nIdx>0 ){
14761 memcpy(&aPg[nPg], aIdx, nIdx);
14762 nPg += nIdx;
14763 }
14764 sqlite3_freesqlite3_api->free(aIdx);
14765
14766 /* Write the new page to disk and exit the loop */
14767 assert( nPg>4 || fts5GetU16(aPg)==0 )((void) (0));
14768 fts5DataWrite(p, iRowid, aPg, nPg);
14769 break;
14770 }
14771 }
14772 fts5DataRelease(pLeaf);
14773}
14774
14775/*
14776** Completely remove the entry that pSeg currently points to from
14777** the database.
14778*/
14779static void fts5DoSecureDelete(
14780 Fts5Index *p,
14781 Fts5SegIter *pSeg
14782){
14783 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1);
14784 int iSegid = pSeg->pSeg->iSegid;
14785 u8 *aPg = pSeg->pLeaf->p;
14786 int nPg = pSeg->pLeaf->nn;
14787 int iPgIdx = pSeg->pLeaf->szLeaf;
14788
14789 u64 iDelta = 0;
14790 int iNextOff = 0;
14791 int iOff = 0;
14792 int nIdx = 0;
14793 u8 *aIdx = 0;
14794 int bLastInDoclist = 0;
14795 int iIdx = 0;
14796 int iStart = 0;
14797 int iDelKeyOff = 0; /* Offset of deleted key, if any */
14798
14799 nIdx = nPg-iPgIdx;
14800 aIdx = sqlite3Fts5MallocZero(&p->rc, ((i64)nIdx)+16);
14801 if( p->rc ) return;
14802 memcpy(aIdx, &aPg[iPgIdx], nIdx);
14803
14804 /* At this point segment iterator pSeg points to the entry
14805 ** this function should remove from the b-tree segment.
14806 **
14807 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
14808 ** offset of the first byte in the position-list for the entry to
14809 ** remove. Immediately before this comes two varints that will also
14810 ** need to be removed:
14811 **
14812 ** + the rowid or delta rowid value for the entry, and
14813 ** + the size of the position list in bytes.
14814 **
14815 ** Or, in detail=none mode, there is a single varint prior to
14816 ** pSeg->iLeafOffset - the rowid or delta rowid value.
14817 **
14818 ** This block sets the following variables:
14819 **
14820 ** iStart:
14821 ** The offset of the first byte of the rowid or delta-rowid
14822 ** value for the doclist entry being removed.
14823 **
14824 ** iDelta:
14825 ** The value of the rowid or delta-rowid value for the doclist
14826 ** entry being removed.
14827 **
14828 ** iNextOff:
14829 ** The offset of the next entry following the position list
14830 ** for the one being removed. If the position list for this
14831 ** entry overflows onto the next leaf page, this value will be
14832 ** greater than pLeaf->szLeaf.
14833 */
14834 {
14835 int iSOP; /* Start-Of-Position-list */
14836 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
14837 iStart = pSeg->iTermLeafOffset;
14838 }else{
14839 iStart = fts5GetU16(&aPg[0]);
14840 }
14841
14842 iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta);
14843 assert_nc( iSOP<=pSeg->iLeafOffset )((void) (0));
14844
14845 if( bDetailNone ){
14846 while( iSOP<pSeg->iLeafOffset ){
14847 if( aPg[iSOP]==0x00 ) iSOP++;
14848 if( aPg[iSOP]==0x00 ) iSOP++;
14849 iStart = iSOP;
14850 iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta);
14851 }
14852
14853 iNextOff = iSOP;
14854 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
14855 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
14856
14857 }else{
14858 int nPos = 0;
14859 iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos));
14860 while( iSOP<pSeg->iLeafOffset ){
14861 iStart = iSOP + (nPos/2);
14862 iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta);
14863 iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos));
14864 }
14865 assert_nc( iSOP==pSeg->iLeafOffset )((void) (0));
14866 iNextOff = pSeg->iLeafOffset + pSeg->nPos;
14867 }
14868 }
14869
14870 iOff = iStart;
14871
14872 /* If the position-list for the entry being removed flows over past
14873 ** the end of this page, delete the portion of the position-list on the
14874 ** next page and beyond.
14875 **
14876 ** Set variable bLastInDoclist to true if this entry happens
14877 ** to be the last rowid in the doclist for its term. */
14878 if( iNextOff>=iPgIdx ){
14879 int pgno = pSeg->iLeafPgno+1;
14880 fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
14881 iNextOff = iPgIdx;
14882 }
14883
14884 if( pSeg->bDel==0 ){
14885 if( iNextOff!=iPgIdx ){
14886 /* Loop through the page-footer. If iNextOff (offset of the
14887 ** entry following the one we are removing) is equal to the
14888 ** offset of a key on this page, then the entry is the last
14889 ** in its doclist. */
14890 int iKeyOff = 0;
14891 for(iIdx=0; iIdx<nIdx; /* no-op */){
14892 u32 iVal = 0;
14893 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal));
14894 iKeyOff += iVal;
14895 if( iKeyOff==iNextOff ){
14896 bLastInDoclist = 1;
14897 }
14898 }
14899 }
14900
14901 /* If this is (a) the first rowid on a page and (b) is not followed by
14902 ** another position list on the same page, set the "first-rowid" field
14903 ** of the header to 0. */
14904 if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
14905 fts5PutU16(&aPg[0], 0);
14906 }
14907 }
14908
14909 if( pSeg->bDel ){
14910 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
14911 aPg[iOff++] = 0x01;
14912 }else if( bLastInDoclist==0 ){
14913 if( iNextOff!=iPgIdx ){
14914 u64 iNextDelta = 0;
14915 iNextOff += fts5GetVarintsqlite3Fts5GetVarint(&aPg[iNextOff], &iNextDelta);
14916 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
14917 }
14918 }else if(
14919 pSeg->iLeafPgno==pSeg->iTermLeafPgno
14920 && iStart==pSeg->iTermLeafOffset
14921 ){
14922 /* The entry being removed was the only position list in its
14923 ** doclist. Therefore the term needs to be removed as well. */
14924 int iKey = 0;
14925 int iKeyOff = 0;
14926
14927 /* Set iKeyOff to the offset of the term that will be removed - the
14928 ** last offset in the footer that is not greater than iStart. */
14929 for(iIdx=0; iIdx<nIdx; iKey++){
14930 u32 iVal = 0;
14931 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal));
14932 if( (iKeyOff+iVal)>(u32)iStart ) break;
14933 iKeyOff += iVal;
14934 }
14935 assert_nc( iKey>=1 )((void) (0));
14936
14937 /* Set iDelKeyOff to the value of the footer entry to remove from
14938 ** the page. */
14939 iDelKeyOff = iOff = iKeyOff;
14940
14941 if( iNextOff!=iPgIdx ){
14942 /* This is the only position-list associated with the term, and there
14943 ** is another term following it on this page. So the subsequent term
14944 ** needs to be moved to replace the term associated with the entry
14945 ** being removed. */
14946 int nPrefix = 0;
14947 int nSuffix = 0;
14948 int nPrefix2 = 0;
14949 int nSuffix2 = 0;
14950
14951 iDelKeyOff = iNextOff;
14952 iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nPrefix2
))
;
14953 iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nSuffix2
))
;
14954
14955 if( iKey!=1 ){
14956 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nPrefix)
)
;
14957 }
14958 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nSuffix)
)
;
14959
14960 nPrefix = MIN(nPrefix, nPrefix2)(((nPrefix) < (nPrefix2)) ? (nPrefix) : (nPrefix2));
14961 nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
14962
14963 if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
14964 p->rc = FTS5_CORRUPT(11 | (1<<8));
14965 }else{
14966 if( iKey!=1 ){
14967 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
14968 }
14969 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
14970 if( nPrefix2>pSeg->term.n ){
14971 p->rc = FTS5_CORRUPT(11 | (1<<8));
14972 }else if( nPrefix2>nPrefix ){
14973 memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
14974 iOff += (nPrefix2-nPrefix);
14975 }
14976 memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
14977 iOff += nSuffix2;
14978 iNextOff += nSuffix2;
14979 }
14980 }
14981 }else if( iStart==4 ){
14982 int iPgno;
14983
14984 assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno )((void) (0));
14985 /* The entry being removed may be the only position list in
14986 ** its doclist. */
14987 for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
14988 Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(iPgno)) )
);
14989 int bEmpty = (pPg && pPg->nn==4);
14990 fts5DataRelease(pPg);
14991 if( bEmpty==0 ) break;
14992 }
14993
14994 if( iPgno==pSeg->iTermLeafPgno ){
14995 i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno
)) )
;
14996 Fts5Data *pTerm = fts5DataRead(p, iId);
14997 if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
14998 u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
14999 int nTermIdx = pTerm->nn - pTerm->szLeaf;
15000 int iTermIdx = 0;
15001 int iTermOff = 0;
15002
15003 while( 1 ){
15004 u32 iVal = 0;
15005 int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal)sqlite3Fts5GetVarint32(&aTermIdx[iTermIdx],(u32*)&(iVal
))
;
15006 iTermOff += iVal;
15007 if( (iTermIdx+nByte)>=nTermIdx ) break;
15008 iTermIdx += nByte;
15009 }
15010 nTermIdx = iTermIdx;
15011
15012 memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
15013 fts5PutU16(&pTerm->p[2], iTermOff);
15014
15015 fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
15016 if( nTermIdx==0 ){
15017 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
15018 }
15019 }
15020 fts5DataRelease(pTerm);
15021 }
15022 }
15023
15024 /* Assuming no error has occurred, this block does final edits to the
15025 ** leaf page before writing it back to disk. Input variables are:
15026 **
15027 ** nPg: Total initial size of leaf page.
15028 ** iPgIdx: Initial offset of page footer.
15029 **
15030 ** iOff: Offset to move data to
15031 ** iNextOff: Offset to move data from
15032 */
15033 if( p->rc==SQLITE_OK0 ){
15034 const int nMove = nPg - iNextOff; /* Number of bytes to move */
15035 int nShift = iNextOff - iOff; /* Distance to move them */
15036
15037 int iPrevKeyOut = 0;
15038 int iKeyIn = 0;
15039
15040 memmove(&aPg[iOff], &aPg[iNextOff], nMove);
15041 iPgIdx -= nShift;
15042 nPg = iPgIdx;
15043 fts5PutU16(&aPg[2], iPgIdx);
15044
15045 for(iIdx=0; iIdx<nIdx; /* no-op */){
15046 u32 iVal = 0;
15047 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal));
15048 iKeyIn += iVal;
15049 if( iKeyIn!=iDelKeyOff ){
15050 int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
15051 nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
15052 iPrevKeyOut = iKeyOut;
15053 }
15054 }
15055
15056 if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
15057 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
15058 }
15059
15060 assert_nc( nPg>4 || fts5GetU16(aPg)==0 )((void) (0));
15061 fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iLeafPgno
)) )
, aPg, nPg);
15062 }
15063 sqlite3_freesqlite3_api->free(aIdx);
15064}
15065
15066/*
15067** This is called as part of flushing a delete to disk in 'secure-delete'
15068** mode. It edits the segments within the database described by argument
15069** pStruct to remove the entries for term zTerm, rowid iRowid.
15070**
15071** Return SQLITE_OK if successful, or an SQLite error code if an error
15072** has occurred. Any error code is also stored in the Fts5Index handle.
15073*/
15074static int fts5FlushSecureDelete(
15075 Fts5Index *p,
15076 Fts5Structure *pStruct,
15077 const char *zTerm,
15078 int nTerm,
15079 i64 iRowid
15080){
15081 const int f = FTS5INDEX_QUERY_SKIPHASH0x0040;
15082 Fts5Iter *pIter = 0; /* Used to find term instance */
15083
15084 /* If the version number has not been set to SECUREDELETE, do so now. */
15085 if( p->pConfig->iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 ){
15086 Fts5Config *pConfig = p->pConfig;
15087 sqlite3_stmt *pStmt = 0;
15088 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf(
15089 "REPLACE INTO %Q.'%q_config' VALUES ('version', %d)",
15090 pConfig->zDb, pConfig->zName, FTS5_CURRENT_VERSION_SECUREDELETE5
15091 ));
15092 if( p->rc==SQLITE_OK0 ){
15093 int rc;
15094 sqlite3_stepsqlite3_api->step(pStmt);
15095 rc = sqlite3_finalizesqlite3_api->finalize(pStmt);
15096 if( p->rc==SQLITE_OK0 ) p->rc = rc;
15097 pConfig->iCookie++;
15098 pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE5;
15099 }
15100 }
15101
15102 fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
15103 if( fts5MultiIterEof(p, pIter)==0 ){
15104 i64 iThis = fts5MultiIterRowid(pIter);
15105 if( iThis<iRowid ){
15106 fts5MultiIterNextFrom(p, pIter, iRowid);
15107 }
15108
15109 if( p->rc==SQLITE_OK0
15110 && fts5MultiIterEof(p, pIter)==0
15111 && iRowid==fts5MultiIterRowid(pIter)
15112 ){
15113 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
15114 fts5DoSecureDelete(p, pSeg);
15115 }
15116 }
15117
15118 fts5MultiIterFree(pIter);
15119 return p->rc;
15120}
15121
15122
15123/*
15124** Flush the contents of in-memory hash table iHash to a new level-0
15125** segment on disk. Also update the corresponding structure record.
15126**
15127** If an error occurs, set the Fts5Index.rc error code. If an error has
15128** already occurred, this function is a no-op.
15129*/
15130static void fts5FlushOneHash(Fts5Index *p){
15131 Fts5Hash *pHash = p->pHash;
15132 Fts5Structure *pStruct;
15133 int iSegid;
15134 int pgnoLast = 0; /* Last leaf page number in segment */
15135
15136 /* Obtain a reference to the index structure and allocate a new segment-id
15137 ** for the new level-0 segment. */
15138 pStruct = fts5StructureRead(p);
15139 fts5StructureInvalidate(p);
15140
15141 if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
15142 iSegid = fts5AllocateSegid(p, pStruct);
15143 if( iSegid ){
15144 const int pgsz = p->pConfig->pgsz;
15145 int eDetail = p->pConfig->eDetail;
15146 int bSecureDelete = p->pConfig->bSecureDelete;
15147 Fts5StructureSegment *pSeg; /* New segment within pStruct */
15148 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
15149 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
15150
15151 Fts5SegWriter writer;
15152 fts5WriteInit(p, &writer, iSegid);
15153
15154 pBuf = &writer.writer.buf;
15155 pPgidx = &writer.writer.pgidx;
15156
15157 /* fts5WriteInit() should have initialized the buffers to (most likely)
15158 ** the maximum space required. */
15159 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0));
15160 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0));
15161
15162 /* Begin scanning through hash table entries. This loop runs once for each
15163 ** term/doclist currently stored within the hash table. */
15164 if( p->rc==SQLITE_OK0 ){
15165 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
15166 }
15167 while( p->rc==SQLITE_OK0 && 0==sqlite3Fts5HashScanEof(pHash) ){
15168 const char *zTerm; /* Buffer containing term */
15169 int nTerm; /* Size of zTerm in bytes */
15170 const u8 *pDoclist; /* Pointer to doclist for this term */
15171 int nDoclist; /* Size of doclist in bytes */
15172
15173 /* Get the term and doclist for this entry. */
15174 sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
15175 if( bSecureDelete==0 ){
15176 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
15177 if( p->rc!=SQLITE_OK0 ) break;
15178 assert( writer.bFirstRowidInPage==0 )((void) (0));
15179 }
15180
15181 if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
15182 /* The entire doclist will fit on the current leaf. */
15183 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pDoclist
, nDoclist); (pBuf)->n += nDoclist; }
;
15184 }else{
15185 int bTermWritten = !bSecureDelete;
15186 i64 iRowid = 0;
15187 i64 iPrev = 0;
15188 int iOff = 0;
15189
15190 /* The entire doclist will not fit on this leaf. The following
15191 ** loop iterates through the poslists that make up the current
15192 ** doclist. */
15193 while( p->rc==SQLITE_OK0 && iOff<nDoclist ){
15194 u64 iDelta = 0;
15195 iOff += fts5GetVarintsqlite3Fts5GetVarint(&pDoclist[iOff], &iDelta);
15196 iRowid += iDelta;
15197
15198 /* If in secure delete mode, and if this entry in the poslist is
15199 ** in fact a delete, then edit the existing segments directly
15200 ** using fts5FlushSecureDelete(). */
15201 if( bSecureDelete ){
15202 if( eDetail==FTS5_DETAIL_NONE1 ){
15203 if( iOff<nDoclist && pDoclist[iOff]==0x00
15204 && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid)
15205 ){
15206 iOff++;
15207 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
15208 iOff++;
15209 nDoclist = 0;
15210 }else{
15211 continue;
15212 }
15213 }
15214 }else if( (pDoclist[iOff] & 0x01)
15215 && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid)
15216 ){
15217 if( p->rc!=SQLITE_OK0 || pDoclist[iOff]==0x01 ){
15218 iOff++;
15219 continue;
15220 }
15221 }
15222 }
15223
15224 if( p->rc==SQLITE_OK0 && bTermWritten==0 ){
15225 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
15226 bTermWritten = 1;
15227 assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 )((void) (0));
15228 }
15229
15230 if( writer.bFirstRowidInPage ){
15231 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
15232 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
15233 writer.bFirstRowidInPage = 0;
15234 fts5WriteDlidxAppend(p, &writer, iRowid);
15235 }else{
15236 u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
15237 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
15238 }
15239 if( p->rc!=SQLITE_OK0 ) break;
15240 assert( pBuf->n<=pBuf->nSpace )((void) (0));
15241 iPrev = iRowid;
15242
15243 if( eDetail==FTS5_DETAIL_NONE1 ){
15244 if( iOff<nDoclist && pDoclist[iOff]==0 ){
15245 pBuf->p[pBuf->n++] = 0;
15246 iOff++;
15247 if( iOff<nDoclist && pDoclist[iOff]==0 ){
15248 pBuf->p[pBuf->n++] = 0;
15249 iOff++;
15250 }
15251 }
15252 if( (pBuf->n + pPgidx->n)>=pgsz ){
15253 fts5WriteFlushLeaf(p, &writer);
15254 }
15255 }else{
15256 int bDel = 0;
15257 int nPos = 0;
15258 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
15259 if( bDel && bSecureDelete ){
15260 fts5BufferAppendVarint(&p->rc, pBuf, nPos*2)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)nPos*2);
15261 iOff += nCopy;
15262 nCopy = nPos;
15263 }else{
15264 nCopy += nPos;
15265 }
15266 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
15267 /* The entire poslist will fit on the current leaf. So copy
15268 ** it in one go. */
15269 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], &
pDoclist[iOff], nCopy); (pBuf)->n += nCopy; }
;
15270 }else{
15271 /* The entire poslist will not fit on this leaf. So it needs
15272 ** to be broken into sections. The only qualification being
15273 ** that each varint must be stored contiguously. */
15274 const u8 *pPoslist = &pDoclist[iOff];
15275 int iPos = 0;
15276 while( p->rc==SQLITE_OK0 ){
15277 int nSpace = pgsz - pBuf->n - pPgidx->n;
15278 int n = 0;
15279 if( (nCopy - iPos)<=nSpace ){
15280 n = nCopy - iPos;
15281 }else{
15282 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
15283 }
15284 assert( n>0 )((void) (0));
15285 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], &
pPoslist[iPos], n); (pBuf)->n += n; }
;
15286 iPos += n;
15287 if( (pBuf->n + pPgidx->n)>=pgsz ){
15288 fts5WriteFlushLeaf(p, &writer);
15289 }
15290 if( iPos>=nCopy ) break;
15291 }
15292 }
15293 iOff += nCopy;
15294 }
15295 }
15296 }
15297
15298 /* TODO2: Doclist terminator written here. */
15299 /* pBuf->p[pBuf->n++] = '\0'; */
15300 assert( pBuf->n<=pBuf->nSpace )((void) (0));
15301 if( p->rc==SQLITE_OK0 ) sqlite3Fts5HashScanNext(pHash);
15302 }
15303 fts5WriteFinish(p, &writer, &pgnoLast);
15304
15305 assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 )((void) (0));
15306 if( pgnoLast>0 ){
15307 /* Update the Fts5Structure. It is written back to the database by the
15308 ** fts5StructureRelease() call below. */
15309 if( pStruct->nLevel==0 ){
15310 fts5StructureAddLevel(&p->rc, &pStruct);
15311 }
15312 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
15313 if( p->rc==SQLITE_OK0 ){
15314 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
15315 pSeg->iSegid = iSegid;
15316 pSeg->pgnoFirst = 1;
15317 pSeg->pgnoLast = pgnoLast;
15318 if( pStruct->nOriginCntr>0 ){
15319 pSeg->iOrigin1 = pStruct->nOriginCntr;
15320 pSeg->iOrigin2 = pStruct->nOriginCntr;
15321 pSeg->nEntry = p->nPendingRow;
15322 pStruct->nOriginCntr++;
15323 }
15324 pStruct->nSegment++;
15325 }
15326 fts5StructurePromote(p, 0, pStruct);
15327 }
15328 }
15329 }
15330
15331 fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
15332 fts5IndexCrisismerge(p, &pStruct);
15333 fts5StructureWrite(p, pStruct);
15334 fts5StructureRelease(pStruct);
15335}
15336
15337/*
15338** Flush any data stored in the in-memory hash tables to the database.
15339*/
15340static void fts5IndexFlush(Fts5Index *p){
15341 /* Unless it is empty, flush the hash table to disk */
15342 if( p->flushRc ){
15343 p->rc = p->flushRc;
15344 return;
15345 }
15346 if( p->nPendingData || p->nContentlessDelete ){
15347 assert( p->pHash )((void) (0));
15348 fts5FlushOneHash(p);
15349 if( p->rc==SQLITE_OK0 ){
15350 sqlite3Fts5HashClear(p->pHash);
15351 p->nPendingData = 0;
15352 p->nPendingRow = 0;
15353 p->nContentlessDelete = 0;
15354 }else if( p->nPendingData || p->nContentlessDelete ){
15355 p->flushRc = p->rc;
15356 }
15357 }
15358}
15359
15360static Fts5Structure *fts5IndexOptimizeStruct(
15361 Fts5Index *p,
15362 Fts5Structure *pStruct
15363){
15364 Fts5Structure *pNew = 0;
15365 sqlite3_int64 nByte = SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel
))
;
15366 int nSeg = pStruct->nSegment;
15367 int i;
15368
15369 /* Figure out if this structure requires optimization. A structure does
15370 ** not require optimization if either:
15371 **
15372 ** 1. it consists of fewer than two segments, or
15373 ** 2. all segments are on the same level, or
15374 ** 3. all segments except one are currently inputs to a merge operation.
15375 **
15376 ** In the first case, if there are no tombstone hash pages, return NULL. In
15377 ** the second, increment the ref-count on *pStruct and return a copy of the
15378 ** pointer to it.
15379 */
15380 if( nSeg==0 ) return 0;
15381 for(i=0; i<pStruct->nLevel; i++){
15382 int nThis = pStruct->aLevel[i].nSeg;
15383 int nMerge = pStruct->aLevel[i].nMerge;
15384 if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
15385 if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
15386 return 0;
15387 }
15388 fts5StructureRef(pStruct);
15389 return pStruct;
15390 }
15391 assert( pStruct->aLevel[i].nMerge<=nThis )((void) (0));
15392 }
15393
15394 nByte += (((i64)pStruct->nLevel)+1) * sizeof(Fts5StructureLevel);
15395 assert( nByte==SZ_FTS5STRUCTURE(pStruct->nLevel+2) )((void) (0));
15396 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
15397
15398 if( pNew ){
15399 Fts5StructureLevel *pLvl;
15400 nByte = nSeg * sizeof(Fts5StructureSegment);
15401 pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL)(((pStruct->nLevel+1) < (64)) ? (pStruct->nLevel+1) :
(64))
;
15402 pNew->nRef = 1;
15403 pNew->nWriteCounter = pStruct->nWriteCounter;
15404 pNew->nOriginCntr = pStruct->nOriginCntr;
15405 pLvl = &pNew->aLevel[pNew->nLevel-1];
15406 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
15407 if( pLvl->aSeg ){
15408 int iLvl, iSeg;
15409 int iSegOut = 0;
15410 /* Iterate through all segments, from oldest to newest. Add them to
15411 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
15412 ** segment in the data structure. */
15413 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
15414 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
15415 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
15416 iSegOut++;
15417 }
15418 }
15419 pNew->nSegment = pLvl->nSeg = nSeg;
15420 }else{
15421 sqlite3_freesqlite3_api->free(pNew);
15422 pNew = 0;
15423 }
15424 }
15425
15426 return pNew;
15427}
15428
15429static int sqlite3Fts5IndexOptimize(Fts5Index *p){
15430 Fts5Structure *pStruct;
15431 Fts5Structure *pNew = 0;
15432
15433 assert( p->rc==SQLITE_OK )((void) (0));
15434 fts5IndexFlush(p);
15435 assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 )((void) (0));
15436 pStruct = fts5StructureRead(p);
15437 assert( p->rc!=SQLITE_OK || pStruct!=0 )((void) (0));
15438 fts5StructureInvalidate(p);
15439
15440 if( pStruct ){
15441 pNew = fts5IndexOptimizeStruct(p, pStruct);
15442 }
15443 fts5StructureRelease(pStruct);
15444
15445 assert( pNew==0 || pNew->nSegment>0 )((void) (0));
15446 if( pNew ){
15447 int iLvl;
15448 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
15449 while( p->rc==SQLITE_OK0 && pNew->aLevel[iLvl].nSeg>0 ){
15450 int nRem = FTS5_OPT_WORK_UNIT1000;
15451 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
15452 }
15453
15454 fts5StructureWrite(p, pNew);
15455 fts5StructureRelease(pNew);
15456 }
15457
15458 return fts5IndexReturn(p);
15459}
15460
15461/*
15462** This is called to implement the special "VALUES('merge', $nMerge)"
15463** INSERT command.
15464*/
15465static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
15466 Fts5Structure *pStruct = 0;
15467
15468 fts5IndexFlush(p);
15469 pStruct = fts5StructureRead(p);
15470 if( pStruct ){
15471 int nMin = p->pConfig->nUsermerge;
15472 fts5StructureInvalidate(p);
15473 if( nMerge<0 ){
15474 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
15475 fts5StructureRelease(pStruct);
15476 pStruct = pNew;
15477 nMin = 1;
15478 nMerge = nMerge*-1;
15479 }
15480 if( pStruct && pStruct->nLevel ){
15481 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
15482 fts5StructureWrite(p, pStruct);
15483 }
15484 }
15485 fts5StructureRelease(pStruct);
15486 }
15487 return fts5IndexReturn(p);
15488}
15489
15490static void fts5AppendRowid(
15491 Fts5Index *p,
15492 u64 iDelta,
15493 Fts5Iter *pUnused,
15494 Fts5Buffer *pBuf
15495){
15496 UNUSED_PARAM(pUnused)(void)(pUnused);
15497 fts5BufferAppendVarint(&p->rc, pBuf, iDelta)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)iDelta);
15498}
15499
15500static void fts5AppendPoslist(
15501 Fts5Index *p,
15502 u64 iDelta,
15503 Fts5Iter *pMulti,
15504 Fts5Buffer *pBuf
15505){
15506 int nData = pMulti->base.nData;
15507 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING8;
15508 assert( nData>0 )((void) (0));
15509 if( p->rc==SQLITE_OK0 && 0==fts5BufferGrow(&p->rc, pBuf, nByte)( (u32)((pBuf)->n) + (u32)(nByte) <= (u32)((pBuf)->nSpace
) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf),(nByte)+
(pBuf)->n) )
){
15510 fts5BufferSafeAppendVarint(pBuf, iDelta){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf
)->n], (iDelta)); ((void) (0)); }
;
15511 fts5BufferSafeAppendVarint(pBuf, nData*2){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf
)->n], (nData*2)); ((void) (0)); }
;
15512 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pMulti
->base.pData, nData); (pBuf)->n += nData; }
;
15513 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING8);
15514 }
15515}
15516
15517
15518static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
15519 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
15520
15521 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) )((void) (0));
15522 if( p>=pIter->aEof ){
15523 pIter->aPoslist = 0;
15524 }else{
15525 i64 iDelta;
15526
15527 p += fts5GetVarintsqlite3Fts5GetVarint(p, (u64*)&iDelta);
15528 pIter->iRowid += iDelta;
15529
15530 /* Read position list size */
15531 if( p[0] & 0x80 ){
15532 int nPos;
15533 pIter->nSize = fts5GetVarint32(p, nPos)sqlite3Fts5GetVarint32(p,(u32*)&(nPos));
15534 pIter->nPoslist = (nPos>>1);
15535 }else{
15536 pIter->nPoslist = ((int)(p[0])) >> 1;
15537 pIter->nSize = 1;
15538 }
15539
15540 pIter->aPoslist = p;
15541 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
15542 pIter->aPoslist = 0;
15543 }
15544 }
15545}
15546
15547static void fts5DoclistIterInit(
15548 Fts5Buffer *pBuf,
15549 Fts5DoclistIter *pIter
15550){
15551 memset(pIter, 0, sizeof(*pIter));
15552 if( pBuf->n>0 ){
15553 pIter->aPoslist = pBuf->p;
15554 pIter->aEof = &pBuf->p[pBuf->n];
15555 fts5DoclistIterNext(pIter);
15556 }
15557}
15558
15559#if 0
15560/*
15561** Append a doclist to buffer pBuf.
15562**
15563** This function assumes that space within the buffer has already been
15564** allocated.
15565*/
15566static void fts5MergeAppendDocid({ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint
(&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n],
((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0))
; }; (i64 *piLastRowid) = (i64 iRowid); }
15567 Fts5Buffer *pBuf, /* Buffer to write to */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint
(&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n],
((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0))
; }; (i64 *piLastRowid) = (i64 iRowid); }
15568 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint
(&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n],
((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0))
; }; (i64 *piLastRowid) = (i64 iRowid); }
15569 i64 iRowid /* Rowid to append */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint
(&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n],
((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0))
; }; (i64 *piLastRowid) = (i64 iRowid); }
15570){ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint
(&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n],
((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0))
; }; (i64 *piLastRowid) = (i64 iRowid); }
{
15571 assert( pBuf->n!=0 || (*piLastRowid)==0 )((void) (0));
15572 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf
)->n], (iRowid - *piLastRowid)); ((void) (0)); }
;
15573 *piLastRowid = iRowid;
15574}
15575#endif
15576
15577#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid){ ((void) (0)); { ((pBuf))->n += sqlite3Fts5PutVarint(&
((pBuf))->p[((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid
))); ((void) (0)); }; (iLastRowid) = (iRowid); }
{ \
15578 assert( (pBuf)->n!=0 || (iLastRowid)==0 )((void) (0)); \
15579 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)){ ((pBuf))->n += sqlite3Fts5PutVarint(&((pBuf))->p[
((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid))); ((void
) (0)); }
; \
15580 (iLastRowid) = (iRowid); \
15581}
15582
15583/*
15584** Swap the contents of buffer *p1 with that of *p2.
15585*/
15586static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
15587 Fts5Buffer tmp = *p1;
15588 *p1 = *p2;
15589 *p2 = tmp;
15590}
15591
15592static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
15593 int i = *piOff;
15594 if( i>=pBuf->n ){
15595 *piOff = -1;
15596 }else{
15597 u64 iVal;
15598 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
15599 *piRowid += iVal;
15600 }
15601}
15602
15603/*
15604** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
15605** In this case the buffers consist of a delta-encoded list of rowids only.
15606*/
15607static void fts5MergeRowidLists(
15608 Fts5Index *p, /* FTS5 backend object */
15609 Fts5Buffer *p1, /* First list to merge */
15610 int nBuf, /* Number of entries in apBuf[] */
15611 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
15612){
15613 int i1 = 0;
15614 int i2 = 0;
15615 i64 iRowid1 = 0;
15616 i64 iRowid2 = 0;
15617 i64 iOut = 0;
15618 Fts5Buffer *p2 = &aBuf[0];
15619 Fts5Buffer out;
15620
15621 (void)nBuf;
15622 memset(&out, 0, sizeof(out));
15623 assert( nBuf==1 )((void) (0));
15624 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
15625 if( p->rc ) return;
15626
15627 fts5NextRowid(p1, &i1, &iRowid1);
15628 fts5NextRowid(p2, &i2, &iRowid2);
15629 while( i1>=0 || i2>=0 ){
15630 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
15631 assert( iOut==0 || iRowid1>iOut )((void) (0));
15632 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)->
p[(&out)->n], (iRowid1 - iOut)); ((void) (0)); }
;
15633 iOut = iRowid1;
15634 fts5NextRowid(p1, &i1, &iRowid1);
15635 }else{
15636 assert( iOut==0 || iRowid2>iOut )((void) (0));
15637 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)->
p[(&out)->n], (iRowid2 - iOut)); ((void) (0)); }
;
15638 iOut = iRowid2;
15639 if( i1>=0 && iRowid1==iRowid2 ){
15640 fts5NextRowid(p1, &i1, &iRowid1);
15641 }
15642 fts5NextRowid(p2, &i2, &iRowid2);
15643 }
15644 }
15645
15646 fts5BufferSwap(&out, p1);
15647 fts5BufferFree(&out)sqlite3Fts5BufferFree(&out);
15648}
15649
15650typedef struct PrefixMerger PrefixMerger;
15651struct PrefixMerger {
15652 Fts5DoclistIter iter; /* Doclist iterator */
15653 i64 iPos; /* For iterating through a position list */
15654 int iOff;
15655 u8 *aPos;
15656 PrefixMerger *pNext; /* Next in docid/poslist order */
15657};
15658
15659static void fts5PrefixMergerInsertByRowid(
15660 PrefixMerger **ppHead,
15661 PrefixMerger *p
15662){
15663 if( p->iter.aPoslist ){
15664 PrefixMerger **pp = ppHead;
15665 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
15666 pp = &(*pp)->pNext;
15667 }
15668 p->pNext = *pp;
15669 *pp = p;
15670 }
15671}
15672
15673static void fts5PrefixMergerInsertByPosition(
15674 PrefixMerger **ppHead,
15675 PrefixMerger *p
15676){
15677 if( p->iPos>=0 ){
15678 PrefixMerger **pp = ppHead;
15679 while( *pp && p->iPos>(*pp)->iPos ){
15680 pp = &(*pp)->pNext;
15681 }
15682 p->pNext = *pp;
15683 *pp = p;
15684 }
15685}
15686
15687
15688/*
15689** Array aBuf[] contains nBuf doclists. These are all merged in with the
15690** doclist in buffer p1.
15691*/
15692static void fts5MergePrefixLists(
15693 Fts5Index *p, /* FTS5 backend object */
15694 Fts5Buffer *p1, /* First list to merge */
15695 int nBuf, /* Number of buffers in array aBuf[] */
15696 Fts5Buffer *aBuf /* Other lists to merge in */
15697){
15698#define fts5PrefixMergerNextPosition(p)sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&
(p)->iOff,&(p)->iPos)
\
15699 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
15700#define FTS5_MERGE_NLIST16 16
15701 PrefixMerger aMerger[FTS5_MERGE_NLIST16];
15702 PrefixMerger *pHead = 0;
15703 int i;
15704 int nOut = 0;
15705 Fts5Buffer out = {0, 0, 0};
15706 Fts5Buffer tmp = {0, 0, 0};
15707 i64 iLastRowid = 0;
15708
15709 /* Initialize a doclist-iterator for each input buffer. Arrange them in
15710 ** a linked-list starting at pHead in ascending order of rowid. Avoid
15711 ** linking any iterators already at EOF into the linked list at all. */
15712 assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) )((void) (0));
15713 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
15714 pHead = &aMerger[nBuf];
15715 fts5DoclistIterInit(p1, &pHead->iter);
15716 for(i=0; i<nBuf; i++){
15717 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
15718 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
15719 nOut += aBuf[i].n;
15720 }
15721 if( nOut==0 ) return;
15722 nOut += p1->n + 9 + 10*nBuf;
15723
15724 /* The maximum size of the output is equal to the sum of the
15725 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
15726 ** first rowid in one input is a large negative number, and the first in
15727 ** the other a non-negative number, the delta for the non-negative
15728 ** number will be larger on disk than the literal integer value
15729 ** was.
15730 **
15731 ** Or, if the input position-lists are corrupt, then the output might
15732 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
15733 ** (the value PoslistNext64() uses for EOF) as a position and appending
15734 ** it to the output. This can happen at most once for each input
15735 ** position-list, hence (nBuf+1) 10 byte paddings. */
15736 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
15737
15738 while( pHead ){
15739 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid){ ((void) (0)); { ((&out))->n += sqlite3Fts5PutVarint(
&((&out))->p[((&out))->n], ((u64)(pHead->
iter.iRowid) - (u64)(iLastRowid))); ((void) (0)); }; (iLastRowid
) = (pHead->iter.iRowid); }
;
15740
15741 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
15742 /* Merge data from two or more poslists */
15743 i64 iPrev = 0;
15744 int nTmp = FTS5_DATA_ZERO_PADDING8;
15745 int nMerge = 0;
15746 PrefixMerger *pSave = pHead;
15747 PrefixMerger *pThis = 0;
15748 int nTail = 0;
15749
15750 pHead = 0;
15751 while( pSave && pSave->iter.iRowid==iLastRowid ){
15752 PrefixMerger *pNext = pSave->pNext;
15753 pSave->iOff = 0;
15754 pSave->iPos = 0;
15755 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
15756 fts5PrefixMergerNextPosition(pSave)sqlite3Fts5PoslistNext64((pSave)->aPos,(pSave)->iter.nPoslist
,&(pSave)->iOff,&(pSave)->iPos)
;
15757 nTmp += pSave->iter.nPoslist + 10;
15758 nMerge++;
15759 fts5PrefixMergerInsertByPosition(&pHead, pSave);
15760 pSave = pNext;
15761 }
15762
15763 if( pHead==0 || pHead->pNext==0 ){
15764 p->rc = FTS5_CORRUPT(11 | (1<<8));
15765 break;
15766 }
15767
15768 /* See the earlier comment in this function for an explanation of why
15769 ** corrupt input position lists might cause the output to consume
15770 ** at most nMerge*10 bytes of unexpected space. */
15771 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
15772 break;
15773 }
15774 fts5BufferZero(&tmp)sqlite3Fts5BufferZero(&tmp);
15775
15776 pThis = pHead;
15777 pHead = pThis->pNext;
15778 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
15779 fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist
,&(pThis)->iOff,&(pThis)->iPos)
;
15780 fts5PrefixMergerInsertByPosition(&pHead, pThis);
15781
15782 while( pHead->pNext ){
15783 pThis = pHead;
15784 if( pThis->iPos!=iPrev ){
15785 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
15786 }
15787 fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist
,&(pThis)->iOff,&(pThis)->iPos)
;
15788 pHead = pThis->pNext;
15789 fts5PrefixMergerInsertByPosition(&pHead, pThis);
15790 }
15791
15792 if( pHead->iPos!=iPrev ){
15793 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
15794 }
15795 nTail = pHead->iter.nPoslist - pHead->iOff;
15796
15797 /* WRITEPOSLISTSIZE */
15798 assert_nc( tmp.n+nTail<=nTmp )((void) (0));
15799 assert( tmp.n+nTail<=nTmp+nMerge*10 )((void) (0));
15800 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING8 ){
15801 if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
15802 break;
15803 }
15804 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2){ (&out)->n += sqlite3Fts5PutVarint(&(&out)->
p[(&out)->n], ((tmp.n+nTail) * 2)); ((void) (0)); }
;
15805 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n){ ((void) (0)); memcpy(&(&out)->p[(&out)->n
], tmp.p, tmp.n); (&out)->n += tmp.n; }
;
15806 if( nTail>0 ){
15807 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail){ ((void) (0)); memcpy(&(&out)->p[(&out)->n
], &pHead->aPos[pHead->iOff], nTail); (&out)->
n += nTail; }
;
15808 }
15809
15810 pHead = pSave;
15811 for(i=0; i<nBuf+1; i++){
15812 PrefixMerger *pX = &aMerger[i];
15813 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
15814 fts5DoclistIterNext(&pX->iter);
15815 fts5PrefixMergerInsertByRowid(&pHead, pX);
15816 }
15817 }
15818
15819 }else{
15820 /* Copy poslist from pHead to output */
15821 PrefixMerger *pThis = pHead;
15822 Fts5DoclistIter *pI = &pThis->iter;
15823 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize){ ((void) (0)); memcpy(&(&out)->p[(&out)->n
], pI->aPoslist, pI->nPoslist+pI->nSize); (&out)
->n += pI->nPoslist+pI->nSize; }
;
15824 fts5DoclistIterNext(pI);
15825 pHead = pThis->pNext;
15826 fts5PrefixMergerInsertByRowid(&pHead, pThis);
15827 }
15828 }
15829
15830 fts5BufferFree(p1)sqlite3Fts5BufferFree(p1);
15831 fts5BufferFree(&tmp)sqlite3Fts5BufferFree(&tmp);
15832 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING8);
15833 *p1 = out;
15834}
15835
15836
15837/*
15838** Iterate through a range of entries in the FTS index, invoking the xVisit
15839** callback for each of them.
15840**
15841** Parameter pToken points to an nToken buffer containing an FTS index term
15842** (i.e. a document term with the preceding 1 byte index identifier -
15843** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits
15844** all entries for terms that have pToken/nToken as a prefix. If bPrefix
15845** is false, then only entries with pToken/nToken as the entire key are
15846** visited.
15847**
15848** If the current table is a tokendata=1 table, then if bPrefix is true then
15849** each index term is treated separately. However, if bPrefix is false, then
15850** all index terms corresponding to pToken/nToken are collapsed into a single
15851** term before the callback is invoked.
15852**
15853** The callback invoked for each entry visited is specified by paramter xVisit.
15854** Each time it is invoked, it is passed a pointer to the Fts5Index object,
15855** a copy of the 7th paramter to this function (pCtx) and a pointer to the
15856** iterator that indicates the current entry. If the current entry is the
15857** first with a new term (i.e. different from that of the previous entry,
15858** including the very first term), then the final two parameters are passed
15859** a pointer to the term and its size in bytes, respectively. If the current
15860** entry is not the first associated with its term, these two parameters
15861** are passed 0.
15862**
15863** If parameter pColset is not NULL, then it is used to filter entries before
15864** the callback is invoked.
15865*/
15866static int fts5VisitEntries(
15867 Fts5Index *p, /* Fts5 index object */
15868 Fts5Colset *pColset, /* Columns filter to apply, or NULL */
15869 u8 *pToken, /* Buffer containing token */
15870 int nToken, /* Size of buffer pToken in bytes */
15871 int bPrefix, /* True for a prefix scan */
15872 void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
15873 void *pCtx /* Passed as second argument to xVisit() */
15874){
15875 const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN0x0008 : 0)
15876 | FTS5INDEX_QUERY_SKIPEMPTY0x0010
15877 | FTS5INDEX_QUERY_NOOUTPUT0x0020;
15878 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
15879 int bNewTerm = 1;
15880 Fts5Structure *pStruct = fts5StructureRead(p);
15881
15882 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
15883 fts5IterSetOutputCb(&p->rc, p1);
15884 for( /* no-op */ ;
15885 fts5MultiIterEof(p, p1)==0;
15886 fts5MultiIterNext2(p, p1, &bNewTerm)
15887 ){
15888 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
15889 int nNew = 0;
15890 const u8 *pNew = 0;
15891
15892 p1->xSetOutputs(p1, pSeg);
15893 if( p->rc ) break;
15894
15895 if( bNewTerm ){
15896 nNew = pSeg->term.n;
15897 pNew = pSeg->term.p;
15898 if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break;
15899 }
15900
15901 xVisit(p, pCtx, p1, pNew, nNew);
15902 }
15903 fts5MultiIterFree(p1);
15904
15905 fts5StructureRelease(pStruct);
15906 return p->rc;
15907}
15908
15909
15910/*
15911** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
15912** array of these for each row it visits (so all iRowid fields are the same).
15913** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an
15914** array of these for the entire query (in which case iRowid fields may take
15915** a variety of values).
15916**
15917** Each instance in the array indicates the iterator (and therefore term)
15918** associated with position iPos of rowid iRowid. This is used by the
15919** xInstToken() API.
15920**
15921** iRowid:
15922** Rowid for the current entry.
15923**
15924** iPos:
15925** Position of current entry within row. In the usual ((iCol<<32)+iOff)
15926** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()).
15927**
15928** iIter:
15929** If the Fts5TokenDataIter iterator that the entry is part of is
15930** actually an iterator (i.e. with nIter>0, not just a container for
15931** Fts5TokenDataMap structures), then this variable is an index into
15932** the apIter[] array. The corresponding term is that which the iterator
15933** at apIter[iIter] currently points to.
15934**
15935** Or, if the Fts5TokenDataIter iterator is just a container object
15936** (nIter==0), then iIter is an index into the term.p[] buffer where
15937** the term is stored.
15938**
15939** nByte:
15940** In the case where iIter is an index into term.p[], this variable
15941** is the size of the term in bytes. If iIter is an index into apIter[],
15942** this variable is unused.
15943*/
15944struct Fts5TokenDataMap {
15945 i64 iRowid; /* Row this token is located in */
15946 i64 iPos; /* Position of token */
15947 int iIter; /* Iterator token was read from */
15948 int nByte; /* Length of token in bytes (or 0) */
15949};
15950
15951/*
15952** An object used to supplement Fts5Iter for tokendata=1 iterators.
15953**
15954** This object serves two purposes. The first is as a container for an array
15955** of Fts5TokenDataMap structures, which are used to find the token required
15956** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and
15957** aMap[] variables.
15958*/
15959struct Fts5TokenDataIter {
15960 int nMapAlloc; /* Allocated size of aMap[] in entries */
15961 int nMap; /* Number of valid entries in aMap[] */
15962 Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */
15963
15964 /* The following are used for prefix-queries only. */
15965 Fts5Buffer terms;
15966
15967 /* The following are used for other full-token tokendata queries only. */
15968 int nIter;
15969 int nIterAlloc;
15970 Fts5PoslistReader *aPoslistReader;
15971 int *aPoslistToIter;
15972 Fts5Iter *apIter[FLEXARRAY];
15973};
15974
15975/* Size in bytes of an Fts5TokenDataIter object holding up to N iterators */
15976#define SZ_FTS5TOKENDATAITER(N)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter
))
\
15977 (offsetof(Fts5TokenDataIter,apIter)__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter))
15978
15979/*
15980** The two input arrays - a1[] and a2[] - are in sorted order. This function
15981** merges the two arrays together and writes the result to output array
15982** aOut[]. aOut[] is guaranteed to be large enough to hold the result.
15983**
15984** Duplicate entries are copied into the output. So the size of the output
15985** array is always (n1+n2) entries.
15986*/
15987static void fts5TokendataMerge(
15988 Fts5TokenDataMap *a1, int n1, /* Input array 1 */
15989 Fts5TokenDataMap *a2, int n2, /* Input array 2 */
15990 Fts5TokenDataMap *aOut /* Output array */
15991){
15992 int i1 = 0;
15993 int i2 = 0;
15994
15995 assert( n1>=0 && n2>=0 )((void) (0));
15996 while( i1<n1 || i2<n2 ){
15997 Fts5TokenDataMap *pOut = &aOut[i1+i2];
15998 if( i2>=n2 || (i1<n1 && (
15999 a1[i1].iRowid<a2[i2].iRowid
16000 || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos)
16001 ))){
16002 memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap));
16003 i1++;
16004 }else{
16005 memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap));
16006 i2++;
16007 }
16008 }
16009}
16010
16011
16012/*
16013** Append a mapping to the token-map belonging to object pT.
16014*/
16015static void fts5TokendataIterAppendMap(
16016 Fts5Index *p,
16017 Fts5TokenDataIter *pT,
16018 int iIter,
16019 int nByte,
16020 i64 iRowid,
16021 i64 iPos
16022){
16023 if( p->rc==SQLITE_OK0 ){
16024 if( pT->nMap==pT->nMapAlloc ){
16025 int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
16026 int nAlloc = nNew * sizeof(Fts5TokenDataMap);
16027 Fts5TokenDataMap *aNew;
16028
16029 aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc(pT->aMap, nAlloc);
16030 if( aNew==0 ){
16031 p->rc = SQLITE_NOMEM7;
16032 return;
16033 }
16034
16035 pT->aMap = aNew;
16036 pT->nMapAlloc = nNew;
16037 }
16038
16039 pT->aMap[pT->nMap].iRowid = iRowid;
16040 pT->aMap[pT->nMap].iPos = iPos;
16041 pT->aMap[pT->nMap].iIter = iIter;
16042 pT->aMap[pT->nMap].nByte = nByte;
16043 pT->nMap++;
16044 }
16045}
16046
16047/*
16048** Sort the contents of the pT->aMap[] array.
16049**
16050** The sorting algorithm requires a malloc(). If this fails, an error code
16051** is left in Fts5Index.rc before returning.
16052*/
16053static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
16054 Fts5TokenDataMap *aTmp = 0;
16055 int nByte = pT->nMap * sizeof(Fts5TokenDataMap);
16056
16057 aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte);
16058 if( aTmp ){
16059 Fts5TokenDataMap *a1 = pT->aMap;
16060 Fts5TokenDataMap *a2 = aTmp;
16061 i64 nHalf;
16062
16063 for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){
16064 int i1;
16065 for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){
16066 int n1 = MIN(nHalf, pT->nMap-i1)(((nHalf) < (pT->nMap-i1)) ? (nHalf) : (pT->nMap-i1)
)
;
16067 int n2 = MIN(nHalf, pT->nMap-i1-n1)(((nHalf) < (pT->nMap-i1-n1)) ? (nHalf) : (pT->nMap-
i1-n1))
;
16068 fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]);
16069 }
16070 SWAPVAL(Fts5TokenDataMap*, a1, a2){ Fts5TokenDataMap* tmp; tmp=a1; a1=a2; a2=tmp; };
16071 }
16072
16073 if( a1!=pT->aMap ){
16074 memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap));
16075 }
16076 sqlite3_freesqlite3_api->free(aTmp);
16077
16078#ifdef SQLITE_DEBUG
16079 {
16080 int ii;
16081 for(ii=1; ii<pT->nMap; ii++){
16082 Fts5TokenDataMap *p1 = &pT->aMap[ii-1];
16083 Fts5TokenDataMap *p2 = &pT->aMap[ii];
16084 assert( p1->iRowid<p2->iRowid((void) (0))
16085 || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)((void) (0))
16086 )((void) (0));
16087 }
16088 }
16089#endif
16090 }
16091}
16092
16093/*
16094** Delete an Fts5TokenDataIter structure and its contents.
16095*/
16096static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
16097 if( pSet ){
16098 int ii;
16099 for(ii=0; ii<pSet->nIter; ii++){
16100 fts5MultiIterFree(pSet->apIter[ii]);
16101 }
16102 fts5BufferFree(&pSet->terms)sqlite3Fts5BufferFree(&pSet->terms);
16103 sqlite3_freesqlite3_api->free(pSet->aPoslistReader);
16104 sqlite3_freesqlite3_api->free(pSet->aMap);
16105 sqlite3_freesqlite3_api->free(pSet);
16106 }
16107}
16108
16109
16110/*
16111** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata()
16112** to pass data to prefixIterSetupTokendataCb().
16113*/
16114typedef struct TokendataSetupCtx TokendataSetupCtx;
16115struct TokendataSetupCtx {
16116 Fts5TokenDataIter *pT; /* Object being populated with mappings */
16117 int iTermOff; /* Offset of current term in terms.p[] */
16118 int nTermByte; /* Size of current term in bytes */
16119};
16120
16121/*
16122** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This
16123** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each
16124** position in the current position-list. It doesn't matter that some of
16125** these may be out of order - they will be sorted later.
16126*/
16127static void prefixIterSetupTokendataCb(
16128 Fts5Index *p,
16129 void *pCtx,
16130 Fts5Iter *p1,
16131 const u8 *pNew,
16132 int nNew
16133){
16134 TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx;
16135 int iPosOff = 0;
16136 i64 iPos = 0;
16137
16138 if( pNew ){
16139 pSetup->nTermByte = nNew-1;
16140 pSetup->iTermOff = pSetup->pT->terms.n;
16141 fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1)sqlite3Fts5BufferAppendBlob(&p->rc,&pSetup->pT->
terms,nNew-1,pNew+1)
;
16142 }
16143
16144 while( 0==sqlite3Fts5PoslistNext64(
16145 p1->base.pData, p1->base.nData, &iPosOff, &iPos
16146 ) ){
16147 fts5TokendataIterAppendMap(p,
16148 pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos
16149 );
16150 }
16151}
16152
16153
16154/*
16155** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries().
16156*/
16157typedef struct PrefixSetupCtx PrefixSetupCtx;
16158struct PrefixSetupCtx {
16159 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
16160 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
16161 i64 iLastRowid;
16162 int nMerge;
16163 Fts5Buffer *aBuf;
16164 int nBuf;
16165 Fts5Buffer doclist;
16166 TokendataSetupCtx *pTokendata;
16167};
16168
16169/*
16170** fts5VisitEntries() callback used by fts5SetupPrefixIter()
16171*/
16172static void prefixIterSetupCb(
16173 Fts5Index *p,
16174 void *pCtx,
16175 Fts5Iter *p1,
16176 const u8 *pNew,
16177 int nNew
16178){
16179 PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx;
16180 const int nMerge = pSetup->nMerge;
16181
16182 if( p1->base.nData>0 ){
16183 if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){
16184 int i;
16185 for(i=0; p->rc==SQLITE_OK0 && pSetup->doclist.n; i++){
16186 int i1 = i*nMerge;
16187 int iStore;
16188 assert( i1+nMerge<=pSetup->nBuf )((void) (0));
16189 for(iStore=i1; iStore<i1+nMerge; iStore++){
16190 if( pSetup->aBuf[iStore].n==0 ){
16191 fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]);
16192 fts5BufferZero(&pSetup->doclist)sqlite3Fts5BufferZero(&pSetup->doclist);
16193 break;
16194 }
16195 }
16196 if( iStore==i1+nMerge ){
16197 pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]);
16198 for(iStore=i1; iStore<i1+nMerge; iStore++){
16199 fts5BufferZero(&pSetup->aBuf[iStore])sqlite3Fts5BufferZero(&pSetup->aBuf[iStore]);
16200 }
16201 }
16202 }
16203 pSetup->iLastRowid = 0;
16204 }
16205
16206 pSetup->xAppend(
16207 p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist
16208 );
16209 pSetup->iLastRowid = p1->base.iRowid;
16210 }
16211
16212 if( pSetup->pTokendata ){
16213 prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew);
16214 }
16215}
16216
16217static void fts5SetupPrefixIter(
16218 Fts5Index *p, /* Index to read from */
16219 int bDesc, /* True for "ORDER BY rowid DESC" */
16220 int iIdx, /* Index to scan for data */
16221 u8 *pToken, /* Buffer containing prefix to match */
16222 int nToken, /* Size of buffer pToken in bytes */
16223 Fts5Colset *pColset, /* Restrict matches to these columns */
16224 Fts5Iter **ppIter /* OUT: New iterator */
16225){
16226 Fts5Structure *pStruct;
16227 PrefixSetupCtx s;
16228 TokendataSetupCtx s2;
16229
16230 memset(&s, 0, sizeof(s));
16231 memset(&s2, 0, sizeof(s2));
16232
16233 s.nMerge = 1;
16234 s.iLastRowid = 0;
16235 s.nBuf = 32;
16236 if( iIdx==0
16237 && p->pConfig->eDetail==FTS5_DETAIL_FULL0
16238 && p->pConfig->bPrefixInsttoken
16239 ){
16240 s.pTokendata = &s2;
16241 s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter
))
);
16242 }
16243
16244 if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){
16245 s.xMerge = fts5MergeRowidLists;
16246 s.xAppend = fts5AppendRowid;
16247 }else{
16248 s.nMerge = FTS5_MERGE_NLIST16-1;
16249 s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
16250 s.xMerge = fts5MergePrefixLists;
16251 s.xAppend = fts5AppendPoslist;
16252 }
16253
16254 s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf);
16255 pStruct = fts5StructureRead(p);
16256 assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) )((void) (0));
16257
16258 if( p->rc==SQLITE_OK0 ){
16259 void *pCtx = (void*)&s;
16260 int i;
16261 Fts5Data *pData;
16262
16263 /* If iIdx is non-zero, then it is the number of a prefix-index for
16264 ** prefixes 1 character longer than the prefix being queried for. That
16265 ** index contains all the doclists required, except for the one
16266 ** corresponding to the prefix itself. That one is extracted from the
16267 ** main term index here. */
16268 if( iIdx!=0 ){
16269 pToken[0] = FTS5_MAIN_PREFIX'0';
16270 fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx);
16271 }
16272
16273 pToken[0] = FTS5_MAIN_PREFIX'0' + iIdx;
16274 fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx);
16275
16276 assert( (s.nBuf%s.nMerge)==0 )((void) (0));
16277 for(i=0; i<s.nBuf; i+=s.nMerge){
16278 int iFree;
16279 if( p->rc==SQLITE_OK0 ){
16280 s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]);
16281 }
16282 for(iFree=i; iFree<i+s.nMerge; iFree++){
16283 fts5BufferFree(&s.aBuf[iFree])sqlite3Fts5BufferFree(&s.aBuf[iFree]);
16284 }
16285 }
16286
16287 pData = fts5IdxMalloc(p, sizeof(*pData)
16288 + ((i64)s.doclist.n)+FTS5_DATA_ZERO_PADDING8);
16289 assert( pData!=0 || p->rc!=SQLITE_OK )((void) (0));
16290 if( pData ){
16291 pData->p = (u8*)&pData[1];
16292 pData->nn = pData->szLeaf = s.doclist.n;
16293 if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n);
16294 fts5MultiIterNew2(p, pData, bDesc, ppIter);
16295 }
16296
16297 assert( (*ppIter)!=0 || p->rc!=SQLITE_OK )((void) (0));
16298 if( p->rc==SQLITE_OK0 && s.pTokendata ){
16299 fts5TokendataIterSortMap(p, s2.pT);
16300 (*ppIter)->pTokenDataIter = s2.pT;
16301 s2.pT = 0;
16302 }
16303 }
16304
16305 fts5TokendataIterDelete(s2.pT);
16306 fts5BufferFree(&s.doclist)sqlite3Fts5BufferFree(&s.doclist);
16307 fts5StructureRelease(pStruct);
16308 sqlite3_freesqlite3_api->free(s.aBuf);
16309}
16310
16311
16312/*
16313** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
16314** to the document with rowid iRowid.
16315*/
16316static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
16317 assert( p->rc==SQLITE_OK )((void) (0));
16318
16319 /* Allocate the hash table if it has not already been allocated */
16320 if( p->pHash==0 ){
16321 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
16322 }
16323
16324 /* Flush the hash table to disk if required */
16325 if( iRowid<p->iWriteRowid
16326 || (iRowid==p->iWriteRowid && p->bDelete==0)
16327 || (p->nPendingData > p->pConfig->nHashSize)
16328 ){
16329 fts5IndexFlush(p);
16330 }
16331
16332 p->iWriteRowid = iRowid;
16333 p->bDelete = bDelete;
16334 if( bDelete==0 ){
16335 p->nPendingRow++;
16336 }
16337 return fts5IndexReturn(p);
16338}
16339
16340/*
16341** Commit data to disk.
16342*/
16343static int sqlite3Fts5IndexSync(Fts5Index *p){
16344 assert( p->rc==SQLITE_OK )((void) (0));
16345 fts5IndexFlush(p);
16346 fts5IndexCloseReader(p);
16347 return fts5IndexReturn(p);
16348}
16349
16350/*
16351** Discard any data stored in the in-memory hash tables. Do not write it
16352** to the database. Additionally, assume that the contents of the %_data
16353** table may have changed on disk. So any in-memory caches of %_data
16354** records must be invalidated.
16355*/
16356static int sqlite3Fts5IndexRollback(Fts5Index *p){
16357 fts5IndexCloseReader(p);
16358 fts5IndexDiscardData(p);
16359 fts5StructureInvalidate(p);
16360 return fts5IndexReturn(p);
16361}
16362
16363/*
16364** The %_data table is completely empty when this function is called. This
16365** function populates it with the initial structure objects for each index,
16366** and the initial version of the "averages" record (a zero-byte blob).
16367*/
16368static int sqlite3Fts5IndexReinit(Fts5Index *p){
16369 Fts5Structure *pTmp;
16370 u8 tmpSpace[SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel
))
];
16371 fts5StructureInvalidate(p);
16372 fts5IndexDiscardData(p);
16373 pTmp = (Fts5Structure*)tmpSpace;
16374 memset(pTmp, 0, SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel
))
);
16375 if( p->pConfig->bContentlessDelete ){
16376 pTmp->nOriginCntr = 1;
16377 }
16378 fts5DataWrite(p, FTS5_AVERAGES_ROWID1, (const u8*)"", 0);
16379 fts5StructureWrite(p, pTmp);
16380 return fts5IndexReturn(p);
16381}
16382
16383/*
16384** Open a new Fts5Index handle. If the bCreate argument is true, create
16385** and initialize the underlying %_data table.
16386**
16387** If successful, set *pp to point to the new object and return SQLITE_OK.
16388** Otherwise, set *pp to NULL and return an SQLite error code.
16389*/
16390static int sqlite3Fts5IndexOpen(
16391 Fts5Config *pConfig,
16392 int bCreate,
16393 Fts5Index **pp,
16394 char **pzErr
16395){
16396 int rc = SQLITE_OK0;
16397 Fts5Index *p; /* New object */
16398
16399 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
16400 if( rc==SQLITE_OK0 ){
16401 p->pConfig = pConfig;
16402 p->nWorkUnit = FTS5_WORK_UNIT64;
16403 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
16404 if( p->zDataTbl && bCreate ){
16405 rc = sqlite3Fts5CreateTable(
16406 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
16407 );
16408 if( rc==SQLITE_OK0 ){
16409 rc = sqlite3Fts5CreateTable(pConfig, "idx",
16410 "segid, term, pgno, PRIMARY KEY(segid, term)",
16411 1, pzErr
16412 );
16413 }
16414 if( rc==SQLITE_OK0 ){
16415 rc = sqlite3Fts5IndexReinit(p);
16416 }
16417 }
16418 }
16419
16420 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK )((void) (0));
16421 if( rc ){
16422 sqlite3Fts5IndexClose(p);
16423 *pp = 0;
16424 }
16425 return rc;
16426}
16427
16428/*
16429** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
16430*/
16431static int sqlite3Fts5IndexClose(Fts5Index *p){
16432 int rc = SQLITE_OK0;
16433 if( p ){
16434 assert( p->pReader==0 )((void) (0));
16435 fts5StructureInvalidate(p);
16436 sqlite3_finalizesqlite3_api->finalize(p->pWriter);
16437 sqlite3_finalizesqlite3_api->finalize(p->pDeleter);
16438 sqlite3_finalizesqlite3_api->finalize(p->pIdxWriter);
16439 sqlite3_finalizesqlite3_api->finalize(p->pIdxDeleter);
16440 sqlite3_finalizesqlite3_api->finalize(p->pIdxSelect);
16441 sqlite3_finalizesqlite3_api->finalize(p->pIdxNextSelect);
16442 sqlite3_finalizesqlite3_api->finalize(p->pDataVersion);
16443 sqlite3_finalizesqlite3_api->finalize(p->pDeleteFromIdx);
16444 sqlite3Fts5HashFree(p->pHash);
16445 sqlite3_freesqlite3_api->free(p->zDataTbl);
16446 sqlite3_freesqlite3_api->free(p);
16447 }
16448 return rc;
16449}
16450
16451/*
16452** Argument p points to a buffer containing utf-8 text that is n bytes in
16453** size. Return the number of bytes in the nChar character prefix of the
16454** buffer, or 0 if there are less than nChar characters in total.
16455*/
16456static int sqlite3Fts5IndexCharlenToBytelen(
16457 const char *p,
16458 int nByte,
16459 int nChar
16460){
16461 int n = 0;
16462 int i;
16463 for(i=0; i<nChar; i++){
16464 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
16465 if( (unsigned char)p[n++]>=0xc0 ){
16466 if( n>=nByte ) return 0;
16467 while( (p[n] & 0xc0)==0x80 ){
16468 n++;
16469 if( n>=nByte ){
16470 if( i+1==nChar ) break;
16471 return 0;
16472 }
16473 }
16474 }
16475 }
16476 return n;
16477}
16478
16479/*
16480** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
16481** unicode characters in the string.
16482*/
16483static int fts5IndexCharlen(const char *pIn, int nIn){
16484 int nChar = 0;
16485 int i = 0;
16486 while( i<nIn ){
16487 if( (unsigned char)pIn[i++]>=0xc0 ){
16488 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
16489 }
16490 nChar++;
16491 }
16492 return nChar;
16493}
16494
16495/*
16496** Insert or remove data to or from the index. Each time a document is
16497** added to or removed from the index, this function is called one or more
16498** times.
16499**
16500** For an insert, it must be called once for each token in the new document.
16501** If the operation is a delete, it must be called (at least) once for each
16502** unique token in the document with an iCol value less than zero. The iPos
16503** argument is ignored for a delete.
16504*/
16505static int sqlite3Fts5IndexWrite(
16506 Fts5Index *p, /* Index to write to */
16507 int iCol, /* Column token appears in (-ve -> delete) */
16508 int iPos, /* Position of token within column */
16509 const char *pToken, int nToken /* Token to add or remove to or from index */
16510){
16511 int i; /* Used to iterate through indexes */
16512 int rc = SQLITE_OK0; /* Return code */
16513 Fts5Config *pConfig = p->pConfig;
16514
16515 assert( p->rc==SQLITE_OK )((void) (0));
16516 assert( (iCol<0)==p->bDelete )((void) (0));
16517
16518 /* Add the entry to the main terms index. */
16519 rc = sqlite3Fts5HashWrite(
16520 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX'0', pToken, nToken
16521 );
16522
16523 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK0; i++){
16524 const int nChar = pConfig->aPrefix[i];
16525 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
16526 if( nByte ){
16527 rc = sqlite3Fts5HashWrite(p->pHash,
16528 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX'0'+i+1), pToken,
16529 nByte
16530 );
16531 }
16532 }
16533
16534 return rc;
16535}
16536
16537/*
16538** pToken points to a buffer of size nToken bytes containing a search
16539** term, including the index number at the start, used on a tokendata=1
16540** table. This function returns true if the term in buffer pBuf matches
16541** token pToken/nToken.
16542*/
16543static int fts5IsTokendataPrefix(
16544 Fts5Buffer *pBuf,
16545 const u8 *pToken,
16546 int nToken
16547){
16548 return (
16549 pBuf->n>=nToken
16550 && 0==memcmp(pBuf->p, pToken, nToken)
16551 && (pBuf->n==nToken || pBuf->p[nToken]==0x00)
16552 );
16553}
16554
16555/*
16556** Ensure the segment-iterator passed as the only argument points to EOF.
16557*/
16558static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
16559 fts5DataRelease(pSeg->pLeaf);
16560 pSeg->pLeaf = 0;
16561}
16562
16563static void fts5IterClose(Fts5IndexIter *pIndexIter){
16564 if( pIndexIter ){
16565 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
16566 Fts5Index *pIndex = pIter->pIndex;
16567 fts5TokendataIterDelete(pIter->pTokenDataIter);
16568 fts5MultiIterFree(pIter);
16569 fts5IndexCloseReader(pIndex);
16570 }
16571}
16572
16573/*
16574** This function appends iterator pAppend to Fts5TokenDataIter pIn and
16575** returns the result.
16576*/
16577static Fts5TokenDataIter *fts5AppendTokendataIter(
16578 Fts5Index *p, /* Index object (for error code) */
16579 Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
16580 Fts5Iter *pAppend /* Append this iterator */
16581){
16582 Fts5TokenDataIter *pRet = pIn;
16583
16584 if( p->rc==SQLITE_OK0 ){
16585 if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
16586 int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
16587 int nByte = SZ_FTS5TOKENDATAITER(nAlloc+1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (nAlloc+1)*sizeof
(Fts5Iter))
;
16588 Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_reallocsqlite3_api->realloc(pIn, nByte);
16589
16590 if( pNew==0 ){
16591 p->rc = SQLITE_NOMEM7;
16592 }else{
16593 if( pIn==0 ) memset(pNew, 0, nByte);
16594 pRet = pNew;
16595 pNew->nIterAlloc = nAlloc;
16596 }
16597 }
16598 }
16599 if( p->rc ){
16600 fts5IterClose((Fts5IndexIter*)pAppend);
16601 }else{
16602 pRet->apIter[pRet->nIter++] = pAppend;
16603 }
16604 assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc )((void) (0));
16605
16606 return pRet;
16607}
16608
16609/*
16610** The iterator passed as the only argument must be a tokendata=1 iterator
16611** (pIter->pTokenDataIter!=0). This function sets the iterator output
16612** variables (pIter->base.*) according to the contents of the current
16613** row.
16614*/
16615static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
16616 int ii;
16617 int nHit = 0;
16618 i64 iRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)));
16619 int iMin = 0;
16620
16621 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
16622
16623 pIter->base.nData = 0;
16624 pIter->base.pData = 0;
16625
16626 for(ii=0; ii<pT->nIter; ii++){
16627 Fts5Iter *p = pT->apIter[ii];
16628 if( p->base.bEof==0 ){
16629 if( nHit==0 || p->base.iRowid<iRowid ){
16630 iRowid = p->base.iRowid;
16631 nHit = 1;
16632 pIter->base.pData = p->base.pData;
16633 pIter->base.nData = p->base.nData;
16634 iMin = ii;
16635 }else if( p->base.iRowid==iRowid ){
16636 nHit++;
16637 }
16638 }
16639 }
16640
16641 if( nHit==0 ){
16642 pIter->base.bEof = 1;
16643 }else{
16644 int eDetail = pIter->pIndex->pConfig->eDetail;
16645 pIter->base.bEof = 0;
16646 pIter->base.iRowid = iRowid;
16647
16648 if( nHit==1 && eDetail==FTS5_DETAIL_FULL0 ){
16649 fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1);
16650 }else
16651 if( nHit>1 && eDetail!=FTS5_DETAIL_NONE1 ){
16652 int nReader = 0;
16653 int nByte = 0;
16654 i64 iPrev = 0;
16655
16656 /* Allocate array of iterators if they are not already allocated. */
16657 if( pT->aPoslistReader==0 ){
16658 pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero(
16659 &pIter->pIndex->rc,
16660 pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int))
16661 );
16662 if( pT->aPoslistReader==0 ) return;
16663 pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter];
16664 }
16665
16666 /* Populate an iterator for each poslist that will be merged */
16667 for(ii=0; ii<pT->nIter; ii++){
16668 Fts5Iter *p = pT->apIter[ii];
16669 if( iRowid==p->base.iRowid ){
16670 pT->aPoslistToIter[nReader] = ii;
16671 sqlite3Fts5PoslistReaderInit(
16672 p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++]
16673 );
16674 nByte += p->base.nData;
16675 }
16676 }
16677
16678 /* Ensure the output buffer is large enough */
16679 if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10)( (u32)((&pIter->poslist)->n) + (u32)(nByte+nHit*10
) <= (u32)((&pIter->poslist)->nSpace) ? 0 : sqlite3Fts5BufferSize
((&pIter->pIndex->rc),(&pIter->poslist),(nByte
+nHit*10)+(&pIter->poslist)->n) )
){
16680 return;
16681 }
16682
16683 /* Ensure the token-mapping is large enough */
16684 if( eDetail==FTS5_DETAIL_FULL0 && pT->nMapAlloc<(pT->nMap + nByte) ){
16685 int nNew = (pT->nMapAlloc + nByte) * 2;
16686 Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc(
16687 pT->aMap, nNew*sizeof(Fts5TokenDataMap)
16688 );
16689 if( aNew==0 ){
16690 pIter->pIndex->rc = SQLITE_NOMEM7;
16691 return;
16692 }
16693 pT->aMap = aNew;
16694 pT->nMapAlloc = nNew;
16695 }
16696
16697 pIter->poslist.n = 0;
16698
16699 while( 1 ){
16700 i64 iMinPos = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32));
16701
16702 /* Find smallest position */
16703 iMin = 0;
16704 for(ii=0; ii<nReader; ii++){
16705 Fts5PoslistReader *pReader = &pT->aPoslistReader[ii];
16706 if( pReader->bEof==0 ){
16707 if( pReader->iPos<iMinPos ){
16708 iMinPos = pReader->iPos;
16709 iMin = ii;
16710 }
16711 }
16712 }
16713
16714 /* If all readers were at EOF, break out of the loop. */
16715 if( iMinPos==LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) ) break;
16716
16717 sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos);
16718 sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]);
16719
16720 if( eDetail==FTS5_DETAIL_FULL0 ){
16721 pT->aMap[pT->nMap].iPos = iMinPos;
16722 pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin];
16723 pT->aMap[pT->nMap].iRowid = iRowid;
16724 pT->nMap++;
16725 }
16726 }
16727
16728 pIter->base.pData = pIter->poslist.p;
16729 pIter->base.nData = pIter->poslist.n;
16730 }
16731 }
16732}
16733
16734/*
16735** The iterator passed as the only argument must be a tokendata=1 iterator
16736** (pIter->pTokenDataIter!=0). This function advances the iterator. If
16737** argument bFrom is false, then the iterator is advanced to the next
16738** entry. Or, if bFrom is true, it is advanced to the first entry with
16739** a rowid of iFrom or greater.
16740*/
16741static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){
16742 int ii;
16743 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
16744 Fts5Index *pIndex = pIter->pIndex;
16745
16746 for(ii=0; ii<pT->nIter; ii++){
16747 Fts5Iter *p = pT->apIter[ii];
16748 if( p->base.bEof==0
16749 && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom))
16750 ){
16751 fts5MultiIterNext(pIndex, p, bFrom, iFrom);
16752 while( bFrom && p->base.bEof==0
16753 && p->base.iRowid<iFrom
16754 && pIndex->rc==SQLITE_OK0
16755 ){
16756 fts5MultiIterNext(pIndex, p, 0, 0);
16757 }
16758 }
16759 }
16760
16761 if( pIndex->rc==SQLITE_OK0 ){
16762 fts5IterSetOutputsTokendata(pIter);
16763 }
16764}
16765
16766/*
16767** If the segment-iterator passed as the first argument is at EOF, then
16768** set pIter->term to a copy of buffer pTerm.
16769*/
16770static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){
16771 if( pIter && pIter->aSeg[0].pLeaf==0 ){
16772 fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p)sqlite3Fts5BufferSet(&pIter->pIndex->rc,&pIter->
aSeg[0].term,pTerm->n,pTerm->p)
;
16773 }
16774}
16775
16776/*
16777** This function sets up an iterator to use for a non-prefix query on a
16778** tokendata=1 table.
16779*/
16780static Fts5Iter *fts5SetupTokendataIter(
16781 Fts5Index *p, /* FTS index to query */
16782 const u8 *pToken, /* Buffer containing query term */
16783 int nToken, /* Size of buffer pToken in bytes */
16784 Fts5Colset *pColset /* Colset to filter on */
16785){
16786 Fts5Iter *pRet = 0;
16787 Fts5TokenDataIter *pSet = 0;
16788 Fts5Structure *pStruct = 0;
16789 const int flags = FTS5INDEX_QUERY_SCANONETERM0x0100 | FTS5INDEX_QUERY_SCAN0x0008;
16790
16791 Fts5Buffer bSeek = {0, 0, 0};
16792 Fts5Buffer *pSmall = 0;
16793
16794 fts5IndexFlush(p);
16795 pStruct = fts5StructureRead(p);
16796
16797 while( p->rc==SQLITE_OK0 ){
16798 Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0;
16799 Fts5Iter *pNew = 0;
16800 Fts5SegIter *pNewIter = 0;
16801 Fts5SegIter *pPrevIter = 0;
16802
16803 int iLvl, iSeg, ii;
16804
16805 pNew = fts5MultiIterAlloc(p, pStruct->nSegment);
16806 if( pSmall ){
16807 fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p)sqlite3Fts5BufferSet(&p->rc,&bSeek,pSmall->n,pSmall
->p)
;
16808 fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0")sqlite3Fts5BufferAppendBlob(&p->rc,&bSeek,1,(const
u8*)"\0")
;
16809 }else{
16810 fts5BufferSet(&p->rc, &bSeek, nToken, pToken)sqlite3Fts5BufferSet(&p->rc,&bSeek,nToken,pToken);
16811 }
16812 if( p->rc ){
16813 fts5IterClose((Fts5IndexIter*)pNew);
16814 break;
16815 }
16816
16817 pNewIter = &pNew->aSeg[0];
16818 pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0);
16819 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
16820 for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
16821 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
16822 int bDone = 0;
16823
16824 if( pPrevIter ){
16825 if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
16826 memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
16827 memset(pPrevIter, 0, sizeof(Fts5SegIter));
16828 bDone = 1;
16829 }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){
16830 fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
16831 bDone = 1;
16832 }
16833 }
16834
16835 if( bDone==0 ){
16836 fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
16837 }
16838
16839 if( pPrevIter ){
16840 if( pPrevIter->pTombArray ){
16841 pNewIter->pTombArray = pPrevIter->pTombArray;
16842 pNewIter->pTombArray->nRef++;
16843 }
16844 }else{
16845 fts5SegIterAllocTombstone(p, pNewIter);
16846 }
16847
16848 pNewIter++;
16849 if( pPrevIter ) pPrevIter++;
16850 if( p->rc ) break;
16851 }
16852 }
16853 fts5TokendataSetTermIfEof(pPrev, pSmall);
16854
16855 pNew->bSkipEmpty = 1;
16856 pNew->pColset = pColset;
16857 fts5IterSetOutputCb(&p->rc, pNew);
16858
16859 /* Loop through all segments in the new iterator. Find the smallest
16860 ** term that any segment-iterator points to. Iterator pNew will be
16861 ** used for this term. Also, set any iterator that points to a term that
16862 ** does not match pToken/nToken to point to EOF */
16863 pSmall = 0;
16864 for(ii=0; ii<pNew->nSeg; ii++){
16865 Fts5SegIter *pII = &pNew->aSeg[ii];
16866 if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
16867 fts5SegIterSetEOF(pII);
16868 }
16869 if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){
16870 pSmall = &pII->term;
16871 }
16872 }
16873
16874 /* If pSmall is still NULL at this point, then the new iterator does
16875 ** not point to any terms that match the query. So delete it and break
16876 ** out of the loop - all required iterators have been collected. */
16877 if( pSmall==0 ){
16878 fts5IterClose((Fts5IndexIter*)pNew);
16879 break;
16880 }
16881
16882 /* Append this iterator to the set and continue. */
16883 pSet = fts5AppendTokendataIter(p, pSet, pNew);
16884 }
16885
16886 if( p->rc==SQLITE_OK0 && pSet ){
16887 int ii;
16888 for(ii=0; ii<pSet->nIter; ii++){
16889 Fts5Iter *pIter = pSet->apIter[ii];
16890 int iSeg;
16891 for(iSeg=0; iSeg<pIter->nSeg; iSeg++){
16892 pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM0x01;
16893 }
16894 fts5MultiIterFinishSetup(p, pIter);
16895 }
16896 }
16897
16898 if( p->rc==SQLITE_OK0 ){
16899 pRet = fts5MultiIterAlloc(p, 0);
16900 }
16901 if( pRet ){
16902 pRet->nSeg = 0;
16903 pRet->pTokenDataIter = pSet;
16904 if( pSet ){
16905 fts5IterSetOutputsTokendata(pRet);
16906 }else{
16907 pRet->base.bEof = 1;
16908 }
16909 }else{
16910 fts5TokendataIterDelete(pSet);
16911 }
16912
16913 fts5StructureRelease(pStruct);
16914 fts5BufferFree(&bSeek)sqlite3Fts5BufferFree(&bSeek);
16915 return pRet;
16916}
16917
16918/*
16919** Open a new iterator to iterate though all rowid that match the
16920** specified token or token prefix.
16921*/
16922static int sqlite3Fts5IndexQuery(
16923 Fts5Index *p, /* FTS index to query */
16924 const char *pToken, int nToken, /* Token (or prefix) to query for */
16925 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
16926 Fts5Colset *pColset, /* Match these columns only */
16927 Fts5IndexIter **ppIter /* OUT: New iterator object */
16928){
16929 Fts5Config *pConfig = p->pConfig;
16930 Fts5Iter *pRet = 0;
16931 Fts5Buffer buf = {0, 0, 0};
16932
16933 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
16934 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN )((void) (0));
16935
16936 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
16937 int iIdx = 0; /* Index to search */
16938 int iPrefixIdx = 0; /* +1 prefix index */
16939 int bTokendata = pConfig->bTokendata;
16940 assert( buf.p!=0 )((void) (0));
16941 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
16942
16943 /* The NOTOKENDATA flag is set when each token in a tokendata=1 table
16944 ** should be treated individually, instead of merging all those with
16945 ** a common prefix into a single entry. This is used, for example, by
16946 ** queries performed as part of an integrity-check, or by the fts5vocab
16947 ** module. */
16948 if( flags & (FTS5INDEX_QUERY_NOTOKENDATA0x0080|FTS5INDEX_QUERY_SCAN0x0008) ){
16949 bTokendata = 0;
16950 }
16951
16952 /* Figure out which index to search and set iIdx accordingly. If this
16953 ** is a prefix query for which there is no prefix index, set iIdx to
16954 ** greater than pConfig->nPrefix to indicate that the query will be
16955 ** satisfied by scanning multiple terms in the main index.
16956 **
16957 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
16958 ** prefix-query. Instead of using a prefix-index (if one exists),
16959 ** evaluate the prefix query using the main FTS index. This is used
16960 ** for internal sanity checking by the integrity-check in debug
16961 ** mode only. */
16962#ifdef SQLITE_DEBUG
16963 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX0x0004) ){
16964 assert( flags & FTS5INDEX_QUERY_PREFIX )((void) (0));
16965 iIdx = 1+pConfig->nPrefix;
16966 }else
16967#endif
16968 if( flags & FTS5INDEX_QUERY_PREFIX0x0001 ){
16969 int nChar = fts5IndexCharlen(pToken, nToken);
16970 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
16971 int nIdxChar = pConfig->aPrefix[iIdx-1];
16972 if( nIdxChar==nChar ) break;
16973 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
16974 }
16975 }
16976
16977 if( bTokendata && iIdx==0 ){
16978 buf.p[0] = FTS5_MAIN_PREFIX'0';
16979 pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
16980 }else if( iIdx<=pConfig->nPrefix ){
16981 /* Straight index lookup */
16982 Fts5Structure *pStruct = fts5StructureRead(p);
16983 buf.p[0] = (u8)(FTS5_MAIN_PREFIX'0' + iIdx);
16984 if( pStruct ){
16985 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY0x0010,
16986 pColset, buf.p, nToken+1, -1, 0, &pRet
16987 );
16988 fts5StructureRelease(pStruct);
16989 }
16990 }else{
16991 /* Scan multiple terms in the main index for a prefix query. */
16992 int bDesc = (flags & FTS5INDEX_QUERY_DESC0x0002)!=0;
16993 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
16994 if( pRet==0 ){
16995 assert( p->rc!=SQLITE_OK )((void) (0));
16996 }else{
16997 assert( pRet->pColset==0 )((void) (0));
16998 fts5IterSetOutputCb(&p->rc, pRet);
16999 if( p->rc==SQLITE_OK0 ){
17000 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
17001 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
17002 }
17003 }
17004 }
17005
17006 if( p->rc ){
17007 fts5IterClose((Fts5IndexIter*)pRet);
17008 pRet = 0;
17009 fts5IndexCloseReader(p);
17010 }
17011
17012 *ppIter = (Fts5IndexIter*)pRet;
17013 sqlite3Fts5BufferFree(&buf);
17014 }
17015 return fts5IndexReturn(p);
17016}
17017
17018/*
17019** Return true if the iterator passed as the only argument is at EOF.
17020*/
17021/*
17022** Move to the next matching rowid.
17023*/
17024static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
17025 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17026 assert( pIter->pIndex->rc==SQLITE_OK )((void) (0));
17027 if( pIter->nSeg==0 ){
17028 assert( pIter->pTokenDataIter )((void) (0));
17029 fts5TokendataIterNext(pIter, 0, 0);
17030 }else{
17031 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
17032 }
17033 return fts5IndexReturn(pIter->pIndex);
17034}
17035
17036/*
17037** Move to the next matching term/rowid. Used by the fts5vocab module.
17038*/
17039static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
17040 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17041 Fts5Index *p = pIter->pIndex;
17042
17043 assert( pIter->pIndex->rc==SQLITE_OK )((void) (0));
17044
17045 fts5MultiIterNext(p, pIter, 0, 0);
17046 if( p->rc==SQLITE_OK0 ){
17047 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
17048 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX'0' ){
17049 fts5DataRelease(pSeg->pLeaf);
17050 pSeg->pLeaf = 0;
17051 pIter->base.bEof = 1;
17052 }
17053 }
17054
17055 return fts5IndexReturn(pIter->pIndex);
17056}
17057
17058/*
17059** Move to the next matching rowid that occurs at or after iMatch. The
17060** definition of "at or after" depends on whether this iterator iterates
17061** in ascending or descending rowid order.
17062*/
17063static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
17064 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17065 if( pIter->nSeg==0 ){
17066 assert( pIter->pTokenDataIter )((void) (0));
17067 fts5TokendataIterNext(pIter, 1, iMatch);
17068 }else{
17069 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
17070 }
17071 return fts5IndexReturn(pIter->pIndex);
17072}
17073
17074/*
17075** Return the current term.
17076*/
17077static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
17078 int n;
17079 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
17080 assert_nc( z || n<=1 )((void) (0));
17081 *pn = n-1;
17082 return (z ? &z[1] : 0);
17083}
17084
17085/*
17086** pIter is a prefix query. This function populates pIter->pTokenDataIter
17087** with an Fts5TokenDataIter object containing mappings for all rows
17088** matched by the query.
17089*/
17090static int fts5SetupPrefixIterTokendata(
17091 Fts5Iter *pIter,
17092 const char *pToken, /* Token prefix to search for */
17093 int nToken /* Size of pToken in bytes */
17094){
17095 Fts5Index *p = pIter->pIndex;
17096 Fts5Buffer token = {0, 0, 0};
17097 TokendataSetupCtx ctx;
17098
17099 memset(&ctx, 0, sizeof(ctx));
17100
17101 fts5BufferGrow(&p->rc, &token, nToken+1)( (u32)((&token)->n) + (u32)(nToken+1) <= (u32)((&
token)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc
),(&token),(nToken+1)+(&token)->n) )
;
17102 assert( token.p!=0 || p->rc!=SQLITE_OK )((void) (0));
17103 ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc,
17104 SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter
))
);
17105
17106 if( p->rc==SQLITE_OK0 ){
17107
17108 /* Fill in the token prefix to search for */
17109 token.p[0] = FTS5_MAIN_PREFIX'0';
17110 memcpy(&token.p[1], pToken, nToken);
17111 token.n = nToken+1;
17112
17113 fts5VisitEntries(
17114 p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx
17115 );
17116
17117 fts5TokendataIterSortMap(p, ctx.pT);
17118 }
17119
17120 if( p->rc==SQLITE_OK0 ){
17121 pIter->pTokenDataIter = ctx.pT;
17122 }else{
17123 fts5TokendataIterDelete(ctx.pT);
17124 }
17125 fts5BufferFree(&token)sqlite3Fts5BufferFree(&token);
17126
17127 return fts5IndexReturn(p);
17128}
17129
17130/*
17131** This is used by xInstToken() to access the token at offset iOff, column
17132** iCol of row iRowid. The token is returned via output variables *ppOut
17133** and *pnOut. The iterator passed as the first argument must be a tokendata=1
17134** iterator (pIter->pTokenDataIter!=0).
17135**
17136** pToken/nToken:
17137*/
17138static int sqlite3Fts5IterToken(
17139 Fts5IndexIter *pIndexIter,
17140 const char *pToken, int nToken,
17141 i64 iRowid,
17142 int iCol,
17143 int iOff,
17144 const char **ppOut, int *pnOut
17145){
17146 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17147 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
17148 i64 iPos = (((i64)iCol)<<32) + iOff;
17149 Fts5TokenDataMap *aMap = 0;
17150 int i1 = 0;
17151 int i2 = 0;
17152 int iTest = 0;
17153
17154 assert( pT || (pToken && pIter->nSeg>0) )((void) (0));
17155 if( pT==0 ){
17156 int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken);
17157 if( rc!=SQLITE_OK0 ) return rc;
17158 pT = pIter->pTokenDataIter;
17159 }
17160
17161 i2 = pT->nMap;
17162 aMap = pT->aMap;
17163
17164 while( i2>i1 ){
17165 iTest = (i1 + i2) / 2;
17166
17167 if( aMap[iTest].iRowid<iRowid ){
17168 i1 = iTest+1;
17169 }else if( aMap[iTest].iRowid>iRowid ){
17170 i2 = iTest;
17171 }else{
17172 if( aMap[iTest].iPos<iPos ){
17173 if( aMap[iTest].iPos<0 ){
17174 break;
17175 }
17176 i1 = iTest+1;
17177 }else if( aMap[iTest].iPos>iPos ){
17178 i2 = iTest;
17179 }else{
17180 break;
17181 }
17182 }
17183 }
17184
17185 if( i2>i1 ){
17186 if( pIter->nSeg==0 ){
17187 Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
17188 *ppOut = (const char*)pMap->aSeg[0].term.p+1;
17189 *pnOut = pMap->aSeg[0].term.n-1;
17190 }else{
17191 Fts5TokenDataMap *p = &aMap[iTest];
17192 *ppOut = (const char*)&pT->terms.p[p->iIter];
17193 *pnOut = aMap[iTest].nByte;
17194 }
17195 }
17196
17197 return SQLITE_OK0;
17198}
17199
17200/*
17201** Clear any existing entries from the token-map associated with the
17202** iterator passed as the only argument.
17203*/
17204static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
17205 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17206 if( pIter && pIter->pTokenDataIter
17207 && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL0)
17208 ){
17209 pIter->pTokenDataIter->nMap = 0;
17210 }
17211}
17212
17213/*
17214** Set a token-mapping for the iterator passed as the first argument. This
17215** is used in detail=column or detail=none mode when a token is requested
17216** using the xInstToken() API. In this case the caller tokenizers the
17217** current row and configures the token-mapping via multiple calls to this
17218** function.
17219*/
17220static int sqlite3Fts5IndexIterWriteTokendata(
17221 Fts5IndexIter *pIndexIter,
17222 const char *pToken, int nToken,
17223 i64 iRowid, int iCol, int iOff
17224){
17225 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
17226 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
17227 Fts5Index *p = pIter->pIndex;
17228 i64 iPos = (((i64)iCol)<<32) + iOff;
17229
17230 assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL )((void) (0));
17231 assert( pIter->pTokenDataIter || pIter->nSeg>0 )((void) (0));
17232 if( pIter->nSeg>0 ){
17233 /* This is a prefix term iterator. */
17234 if( pT==0 ){
17235 pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc,
17236 SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter
))
);
17237 pIter->pTokenDataIter = pT;
17238 }
17239 if( pT ){
17240 fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos);
17241 fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken)sqlite3Fts5BufferAppendBlob(&p->rc,&pT->terms,nToken
,(const u8*)pToken)
;
17242 }
17243 }else{
17244 int ii;
17245 for(ii=0; ii<pT->nIter; ii++){
17246 Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
17247 if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
17248 }
17249 if( ii<pT->nIter ){
17250 fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos);
17251 }
17252 }
17253 return fts5IndexReturn(p);
17254}
17255
17256/*
17257** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
17258*/
17259static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
17260 if( pIndexIter ){
17261 Fts5Index *pIndex = ((Fts5Iter*)pIndexIter)->pIndex;
17262 fts5IterClose(pIndexIter);
17263 fts5IndexReturn(pIndex);
17264 }
17265}
17266
17267/*
17268** Read and decode the "averages" record from the database.
17269**
17270** Parameter anSize must point to an array of size nCol, where nCol is
17271** the number of user defined columns in the FTS table.
17272*/
17273static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
17274 int nCol = p->pConfig->nCol;
17275 Fts5Data *pData;
17276
17277 *pnRow = 0;
17278 memset(anSize, 0, sizeof(i64) * nCol);
17279 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID1);
17280 if( p->rc==SQLITE_OK0 && pData->nn ){
17281 int i = 0;
17282 int iCol;
17283 i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)pnRow);
17284 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
17285 i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
17286 }
17287 }
17288
17289 fts5DataRelease(pData);
17290 return fts5IndexReturn(p);
17291}
17292
17293/*
17294** Replace the current "averages" record with the contents of the buffer
17295** supplied as the second argument.
17296*/
17297static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
17298 assert( p->rc==SQLITE_OK )((void) (0));
17299 fts5DataWrite(p, FTS5_AVERAGES_ROWID1, pData, nData);
17300 return fts5IndexReturn(p);
17301}
17302
17303/*
17304** Return the total number of blocks this module has read from the %_data
17305** table since it was created.
17306*/
17307static int sqlite3Fts5IndexReads(Fts5Index *p){
17308 return p->nRead;
17309}
17310
17311/*
17312** Set the 32-bit cookie value stored at the start of all structure
17313** records to the value passed as the second argument.
17314**
17315** Return SQLITE_OK if successful, or an SQLite error code if an error
17316** occurs.
17317*/
17318static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
17319 int rc; /* Return code */
17320 Fts5Config *pConfig = p->pConfig; /* Configuration object */
17321 u8 aCookie[4]; /* Binary representation of iNew */
17322 sqlite3_blob *pBlob = 0;
17323
17324 assert( p->rc==SQLITE_OK )((void) (0));
17325 sqlite3Fts5Put32(aCookie, iNew);
17326
17327 rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
17328 "block", FTS5_STRUCTURE_ROWID10, 1, &pBlob
17329 );
17330 if( rc==SQLITE_OK0 ){
17331 sqlite3_blob_writesqlite3_api->blob_write(pBlob, aCookie, 4, 0);
17332 rc = sqlite3_blob_closesqlite3_api->blob_close(pBlob);
17333 }
17334
17335 return rc;
17336}
17337
17338static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
17339 Fts5Structure *pStruct;
17340 pStruct = fts5StructureRead(p);
17341 fts5StructureRelease(pStruct);
17342 return fts5IndexReturn(p);
17343}
17344
17345/*
17346** Retrieve the origin value that will be used for the segment currently
17347** being accumulated in the in-memory hash table when it is flushed to
17348** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
17349** the queried value. Or, if an error occurs, an error code is returned
17350** and the final value of (*piOrigin) is undefined.
17351*/
17352static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
17353 Fts5Structure *pStruct;
17354 pStruct = fts5StructureRead(p);
17355 if( pStruct ){
17356 *piOrigin = pStruct->nOriginCntr;
17357 fts5StructureRelease(pStruct);
17358 }
17359 return fts5IndexReturn(p);
17360}
17361
17362/*
17363** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
17364** associated with the same segment. This function adds rowid iRowid to
17365** the hash table. The caller is required to guarantee that there is at
17366** least one free slot on the page.
17367**
17368** If parameter bForce is false and the hash table is deemed to be full
17369** (more than half of the slots are occupied), then non-zero is returned
17370** and iRowid not inserted. Or, if bForce is true or if the hash table page
17371** is not full, iRowid is inserted and zero returned.
17372*/
17373static int fts5IndexTombstoneAddToPage(
17374 Fts5Data *pPg,
17375 int bForce,
17376 int nPg,
17377 u64 iRowid
17378){
17379 const int szKey = TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8);
17380 const int nSlot = TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4
: 8)) : 1)
;
17381 const int nElem = fts5GetU32(&pPg->p[4]);
17382 int iSlot = (iRowid / nPg) % nSlot;
17383 int nCollide = nSlot;
17384
17385 if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
17386 if( iRowid==0 ){
17387 pPg->p[1] = 0x01;
17388 return 0;
17389 }
17390
17391 if( bForce==0 && nElem>=(nSlot/2) ){
17392 return 1;
17393 }
17394
17395 fts5PutU32(&pPg->p[4], nElem+1);
17396 if( szKey==4 ){
17397 u32 *aSlot = (u32*)&pPg->p[8];
17398 while( aSlot[iSlot] ){
17399 iSlot = (iSlot + 1) % nSlot;
17400 if( nCollide--==0 ) return 0;
17401 }
17402 fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
17403 }else{
17404 u64 *aSlot = (u64*)&pPg->p[8];
17405 while( aSlot[iSlot] ){
17406 iSlot = (iSlot + 1) % nSlot;
17407 if( nCollide--==0 ) return 0;
17408 }
17409 fts5PutU64((u8*)&aSlot[iSlot], iRowid);
17410 }
17411
17412 return 0;
17413}
17414
17415/*
17416** This function attempts to build a new hash containing all the keys
17417** currently in the tombstone hash table for segment pSeg. The new
17418** hash will be stored in the nOut buffers passed in array apOut[].
17419** All pages of the new hash use key-size szKey (4 or 8).
17420**
17421** Return 0 if the hash is successfully rebuilt into the nOut pages.
17422** Or non-zero if it is not (because one page became overfull). In this
17423** case the caller should retry with a larger nOut parameter.
17424**
17425** Parameter pData1 is page iPg1 of the hash table being rebuilt.
17426*/
17427static int fts5IndexTombstoneRehash(
17428 Fts5Index *p,
17429 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
17430 Fts5Data *pData1, /* One page of current hash - or NULL */
17431 int iPg1, /* Which page of the current hash is pData1 */
17432 int szKey, /* 4 or 8, the keysize */
17433 int nOut, /* Number of output pages */
17434 Fts5Data **apOut /* Array of output hash pages */
17435){
17436 int ii;
17437 int res = 0;
17438
17439 /* Initialize the headers of all the output pages */
17440 for(ii=0; ii<nOut; ii++){
17441 apOut[ii]->p[0] = szKey;
17442 fts5PutU32(&apOut[ii]->p[4], 0);
17443 }
17444
17445 /* Loop through the current pages of the hash table. */
17446 for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
17447 Fts5Data *pData = 0; /* Page ii of the current hash table */
17448 Fts5Data *pFree = 0; /* Free this at the end of the loop */
17449
17450 if( iPg1==ii ){
17451 pData = pData1;
17452 }else{
17453 pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) +
((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((
i64)(ii)) )
);
17454 }
17455
17456 if( pData ){
17457 int szKeyIn = TOMBSTONE_KEYSIZE(pData)(pData->p[0]==4 ? 4 : 8);
17458 int nSlotIn = (pData->nn - 8) / szKeyIn;
17459 int iIn;
17460 for(iIn=0; iIn<nSlotIn; iIn++){
17461 u64 iVal = 0;
17462
17463 /* Read the value from slot iIn of the input page into iVal. */
17464 if( szKeyIn==4 ){
17465 u32 *aSlot = (u32*)&pData->p[8];
17466 if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
17467 }else{
17468 u64 *aSlot = (u64*)&pData->p[8];
17469 if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
17470 }
17471
17472 /* If iVal is not 0 at this point, insert it into the new hash table */
17473 if( iVal ){
17474 Fts5Data *pPg = apOut[(iVal % nOut)];
17475 res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
17476 if( res ) break;
17477 }
17478 }
17479
17480 /* If this is page 0 of the old hash, copy the rowid-0-flag from the
17481 ** old hash to the new. */
17482 if( ii==0 ){
17483 apOut[0]->p[1] = pData->p[1];
17484 }
17485 }
17486 fts5DataRelease(pFree);
17487 }
17488
17489 return res;
17490}
17491
17492/*
17493** This is called to rebuild the hash table belonging to segment pSeg.
17494** If parameter pData1 is not NULL, then one page of the existing hash table
17495** has already been loaded - pData1, which is page iPg1. The key-size for
17496** the new hash table is szKey (4 or 8).
17497**
17498** If successful, the new hash table is not written to disk. Instead,
17499** output parameter (*pnOut) is set to the number of pages in the new
17500** hash table, and (*papOut) to point to an array of buffers containing
17501** the new page data.
17502**
17503** If an error occurs, an error code is left in the Fts5Index object and
17504** both output parameters set to 0 before returning.
17505*/
17506static void fts5IndexTombstoneRebuild(
17507 Fts5Index *p,
17508 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
17509 Fts5Data *pData1, /* One page of current hash - or NULL */
17510 int iPg1, /* Which page of the current hash is pData1 */
17511 int szKey, /* 4 or 8, the keysize */
17512 int *pnOut, /* OUT: Number of output pages */
17513 Fts5Data ***papOut /* OUT: Output hash pages */
17514){
17515 const int MINSLOT = 32;
17516 int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey)(((MINSLOT) > ((p->pConfig->pgsz - 8) / szKey)) ? (MINSLOT
) : ((p->pConfig->pgsz - 8) / szKey))
;
17517 int nSlot = 0; /* Number of slots in each output page */
17518 int nOut = 0;
17519
17520 /* Figure out how many output pages (nOut) and how many slots per
17521 ** page (nSlot). There are three possibilities:
17522 **
17523 ** 1. The hash table does not yet exist. In this case the new hash
17524 ** table will consist of a single page with MINSLOT slots.
17525 **
17526 ** 2. The hash table exists but is currently a single page. In this
17527 ** case an attempt is made to grow the page to accommodate the new
17528 ** entry. The page is allowed to grow up to nSlotPerPage (see above)
17529 ** slots.
17530 **
17531 ** 3. The hash table already consists of more than one page, or of
17532 ** a single page already so large that it cannot be grown. In this
17533 ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
17534 ** slots each, where nPg is the current number of pages in the
17535 ** hash table.
17536 */
17537 if( pSeg->nPgTombstone==0 ){
17538 /* Case 1. */
17539 nOut = 1;
17540 nSlot = MINSLOT;
17541 }else if( pSeg->nPgTombstone==1 ){
17542 /* Case 2. */
17543 int nElem = (int)fts5GetU32(&pData1->p[4]);
17544 assert( pData1 && iPg1==0 )((void) (0));
17545 nOut = 1;
17546 nSlot = MAX(nElem*4, MINSLOT)(((nElem*4) > (MINSLOT)) ? (nElem*4) : (MINSLOT));
17547 if( nSlot>nSlotPerPage ) nOut = 0;
17548 }
17549 if( nOut==0 ){
17550 /* Case 3. */
17551 nOut = (pSeg->nPgTombstone * 2 + 1);
17552 nSlot = nSlotPerPage;
17553 }
17554
17555 /* Allocate the required array and output pages */
17556 while( 1 ){
17557 int res = 0;
17558 int ii = 0;
17559 int szPage = 0;
17560 Fts5Data **apOut = 0;
17561
17562 /* Allocate space for the new hash table */
17563 assert( nSlot>=MINSLOT )((void) (0));
17564 apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
17565 szPage = 8 + nSlot*szKey;
17566 for(ii=0; ii<nOut; ii++){
17567 Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
17568 sizeof(Fts5Data)+szPage
17569 );
17570 if( pNew ){
17571 pNew->nn = szPage;
17572 pNew->p = (u8*)&pNew[1];
17573 apOut[ii] = pNew;
17574 }
17575 }
17576
17577 /* Rebuild the hash table. */
17578 if( p->rc==SQLITE_OK0 ){
17579 res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
17580 }
17581 if( res==0 ){
17582 if( p->rc ){
17583 fts5IndexFreeArray(apOut, nOut);
17584 apOut = 0;
17585 nOut = 0;
17586 }
17587 *pnOut = nOut;
17588 *papOut = apOut;
17589 break;
17590 }
17591
17592 /* If control flows to here, it was not possible to rebuild the hash
17593 ** table. Free all buffers and then try again with more pages. */
17594 assert( p->rc==SQLITE_OK )((void) (0));
17595 fts5IndexFreeArray(apOut, nOut);
17596 nSlot = nSlotPerPage;
17597 nOut = nOut*2 + 1;
17598 }
17599}
17600
17601
17602/*
17603** Add a tombstone for rowid iRowid to segment pSeg.
17604*/
17605static void fts5IndexTombstoneAdd(
17606 Fts5Index *p,
17607 Fts5StructureSegment *pSeg,
17608 u64 iRowid
17609){
17610 Fts5Data *pPg = 0;
17611 int iPg = -1;
17612 int szKey = 0;
17613 int nHash = 0;
17614 Fts5Data **apHash = 0;
17615
17616 p->nContentlessDelete++;
17617
17618 if( pSeg->nPgTombstone>0 ){
17619 iPg = iRowid % pSeg->nPgTombstone;
17620 pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) +
((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((
i64)(iPg)) )
);
17621 if( pPg==0 ){
17622 assert( p->rc!=SQLITE_OK )((void) (0));
17623 return;
17624 }
17625
17626 if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
17627 fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) +
((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((
i64)(iPg)) )
, pPg->p, pPg->nn);
17628 fts5DataRelease(pPg);
17629 return;
17630 }
17631 }
17632
17633 /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
17634 szKey = pPg ? TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) : 4;
17635 if( iRowid>0xFFFFFFFF ) szKey = 8;
17636
17637 /* Rebuild the hash table */
17638 fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
17639 assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) )((void) (0));
17640
17641 /* If all has succeeded, write the new rowid into one of the new hash
17642 ** table pages, then write them all out to disk. */
17643 if( nHash ){
17644 int ii = 0;
17645 fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
17646 for(ii=0; ii<nHash; ii++){
17647 i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) +
((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((
i64)(ii)) )
;
17648 fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
17649 }
17650 pSeg->nPgTombstone = nHash;
17651 fts5StructureWrite(p, p->pStruct);
17652 }
17653
17654 fts5DataRelease(pPg);
17655 fts5IndexFreeArray(apHash, nHash);
17656}
17657
17658/*
17659** Add iRowid to the tombstone list of the segment or segments that contain
17660** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
17661** error code otherwise.
17662*/
17663static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
17664 Fts5Structure *pStruct;
17665 pStruct = fts5StructureRead(p);
17666 if( pStruct ){
17667 int bFound = 0; /* True after pSeg->nEntryTombstone incr. */
17668 int iLvl;
17669 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
17670 int iSeg;
17671 for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
17672 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
17673 if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
17674 if( bFound==0 ){
17675 pSeg->nEntryTombstone++;
17676 bFound = 1;
17677 }
17678 fts5IndexTombstoneAdd(p, pSeg, iRowid);
17679 }
17680 }
17681 }
17682 fts5StructureRelease(pStruct);
17683 }
17684 return fts5IndexReturn(p);
17685}
17686
17687/*************************************************************************
17688**************************************************************************
17689** Below this point is the implementation of the integrity-check
17690** functionality.
17691*/
17692
17693/*
17694** Return a simple checksum value based on the arguments.
17695*/
17696static u64 sqlite3Fts5IndexEntryCksum(
17697 i64 iRowid,
17698 int iCol,
17699 int iPos,
17700 int iIdx,
17701 const char *pTerm,
17702 int nTerm
17703){
17704 int i;
17705 u64 ret = iRowid;
17706 ret += (ret<<3) + iCol;
17707 ret += (ret<<3) + iPos;
17708 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX'0' + iIdx);
17709 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
17710 return ret;
17711}
17712
17713#ifdef SQLITE_DEBUG
17714/*
17715** This function is purely an internal test. It does not contribute to
17716** FTS functionality, or even the integrity-check, in any way.
17717**
17718** Instead, it tests that the same set of pgno/rowid combinations are
17719** visited regardless of whether the doclist-index identified by parameters
17720** iSegid/iLeaf is iterated in forwards or reverse order.
17721*/
17722static void fts5TestDlidxReverse(
17723 Fts5Index *p,
17724 int iSegid, /* Segment id to load from */
17725 int iLeaf /* Load doclist-index for this leaf */
17726){
17727 Fts5DlidxIter *pDlidx = 0;
17728 u64 cksum1 = 13;
17729 u64 cksum2 = 13;
17730
17731 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
17732 fts5DlidxIterEof(p, pDlidx)==0;
17733 fts5DlidxIterNext(p, pDlidx)
17734 ){
17735 i64 iRowid = fts5DlidxIterRowid(pDlidx);
17736 int pgno = fts5DlidxIterPgno(pDlidx);
17737 assert( pgno>iLeaf )((void) (0));
17738 cksum1 += iRowid + ((i64)pgno<<32);
17739 }
17740 fts5DlidxIterFree(pDlidx);
17741 pDlidx = 0;
17742
17743 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
17744 fts5DlidxIterEof(p, pDlidx)==0;
17745 fts5DlidxIterPrev(p, pDlidx)
17746 ){
17747 i64 iRowid = fts5DlidxIterRowid(pDlidx);
17748 int pgno = fts5DlidxIterPgno(pDlidx);
17749 assert( fts5DlidxIterPgno(pDlidx)>iLeaf )((void) (0));
17750 cksum2 += iRowid + ((i64)pgno<<32);
17751 }
17752 fts5DlidxIterFree(pDlidx);
17753 pDlidx = 0;
17754
17755 if( p->rc==SQLITE_OK0 && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
17756}
17757
17758static int fts5QueryCksum(
17759 Fts5Index *p, /* Fts5 index object */
17760 int iIdx,
17761 const char *z, /* Index key to query for */
17762 int n, /* Size of index key in bytes */
17763 int flags, /* Flags for Fts5IndexQuery */
17764 u64 *pCksum /* IN/OUT: Checksum value */
17765){
17766 int eDetail = p->pConfig->eDetail;
17767 u64 cksum = *pCksum;
17768 Fts5IndexIter *pIter = 0;
17769 int rc = sqlite3Fts5IndexQuery(
17770 p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA0x0080), 0, &pIter
17771 );
17772
17773 while( rc==SQLITE_OK0 && ALWAYS(pIter!=0)(pIter!=0) && 0==sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){
17774 i64 rowid = pIter->iRowid;
17775
17776 if( eDetail==FTS5_DETAIL_NONE1 ){
17777 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
17778 }else{
17779 Fts5PoslistReader sReader;
17780 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
17781 sReader.bEof==0;
17782 sqlite3Fts5PoslistReaderNext(&sReader)
17783 ){
17784 int iCol = FTS5_POS2COLUMN(sReader.iPos)(int)((sReader.iPos >> 32) & 0x7FFFFFFF);
17785 int iOff = FTS5_POS2OFFSET(sReader.iPos)(int)(sReader.iPos & 0x7FFFFFFF);
17786 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
17787 }
17788 }
17789 if( rc==SQLITE_OK0 ){
17790 rc = sqlite3Fts5IterNext(pIter);
17791 }
17792 }
17793 fts5IterClose(pIter);
17794
17795 *pCksum = cksum;
17796 return rc;
17797}
17798
17799/*
17800** Check if buffer z[], size n bytes, contains as series of valid utf-8
17801** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
17802** contain valid utf-8, return non-zero.
17803*/
17804static int fts5TestUtf8(const char *z, int n){
17805 int i = 0;
17806 assert_nc( n>0 )((void) (0));
17807 while( i<n ){
17808 if( (z[i] & 0x80)==0x00 ){
17809 i++;
17810 }else
17811 if( (z[i] & 0xE0)==0xC0 ){
17812 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
17813 i += 2;
17814 }else
17815 if( (z[i] & 0xF0)==0xE0 ){
17816 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
17817 i += 3;
17818 }else
17819 if( (z[i] & 0xF8)==0xF0 ){
17820 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
17821 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
17822 i += 3;
17823 }else{
17824 return 1;
17825 }
17826 }
17827
17828 return 0;
17829}
17830
17831/*
17832** This function is also purely an internal test. It does not contribute to
17833** FTS functionality, or even the integrity-check, in any way.
17834*/
17835static void fts5TestTerm(
17836 Fts5Index *p,
17837 Fts5Buffer *pPrev, /* Previous term */
17838 const char *z, int n, /* Possibly new term to test */
17839 u64 expected,
17840 u64 *pCksum
17841){
17842 int rc = p->rc;
17843 if( pPrev->n==0 ){
17844 fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z);
17845 }else
17846 if( rc==SQLITE_OK0 && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
17847 u64 cksum3 = *pCksum;
17848 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
17849 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
17850 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX'0');
17851 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX0x0001);
17852 u64 ck1 = 0;
17853 u64 ck2 = 0;
17854
17855 /* Check that the results returned for ASC and DESC queries are
17856 ** the same. If not, call this corruption. */
17857 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
17858 if( rc==SQLITE_OK0 ){
17859 int f = flags|FTS5INDEX_QUERY_DESC0x0002;
17860 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
17861 }
17862 if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8));
17863
17864 /* If this is a prefix query, check that the results returned if the
17865 ** the index is disabled are the same. In both ASC and DESC order.
17866 **
17867 ** This check may only be performed if the hash table is empty. This
17868 ** is because the hash table only supports a single scan query at
17869 ** a time, and the multi-iter loop from which this function is called
17870 ** is already performing such a scan.
17871 **
17872 ** Also only do this if buffer zTerm contains nTerm bytes of valid
17873 ** utf-8. Otherwise, the last part of the buffer contents might contain
17874 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
17875 ** character stored in the main fts index, which will cause the
17876 ** test to fail. */
17877 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
17878 if( iIdx>0 && rc==SQLITE_OK0 ){
17879 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004;
17880 ck2 = 0;
17881 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
17882 if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8));
17883 }
17884 if( iIdx>0 && rc==SQLITE_OK0 ){
17885 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004|FTS5INDEX_QUERY_DESC0x0002;
17886 ck2 = 0;
17887 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
17888 if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8));
17889 }
17890 }
17891
17892 cksum3 ^= ck1;
17893 fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z);
17894
17895 if( rc==SQLITE_OK0 && cksum3!=expected ){
17896 rc = FTS5_CORRUPT(11 | (1<<8));
17897 }
17898 *pCksum = cksum3;
17899 }
17900 p->rc = rc;
17901}
17902
17903#else
17904# define fts5TestDlidxReverse(x,y,z)
17905# define fts5TestTerm(u,v,w,x,y,z)
17906#endif
17907
17908/*
17909** Check that:
17910**
17911** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
17912** contain zero terms.
17913** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
17914** contain zero rowids.
17915*/
17916static void fts5IndexIntegrityCheckEmpty(
17917 Fts5Index *p,
17918 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
17919 int iFirst,
17920 int iNoRowid,
17921 int iLast
17922){
17923 int i;
17924
17925 /* Now check that the iter.nEmpty leaves following the current leaf
17926 ** (a) exist and (b) contain no terms. */
17927 for(i=iFirst; p->rc==SQLITE_OK0 && i<=iLast; i++){
17928 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) <<
(31 + 5)) + ((i64)(0) << (31)) + ((i64)(i)) )
);
17929 if( pLeaf ){
17930 if( !fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ) p->rc = FTS5_CORRUPT(11 | (1<<8));
17931 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)) ) p->rc = FTS5_CORRUPT(11 | (1<<8));
17932 }
17933 fts5DataRelease(pLeaf);
17934 }
17935}
17936
17937static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
17938 i64 iTermOff = 0;
17939 int ii;
17940
17941 Fts5Buffer buf1 = {0,0,0};
17942 Fts5Buffer buf2 = {0,0,0};
17943
17944 ii = pLeaf->szLeaf;
17945 while( ii<pLeaf->nn && p->rc==SQLITE_OK0 ){
17946 int res;
17947 i64 iOff;
17948 int nIncr;
17949
17950 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr)sqlite3Fts5GetVarint32(&pLeaf->p[ii],(u32*)&(nIncr
))
;
17951 iTermOff += nIncr;
17952 iOff = iTermOff;
17953
17954 if( iOff>=pLeaf->szLeaf ){
17955 p->rc = FTS5_CORRUPT(11 | (1<<8));
17956 }else if( iTermOff==nIncr ){
17957 int nByte;
17958 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte
))
;
17959 if( (iOff+nByte)>pLeaf->szLeaf ){
17960 p->rc = FTS5_CORRUPT(11 | (1<<8));
17961 }else{
17962 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferSet(&p->rc,&buf1,nByte,&pLeaf
->p[iOff])
;
17963 }
17964 }else{
17965 int nKeep, nByte;
17966 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nKeep
))
;
17967 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte
))
;
17968 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
17969 p->rc = FTS5_CORRUPT(11 | (1<<8));
17970 }else{
17971 buf1.n = nKeep;
17972 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf1,nByte,&
pLeaf->p[iOff])
;
17973 }
17974
17975 if( p->rc==SQLITE_OK0 ){
17976 res = fts5BufferCompare(&buf1, &buf2);
17977 if( res<=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
17978 }
17979 }
17980 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p)sqlite3Fts5BufferSet(&p->rc,&buf2,buf1.n,buf1.p);
17981 }
17982
17983 fts5BufferFree(&buf1)sqlite3Fts5BufferFree(&buf1);
17984 fts5BufferFree(&buf2)sqlite3Fts5BufferFree(&buf2);
17985}
17986
17987static void fts5IndexIntegrityCheckSegment(
17988 Fts5Index *p, /* FTS5 backend object */
17989 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
17990){
17991 Fts5Config *pConfig = p->pConfig;
17992 int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5);
17993 sqlite3_stmt *pStmt = 0;
17994 int rc2;
17995 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
17996 int iDlidxPrevLeaf = pSeg->pgnoLast;
17997
17998 if( pSeg->pgnoFirst==0 ) return;
17999
18000 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf(
18001 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
18002 "ORDER BY 1, 2",
18003 pConfig->zDb, pConfig->zName, pSeg->iSegid
18004 ));
18005
18006 /* Iterate through the b-tree hierarchy. */
18007 while( p->rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){
18008 i64 iRow; /* Rowid for this leaf */
18009 Fts5Data *pLeaf; /* Data for this leaf */
18010
18011 const char *zIdxTerm = (const char*)sqlite3_column_blobsqlite3_api->column_blob(pStmt, 1);
18012 int nIdxTerm = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, 1);
18013 int iIdxLeaf = sqlite3_column_intsqlite3_api->column_int(pStmt, 2);
18014 int bIdxDlidx = sqlite3_column_intsqlite3_api->column_int(pStmt, 3);
18015
18016 /* If the leaf in question has already been trimmed from the segment,
18017 ** ignore this b-tree entry. Otherwise, load it into memory. */
18018 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
18019 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) <<
(31 + 5)) + ((i64)(0) << (31)) + ((i64)(iIdxLeaf)) )
;
18020 pLeaf = fts5LeafRead(p, iRow);
18021 if( pLeaf==0 ) break;
18022
18023 /* Check that the leaf contains at least one term, and that it is equal
18024 ** to or larger than the split-key in zIdxTerm. Also check that if there
18025 ** is also a rowid pointer within the leaf page header, it points to a
18026 ** location before the term. */
18027 if( pLeaf->nn<=pLeaf->szLeaf ){
18028
18029 if( nIdxTerm==0
18030 && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5
18031 && pLeaf->nn==pLeaf->szLeaf
18032 && pLeaf->nn==4
18033 ){
18034 /* special case - the very first page in a segment keeps its %_idx
18035 ** entry even if all the terms are removed from it by secure-delete
18036 ** operations. */
18037 }else{
18038 p->rc = FTS5_CORRUPT(11 | (1<<8));
18039 }
18040
18041 }else{
18042 int iOff; /* Offset of first term on leaf */
18043 int iRowidOff; /* Offset of first rowid on leaf */
18044 int nTerm; /* Size of term on leaf in bytes */
18045 int res; /* Comparison of term and split-key */
18046
18047 iOff = fts5LeafFirstTermOff(pLeaf);
18048 iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p));
18049 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
18050 p->rc = FTS5_CORRUPT(11 | (1<<8));
18051 }else{
18052 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nTerm
))
;
18053 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm))(((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm)))<=0 ?
0 : memcmp((&pLeaf->p[iOff]), (zIdxTerm), ((((nTerm) <
(nIdxTerm)) ? (nTerm) : (nIdxTerm)))))
;
18054 if( res==0 ) res = nTerm - nIdxTerm;
18055 if( res<0 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
18056 }
18057
18058 fts5IntegrityCheckPgidx(p, pLeaf);
18059 }
18060 fts5DataRelease(pLeaf);
18061 if( p->rc ) break;
18062
18063 /* Now check that the iter.nEmpty leaves following the current leaf
18064 ** (a) exist and (b) contain no terms. */
18065 fts5IndexIntegrityCheckEmpty(
18066 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
18067 );
18068 if( p->rc ) break;
18069
18070 /* If there is a doclist-index, check that it looks right. */
18071 if( bIdxDlidx ){
18072 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
18073 int iPrevLeaf = iIdxLeaf;
18074 int iSegid = pSeg->iSegid;
18075 int iPg = 0;
18076 i64 iKey;
18077
18078 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
18079 fts5DlidxIterEof(p, pDlidx)==0;
18080 fts5DlidxIterNext(p, pDlidx)
18081 ){
18082
18083 /* Check any rowid-less pages that occur before the current leaf. */
18084 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
18085 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(iPg)) )
;
18086 pLeaf = fts5DataRead(p, iKey);
18087 if( pLeaf ){
18088 if( fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))!=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
18089 fts5DataRelease(pLeaf);
18090 }
18091 }
18092 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
18093
18094 /* Check that the leaf page indicated by the iterator really does
18095 ** contain the rowid suggested by the same. */
18096 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(iPrevLeaf)) )
;
18097 pLeaf = fts5DataRead(p, iKey);
18098 if( pLeaf ){
18099 i64 iRowid;
18100 int iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p));
18101 ASSERT_SZLEAF_OK(pLeaf)((void) (0));
18102 if( iRowidOff>=pLeaf->szLeaf ){
18103 p->rc = FTS5_CORRUPT(11 | (1<<8));
18104 }else if( bSecureDelete==0 || iRowidOff>0 ){
18105 i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
18106 fts5GetVarintsqlite3Fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
18107 if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
18108 p->rc = FTS5_CORRUPT(11 | (1<<8));
18109 }
18110 }
18111 fts5DataRelease(pLeaf);
18112 }
18113 }
18114
18115 iDlidxPrevLeaf = iPg;
18116 fts5DlidxIterFree(pDlidx);
18117 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
18118 }else{
18119 iDlidxPrevLeaf = pSeg->pgnoLast;
18120 /* TODO: Check there is no doclist index */
18121 }
18122
18123 iIdxPrevLeaf = iIdxLeaf;
18124 }
18125
18126 rc2 = sqlite3_finalizesqlite3_api->finalize(pStmt);
18127 if( p->rc==SQLITE_OK0 ) p->rc = rc2;
18128
18129 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
18130#if 0
18131 if( p->rc==SQLITE_OK0 && iter.iLeaf!=pSeg->pgnoLast ){
18132 p->rc = FTS5_CORRUPT(11 | (1<<8));
18133 }
18134#endif
18135}
18136
18137
18138/*
18139** Run internal checks to ensure that the FTS index (a) is internally
18140** consistent and (b) contains entries for which the XOR of the checksums
18141** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
18142**
18143** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
18144** checksum does not match. Return SQLITE_OK if all checks pass without
18145** error, or some other SQLite error code if another error (e.g. OOM)
18146** occurs.
18147*/
18148static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
18149 int eDetail = p->pConfig->eDetail;
18150 u64 cksum2 = 0; /* Checksum based on contents of indexes */
18151 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
9
'poslist.p' initialized to a null pointer value
18152 Fts5Iter *pIter; /* Used to iterate through entire index */
18153 Fts5Structure *pStruct; /* Index structure */
18154 int iLvl, iSeg;
18155
18156#ifdef SQLITE_DEBUG
18157 /* Used by extra internal tests only run if NDEBUG is not defined */
18158 u64 cksum3 = 0; /* Checksum based on contents of indexes */
18159 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
18160#endif
18161 const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020;
18162
18163 /* Load the FTS index structure */
18164 pStruct = fts5StructureRead(p);
18165 if( pStruct
9.1
'pStruct' is not equal to null
==0 ){
10
Taking false branch
18166 assert( p->rc!=SQLITE_OK )((void) (0));
18167 return fts5IndexReturn(p);
18168 }
18169
18170 /* Check that the internal nodes of each segment match the leaves */
18171 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
11
Assuming 'iLvl' is >= field 'nLevel'
12
Loop condition is false. Execution continues on line 18191
18172 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
18173 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
18174 fts5IndexIntegrityCheckSegment(p, pSeg);
18175 }
18176 }
18177
18178 /* The cksum argument passed to this function is a checksum calculated
18179 ** based on all expected entries in the FTS index (including prefix index
18180 ** entries). This block checks that a checksum calculated based on the
18181 ** actual contents of FTS index is identical.
18182 **
18183 ** Two versions of the same checksum are calculated. The first (stack
18184 ** variable cksum2) based on entries extracted from the full-text index
18185 ** while doing a linear scan of each individual index in turn.
18186 **
18187 ** As each term visited by the linear scans, a separate query for the
18188 ** same term is performed. cksum3 is calculated based on the entries
18189 ** extracted by these queries.
18190 */
18191 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
14
Loop condition is true. Entering loop body
18192 fts5MultiIterEof(p, pIter)==0;
13
Assuming the condition is true
18193 fts5MultiIterNext(p, pIter, 0, 0)
18194 ){
18195 int n; /* Size of term in bytes */
18196 i64 iPos = 0; /* Position read from poslist */
18197 int iOff = 0; /* Offset within poslist */
18198 i64 iRowid = fts5MultiIterRowid(pIter);
18199 char *z = (char*)fts5MultiIterTerm(pIter, &n);
18200
18201 /* If this is a new term, query for it. Update cksum3 with the results. */
18202 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
18203 if( p->rc
14.1
Field 'rc' is 0
) break;
15
Taking false branch
18204
18205 if( eDetail==FTS5_DETAIL_NONE1 ){
16
Assuming 'eDetail' is not equal to FTS5_DETAIL_NONE
17
Taking false branch
18206 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
18207 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
18208 }
18209 }else{
18210 poslist.n = 0;
18211 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
18
Calling 'fts5SegiterPoslist'
18212 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0")sqlite3Fts5BufferAppendBlob(&p->rc,&poslist,4,(const
u8*)"\0\0\0\0")
;
18213 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
18214 int iCol = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF);
18215 int iTokOff = FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF);
18216 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
18217 }
18218 }
18219 }
18220 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
18221
18222 fts5MultiIterFree(pIter);
18223 if( p->rc==SQLITE_OK0 && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8));
18224
18225 fts5StructureRelease(pStruct);
18226#ifdef SQLITE_DEBUG
18227 fts5BufferFree(&term)sqlite3Fts5BufferFree(&term);
18228#endif
18229 fts5BufferFree(&poslist)sqlite3Fts5BufferFree(&poslist);
18230 return fts5IndexReturn(p);
18231}
18232
18233/*************************************************************************
18234**************************************************************************
18235** Below this point is the implementation of the fts5_decode() scalar
18236** function only.
18237*/
18238
18239#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18240/*
18241** Decode a segment-data rowid from the %_data table. This function is
18242** the opposite of macro FTS5_SEGMENT_ROWID().
18243*/
18244static void fts5DecodeRowid(
18245 i64 iRowid, /* Rowid from %_data table */
18246 int *pbTombstone, /* OUT: Tombstone hash flag */
18247 int *piSegid, /* OUT: Segment id */
18248 int *pbDlidx, /* OUT: Dlidx flag */
18249 int *piHeight, /* OUT: Height */
18250 int *piPgno /* OUT: Page number */
18251){
18252 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B31) - 1));
18253 iRowid >>= FTS5_DATA_PAGE_B31;
18254
18255 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B5) - 1));
18256 iRowid >>= FTS5_DATA_HEIGHT_B5;
18257
18258 *pbDlidx = (int)(iRowid & 0x0001);
18259 iRowid >>= FTS5_DATA_DLI_B1;
18260
18261 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B16) - 1));
18262 iRowid >>= FTS5_DATA_ID_B16;
18263
18264 *pbTombstone = (int)(iRowid & 0x0001);
18265}
18266#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18267
18268#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18269static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
18270 int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */
18271 fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
18272
18273 if( iSegid==0 ){
18274 if( iKey==FTS5_AVERAGES_ROWID1 ){
18275 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
18276 }else{
18277 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
18278 }
18279 }
18280 else{
18281 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
18282 bDlidx ? "dlidx " : "",
18283 bTomb ? "tombstone " : "",
18284 iSegid, iHeight, iPgno
18285 );
18286 }
18287}
18288#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18289
18290#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18291static void fts5DebugStructure(
18292 int *pRc, /* IN/OUT: error code */
18293 Fts5Buffer *pBuf,
18294 Fts5Structure *p
18295){
18296 int iLvl, iSeg; /* Iterate through levels, segments */
18297
18298 for(iLvl=0; iLvl<p->nLevel; iLvl++){
18299 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
18300 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
18301 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
18302 );
18303 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
18304 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
18305 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
18306 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
18307 );
18308 if( pSeg->iOrigin1>0 ){
18309 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
18310 pSeg->iOrigin1, pSeg->iOrigin2
18311 );
18312 }
18313 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
18314 }
18315 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
18316 }
18317}
18318#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18319
18320#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18321/*
18322** This is part of the fts5_decode() debugging aid.
18323**
18324** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
18325** function appends a human-readable representation of the same object
18326** to the buffer passed as the second argument.
18327*/
18328static void fts5DecodeStructure(
18329 int *pRc, /* IN/OUT: error code */
18330 Fts5Buffer *pBuf,
18331 const u8 *pBlob, int nBlob
18332){
18333 int rc; /* Return code */
18334 Fts5Structure *p = 0; /* Decoded structure object */
18335
18336 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
18337 if( rc!=SQLITE_OK0 ){
18338 *pRc = rc;
18339 return;
18340 }
18341
18342 fts5DebugStructure(pRc, pBuf, p);
18343 fts5StructureRelease(p);
18344}
18345#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18346
18347#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18348/*
18349** This is part of the fts5_decode() debugging aid.
18350**
18351** Arguments pBlob/nBlob contain an "averages" record. This function
18352** appends a human-readable representation of record to the buffer passed
18353** as the second argument.
18354*/
18355static void fts5DecodeAverages(
18356 int *pRc, /* IN/OUT: error code */
18357 Fts5Buffer *pBuf,
18358 const u8 *pBlob, int nBlob
18359){
18360 int i = 0;
18361 const char *zSpace = "";
18362
18363 while( i<nBlob ){
18364 u64 iVal;
18365 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
18366 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
18367 zSpace = " ";
18368 }
18369}
18370#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18371
18372#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18373/*
18374** Buffer (a/n) is assumed to contain a list of serialized varints. Read
18375** each varint and append its string representation to buffer pBuf. Return
18376** after either the input buffer is exhausted or a 0 value is read.
18377**
18378** The return value is the number of bytes read from the input buffer.
18379*/
18380static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
18381 int iOff = 0;
18382 while( iOff<n ){
18383 int iVal;
18384 iOff += fts5GetVarint32(&a[iOff], iVal)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(iVal));
18385 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
18386 }
18387 return iOff;
18388}
18389#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18390
18391#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18392/*
18393** The start of buffer (a/n) contains the start of a doclist. The doclist
18394** may or may not finish within the buffer. This function appends a text
18395** representation of the part of the doclist that is present to buffer
18396** pBuf.
18397**
18398** The return value is the number of bytes read from the input buffer.
18399*/
18400static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
18401 i64 iDocid = 0;
18402 int iOff = 0;
18403
18404 if( n>0 ){
18405 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
18406 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
18407 }
18408 while( iOff<n ){
18409 int nPos;
18410 int bDel;
18411 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
18412 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
18413 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)(((n-iOff) < (nPos)) ? (n-iOff) : (nPos)));
18414 if( iOff<n ){
18415 i64 iDelta;
18416 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
18417 iDocid += iDelta;
18418 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
18419 }
18420 }
18421
18422 return iOff;
18423}
18424#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18425
18426#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18427/*
18428** This function is part of the fts5_decode() debugging function. It is
18429** only ever used with detail=none tables.
18430**
18431** Buffer (pData/nData) contains a doclist in the format used by detail=none
18432** tables. This function appends a human-readable version of that list to
18433** buffer pBuf.
18434**
18435** If *pRc is other than SQLITE_OK when this function is called, it is a
18436** no-op. If an OOM or other error occurs within this function, *pRc is
18437** set to an SQLite error code before returning. The final state of buffer
18438** pBuf is undefined in this case.
18439*/
18440static void fts5DecodeRowidList(
18441 int *pRc, /* IN/OUT: Error code */
18442 Fts5Buffer *pBuf, /* Buffer to append text to */
18443 const u8 *pData, int nData /* Data to decode list-of-rowids from */
18444){
18445 int i = 0;
18446 i64 iRowid = 0;
18447
18448 while( i<nData ){
18449 const char *zApp = "";
18450 u64 iVal;
18451 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
18452 iRowid += iVal;
18453
18454 if( i<nData && pData[i]==0x00 ){
18455 i++;
18456 if( i<nData && pData[i]==0x00 ){
18457 i++;
18458 zApp = "+";
18459 }else{
18460 zApp = "*";
18461 }
18462 }
18463
18464 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
18465 }
18466}
18467#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18468
18469#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18470static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){
18471 int ii;
18472 fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1)( (u32)((pBuf)->n) + (u32)(pTerm->n*2 + 1) <= (u32)(
(pBuf)->nSpace) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(
pTerm->n*2 + 1)+(pBuf)->n) )
;
18473 if( *pRc==SQLITE_OK0 ){
18474 for(ii=0; ii<pTerm->n; ii++){
18475 if( pTerm->p[ii]==0x00 ){
18476 pBuf->p[pBuf->n++] = '\\';
18477 pBuf->p[pBuf->n++] = '0';
18478 }else{
18479 pBuf->p[pBuf->n++] = pTerm->p[ii];
18480 }
18481 }
18482 pBuf->p[pBuf->n] = 0x00;
18483 }
18484}
18485#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18486
18487#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18488/*
18489** The implementation of user-defined scalar function fts5_decode().
18490*/
18491static void fts5DecodeFunction(
18492 sqlite3_context *pCtx, /* Function call context */
18493 int nArg, /* Number of args (always 2) */
18494 sqlite3_value **apVal /* Function arguments */
18495){
18496 i64 iRowid; /* Rowid for record being decoded */
18497 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
18498 int bTomb;
18499 const u8 *aBlob; int n; /* Record to decode */
18500 u8 *a = 0;
18501 Fts5Buffer s; /* Build up text to return here */
18502 int rc = SQLITE_OK0; /* Return code */
18503 sqlite3_int64 nSpace = 0;
18504 int eDetailNone = (sqlite3_user_datasqlite3_api->user_data(pCtx)!=0);
18505
18506 assert( nArg==2 )((void) (0));
18507 UNUSED_PARAM(nArg)(void)(nArg);
18508 memset(&s, 0, sizeof(Fts5Buffer));
18509 iRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]);
18510
18511 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
18512 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
18513 ** buffer overreads even if the record is corrupt. */
18514 n = sqlite3_value_bytessqlite3_api->value_bytes(apVal[1]);
18515 aBlob = sqlite3_value_blobsqlite3_api->value_blob(apVal[1]);
18516 nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING8;
18517 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
18518 if( a==0 ) goto decode_out;
18519 if( n>0 ) memcpy(a, aBlob, n);
18520
18521 fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
18522
18523 fts5DebugRowid(&rc, &s, iRowid);
18524 if( bDlidx ){
18525 Fts5Data dlidx;
18526 Fts5DlidxLvl lvl;
18527
18528 dlidx.p = a;
18529 dlidx.nn = n;
18530
18531 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
18532 lvl.pData = &dlidx;
18533 lvl.iLeafPgno = iPgno;
18534
18535 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
18536 sqlite3Fts5BufferAppendPrintf(&rc, &s,
18537 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
18538 );
18539 }
18540 }else if( bTomb ){
18541 u32 nElem = fts5GetU32(&a[4]);
18542 int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
18543 int nSlot = (n - 8) / szKey;
18544 int ii;
18545 sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
18546 if( aBlob[1] ){
18547 sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
18548 }
18549 for(ii=0; ii<nSlot; ii++){
18550 u64 iVal = 0;
18551 if( szKey==4 ){
18552 u32 *aSlot = (u32*)&aBlob[8];
18553 if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
18554 }else{
18555 u64 *aSlot = (u64*)&aBlob[8];
18556 if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
18557 }
18558 if( iVal!=0 ){
18559 sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
18560 }
18561 }
18562 }else if( iSegid==0 ){
18563 if( iRowid==FTS5_AVERAGES_ROWID1 ){
18564 fts5DecodeAverages(&rc, &s, a, n);
18565 }else{
18566 fts5DecodeStructure(&rc, &s, a, n);
18567 }
18568 }else if( eDetailNone ){
18569 Fts5Buffer term; /* Current term read from page */
18570 int szLeaf;
18571 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
18572 int iTermOff;
18573 int nKeep = 0;
18574 int iOff;
18575
18576 memset(&term, 0, sizeof(Fts5Buffer));
18577
18578 /* Decode any entries that occur before the first term. */
18579 if( szLeaf<n ){
18580 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff
))
;
18581 }else{
18582 iTermOff = szLeaf;
18583 }
18584 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
18585
18586 iOff = iTermOff;
18587 while( iOff<szLeaf && rc==SQLITE_OK0 ){
18588 int nAppend;
18589
18590 /* Read the term data for the next term*/
18591 iOff += fts5GetVarint32(&a[iOff], nAppend)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nAppend));
18592 term.n = nKeep;
18593 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nAppend,&a[
iOff])
;
18594 sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
18595 fts5BufferAppendTerm(&rc, &s, &term);
18596 iOff += nAppend;
18597
18598 /* Figure out where the doclist for this term ends */
18599 if( iPgidxOff<n ){
18600 int nIncr;
18601 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nIncr));
18602 iTermOff += nIncr;
18603 }else{
18604 iTermOff = szLeaf;
18605 }
18606 if( iTermOff>szLeaf ){
18607 rc = FTS5_CORRUPT(11 | (1<<8));
18608 }else{
18609 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
18610 }
18611 iOff = iTermOff;
18612 if( iOff<szLeaf ){
18613 iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep));
18614 }
18615 }
18616
18617 fts5BufferFree(&term)sqlite3Fts5BufferFree(&term);
18618 }else{
18619 Fts5Buffer term; /* Current term read from page */
18620 int szLeaf; /* Offset of pgidx in a[] */
18621 int iPgidxOff;
18622 int iPgidxPrev = 0; /* Previous value read from pgidx */
18623 int iTermOff = 0;
18624 int iRowidOff = 0;
18625 int iOff;
18626 int nDoclist;
18627
18628 memset(&term, 0, sizeof(Fts5Buffer));
18629
18630 if( n<4 ){
18631 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
18632 goto decode_out;
18633 }else{
18634 iRowidOff = fts5GetU16(&a[0]);
18635 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
18636 if( iPgidxOff<n ){
18637 fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff
))
;
18638 }else if( iPgidxOff>n ){
18639 rc = FTS5_CORRUPT(11 | (1<<8));
18640 goto decode_out;
18641 }
18642 }
18643
18644 /* Decode the position list tail at the start of the page */
18645 if( iRowidOff!=0 ){
18646 iOff = iRowidOff;
18647 }else if( iTermOff!=0 ){
18648 iOff = iTermOff;
18649 }else{
18650 iOff = szLeaf;
18651 }
18652 if( iOff>n ){
18653 rc = FTS5_CORRUPT(11 | (1<<8));
18654 goto decode_out;
18655 }
18656 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
18657
18658 /* Decode any more doclist data that appears on the page before the
18659 ** first term. */
18660 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
18661 if( nDoclist+iOff>n ){
18662 rc = FTS5_CORRUPT(11 | (1<<8));
18663 goto decode_out;
18664 }
18665 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
18666
18667 while( iPgidxOff<n && rc==SQLITE_OK0 ){
18668 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
18669 int nByte; /* Bytes of data */
18670 int iEnd;
18671
18672 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte));
18673 iPgidxPrev += nByte;
18674 iOff = iPgidxPrev;
18675
18676 if( iPgidxOff<n ){
18677 fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte));
18678 iEnd = iPgidxPrev + nByte;
18679 }else{
18680 iEnd = szLeaf;
18681 }
18682 if( iEnd>szLeaf ){
18683 rc = FTS5_CORRUPT(11 | (1<<8));
18684 break;
18685 }
18686
18687 if( bFirst==0 ){
18688 iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte));
18689 if( nByte>term.n ){
18690 rc = FTS5_CORRUPT(11 | (1<<8));
18691 break;
18692 }
18693 term.n = nByte;
18694 }
18695 iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte));
18696 if( iOff+nByte>n ){
18697 rc = FTS5_CORRUPT(11 | (1<<8));
18698 break;
18699 }
18700 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nByte,&a[iOff
])
;
18701 iOff += nByte;
18702
18703 sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
18704 fts5BufferAppendTerm(&rc, &s, &term);
18705 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
18706 }
18707
18708 fts5BufferFree(&term)sqlite3Fts5BufferFree(&term);
18709 }
18710
18711 decode_out:
18712 sqlite3_freesqlite3_api->free(a);
18713 if( rc==SQLITE_OK0 ){
18714 sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
18715 }else{
18716 sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc);
18717 }
18718 fts5BufferFree(&s)sqlite3Fts5BufferFree(&s);
18719}
18720#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18721
18722#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18723/*
18724** The implementation of user-defined scalar function fts5_rowid().
18725*/
18726static void fts5RowidFunction(
18727 sqlite3_context *pCtx, /* Function call context */
18728 int nArg, /* Number of args (always 2) */
18729 sqlite3_value **apVal /* Function arguments */
18730){
18731 const char *zArg;
18732 if( nArg==0 ){
18733 sqlite3_result_errorsqlite3_api->result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
18734 }else{
18735 zArg = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]);
18736 if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "segment") ){
18737 i64 iRowid;
18738 int segid, pgno;
18739 if( nArg!=3 ){
18740 sqlite3_result_errorsqlite3_api->result_error(pCtx,
18741 "should be: fts5_rowid('segment', segid, pgno))", -1
18742 );
18743 }else{
18744 segid = sqlite3_value_intsqlite3_api->value_int(apVal[1]);
18745 pgno = sqlite3_value_intsqlite3_api->value_int(apVal[2]);
18746 iRowid = FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31
+ 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) )
;
18747 sqlite3_result_int64sqlite3_api->result_int64(pCtx, iRowid);
18748 }
18749 }else{
18750 sqlite3_result_errorsqlite3_api->result_error(pCtx,
18751 "first arg to fts5_rowid() must be 'segment'" , -1
18752 );
18753 }
18754 }
18755}
18756#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18757
18758#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18759
18760typedef struct Fts5StructVtab Fts5StructVtab;
18761struct Fts5StructVtab {
18762 sqlite3_vtab base;
18763};
18764
18765typedef struct Fts5StructVcsr Fts5StructVcsr;
18766struct Fts5StructVcsr {
18767 sqlite3_vtab_cursor base;
18768 Fts5Structure *pStruct;
18769 int iLevel;
18770 int iSeg;
18771 int iRowid;
18772};
18773
18774/*
18775** Create a new fts5_structure() table-valued function.
18776*/
18777static int fts5structConnectMethod(
18778 sqlite3 *db,
18779 void *pAux,
18780 int argc, const char *const*argv,
18781 sqlite3_vtab **ppVtab,
18782 char **pzErr
18783){
18784 Fts5StructVtab *pNew = 0;
18785 int rc = SQLITE_OK0;
18786
18787 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db,
18788 "CREATE TABLE xyz("
18789 "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
18790 "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
18791 );
18792 if( rc==SQLITE_OK0 ){
18793 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
18794 }
18795
18796 *ppVtab = (sqlite3_vtab*)pNew;
18797 return rc;
18798}
18799
18800/*
18801** We must have a single struct=? constraint that will be passed through
18802** into the xFilter method. If there is no valid struct=? constraint,
18803** then return an SQLITE_CONSTRAINT error.
18804*/
18805static int fts5structBestIndexMethod(
18806 sqlite3_vtab *tab,
18807 sqlite3_index_info *pIdxInfo
18808){
18809 int i;
18810 int rc = SQLITE_CONSTRAINT19;
18811 struct sqlite3_index_constraint *p;
18812 pIdxInfo->estimatedCost = (double)100;
18813 pIdxInfo->estimatedRows = 100;
18814 pIdxInfo->idxNum = 0;
18815 for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
18816 if( p->usable==0 ) continue;
18817 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && p->iColumn==11 ){
18818 rc = SQLITE_OK0;
18819 pIdxInfo->aConstraintUsage[i].omit = 1;
18820 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
18821 break;
18822 }
18823 }
18824 return rc;
18825}
18826
18827/*
18828** This method is the destructor for bytecodevtab objects.
18829*/
18830static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
18831 Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
18832 sqlite3_freesqlite3_api->free(p);
18833 return SQLITE_OK0;
18834}
18835
18836/*
18837** Constructor for a new bytecodevtab_cursor object.
18838*/
18839static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
18840 int rc = SQLITE_OK0;
18841 Fts5StructVcsr *pNew = 0;
18842
18843 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
18844 *ppCsr = (sqlite3_vtab_cursor*)pNew;
18845
18846 return SQLITE_OK0;
18847}
18848
18849/*
18850** Destructor for a bytecodevtab_cursor.
18851*/
18852static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
18853 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
18854 fts5StructureRelease(pCsr->pStruct);
18855 sqlite3_freesqlite3_api->free(pCsr);
18856 return SQLITE_OK0;
18857}
18858
18859
18860/*
18861** Advance a bytecodevtab_cursor to its next row of output.
18862*/
18863static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
18864 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
18865 Fts5Structure *p = pCsr->pStruct;
18866
18867 assert( pCsr->pStruct )((void) (0));
18868 pCsr->iSeg++;
18869 pCsr->iRowid++;
18870 while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
18871 pCsr->iLevel++;
18872 pCsr->iSeg = 0;
18873 }
18874 if( pCsr->iLevel>=p->nLevel ){
18875 fts5StructureRelease(pCsr->pStruct);
18876 pCsr->pStruct = 0;
18877 }
18878 return SQLITE_OK0;
18879}
18880
18881/*
18882** Return TRUE if the cursor has been moved off of the last
18883** row of output.
18884*/
18885static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
18886 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
18887 return pCsr->pStruct==0;
18888}
18889
18890static int fts5structRowidMethod(
18891 sqlite3_vtab_cursor *cur,
18892 sqlite_int64 *piRowid
18893){
18894 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
18895 *piRowid = pCsr->iRowid;
18896 return SQLITE_OK0;
18897}
18898
18899/*
18900** Return values of columns for the row at which the bytecodevtab_cursor
18901** is currently pointing.
18902*/
18903static int fts5structColumnMethod(
18904 sqlite3_vtab_cursor *cur, /* The cursor */
18905 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
18906 int i /* Which column to return */
18907){
18908 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
18909 Fts5Structure *p = pCsr->pStruct;
18910 Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
18911
18912 switch( i ){
18913 case 0: /* level */
18914 sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iLevel);
18915 break;
18916 case 1: /* segment */
18917 sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg);
18918 break;
18919 case 2: /* merge */
18920 sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
18921 break;
18922 case 3: /* segid */
18923 sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->iSegid);
18924 break;
18925 case 4: /* leaf1 */
18926 sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoFirst);
18927 break;
18928 case 5: /* leaf2 */
18929 sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoLast);
18930 break;
18931 case 6: /* origin1 */
18932 sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin1);
18933 break;
18934 case 7: /* origin2 */
18935 sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin2);
18936 break;
18937 case 8: /* npgtombstone */
18938 sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->nPgTombstone);
18939 break;
18940 case 9: /* nentrytombstone */
18941 sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntryTombstone);
18942 break;
18943 case 10: /* nentry */
18944 sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntry);
18945 break;
18946 }
18947 return SQLITE_OK0;
18948}
18949
18950/*
18951** Initialize a cursor.
18952**
18953** idxNum==0 means show all subprograms
18954** idxNum==1 means show only the main bytecode and omit subprograms.
18955*/
18956static int fts5structFilterMethod(
18957 sqlite3_vtab_cursor *pVtabCursor,
18958 int idxNum, const char *idxStr,
18959 int argc, sqlite3_value **argv
18960){
18961 Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
18962 int rc = SQLITE_OK0;
18963
18964 const u8 *aBlob = 0;
18965 int nBlob = 0;
18966
18967 assert( argc==1 )((void) (0));
18968 fts5StructureRelease(pCsr->pStruct);
18969 pCsr->pStruct = 0;
18970
18971 nBlob = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]);
18972 aBlob = (const u8*)sqlite3_value_blobsqlite3_api->value_blob(argv[0]);
18973 rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
18974 if( rc==SQLITE_OK0 ){
18975 pCsr->iLevel = 0;
18976 pCsr->iRowid = 0;
18977 pCsr->iSeg = -1;
18978 rc = fts5structNextMethod(pVtabCursor);
18979 }
18980
18981 return rc;
18982}
18983
18984#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
18985
18986/*
18987** This is called as part of registering the FTS5 module with database
18988** connection db. It registers several user-defined scalar functions useful
18989** with FTS5.
18990**
18991** If successful, SQLITE_OK is returned. If an error occurs, some other
18992** SQLite error code is returned instead.
18993*/
18994static int sqlite3Fts5IndexInit(sqlite3 *db){
18995#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
18996 int rc = sqlite3_create_functionsqlite3_api->create_function(
18997 db, "fts5_decode", 2, SQLITE_UTF81, 0, fts5DecodeFunction, 0, 0
18998 );
18999
19000 if( rc==SQLITE_OK0 ){
19001 rc = sqlite3_create_functionsqlite3_api->create_function(
19002 db, "fts5_decode_none", 2,
19003 SQLITE_UTF81, (void*)db, fts5DecodeFunction, 0, 0
19004 );
19005 }
19006
19007 if( rc==SQLITE_OK0 ){
19008 rc = sqlite3_create_functionsqlite3_api->create_function(
19009 db, "fts5_rowid", -1, SQLITE_UTF81, 0, fts5RowidFunction, 0, 0
19010 );
19011 }
19012
19013 if( rc==SQLITE_OK0 ){
19014 static const sqlite3_module fts5structure_module = {
19015 0, /* iVersion */
19016 0, /* xCreate */
19017 fts5structConnectMethod, /* xConnect */
19018 fts5structBestIndexMethod, /* xBestIndex */
19019 fts5structDisconnectMethod, /* xDisconnect */
19020 0, /* xDestroy */
19021 fts5structOpenMethod, /* xOpen */
19022 fts5structCloseMethod, /* xClose */
19023 fts5structFilterMethod, /* xFilter */
19024 fts5structNextMethod, /* xNext */
19025 fts5structEofMethod, /* xEof */
19026 fts5structColumnMethod, /* xColumn */
19027 fts5structRowidMethod, /* xRowid */
19028 0, /* xUpdate */
19029 0, /* xBegin */
19030 0, /* xSync */
19031 0, /* xCommit */
19032 0, /* xRollback */
19033 0, /* xFindFunction */
19034 0, /* xRename */
19035 0, /* xSavepoint */
19036 0, /* xRelease */
19037 0, /* xRollbackTo */
19038 0, /* xShadowName */
19039 0 /* xIntegrity */
19040 };
19041 rc = sqlite3_create_modulesqlite3_api->create_module(db, "fts5_structure", &fts5structure_module, 0);
19042 }
19043 return rc;
19044#else
19045 return SQLITE_OK0;
19046 UNUSED_PARAM(db)(void)(db);
19047#endif
19048}
19049
19050
19051static int sqlite3Fts5IndexReset(Fts5Index *p){
19052 assert( p->pStruct==0 || p->iStructVersion!=0 )((void) (0));
19053 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
19054 fts5StructureInvalidate(p);
19055 }
19056 return fts5IndexReturn(p);
19057}
19058
19059#line 1 "fts5_main.c"
19060/*
19061** 2014 Jun 09
19062**
19063** The author disclaims copyright to this source code. In place of
19064** a legal notice, here is a blessing:
19065**
19066** May you do good and not evil.
19067** May you find forgiveness for yourself and forgive others.
19068** May you share freely, never taking more than you give.
19069**
19070******************************************************************************
19071**
19072** This is an SQLite module implementing full-text search.
19073*/
19074
19075
19076/* #include "fts5Int.h" */
19077
19078/*
19079** This variable is set to false when running tests for which the on disk
19080** structures should not be corrupt. Otherwise, true. If it is false, extra
19081** assert() conditions in the fts5 code are activated - conditions that are
19082** only true if it is guaranteed that the fts5 database is not corrupt.
19083*/
19084#ifdef SQLITE_DEBUG
19085int sqlite3_fts5_may_be_corrupt = 1;
19086#endif
19087
19088
19089typedef struct Fts5Auxdata Fts5Auxdata;
19090typedef struct Fts5Auxiliary Fts5Auxiliary;
19091typedef struct Fts5Cursor Fts5Cursor;
19092typedef struct Fts5FullTable Fts5FullTable;
19093typedef struct Fts5Sorter Fts5Sorter;
19094typedef struct Fts5TokenizerModule Fts5TokenizerModule;
19095
19096/*
19097** NOTES ON TRANSACTIONS:
19098**
19099** SQLite invokes the following virtual table methods as transactions are
19100** opened and closed by the user:
19101**
19102** xBegin(): Start of a new transaction.
19103** xSync(): Initial part of two-phase commit.
19104** xCommit(): Final part of two-phase commit.
19105** xRollback(): Rollback the transaction.
19106**
19107** Anything that is required as part of a commit that may fail is performed
19108** in the xSync() callback. Current versions of SQLite ignore any errors
19109** returned by xCommit().
19110**
19111** And as sub-transactions are opened/closed:
19112**
19113** xSavepoint(int S): Open savepoint S.
19114** xRelease(int S): Commit and close savepoint S.
19115** xRollbackTo(int S): Rollback to start of savepoint S.
19116**
19117** During a write-transaction the fts5_index.c module may cache some data
19118** in-memory. It is flushed to disk whenever xSync(), xRelease() or
19119** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo()
19120** is called.
19121**
19122** Additionally, if SQLITE_DEBUG is defined, an instance of the following
19123** structure is used to record the current transaction state. This information
19124** is not required, but it is used in the assert() statements executed by
19125** function fts5CheckTransactionState() (see below).
19126*/
19127struct Fts5TransactionState {
19128 int eState; /* 0==closed, 1==open, 2==synced */
19129 int iSavepoint; /* Number of open savepoints (0 -> none) */
19130};
19131
19132/*
19133** A single object of this type is allocated when the FTS5 module is
19134** registered with a database handle. It is used to store pointers to
19135** all registered FTS5 extensions - tokenizers and auxiliary functions.
19136*/
19137struct Fts5Global {
19138 fts5_api api; /* User visible part of object (see fts5.h) */
19139 sqlite3 *db; /* Associated database connection */
19140 i64 iNextId; /* Used to allocate unique cursor ids */
19141 Fts5Auxiliary *pAux; /* First in list of all aux. functions */
19142 Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */
19143 Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */
19144 Fts5Cursor *pCsr; /* First in list of all open cursors */
19145 u32 aLocaleHdr[4];
19146};
19147
19148/*
19149** Size of header on fts5_locale() values. And macro to access a buffer
19150** containing a copy of the header from an Fts5Config pointer.
19151*/
19152#define FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))
19153#define FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)) ((const u8*)(pConfig->pGlobal->aLocaleHdr))
19154
19155#define FTS5_INSTTOKEN_SUBTYPE73 73
19156
19157/*
19158** Each auxiliary function registered with the FTS5 module is represented
19159** by an object of the following type. All such objects are stored as part
19160** of the Fts5Global.pAux list.
19161*/
19162struct Fts5Auxiliary {
19163 Fts5Global *pGlobal; /* Global context for this function */
19164 char *zFunc; /* Function name (nul-terminated) */
19165 void *pUserData; /* User-data pointer */
19166 fts5_extension_function xFunc; /* Callback function */
19167 void (*xDestroy)(void*); /* Destructor function */
19168 Fts5Auxiliary *pNext; /* Next registered auxiliary function */
19169};
19170
19171/*
19172** Each tokenizer module registered with the FTS5 module is represented
19173** by an object of the following type. All such objects are stored as part
19174** of the Fts5Global.pTok list.
19175**
19176** bV2Native:
19177** True if the tokenizer was registered using xCreateTokenizer_v2(), false
19178** for xCreateTokenizer(). If this variable is true, then x2 is populated
19179** with the routines as supplied by the caller and x1 contains synthesized
19180** wrapper routines. In this case the user-data pointer passed to
19181** x1.xCreate should be a pointer to the Fts5TokenizerModule structure,
19182** not a copy of pUserData.
19183**
19184** Of course, if bV2Native is false, then x1 contains the real routines and
19185** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule
19186** object should be passed to x2.xCreate.
19187**
19188** The synthesized wrapper routines are necessary for xFindTokenizer(_v2)
19189** calls.
19190*/
19191struct Fts5TokenizerModule {
19192 char *zName; /* Name of tokenizer */
19193 void *pUserData; /* User pointer passed to xCreate() */
19194 int bV2Native; /* True if v2 native tokenizer */
19195 fts5_tokenizer x1; /* Tokenizer functions */
19196 fts5_tokenizer_v2 x2; /* V2 tokenizer functions */
19197 void (*xDestroy)(void*); /* Destructor function */
19198 Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
19199};
19200
19201struct Fts5FullTable {
19202 Fts5Table p; /* Public class members from fts5Int.h */
19203 Fts5Storage *pStorage; /* Document store */
19204 Fts5Global *pGlobal; /* Global (connection wide) data */
19205 Fts5Cursor *pSortCsr; /* Sort data from this cursor */
19206 int iSavepoint; /* Successful xSavepoint()+1 */
19207
19208#ifdef SQLITE_DEBUG
19209 struct Fts5TransactionState ts;
19210#endif
19211};
19212
19213struct Fts5MatchPhrase {
19214 Fts5Buffer *pPoslist; /* Pointer to current poslist */
19215 int nTerm; /* Size of phrase in terms */
19216};
19217
19218/*
19219** pStmt:
19220** SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
19221**
19222** aIdx[]:
19223** There is one entry in the aIdx[] array for each phrase in the query,
19224** the value of which is the offset within aPoslist[] following the last
19225** byte of the position list for the corresponding phrase.
19226*/
19227struct Fts5Sorter {
19228 sqlite3_stmt *pStmt;
19229 i64 iRowid; /* Current rowid */
19230 const u8 *aPoslist; /* Position lists for current row */
19231 int nIdx; /* Number of entries in aIdx[] */
19232 int aIdx[FLEXARRAY]; /* Offsets into aPoslist for current row */
19233};
19234
19235/* Size (int bytes) of an Fts5Sorter object with N indexes */
19236#define SZ_FTS5SORTER(N)(__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) (offsetof(Fts5Sorter,nIdx)__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64))
19237
19238/*
19239** Virtual-table cursor object.
19240**
19241** iSpecial:
19242** If this is a 'special' query (refer to function fts5SpecialMatch()),
19243** then this variable contains the result of the query.
19244**
19245** iFirstRowid, iLastRowid:
19246** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
19247** cursor iterates in ascending order of rowids, iFirstRowid is the lower
19248** limit of rowids to return, and iLastRowid the upper. In other words, the
19249** WHERE clause in the user's query might have been:
19250**
19251** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
19252**
19253** If the cursor iterates in descending order of rowid, iFirstRowid
19254** is the upper limit (i.e. the "first" rowid visited) and iLastRowid
19255** the lower.
19256*/
19257struct Fts5Cursor {
19258 sqlite3_vtab_cursor base; /* Base class used by SQLite core */
19259 Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */
19260 int *aColumnSize; /* Values for xColumnSize() */
19261 i64 iCsrId; /* Cursor id */
19262
19263 /* Zero from this point onwards on cursor reset */
19264 int ePlan; /* FTS5_PLAN_XXX value */
19265 int bDesc; /* True for "ORDER BY rowid DESC" queries */
19266 i64 iFirstRowid; /* Return no rowids earlier than this */
19267 i64 iLastRowid; /* Return no rowids later than this */
19268 sqlite3_stmt *pStmt; /* Statement used to read %_content */
19269 Fts5Expr *pExpr; /* Expression for MATCH queries */
19270 Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */
19271 int csrflags; /* Mask of cursor flags (see below) */
19272 i64 iSpecial; /* Result of special query */
19273
19274 /* "rank" function. Populated on demand from vtab.xColumn(). */
19275 char *zRank; /* Custom rank function */
19276 char *zRankArgs; /* Custom rank function args */
19277 Fts5Auxiliary *pRank; /* Rank callback (or NULL) */
19278 int nRankArg; /* Number of trailing arguments for rank() */
19279 sqlite3_value **apRankArg; /* Array of trailing arguments */
19280 sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */
19281
19282 /* Auxiliary data storage */
19283 Fts5Auxiliary *pAux; /* Currently executing extension function */
19284 Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
19285
19286 /* Cache used by auxiliary API functions xInst() and xInstCount() */
19287 Fts5PoslistReader *aInstIter; /* One for each phrase */
19288 int nInstAlloc; /* Size of aInst[] array (entries / 3) */
19289 int nInstCount; /* Number of phrase instances */
19290 int *aInst; /* 3 integers per phrase instance */
19291};
19292
19293/*
19294** Bits that make up the "idxNum" parameter passed indirectly by
19295** xBestIndex() to xFilter().
19296*/
19297#define FTS5_BI_MATCH0x0001 0x0001 /* <tbl> MATCH ? */
19298#define FTS5_BI_RANK0x0002 0x0002 /* rank MATCH ? */
19299#define FTS5_BI_ROWID_EQ0x0004 0x0004 /* rowid == ? */
19300#define FTS5_BI_ROWID_LE0x0008 0x0008 /* rowid <= ? */
19301#define FTS5_BI_ROWID_GE0x0010 0x0010 /* rowid >= ? */
19302
19303#define FTS5_BI_ORDER_RANK0x0020 0x0020
19304#define FTS5_BI_ORDER_ROWID0x0040 0x0040
19305#define FTS5_BI_ORDER_DESC0x0080 0x0080
19306
19307/*
19308** Values for Fts5Cursor.csrflags
19309*/
19310#define FTS5CSR_EOF0x01 0x01
19311#define FTS5CSR_REQUIRE_CONTENT0x02 0x02
19312#define FTS5CSR_REQUIRE_DOCSIZE0x04 0x04
19313#define FTS5CSR_REQUIRE_INST0x08 0x08
19314#define FTS5CSR_FREE_ZRANK0x10 0x10
19315#define FTS5CSR_REQUIRE_RESEEK0x20 0x20
19316#define FTS5CSR_REQUIRE_POSLIST0x40 0x40
19317
19318#define BitFlagAllTest(x,y)(((x) & (y))==(y)) (((x) & (y))==(y))
19319#define BitFlagTest(x,y)(((x) & (y))!=0) (((x) & (y))!=0)
19320
19321
19322/*
19323** Macros to Set(), Clear() and Test() cursor flags.
19324*/
19325#define CsrFlagSet(pCsr, flag)((pCsr)->csrflags |= (flag)) ((pCsr)->csrflags |= (flag))
19326#define CsrFlagClear(pCsr, flag)((pCsr)->csrflags &= ~(flag)) ((pCsr)->csrflags &= ~(flag))
19327#define CsrFlagTest(pCsr, flag)((pCsr)->csrflags & (flag)) ((pCsr)->csrflags & (flag))
19328
19329struct Fts5Auxdata {
19330 Fts5Auxiliary *pAux; /* Extension to which this belongs */
19331 void *pPtr; /* Pointer value */
19332 void(*xDelete)(void*); /* Destructor */
19333 Fts5Auxdata *pNext; /* Next object in linked list */
19334};
19335
19336#ifdef SQLITE_DEBUG
19337#define FTS5_BEGIN 1
19338#define FTS5_SYNC 2
19339#define FTS5_COMMIT 3
19340#define FTS5_ROLLBACK 4
19341#define FTS5_SAVEPOINT 5
19342#define FTS5_RELEASE 6
19343#define FTS5_ROLLBACKTO 7
19344static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){
19345 switch( op ){
19346 case FTS5_BEGIN:
19347 assert( p->ts.eState==0 )((void) (0));
19348 p->ts.eState = 1;
19349 p->ts.iSavepoint = -1;
19350 break;
19351
19352 case FTS5_SYNC:
19353 assert( p->ts.eState==1 || p->ts.eState==2 )((void) (0));
19354 p->ts.eState = 2;
19355 break;
19356
19357 case FTS5_COMMIT:
19358 assert( p->ts.eState==2 )((void) (0));
19359 p->ts.eState = 0;
19360 break;
19361
19362 case FTS5_ROLLBACK:
19363 assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 )((void) (0));
19364 p->ts.eState = 0;
19365 break;
19366
19367 case FTS5_SAVEPOINT:
19368 assert( p->ts.eState>=1 )((void) (0));
19369 assert( iSavepoint>=0 )((void) (0));
19370 assert( iSavepoint>=p->ts.iSavepoint )((void) (0));
19371 p->ts.iSavepoint = iSavepoint;
19372 break;
19373
19374 case FTS5_RELEASE:
19375 assert( p->ts.eState>=1 )((void) (0));
19376 assert( iSavepoint>=0 )((void) (0));
19377 assert( iSavepoint<=p->ts.iSavepoint )((void) (0));
19378 p->ts.iSavepoint = iSavepoint-1;
19379 break;
19380
19381 case FTS5_ROLLBACKTO:
19382 assert( p->ts.eState>=1 )((void) (0));
19383 assert( iSavepoint>=-1 )((void) (0));
19384 /* The following assert() can fail if another vtab strikes an error
19385 ** within an xSavepoint() call then SQLite calls xRollbackTo() - without
19386 ** having called xSavepoint() on this vtab. */
19387 /* assert( iSavepoint<=p->ts.iSavepoint ); */
19388 p->ts.iSavepoint = iSavepoint;
19389 break;
19390 }
19391}
19392#else
19393# define fts5CheckTransactionState(x,y,z)
19394#endif
19395
19396/*
19397** Return true if pTab is a contentless table. If parameter bIncludeUnindexed
19398** is true, this includes contentless tables that store UNINDEXED columns
19399** only.
19400*/
19401static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){
19402 int eContent = pTab->p.pConfig->eContent;
19403 return (
19404 eContent==FTS5_CONTENT_NONE1
19405 || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED3)
19406 );
19407}
19408
19409/*
19410** Delete a virtual table handle allocated by fts5InitVtab().
19411*/
19412static void fts5FreeVtab(Fts5FullTable *pTab){
19413 if( pTab ){
19414 sqlite3Fts5IndexClose(pTab->p.pIndex);
19415 sqlite3Fts5StorageClose(pTab->pStorage);
19416 sqlite3Fts5ConfigFree(pTab->p.pConfig);
19417 sqlite3_freesqlite3_api->free(pTab);
19418 }
19419}
19420
19421/*
19422** The xDisconnect() virtual table method.
19423*/
19424static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
19425 fts5FreeVtab((Fts5FullTable*)pVtab);
19426 return SQLITE_OK0;
19427}
19428
19429/*
19430** The xDestroy() virtual table method.
19431*/
19432static int fts5DestroyMethod(sqlite3_vtab *pVtab){
19433 Fts5Table *pTab = (Fts5Table*)pVtab;
19434 int rc = sqlite3Fts5DropAll(pTab->pConfig);
19435 if( rc==SQLITE_OK0 ){
19436 fts5FreeVtab((Fts5FullTable*)pVtab);
19437 }
19438 return rc;
19439}
19440
19441/*
19442** This function is the implementation of both the xConnect and xCreate
19443** methods of the FTS3 virtual table.
19444**
19445** The argv[] array contains the following:
19446**
19447** argv[0] -> module name ("fts5")
19448** argv[1] -> database name
19449** argv[2] -> table name
19450** argv[...] -> "column name" and other module argument fields.
19451*/
19452static int fts5InitVtab(
19453 int bCreate, /* True for xCreate, false for xConnect */
19454 sqlite3 *db, /* The SQLite database connection */
19455 void *pAux, /* Hash table containing tokenizers */
19456 int argc, /* Number of elements in argv array */
19457 const char * const *argv, /* xCreate/xConnect argument array */
19458 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
19459 char **pzErr /* Write any error message here */
19460){
19461 Fts5Global *pGlobal = (Fts5Global*)pAux;
19462 const char **azConfig = (const char**)argv;
19463 int rc = SQLITE_OK0; /* Return code */
19464 Fts5Config *pConfig = 0; /* Results of parsing argc/argv */
19465 Fts5FullTable *pTab = 0; /* New virtual table object */
19466
19467 /* Allocate the new vtab object and parse the configuration */
19468 pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable));
19469 if( rc==SQLITE_OK0 ){
19470 rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
19471 assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 )((void) (0));
19472 }
19473 if( rc==SQLITE_OK0 ){
19474 pConfig->pzErrmsg = pzErr;
19475 pTab->p.pConfig = pConfig;
19476 pTab->pGlobal = pGlobal;
19477 if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){
19478 rc = sqlite3Fts5LoadTokenizer(pConfig);
19479 }
19480 }
19481
19482 /* Open the index sub-system */
19483 if( rc==SQLITE_OK0 ){
19484 rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr);
19485 }
19486
19487 /* Open the storage sub-system */
19488 if( rc==SQLITE_OK0 ){
19489 rc = sqlite3Fts5StorageOpen(
19490 pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr
19491 );
19492 }
19493
19494 /* Call sqlite3_declare_vtab() */
19495 if( rc==SQLITE_OK0 ){
19496 rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
19497 }
19498
19499 /* Load the initial configuration */
19500 if( rc==SQLITE_OK0 ){
19501 rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie-1);
19502 }
19503
19504 if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
19505 rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT1, (int)1);
19506 }
19507 if( rc==SQLITE_OK0 ){
19508 rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_INNOCUOUS2);
19509 }
19510
19511 if( pConfig ) pConfig->pzErrmsg = 0;
19512 if( rc!=SQLITE_OK0 ){
19513 fts5FreeVtab(pTab);
19514 pTab = 0;
19515 }else if( bCreate ){
19516 fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
19517 }
19518 *ppVTab = (sqlite3_vtab*)pTab;
19519 return rc;
19520}
19521
19522/*
19523** The xConnect() and xCreate() methods for the virtual table. All the
19524** work is done in function fts5InitVtab().
19525*/
19526static int fts5ConnectMethod(
19527 sqlite3 *db, /* Database connection */
19528 void *pAux, /* Pointer to tokenizer hash table */
19529 int argc, /* Number of elements in argv array */
19530 const char * const *argv, /* xCreate/xConnect argument array */
19531 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
19532 char **pzErr /* OUT: sqlite3_malloc'd error message */
19533){
19534 return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
19535}
19536static int fts5CreateMethod(
19537 sqlite3 *db, /* Database connection */
19538 void *pAux, /* Pointer to tokenizer hash table */
19539 int argc, /* Number of elements in argv array */
19540 const char * const *argv, /* xCreate/xConnect argument array */
19541 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
19542 char **pzErr /* OUT: sqlite3_malloc'd error message */
19543){
19544 return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
19545}
19546
19547/*
19548** The different query plans.
19549*/
19550#define FTS5_PLAN_MATCH1 1 /* (<tbl> MATCH ?) */
19551#define FTS5_PLAN_SOURCE2 2 /* A source cursor for SORTED_MATCH */
19552#define FTS5_PLAN_SPECIAL3 3 /* An internal query */
19553#define FTS5_PLAN_SORTED_MATCH4 4 /* (<tbl> MATCH ? ORDER BY rank) */
19554#define FTS5_PLAN_SCAN5 5 /* No usable constraint */
19555#define FTS5_PLAN_ROWID6 6 /* (rowid = ?) */
19556
19557/*
19558** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
19559** extension is currently being used by a version of SQLite too old to
19560** support index-info flags. In that case this function is a no-op.
19561*/
19562static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
19563#if SQLITE_VERSION_NUMBER3050001>=3008012
19564#ifndef SQLITE_CORE
19565 if( sqlite3_libversion_numbersqlite3_api->libversion_number()>=3008012 )
19566#endif
19567 {
19568 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE0x00000001;
19569 }
19570#endif
19571}
19572
19573static int fts5UsePatternMatch(
19574 Fts5Config *pConfig,
19575 struct sqlite3_index_constraint *p
19576){
19577 assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB )((void) (0));
19578 assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE )((void) (0));
19579 if( pConfig->t.ePattern==FTS5_PATTERN_GLOB66 && p->op==FTS5_PATTERN_GLOB66 ){
19580 return 1;
19581 }
19582 if( pConfig->t.ePattern==FTS5_PATTERN_LIKE65
19583 && (p->op==FTS5_PATTERN_LIKE65 || p->op==FTS5_PATTERN_GLOB66)
19584 ){
19585 return 1;
19586 }
19587 return 0;
19588}
19589
19590/*
19591** Implementation of the xBestIndex method for FTS5 tables. Within the
19592** WHERE constraint, it searches for the following:
19593**
19594** 1. A MATCH constraint against the table column.
19595** 2. A MATCH constraint against the "rank" column.
19596** 3. A MATCH constraint against some other column.
19597** 4. An == constraint against the rowid column.
19598** 5. A < or <= constraint against the rowid column.
19599** 6. A > or >= constraint against the rowid column.
19600**
19601** Within the ORDER BY, the following are supported:
19602**
19603** 5. ORDER BY rank [ASC|DESC]
19604** 6. ORDER BY rowid [ASC|DESC]
19605**
19606** Information for the xFilter call is passed via both the idxNum and
19607** idxStr variables. Specifically, idxNum is a bitmask of the following
19608** flags used to encode the ORDER BY clause:
19609**
19610** FTS5_BI_ORDER_RANK
19611** FTS5_BI_ORDER_ROWID
19612** FTS5_BI_ORDER_DESC
19613**
19614** idxStr is used to encode data from the WHERE clause. For each argument
19615** passed to the xFilter method, the following is appended to idxStr:
19616**
19617** Match against table column: "m"
19618** Match against rank column: "r"
19619** Match against other column: "M<column-number>"
19620** LIKE against other column: "L<column-number>"
19621** GLOB against other column: "G<column-number>"
19622** Equality constraint against the rowid: "="
19623** A < or <= against the rowid: "<"
19624** A > or >= against the rowid: ">"
19625**
19626** This function ensures that there is at most one "r" or "=". And that if
19627** there exists an "=" then there is no "<" or ">".
19628**
19629** If an unusable MATCH operator is present in the WHERE clause, then
19630** SQLITE_CONSTRAINT is returned.
19631**
19632** Costs are assigned as follows:
19633**
19634** a) If a MATCH operator is present, the cost depends on the other
19635** constraints also present. As follows:
19636**
19637** * No other constraints: cost=1000.0
19638** * One rowid range constraint: cost=750.0
19639** * Both rowid range constraints: cost=500.0
19640** * An == rowid constraint: cost=100.0
19641**
19642** b) Otherwise, if there is no MATCH:
19643**
19644** * No other constraints: cost=1000000.0
19645** * One rowid range constraint: cost=750000.0
19646** * Both rowid range constraints: cost=250000.0
19647** * An == rowid constraint: cost=10.0
19648**
19649** Costs are not modified by the ORDER BY clause.
19650*/
19651static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
19652 Fts5Table *pTab = (Fts5Table*)pVTab;
19653 Fts5Config *pConfig = pTab->pConfig;
19654 const int nCol = pConfig->nCol;
19655 int idxFlags = 0; /* Parameter passed through to xFilter() */
19656 int i;
19657
19658 char *idxStr;
19659 int iIdxStr = 0;
19660 int iCons = 0;
19661
19662 int bSeenEq = 0;
19663 int bSeenGt = 0;
19664 int bSeenLt = 0;
19665 int nSeenMatch = 0;
19666 int bSeenRank = 0;
19667
19668
19669 assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0));
19670 assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0));
19671 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0));
19672 assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0));
19673 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0));
19674
19675 if( pConfig->bLock ){
19676 pTab->base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf(
19677 "recursively defined fts5 content table"
19678 );
19679 return SQLITE_ERROR1;
19680 }
19681
19682 idxStr = (char*)sqlite3_mallocsqlite3_api->malloc(pInfo->nConstraint * 8 + 1);
19683 if( idxStr==0 ) return SQLITE_NOMEM7;
19684 pInfo->idxStr = idxStr;
19685 pInfo->needToFreeIdxStr = 1;
19686
19687 for(i=0; i<pInfo->nConstraint; i++){
19688 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
19689 int iCol = p->iColumn;
19690 if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH64
19691 || (p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol>=nCol)
19692 ){
19693 /* A MATCH operator or equivalent */
19694 if( p->usable==0 || iCol<0 ){
19695 /* As there exists an unusable MATCH constraint this is an
19696 ** unusable plan. Return SQLITE_CONSTRAINT. */
19697 idxStr[iIdxStr] = 0;
19698 return SQLITE_CONSTRAINT19;
19699 }else{
19700 if( iCol==nCol+1 ){
19701 if( bSeenRank ) continue;
19702 idxStr[iIdxStr++] = 'r';
19703 bSeenRank = 1;
19704 }else{
19705 nSeenMatch++;
19706 idxStr[iIdxStr++] = 'M';
19707 sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol);
19708 idxStr += strlen(&idxStr[iIdxStr]);
19709 assert( idxStr[iIdxStr]=='\0' )((void) (0));
19710 }
19711 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
19712 pInfo->aConstraintUsage[i].omit = 1;
19713 }
19714 }else if( p->usable ){
19715 if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){
19716 assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB )((void) (0));
19717 idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE65 ? 'L' : 'G';
19718 sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol);
19719 idxStr += strlen(&idxStr[iIdxStr]);
19720 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
19721 assert( idxStr[iIdxStr]=='\0' )((void) (0));
19722 nSeenMatch++;
19723 }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol<0 ){
19724 idxStr[iIdxStr++] = '=';
19725 bSeenEq = 1;
19726 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
19727 }
19728 }
19729 }
19730
19731 if( bSeenEq==0 ){
19732 for(i=0; i<pInfo->nConstraint; i++){
19733 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
19734 if( p->iColumn<0 && p->usable ){
19735 int op = p->op;
19736 if( op==SQLITE_INDEX_CONSTRAINT_LT16 || op==SQLITE_INDEX_CONSTRAINT_LE8 ){
19737 if( bSeenLt ) continue;
19738 idxStr[iIdxStr++] = '<';
19739 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
19740 bSeenLt = 1;
19741 }else
19742 if( op==SQLITE_INDEX_CONSTRAINT_GT4 || op==SQLITE_INDEX_CONSTRAINT_GE32 ){
19743 if( bSeenGt ) continue;
19744 idxStr[iIdxStr++] = '>';
19745 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
19746 bSeenGt = 1;
19747 }
19748 }
19749 }
19750 }
19751 idxStr[iIdxStr] = '\0';
19752
19753 /* Set idxFlags flags for the ORDER BY clause
19754 **
19755 ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC".
19756 */
19757 if( pInfo->nOrderBy==1 ){
19758 int iSort = pInfo->aOrderBy[0].iColumn;
19759 if( iSort==(pConfig->nCol+1) && nSeenMatch>0 ){
19760 idxFlags |= FTS5_BI_ORDER_RANK0x0020;
19761 }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){
19762 idxFlags |= FTS5_BI_ORDER_ROWID0x0040;
19763 }
19764 if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID)(((idxFlags) & (0x0020|0x0040))!=0) ){
19765 pInfo->orderByConsumed = 1;
19766 if( pInfo->aOrderBy[0].desc ){
19767 idxFlags |= FTS5_BI_ORDER_DESC0x0080;
19768 }
19769 }
19770 }
19771
19772 /* Calculate the estimated cost based on the flags set in idxFlags. */
19773 if( bSeenEq ){
19774 pInfo->estimatedCost = nSeenMatch ? 1000.0 : 10.0;
19775 if( nSeenMatch==0 ) fts5SetUniqueFlag(pInfo);
19776 }else if( bSeenLt && bSeenGt ){
19777 pInfo->estimatedCost = nSeenMatch ? 5000.0 : 250000.0;
19778 }else if( bSeenLt || bSeenGt ){
19779 pInfo->estimatedCost = nSeenMatch ? 7500.0 : 750000.0;
19780 }else{
19781 pInfo->estimatedCost = nSeenMatch ? 10000.0 : 1000000.0;
19782 }
19783 for(i=1; i<nSeenMatch; i++){
19784 pInfo->estimatedCost *= 0.4;
19785 }
19786
19787 pInfo->idxNum = idxFlags;
19788 return SQLITE_OK0;
19789}
19790
19791static int fts5NewTransaction(Fts5FullTable *pTab){
19792 Fts5Cursor *pCsr;
19793 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
19794 if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK0;
19795 }
19796 return sqlite3Fts5StorageReset(pTab->pStorage);
19797}
19798
19799/*
19800** Implementation of xOpen method.
19801*/
19802static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
19803 Fts5FullTable *pTab = (Fts5FullTable*)pVTab;
19804 Fts5Config *pConfig = pTab->p.pConfig;
19805 Fts5Cursor *pCsr = 0; /* New cursor object */
19806 sqlite3_int64 nByte; /* Bytes of space to allocate */
19807 int rc; /* Return code */
19808
19809 rc = fts5NewTransaction(pTab);
19810 if( rc==SQLITE_OK0 ){
19811 nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
19812 pCsr = (Fts5Cursor*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
19813 if( pCsr ){
19814 Fts5Global *pGlobal = pTab->pGlobal;
19815 memset(pCsr, 0, (size_t)nByte);
19816 pCsr->aColumnSize = (int*)&pCsr[1];
19817 pCsr->pNext = pGlobal->pCsr;
19818 pGlobal->pCsr = pCsr;
19819 pCsr->iCsrId = ++pGlobal->iNextId;
19820 }else{
19821 rc = SQLITE_NOMEM7;
19822 }
19823 }
19824 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
19825 return rc;
19826}
19827
19828static int fts5StmtType(Fts5Cursor *pCsr){
19829 if( pCsr->ePlan==FTS5_PLAN_SCAN5 ){
19830 return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC1 : FTS5_STMT_SCAN_ASC0;
19831 }
19832 return FTS5_STMT_LOOKUP2;
19833}
19834
19835/*
19836** This function is called after the cursor passed as the only argument
19837** is moved to point at a different row. It clears all cached data
19838** specific to the previous row stored by the cursor object.
19839*/
19840static void fts5CsrNewrow(Fts5Cursor *pCsr){
19841 CsrFlagSet(pCsr,((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40))
19842 FTS5CSR_REQUIRE_CONTENT((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40))
19843 | FTS5CSR_REQUIRE_DOCSIZE((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40))
19844 | FTS5CSR_REQUIRE_INST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40))
19845 | FTS5CSR_REQUIRE_POSLIST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40))
19846 )((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40));
19847}
19848
19849static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
19850 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
19851 Fts5Auxdata *pData;
19852 Fts5Auxdata *pNext;
19853
19854 sqlite3_freesqlite3_api->free(pCsr->aInstIter);
19855 sqlite3_freesqlite3_api->free(pCsr->aInst);
19856 if( pCsr->pStmt ){
19857 int eStmt = fts5StmtType(pCsr);
19858 sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
19859 }
19860 if( pCsr->pSorter ){
19861 Fts5Sorter *pSorter = pCsr->pSorter;
19862 sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt);
19863 sqlite3_freesqlite3_api->free(pSorter);
19864 }
19865
19866 if( pCsr->ePlan!=FTS5_PLAN_SOURCE2 ){
19867 sqlite3Fts5ExprFree(pCsr->pExpr);
19868 }
19869
19870 for(pData=pCsr->pAuxdata; pData; pData=pNext){
19871 pNext = pData->pNext;
19872 if( pData->xDelete ) pData->xDelete(pData->pPtr);
19873 sqlite3_freesqlite3_api->free(pData);
19874 }
19875
19876 sqlite3_finalizesqlite3_api->finalize(pCsr->pRankArgStmt);
19877 sqlite3_freesqlite3_api->free(pCsr->apRankArg);
19878
19879 if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags & (0x10)) ){
19880 sqlite3_freesqlite3_api->free(pCsr->zRank);
19881 sqlite3_freesqlite3_api->free(pCsr->zRankArgs);
19882 }
19883
19884 sqlite3Fts5IndexCloseReader(pTab->p.pIndex);
19885 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr));
19886}
19887
19888
19889/*
19890** Close the cursor. For additional information see the documentation
19891** on the xClose method of the virtual table interface.
19892*/
19893static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
19894 if( pCursor ){
19895 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
19896 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
19897 Fts5Cursor **pp;
19898
19899 fts5FreeCursorComponents(pCsr);
19900 /* Remove the cursor from the Fts5Global.pCsr list */
19901 for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
19902 *pp = pCsr->pNext;
19903
19904 sqlite3_freesqlite3_api->free(pCsr);
19905 }
19906 return SQLITE_OK0;
19907}
19908
19909static int fts5SorterNext(Fts5Cursor *pCsr){
19910 Fts5Sorter *pSorter = pCsr->pSorter;
19911 int rc;
19912
19913 rc = sqlite3_stepsqlite3_api->step(pSorter->pStmt);
19914 if( rc==SQLITE_DONE101 ){
19915 rc = SQLITE_OK0;
19916 CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags |= (0x01|0x02));
19917 }else if( rc==SQLITE_ROW100 ){
19918 const u8 *a;
19919 const u8 *aBlob;
19920 int nBlob;
19921 int i;
19922 int iOff = 0;
19923 rc = SQLITE_OK0;
19924
19925 pSorter->iRowid = sqlite3_column_int64sqlite3_api->column_int64(pSorter->pStmt, 0);
19926 nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pSorter->pStmt, 1);
19927 aBlob = a = sqlite3_column_blobsqlite3_api->column_blob(pSorter->pStmt, 1);
19928
19929 /* nBlob==0 in detail=none mode. */
19930 if( nBlob>0 ){
19931 for(i=0; i<(pSorter->nIdx-1); i++){
19932 int iVal;
19933 a += fts5GetVarint32(a, iVal)sqlite3Fts5GetVarint32(a,(u32*)&(iVal));
19934 iOff += iVal;
19935 pSorter->aIdx[i] = iOff;
19936 }
19937 pSorter->aIdx[i] = &aBlob[nBlob] - a;
19938 pSorter->aPoslist = a;
19939 }
19940
19941 fts5CsrNewrow(pCsr);
19942 }
19943
19944 return rc;
19945}
19946
19947
19948/*
19949** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors
19950** open on table pTab.
19951*/
19952static void fts5TripCursors(Fts5FullTable *pTab){
19953 Fts5Cursor *pCsr;
19954 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
19955 if( pCsr->ePlan==FTS5_PLAN_MATCH1
19956 && pCsr->base.pVtab==(sqlite3_vtab*)pTab
19957 ){
19958 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags |= (0x20));
19959 }
19960 }
19961}
19962
19963/*
19964** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
19965** argument, close and reopen all Fts5IndexIter iterators that the cursor
19966** is using. Then attempt to move the cursor to a rowid equal to or laster
19967** (in the cursors sort order - ASC or DESC) than the current rowid.
19968**
19969** If the new rowid is not equal to the old, set output parameter *pbSkip
19970** to 1 before returning. Otherwise, leave it unchanged.
19971**
19972** Return SQLITE_OK if successful or if no reseek was required, or an
19973** error code if an error occurred.
19974*/
19975static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
19976 int rc = SQLITE_OK0;
19977 assert( *pbSkip==0 )((void) (0));
19978 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags & (0x20)) ){
19979 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
19980 int bDesc = pCsr->bDesc;
19981 i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);
19982
19983 rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc);
19984 if( rc==SQLITE_OK0 && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
19985 *pbSkip = 1;
19986 }
19987
19988 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags &= ~(0x20));
19989 fts5CsrNewrow(pCsr);
19990 if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
19991 CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01));
19992 *pbSkip = 1;
19993 }
19994 }
19995 return rc;
19996}
19997
19998
19999/*
20000** Advance the cursor to the next row in the table that matches the
20001** search criteria.
20002**
20003** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
20004** even if we reach end-of-file. The fts5EofMethod() will be called
20005** subsequently to determine whether or not an EOF was hit.
20006*/
20007static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
20008 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
20009 int rc;
20010
20011 assert( (pCsr->ePlan<3)==((void) (0))
20012 (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)((void) (0))
20013 )((void) (0));
20014 assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) )((void) (0));
20015
20016 /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table,
20017 ** clear any token mappings accumulated at the fts5_index.c level. In
20018 ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH,
20019 ** we need to retain the mappings for the entire query. */
20020 if( pCsr->ePlan==FTS5_PLAN_MATCH1
20021 && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata
20022 ){
20023 sqlite3Fts5ExprClearTokens(pCsr->pExpr);
20024 }
20025
20026 if( pCsr->ePlan<3 ){
20027 int bSkip = 0;
20028 if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
20029 rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
20030 CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr))((pCsr)->csrflags |= (sqlite3Fts5ExprEof(pCsr->pExpr)));
20031 fts5CsrNewrow(pCsr);
20032 }else{
20033 switch( pCsr->ePlan ){
20034 case FTS5_PLAN_SPECIAL3: {
20035 CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01));
20036 rc = SQLITE_OK0;
20037 break;
20038 }
20039
20040 case FTS5_PLAN_SORTED_MATCH4: {
20041 rc = fts5SorterNext(pCsr);
20042 break;
20043 }
20044
20045 default: {
20046 Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig;
20047 pConfig->bLock++;
20048 rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt);
20049 pConfig->bLock--;
20050 if( rc!=SQLITE_ROW100 ){
20051 CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01));
20052 rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt);
20053 if( rc!=SQLITE_OK0 ){
20054 pCursor->pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf(
20055 "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db)
20056 );
20057 }
20058 }else{
20059 rc = SQLITE_OK0;
20060 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags |= (0x04));
20061 }
20062 break;
20063 }
20064 }
20065 }
20066
20067 return rc;
20068}
20069
20070
20071static int fts5PrepareStatement(
20072 sqlite3_stmt **ppStmt,
20073 Fts5Config *pConfig,
20074 const char *zFmt,
20075 ...
20076){
20077 sqlite3_stmt *pRet = 0;
20078 int rc;
20079 char *zSql;
20080 va_list ap;
20081
20082 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
20083 zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
20084 if( zSql==0 ){
20085 rc = SQLITE_NOMEM7;
20086 }else{
20087 rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1,
20088 SQLITE_PREPARE_PERSISTENT0x01, &pRet, 0);
20089 if( rc!=SQLITE_OK0 ){
20090 sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db));
20091 }
20092 sqlite3_freesqlite3_api->free(zSql);
20093 }
20094
20095 va_end(ap)__builtin_va_end(ap);
20096 *ppStmt = pRet;
20097 return rc;
20098}
20099
20100static int fts5CursorFirstSorted(
20101 Fts5FullTable *pTab,
20102 Fts5Cursor *pCsr,
20103 int bDesc
20104){
20105 Fts5Config *pConfig = pTab->p.pConfig;
20106 Fts5Sorter *pSorter;
20107 int nPhrase;
20108 sqlite3_int64 nByte;
20109 int rc;
20110 const char *zRank = pCsr->zRank;
20111 const char *zRankArgs = pCsr->zRankArgs;
20112
20113 nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
20114 nByte = SZ_FTS5SORTER(nPhrase)(__builtin_offsetof(Fts5Sorter, nIdx)+((nPhrase+2)/2)*sizeof(
i64))
;
20115 pSorter = (Fts5Sorter*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
20116 if( pSorter==0 ) return SQLITE_NOMEM7;
20117 memset(pSorter, 0, (size_t)nByte);
20118 pSorter->nIdx = nPhrase;
20119
20120 /* TODO: It would be better to have some system for reusing statement
20121 ** handles here, rather than preparing a new one for each query. But that
20122 ** is not possible as SQLite reference counts the virtual table objects.
20123 ** And since the statement required here reads from this very virtual
20124 ** table, saving it creates a circular reference.
20125 **
20126 ** If SQLite a built-in statement cache, this wouldn't be a problem. */
20127 rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
20128 "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s",
20129 pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
20130 (zRankArgs ? ", " : ""),
20131 (zRankArgs ? zRankArgs : ""),
20132 bDesc ? "DESC" : "ASC"
20133 );
20134
20135 pCsr->pSorter = pSorter;
20136 if( rc==SQLITE_OK0 ){
20137 assert( pTab->pSortCsr==0 )((void) (0));
20138 pTab->pSortCsr = pCsr;
20139 rc = fts5SorterNext(pCsr);
20140 pTab->pSortCsr = 0;
20141 }
20142
20143 if( rc!=SQLITE_OK0 ){
20144 sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt);
20145 sqlite3_freesqlite3_api->free(pSorter);
20146 pCsr->pSorter = 0;
20147 }
20148
20149 return rc;
20150}
20151
20152static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){
20153 int rc;
20154 Fts5Expr *pExpr = pCsr->pExpr;
20155 rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc);
20156 if( sqlite3Fts5ExprEof(pExpr) ){
20157 CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01));
20158 }
20159 fts5CsrNewrow(pCsr);
20160 return rc;
20161}
20162
20163/*
20164** Process a "special" query. A special query is identified as one with a
20165** MATCH expression that begins with a '*' character. The remainder of
20166** the text passed to the MATCH operator are used as the special query
20167** parameters.
20168*/
20169static int fts5SpecialMatch(
20170 Fts5FullTable *pTab,
20171 Fts5Cursor *pCsr,
20172 const char *zQuery
20173){
20174 int rc = SQLITE_OK0; /* Return code */
20175 const char *z = zQuery; /* Special query text */
20176 int n; /* Number of bytes in text at z */
20177
20178 while( z[0]==' ' ) z++;
20179 for(n=0; z[n] && z[n]!=' '; n++);
20180
20181 assert( pTab->p.base.zErrMsg==0 )((void) (0));
20182 pCsr->ePlan = FTS5_PLAN_SPECIAL3;
20183
20184 if( n==5 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("reads", z, n) ){
20185 pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex);
20186 }
20187 else if( n==2 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("id", z, n) ){
20188 pCsr->iSpecial = pCsr->iCsrId;
20189 }
20190 else{
20191 /* An unrecognized directive. Return an error message. */
20192 pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("unknown special query: %.*s", n, z);
20193 rc = SQLITE_ERROR1;
20194 }
20195
20196 return rc;
20197}
20198
20199/*
20200** Search for an auxiliary function named zName that can be used with table
20201** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
20202** structure. Otherwise, if no such function exists, return NULL.
20203*/
20204static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){
20205 Fts5Auxiliary *pAux;
20206
20207 for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
20208 if( sqlite3_stricmpsqlite3_api->stricmp(zName, pAux->zFunc)==0 ) return pAux;
20209 }
20210
20211 /* No function of the specified name was found. Return 0. */
20212 return 0;
20213}
20214
20215
20216static int fts5FindRankFunction(Fts5Cursor *pCsr){
20217 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
20218 Fts5Config *pConfig = pTab->p.pConfig;
20219 int rc = SQLITE_OK0;
20220 Fts5Auxiliary *pAux = 0;
20221 const char *zRank = pCsr->zRank;
20222 const char *zRankArgs = pCsr->zRankArgs;
20223
20224 if( zRankArgs ){
20225 char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
20226 if( zSql ){
20227 sqlite3_stmt *pStmt = 0;
20228 rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1,
20229 SQLITE_PREPARE_PERSISTENT0x01, &pStmt, 0);
20230 sqlite3_freesqlite3_api->free(zSql);
20231 assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 )((void) (0));
20232 if( rc==SQLITE_OK0 ){
20233 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){
20234 sqlite3_int64 nByte;
20235 pCsr->nRankArg = sqlite3_column_countsqlite3_api->column_count(pStmt);
20236 nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
20237 pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
20238 if( rc==SQLITE_OK0 ){
20239 int i;
20240 for(i=0; i<pCsr->nRankArg; i++){
20241 pCsr->apRankArg[i] = sqlite3_column_valuesqlite3_api->column_value(pStmt, i);
20242 }
20243 }
20244 pCsr->pRankArgStmt = pStmt;
20245 }else{
20246 rc = sqlite3_finalizesqlite3_api->finalize(pStmt);
20247 assert( rc!=SQLITE_OK )((void) (0));
20248 }
20249 }
20250 }
20251 }
20252
20253 if( rc==SQLITE_OK0 ){
20254 pAux = fts5FindAuxiliary(pTab, zRank);
20255 if( pAux==0 ){
20256 assert( pTab->p.base.zErrMsg==0 )((void) (0));
20257 pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("no such function: %s", zRank);
20258 rc = SQLITE_ERROR1;
20259 }
20260 }
20261
20262 pCsr->pRank = pAux;
20263 return rc;
20264}
20265
20266
20267static int fts5CursorParseRank(
20268 Fts5Config *pConfig,
20269 Fts5Cursor *pCsr,
20270 sqlite3_value *pRank
20271){
20272 int rc = SQLITE_OK0;
20273 if( pRank ){
20274 const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(pRank);
20275 char *zRank = 0;
20276 char *zRankArgs = 0;
20277
20278 if( z==0 ){
20279 if( sqlite3_value_typesqlite3_api->value_type(pRank)==SQLITE_NULL5 ) rc = SQLITE_ERROR1;
20280 }else{
20281 rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
20282 }
20283 if( rc==SQLITE_OK0 ){
20284 pCsr->zRank = zRank;
20285 pCsr->zRankArgs = zRankArgs;
20286 CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags |= (0x10));
20287 }else if( rc==SQLITE_ERROR1 ){
20288 pCsr->base.pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf(
20289 "parse error in rank function: %s", z
20290 );
20291 }
20292 }else{
20293 if( pConfig->zRank ){
20294 pCsr->zRank = (char*)pConfig->zRank;
20295 pCsr->zRankArgs = (char*)pConfig->zRankArgs;
20296 }else{
20297 pCsr->zRank = (char*)FTS5_DEFAULT_RANK"bm25";
20298 pCsr->zRankArgs = 0;
20299 }
20300 }
20301 return rc;
20302}
20303
20304static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
20305 if( pVal ){
20306 int eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal);
20307 if( eType==SQLITE_INTEGER1 ){
20308 return sqlite3_value_int64sqlite3_api->value_int64(pVal);
20309 }
20310 }
20311 return iDefault;
20312}
20313
20314/*
20315** Set the error message on the virtual table passed as the first argument.
20316*/
20317static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){
20318 va_list ap; /* ... printf arguments */
20319 va_start(ap, zFormat)__builtin_va_start(ap, zFormat);
20320 sqlite3_freesqlite3_api->free(p->p.base.zErrMsg);
20321 p->p.base.zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap);
20322 va_end(ap)__builtin_va_end(ap);
20323}
20324
20325/*
20326** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale
20327** specified by pLocale/nLocale. The buffer indicated by pLocale must remain
20328** valid until after the final call to sqlite3Fts5Tokenize() that will use
20329** the locale.
20330*/
20331static void sqlite3Fts5SetLocale(
20332 Fts5Config *pConfig,
20333 const char *zLocale,
20334 int nLocale
20335){
20336 Fts5TokenizerConfig *pT = &pConfig->t;
20337 pT->pLocale = zLocale;
20338 pT->nLocale = nLocale;
20339}
20340
20341/*
20342** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale().
20343*/
20344static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){
20345 sqlite3Fts5SetLocale(pConfig, 0, 0);
20346}
20347
20348/*
20349** Return true if the value passed as the only argument is an
20350** fts5_locale() value.
20351*/
20352static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){
20353 int ret = 0;
20354 if( sqlite3_value_typesqlite3_api->value_type(pVal)==SQLITE_BLOB4 ){
20355 /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case.
20356 ** If the blob was created using zeroblob(), then sqlite3_value_blob()
20357 ** may call malloc(). If this malloc() fails, then the values returned
20358 ** by both value_blob() and value_bytes() will be 0. If value_bytes() were
20359 ** called first, then the NULL pointer returned by value_blob() might
20360 ** be dereferenced. */
20361 const u8 *pBlob = sqlite3_value_blobsqlite3_api->value_blob(pVal);
20362 int nBlob = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
20363 if( nBlob>FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))
20364 && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)), FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )))
20365 ){
20366 ret = 1;
20367 }
20368 }
20369 return ret;
20370}
20371
20372/*
20373** Value pVal is guaranteed to be an fts5_locale() value, according to
20374** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale
20375** from the value and returns them separately.
20376**
20377** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set
20378** to point to buffers containing the text and locale, as utf-8,
20379** respectively. In this case output parameters (*pnText) and (*pnLoc) are
20380** set to the sizes in bytes of these two buffers.
20381**
20382** Or, if an error occurs, then an SQLite error code is returned. The final
20383** value of the four output parameters is undefined in this case.
20384*/
20385static int sqlite3Fts5DecodeLocaleValue(
20386 sqlite3_value *pVal,
20387 const char **ppText,
20388 int *pnText,
20389 const char **ppLoc,
20390 int *pnLoc
20391){
20392 const char *p = sqlite3_value_blobsqlite3_api->value_blob(pVal);
20393 int n = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
20394 int nLoc = 0;
20395
20396 assert( sqlite3_value_type(pVal)==SQLITE_BLOB )((void) (0));
20397 assert( n>FTS5_LOCALE_HDR_SIZE )((void) (0));
20398
20399 for(nLoc=FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); p[nLoc]; nLoc++){
20400 if( nLoc==(n-1) ){
20401 return SQLITE_MISMATCH20;
20402 }
20403 }
20404 *ppLoc = &p[FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))];
20405 *pnLoc = nLoc - FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ));
20406
20407 *ppText = &p[nLoc+1];
20408 *pnText = n - nLoc - 1;
20409 return SQLITE_OK0;
20410}
20411
20412/*
20413** Argument pVal is the text of a full-text search expression. It may or
20414** may not have been wrapped by fts5_locale(). This function extracts
20415** the text of the expression, and sets output variable (*pzText) to
20416** point to a nul-terminated buffer containing the expression.
20417**
20418** If pVal was an fts5_locale() value, then sqlite3Fts5SetLocale() is called
20419** to set the tokenizer to use the specified locale.
20420**
20421** If output variable (*pbFreeAndReset) is set to true, then the caller
20422** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer
20423** locale, and (b) call sqlite3_free() to free (*pzText).
20424*/
20425static int fts5ExtractExprText(
20426 Fts5Config *pConfig, /* Fts5 configuration */
20427 sqlite3_value *pVal, /* Value to extract expression text from */
20428 char **pzText, /* OUT: nul-terminated buffer of text */
20429 int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */
20430){
20431 int rc = SQLITE_OK0;
20432
20433 if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
20434 const char *pText = 0;
20435 int nText = 0;
20436 const char *pLoc = 0;
20437 int nLoc = 0;
20438 rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
20439 *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, pText);
20440 if( rc==SQLITE_OK0 ){
20441 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
20442 }
20443 *pbFreeAndReset = 1;
20444 }else{
20445 *pzText = (char*)sqlite3_value_textsqlite3_api->value_text(pVal);
20446 *pbFreeAndReset = 0;
20447 }
20448
20449 return rc;
20450}
20451
20452
20453/*
20454** This is the xFilter interface for the virtual table. See
20455** the virtual table xFilter method documentation for additional
20456** information.
20457**
20458** There are three possible query strategies:
20459**
20460** 1. Full-text search using a MATCH operator.
20461** 2. A by-rowid lookup.
20462** 3. A full-table scan.
20463*/
20464static int fts5FilterMethod(
20465 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
20466 int idxNum, /* Strategy index */
20467 const char *idxStr, /* Unused */
20468 int nVal, /* Number of elements in apVal */
20469 sqlite3_value **apVal /* Arguments for the indexing scheme */
20470){
20471 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
20472 Fts5Config *pConfig = pTab->p.pConfig;
20473 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
20474 int rc = SQLITE_OK0; /* Error code */
20475 int bDesc; /* True if ORDER BY [rank|rowid] DESC */
20476 int bOrderByRank; /* True if ORDER BY rank */
20477 sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */
20478 sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */
20479 sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */
20480 sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */
20481 int iCol; /* Column on LHS of MATCH operator */
20482 char **pzErrmsg = pConfig->pzErrmsg;
20483 int bPrefixInsttoken = pConfig->bPrefixInsttoken;
20484 int i;
20485 int iIdxStr = 0;
20486 Fts5Expr *pExpr = 0;
20487
20488 assert( pConfig->bLock==0 )((void) (0));
20489 if( pCsr->ePlan ){
20490 fts5FreeCursorComponents(pCsr);
20491 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
20492 }
20493
20494 assert( pCsr->pStmt==0 )((void) (0));
20495 assert( pCsr->pExpr==0 )((void) (0));
20496 assert( pCsr->csrflags==0 )((void) (0));
20497 assert( pCsr->pRank==0 )((void) (0));
20498 assert( pCsr->zRank==0 )((void) (0));
20499 assert( pCsr->zRankArgs==0 )((void) (0));
20500 assert( pTab->pSortCsr==0 || nVal==0 )((void) (0));
20501
20502 assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg )((void) (0));
20503 pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
20504
20505 /* Decode the arguments passed through to this function. */
20506 for(i=0; i<nVal; i++){
20507 switch( idxStr[iIdxStr++] ){
20508 case 'r':
20509 pRank = apVal[i];
20510 break;
20511 case 'M': {
20512 char *zText = 0;
20513 int bFreeAndReset = 0;
20514 int bInternal = 0;
20515
20516 rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset);
20517 if( rc!=SQLITE_OK0 ) goto filter_out;
20518 if( zText==0 ) zText = "";
20519 if( sqlite3_value_subtypesqlite3_api->value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE73 ){
20520 pConfig->bPrefixInsttoken = 1;
20521 }
20522
20523 iCol = 0;
20524 do{
20525 iCol = iCol*10 + (idxStr[iIdxStr]-'0');
20526 iIdxStr++;
20527 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
20528
20529 if( zText[0]=='*' ){
20530 /* The user has issued a query of the form "MATCH '*...'". This
20531 ** indicates that the MATCH expression is not a full text query,
20532 ** but a request for an internal parameter. */
20533 rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
20534 bInternal = 1;
20535 }else{
20536 char **pzErr = &pTab->p.base.zErrMsg;
20537 rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
20538 if( rc==SQLITE_OK0 ){
20539 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
20540 pExpr = 0;
20541 }
20542 }
20543
20544 if( bFreeAndReset ){
20545 sqlite3_freesqlite3_api->free(zText);
20546 sqlite3Fts5ClearLocale(pConfig);
20547 }
20548
20549 if( bInternal || rc!=SQLITE_OK0 ) goto filter_out;
20550
20551 break;
20552 }
20553 case 'L':
20554 case 'G': {
20555 int bGlob = (idxStr[iIdxStr-1]=='G');
20556 const char *zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[i]);
20557 iCol = 0;
20558 do{
20559 iCol = iCol*10 + (idxStr[iIdxStr]-'0');
20560 iIdxStr++;
20561 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
20562 if( zText ){
20563 rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr);
20564 }
20565 if( rc==SQLITE_OK0 ){
20566 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
20567 pExpr = 0;
20568 }
20569 if( rc!=SQLITE_OK0 ) goto filter_out;
20570 break;
20571 }
20572 case '=':
20573 pRowidEq = apVal[i];
20574 break;
20575 case '<':
20576 pRowidLe = apVal[i];
20577 break;
20578 default: assert( idxStr[iIdxStr-1]=='>' )((void) (0));
20579 pRowidGe = apVal[i];
20580 break;
20581 }
20582 }
20583 bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK0x0020) ? 1 : 0);
20584 pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC0x0080) ? 1 : 0);
20585
20586 /* Set the cursor upper and lower rowid limits. Only some strategies
20587 ** actually use them. This is ok, as the xBestIndex() method leaves the
20588 ** sqlite3_index_constraint.omit flag clear for range constraints
20589 ** on the rowid field. */
20590 if( pRowidEq ){
20591 pRowidLe = pRowidGe = pRowidEq;
20592 }
20593 if( bDesc ){
20594 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)));
20595 pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))));
20596 }else{
20597 pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)));
20598 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))));
20599 }
20600
20601 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
20602 if( rc!=SQLITE_OK0 ) goto filter_out;
20603
20604 if( pTab->pSortCsr ){
20605 /* If pSortCsr is non-NULL, then this call is being made as part of
20606 ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
20607 ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
20608 ** return results to the user for this query. The current cursor
20609 ** (pCursor) is used to execute the query issued by function
20610 ** fts5CursorFirstSorted() above. */
20611 assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 )((void) (0));
20612 assert( nVal==0 && bOrderByRank==0 && bDesc==0 )((void) (0));
20613 assert( pCsr->iLastRowid==LARGEST_INT64 )((void) (0));
20614 assert( pCsr->iFirstRowid==SMALLEST_INT64 )((void) (0));
20615 if( pTab->pSortCsr->bDesc ){
20616 pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid;
20617 pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid;
20618 }else{
20619 pCsr->iLastRowid = pTab->pSortCsr->iLastRowid;
20620 pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid;
20621 }
20622 pCsr->ePlan = FTS5_PLAN_SOURCE2;
20623 pCsr->pExpr = pTab->pSortCsr->pExpr;
20624 rc = fts5CursorFirst(pTab, pCsr, bDesc);
20625 }else if( pCsr->pExpr ){
20626 assert( rc==SQLITE_OK )((void) (0));
20627 rc = fts5CursorParseRank(pConfig, pCsr, pRank);
20628 if( rc==SQLITE_OK0 ){
20629 if( bOrderByRank ){
20630 pCsr->ePlan = FTS5_PLAN_SORTED_MATCH4;
20631 rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
20632 }else{
20633 pCsr->ePlan = FTS5_PLAN_MATCH1;
20634 rc = fts5CursorFirst(pTab, pCsr, bDesc);
20635 }
20636 }
20637 }else if( pConfig->zContent==0 ){
20638 fts5SetVtabError(pTab,"%s: table does not support scanning",pConfig->zName);
20639 rc = SQLITE_ERROR1;
20640 }else{
20641 /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
20642 ** by rowid (ePlan==FTS5_PLAN_ROWID). */
20643 pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID6 : FTS5_PLAN_SCAN5);
20644 rc = sqlite3Fts5StorageStmt(
20645 pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg
20646 );
20647 if( rc==SQLITE_OK0 ){
20648 if( pRowidEq!=0 ){
20649 assert( pCsr->ePlan==FTS5_PLAN_ROWID )((void) (0));
20650 sqlite3_bind_valuesqlite3_api->bind_value(pCsr->pStmt, 1, pRowidEq);
20651 }else{
20652 sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
20653 sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
20654 }
20655 rc = fts5NextMethod(pCursor);
20656 }
20657 }
20658
20659 filter_out:
20660 sqlite3Fts5ExprFree(pExpr);
20661 pConfig->pzErrmsg = pzErrmsg;
20662 pConfig->bPrefixInsttoken = bPrefixInsttoken;
20663 return rc;
20664}
20665
20666/*
20667** This is the xEof method of the virtual table. SQLite calls this
20668** routine to find out if it has reached the end of a result set.
20669*/
20670static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
20671 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
20672 return (CsrFlagTest(pCsr, FTS5CSR_EOF)((pCsr)->csrflags & (0x01)) ? 1 : 0);
20673}
20674
20675/*
20676** Return the rowid that the cursor currently points to.
20677*/
20678static i64 fts5CursorRowid(Fts5Cursor *pCsr){
20679 assert( pCsr->ePlan==FTS5_PLAN_MATCH((void) (0))
20680 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH((void) (0))
20681 || pCsr->ePlan==FTS5_PLAN_SOURCE((void) (0))
20682 || pCsr->ePlan==FTS5_PLAN_SCAN((void) (0))
20683 || pCsr->ePlan==FTS5_PLAN_ROWID((void) (0))
20684 )((void) (0));
20685 if( pCsr->pSorter ){
20686 return pCsr->pSorter->iRowid;
20687 }else if( pCsr->ePlan>=FTS5_PLAN_SCAN5 ){
20688 return sqlite3_column_int64sqlite3_api->column_int64(pCsr->pStmt, 0);
20689 }else{
20690 return sqlite3Fts5ExprRowid(pCsr->pExpr);
20691 }
20692}
20693
20694/*
20695** This is the xRowid method. The SQLite core calls this routine to
20696** retrieve the rowid for the current row of the result set. fts5
20697** exposes %_content.rowid as the rowid for the virtual table. The
20698** rowid should be written to *pRowid.
20699*/
20700static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
20701 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
20702 int ePlan = pCsr->ePlan;
20703
20704 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0));
20705 if( ePlan==FTS5_PLAN_SPECIAL3 ){
20706 *pRowid = 0;
20707 }else{
20708 *pRowid = fts5CursorRowid(pCsr);
20709 }
20710
20711 return SQLITE_OK0;
20712}
20713
20714
20715/*
20716** If the cursor requires seeking (bSeekRequired flag is set), seek it.
20717** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
20718**
20719** If argument bErrormsg is true and an error occurs, an error message may
20720** be left in sqlite3_vtab.zErrMsg.
20721*/
20722static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
20723 int rc = SQLITE_OK0;
20724
20725 /* If the cursor does not yet have a statement handle, obtain one now. */
20726 if( pCsr->pStmt==0 ){
20727 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
20728 int eStmt = fts5StmtType(pCsr);
20729 rc = sqlite3Fts5StorageStmt(
20730 pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0)
20731 );
20732 assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 )((void) (0));
20733 assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) )((void) (0));
20734 }
20735
20736 if( rc==SQLITE_OK0 && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags & (0x02)) ){
20737 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
20738 assert( pCsr->pExpr )((void) (0));
20739 sqlite3_resetsqlite3_api->reset(pCsr->pStmt);
20740 sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
20741 pTab->pConfig->bLock++;
20742 rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt);
20743 pTab->pConfig->bLock--;
20744 if( rc==SQLITE_ROW100 ){
20745 rc = SQLITE_OK0;
20746 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags &= ~(0x02));
20747 }else{
20748 rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt);
20749 if( rc==SQLITE_OK0 ){
20750 rc = FTS5_CORRUPT(11 | (1<<8));
20751 fts5SetVtabError((Fts5FullTable*)pTab,
20752 "fts5: missing row %lld from content table %s",
20753 fts5CursorRowid(pCsr),
20754 pTab->pConfig->zContent
20755 );
20756 }else if( pTab->pConfig->pzErrmsg ){
20757 fts5SetVtabError((Fts5FullTable*)pTab,
20758 "%s", sqlite3_errmsgsqlite3_api->errmsg(pTab->pConfig->db)
20759 );
20760 }
20761 }
20762 }
20763 return rc;
20764}
20765
20766/*
20767** This function is called to handle an FTS INSERT command. In other words,
20768** an INSERT statement of the form:
20769**
20770** INSERT INTO fts(fts) VALUES($pCmd)
20771** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
20772**
20773** Argument pVal is the value assigned to column "fts" by the INSERT
20774** statement. This function returns SQLITE_OK if successful, or an SQLite
20775** error code if an error occurs.
20776**
20777** The commands implemented by this function are documented in the "Special
20778** INSERT Directives" section of the documentation. It should be updated if
20779** more commands are added to this function.
20780*/
20781static int fts5SpecialInsert(
20782 Fts5FullTable *pTab, /* Fts5 table object */
20783 const char *zCmd, /* Text inserted into table-name column */
20784 sqlite3_value *pVal /* Value inserted into rank column */
20785){
20786 Fts5Config *pConfig = pTab->p.pConfig;
20787 int rc = SQLITE_OK0;
20788 int bError = 0;
20789 int bLoadConfig = 0;
20790
20791 if( 0==sqlite3_stricmpsqlite3_api->stricmp("delete-all", zCmd) ){
20792 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
20793 fts5SetVtabError(pTab,
20794 "'delete-all' may only be used with a "
20795 "contentless or external content fts5 table"
20796 );
20797 rc = SQLITE_ERROR1;
20798 }else{
20799 rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
20800 }
20801 bLoadConfig = 1;
20802 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("rebuild", zCmd) ){
20803 if( fts5IsContentless(pTab, 1) ){
20804 fts5SetVtabError(pTab,
20805 "'rebuild' may not be used with a contentless fts5 table"
20806 );
20807 rc = SQLITE_ERROR1;
20808 }else{
20809 rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
20810 }
20811 bLoadConfig = 1;
20812 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("optimize", zCmd) ){
20813 rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
20814 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("merge", zCmd) ){
20815 int nMerge = sqlite3_value_intsqlite3_api->value_int(pVal);
20816 rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
20817 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("integrity-check", zCmd) ){
20818 int iArg = sqlite3_value_intsqlite3_api->value_int(pVal);
20819 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg);
20820#ifdef SQLITE_DEBUG
20821 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("prefix-index", zCmd) ){
20822 pConfig->bPrefixIndex = sqlite3_value_intsqlite3_api->value_int(pVal);
20823#endif
20824 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("flush", zCmd) ){
20825 rc = sqlite3Fts5FlushToDisk(&pTab->p);
20826 }else{
20827 rc = sqlite3Fts5FlushToDisk(&pTab->p);
20828 if( rc==SQLITE_OK0 ){
20829 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
20830 }
20831 if( rc==SQLITE_OK0 ){
20832 rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError);
20833 }
20834 if( rc==SQLITE_OK0 ){
20835 if( bError ){
20836 rc = SQLITE_ERROR1;
20837 }else{
20838 rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0);
20839 }
20840 }
20841 }
20842
20843 if( rc==SQLITE_OK0 && bLoadConfig ){
20844 pTab->p.pConfig->iCookie--;
20845 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
20846 }
20847
20848 return rc;
20849}
20850
20851static int fts5SpecialDelete(
20852 Fts5FullTable *pTab,
20853 sqlite3_value **apVal
20854){
20855 int rc = SQLITE_OK0;
20856 int eType1 = sqlite3_value_typesqlite3_api->value_type(apVal[1]);
20857 if( eType1==SQLITE_INTEGER1 ){
20858 sqlite3_int64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]);
20859 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0);
20860 }
20861 return rc;
20862}
20863
20864static void fts5StorageInsert(
20865 int *pRc,
20866 Fts5FullTable *pTab,
20867 sqlite3_value **apVal,
20868 i64 *piRowid
20869){
20870 int rc = *pRc;
20871 if( rc==SQLITE_OK0 ){
20872 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid);
20873 }
20874 if( rc==SQLITE_OK0 ){
20875 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid);
20876 }
20877 *pRc = rc;
20878}
20879
20880/*
20881**
20882** This function is called when the user attempts an UPDATE on a contentless
20883** table. Parameter bRowidModified is true if the UPDATE statement modifies
20884** the rowid value. Parameter apVal[] contains the new values for each user
20885** defined column of the fts5 table. pConfig is the configuration object of the
20886** table being updated (guaranteed to be contentless). The contentless_delete=1
20887** and contentless_unindexed=1 options may or may not be set.
20888**
20889** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite
20890** error code if it cannot. In this case an error message is also loaded into
20891** pConfig. Output parameter (*pbContent) is set to true if the caller should
20892** update the %_content table only - not the FTS index or any other shadow
20893** table. This occurs when an UPDATE modifies only UNINDEXED columns of the
20894** table.
20895**
20896** An UPDATE may proceed if:
20897**
20898** * The only columns modified are UNINDEXED columns, or
20899**
20900** * The contentless_delete=1 option was specified and all of the indexed
20901** columns (not a subset) have been modified.
20902*/
20903static int fts5ContentlessUpdate(
20904 Fts5Config *pConfig,
20905 sqlite3_value **apVal,
20906 int bRowidModified,
20907 int *pbContent
20908){
20909 int ii;
20910 int bSeenIndex = 0; /* Have seen modified indexed column */
20911 int bSeenIndexNC = 0; /* Have seen unmodified indexed column */
20912 int rc = SQLITE_OK0;
20913
20914 for(ii=0; ii<pConfig->nCol; ii++){
20915 if( pConfig->abUnindexed[ii]==0 ){
20916 if( sqlite3_value_nochangesqlite3_api->value_nochange(apVal[ii]) ){
20917 bSeenIndexNC++;
20918 }else{
20919 bSeenIndex++;
20920 }
20921 }
20922 }
20923
20924 if( bSeenIndex==0 && bRowidModified==0 ){
20925 *pbContent = 1;
20926 }else{
20927 if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){
20928 rc = SQLITE_ERROR1;
20929 sqlite3Fts5ConfigErrmsg(pConfig,
20930 (pConfig->bContentlessDelete ?
20931 "%s a subset of columns on fts5 contentless-delete table: %s" :
20932 "%s contentless fts5 table: %s")
20933 , "cannot UPDATE", pConfig->zName
20934 );
20935 }
20936 }
20937
20938 return rc;
20939}
20940
20941/*
20942** This function is the implementation of the xUpdate callback used by
20943** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
20944** inserted, updated or deleted.
20945**
20946** A delete specifies a single argument - the rowid of the row to remove.
20947**
20948** Update and insert operations pass:
20949**
20950** 1. The "old" rowid, or NULL.
20951** 2. The "new" rowid.
20952** 3. Values for each of the nCol matchable columns.
20953** 4. Values for the two hidden columns (<tablename> and "rank").
20954*/
20955static int fts5UpdateMethod(
20956 sqlite3_vtab *pVtab, /* Virtual table handle */
20957 int nArg, /* Size of argument array */
20958 sqlite3_value **apVal, /* Array of arguments */
20959 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
20960){
20961 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
20962 Fts5Config *pConfig = pTab->p.pConfig;
20963 int eType0; /* value_type() of apVal[0] */
20964 int rc = SQLITE_OK0; /* Return code */
20965
20966 /* A transaction must be open when this is called. */
20967 assert( pTab->ts.eState==1 || pTab->ts.eState==2 )((void) (0));
20968
20969 assert( pVtab->zErrMsg==0 )((void) (0));
20970 assert( nArg==1 || nArg==(2+pConfig->nCol+2) )((void) (0));
20971 assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER((void) (0))
20972 || sqlite3_value_type(apVal[0])==SQLITE_NULL((void) (0))
20973 )((void) (0));
20974 assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0));
20975 if( pConfig->pgsz==0 ){
20976 rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie);
20977 if( rc!=SQLITE_OK0 ) return rc;
20978 }
20979
20980 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
20981
20982 /* Put any active cursors into REQUIRE_SEEK state. */
20983 fts5TripCursors(pTab);
20984
20985 eType0 = sqlite3_value_typesqlite3_api->value_type(apVal[0]);
20986 if( eType0==SQLITE_NULL5
20987 && sqlite3_value_typesqlite3_api->value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL5
20988 ){
20989 /* A "special" INSERT op. These are handled separately. */
20990 const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2+pConfig->nCol]);
20991 if( pConfig->eContent!=FTS5_CONTENT_NORMAL0
20992 && 0==sqlite3_stricmpsqlite3_api->stricmp("delete", z)
20993 ){
20994 if( pConfig->bContentlessDelete ){
20995 fts5SetVtabError(pTab,
20996 "'delete' may not be used with a contentless_delete=1 table"
20997 );
20998 rc = SQLITE_ERROR1;
20999 }else{
21000 rc = fts5SpecialDelete(pTab, apVal);
21001 }
21002 }else{
21003 rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
21004 }
21005 }else{
21006 /* A regular INSERT, UPDATE or DELETE statement. The trick here is that
21007 ** any conflict on the rowid value must be detected before any
21008 ** modifications are made to the database file. There are 4 cases:
21009 **
21010 ** 1) DELETE
21011 ** 2) UPDATE (rowid not modified)
21012 ** 3) UPDATE (rowid modified)
21013 ** 4) INSERT
21014 **
21015 ** Cases 3 and 4 may violate the rowid constraint.
21016 */
21017 int eConflict = SQLITE_ABORT4;
21018 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || pConfig->bContentlessDelete ){
21019 eConflict = sqlite3_vtab_on_conflictsqlite3_api->vtab_on_conflict(pConfig->db);
21020 }
21021
21022 assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL )((void) (0));
21023 assert( nArg!=1 || eType0==SQLITE_INTEGER )((void) (0));
21024
21025 /* DELETE */
21026 if( nArg==1 ){
21027 /* It is only possible to DELETE from a contentless table if the
21028 ** contentless_delete=1 flag is set. */
21029 if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){
21030 fts5SetVtabError(pTab,
21031 "cannot DELETE from contentless fts5 table: %s", pConfig->zName
21032 );
21033 rc = SQLITE_ERROR1;
21034 }else{
21035 i64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Rowid to delete */
21036 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0);
21037 }
21038 }
21039
21040 /* INSERT or UPDATE */
21041 else{
21042 int eType1 = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[1]);
21043
21044 /* It is an error to write an fts5_locale() value to a table without
21045 ** the locale=1 option. */
21046 if( pConfig->bLocale==0 ){
21047 int ii;
21048 for(ii=0; ii<pConfig->nCol; ii++){
21049 sqlite3_value *pVal = apVal[ii+2];
21050 if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
21051 fts5SetVtabError(pTab, "fts5_locale() requires locale=1");
21052 rc = SQLITE_MISMATCH20;
21053 goto update_out;
21054 }
21055 }
21056 }
21057
21058 if( eType0!=SQLITE_INTEGER1 ){
21059 /* An INSERT statement. If the conflict-mode is REPLACE, first remove
21060 ** the current entry (if any). */
21061 if( eConflict==SQLITE_REPLACE5 && eType1==SQLITE_INTEGER1 ){
21062 i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* Rowid to delete */
21063 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0);
21064 }
21065 fts5StorageInsert(&rc, pTab, apVal, pRowid);
21066 }
21067
21068 /* UPDATE */
21069 else{
21070 Fts5Storage *pStorage = pTab->pStorage;
21071 i64 iOld = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Old rowid */
21072 i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* New rowid */
21073 int bContent = 0; /* Content only update */
21074
21075 /* If this is a contentless table (including contentless_unindexed=1
21076 ** tables), check if the UPDATE may proceed. */
21077 if( fts5IsContentless(pTab, 1) ){
21078 rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent);
21079 if( rc!=SQLITE_OK0 ) goto update_out;
21080 }
21081
21082 if( eType1!=SQLITE_INTEGER1 ){
21083 rc = SQLITE_MISMATCH20;
21084 }else if( iOld!=iNew ){
21085 assert( bContent==0 )((void) (0));
21086 if( eConflict==SQLITE_REPLACE5 ){
21087 rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1);
21088 if( rc==SQLITE_OK0 ){
21089 rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0);
21090 }
21091 fts5StorageInsert(&rc, pTab, apVal, pRowid);
21092 }else{
21093 rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld);
21094 if( rc==SQLITE_OK0 ){
21095 rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid);
21096 }
21097 if( rc==SQLITE_OK0 ){
21098 rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0);
21099 }
21100 if( rc==SQLITE_OK0 ){
21101 rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid);
21102 }
21103 }
21104 }else if( bContent ){
21105 /* This occurs when an UPDATE on a contentless table affects *only*
21106 ** UNINDEXED columns. This is a no-op for contentless_unindexed=0
21107 ** tables, or a write to the %_content table only for =1 tables. */
21108 assert( fts5IsContentless(pTab, 1) )((void) (0));
21109 rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld);
21110 if( rc==SQLITE_OK0 ){
21111 rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid);
21112 }
21113 }else{
21114 rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1);
21115 fts5StorageInsert(&rc, pTab, apVal, pRowid);
21116 }
21117 sqlite3Fts5StorageReleaseDeleteRow(pStorage);
21118 }
21119 }
21120 }
21121
21122 update_out:
21123 pTab->p.pConfig->pzErrmsg = 0;
21124 return rc;
21125}
21126
21127/*
21128** Implementation of xSync() method.
21129*/
21130static int fts5SyncMethod(sqlite3_vtab *pVtab){
21131 int rc;
21132 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
21133 fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
21134 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
21135 rc = sqlite3Fts5FlushToDisk(&pTab->p);
21136 pTab->p.pConfig->pzErrmsg = 0;
21137 return rc;
21138}
21139
21140/*
21141** Implementation of xBegin() method.
21142*/
21143static int fts5BeginMethod(sqlite3_vtab *pVtab){
21144 int rc = fts5NewTransaction((Fts5FullTable*)pVtab);
21145 if( rc==SQLITE_OK0 ){
21146 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0);
21147 }
21148 return rc;
21149}
21150
21151/*
21152** Implementation of xCommit() method. This is a no-op. The contents of
21153** the pending-terms hash-table have already been flushed into the database
21154** by fts5SyncMethod().
21155*/
21156static int fts5CommitMethod(sqlite3_vtab *pVtab){
21157 UNUSED_PARAM(pVtab)(void)(pVtab); /* Call below is a no-op for NDEBUG builds */
21158 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0);
21159 return SQLITE_OK0;
21160}
21161
21162/*
21163** Implementation of xRollback(). Discard the contents of the pending-terms
21164** hash-table. Any changes made to the database are reverted by SQLite.
21165*/
21166static int fts5RollbackMethod(sqlite3_vtab *pVtab){
21167 int rc;
21168 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
21169 fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
21170 rc = sqlite3Fts5StorageRollback(pTab->pStorage);
21171 pTab->p.pConfig->pgsz = 0;
21172 return rc;
21173}
21174
21175static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*);
21176
21177static void *fts5ApiUserData(Fts5Context *pCtx){
21178 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21179 return pCsr->pAux->pUserData;
21180}
21181
21182static int fts5ApiColumnCount(Fts5Context *pCtx){
21183 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21184 return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
21185}
21186
21187static int fts5ApiColumnTotalSize(
21188 Fts5Context *pCtx,
21189 int iCol,
21190 sqlite3_int64 *pnToken
21191){
21192 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21193 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
21194 return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
21195}
21196
21197static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
21198 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21199 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
21200 return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
21201}
21202
21203/*
21204** Implementation of xTokenize_v2() API.
21205*/
21206static int fts5ApiTokenize_v2(
21207 Fts5Context *pCtx,
21208 const char *pText, int nText,
21209 const char *pLoc, int nLoc,
21210 void *pUserData,
21211 int (*xToken)(void*, int, const char*, int, int, int)
21212){
21213 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21214 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
21215 int rc = SQLITE_OK0;
21216
21217 sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc);
21218 rc = sqlite3Fts5Tokenize(pTab->pConfig,
21219 FTS5_TOKENIZE_AUX0x0008, pText, nText, pUserData, xToken
21220 );
21221 sqlite3Fts5SetLocale(pTab->pConfig, 0, 0);
21222
21223 return rc;
21224}
21225
21226/*
21227** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0
21228** passed as the locale.
21229*/
21230static int fts5ApiTokenize(
21231 Fts5Context *pCtx,
21232 const char *pText, int nText,
21233 void *pUserData,
21234 int (*xToken)(void*, int, const char*, int, int, int)
21235){
21236 return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken);
21237}
21238
21239static int fts5ApiPhraseCount(Fts5Context *pCtx){
21240 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21241 return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
21242}
21243
21244static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
21245 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21246 return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
21247}
21248
21249/*
21250** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This
21251** function extracts the text value of column iCol of the current row.
21252** Additionally, if there is an associated locale, it invokes
21253** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller
21254** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point
21255** after this function returns.
21256**
21257** If successful, (*ppText) is set to point to a buffer containing the text
21258** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that
21259** buffer in bytes. It is not guaranteed to be nul-terminated. If an error
21260** occurs, an SQLite error code is returned. The final values of the two
21261** output parameters are undefined in this case.
21262*/
21263static int fts5TextFromStmt(
21264 Fts5Config *pConfig,
21265 sqlite3_stmt *pStmt,
21266 int iCol,
21267 const char **ppText,
21268 int *pnText
21269){
21270 sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pStmt, iCol+1);
21271 const char *pLoc = 0;
21272 int nLoc = 0;
21273 int rc = SQLITE_OK0;
21274
21275 if( pConfig->bLocale
21276 && pConfig->eContent==FTS5_CONTENT_EXTERNAL2
21277 && sqlite3Fts5IsLocaleValue(pConfig, pVal)
21278 ){
21279 rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc);
21280 }else{
21281 *ppText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
21282 *pnText = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
21283 if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
21284 pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pStmt, iCol+1+pConfig->nCol);
21285 nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, iCol+1+pConfig->nCol);
21286 }
21287 }
21288 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
21289 return rc;
21290}
21291
21292static int fts5ApiColumnText(
21293 Fts5Context *pCtx,
21294 int iCol,
21295 const char **pz,
21296 int *pn
21297){
21298 int rc = SQLITE_OK0;
21299 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21300 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
21301
21302 assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0));
21303 if( iCol<0 || iCol>=pTab->pConfig->nCol ){
21304 rc = SQLITE_RANGE25;
21305 }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){
21306 *pz = 0;
21307 *pn = 0;
21308 }else{
21309 rc = fts5SeekCursor(pCsr, 0);
21310 if( rc==SQLITE_OK0 ){
21311 rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn);
21312 sqlite3Fts5ClearLocale(pTab->pConfig);
21313 }
21314 }
21315 return rc;
21316}
21317
21318/*
21319** This is called by various API functions - xInst, xPhraseFirst,
21320** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase
21321** of the current row. This function works for both detail=full tables (in
21322** which case the position-list was read from the fts index) or for other
21323** detail= modes if the row content is available.
21324*/
21325static int fts5CsrPoslist(
21326 Fts5Cursor *pCsr, /* Fts5 cursor object */
21327 int iPhrase, /* Phrase to find position list for */
21328 const u8 **pa, /* OUT: Pointer to position list buffer */
21329 int *pn /* OUT: Size of (*pa) in bytes */
21330){
21331 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
21332 int rc = SQLITE_OK0;
21333 int bLive = (pCsr->pSorter==0);
21334
21335 if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){
21336 rc = SQLITE_RANGE25;
21337 }else if( pConfig->eDetail!=FTS5_DETAIL_FULL0
21338 && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1)
21339 ){
21340 *pa = 0;
21341 *pn = 0;
21342 return SQLITE_OK0;
21343 }else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags & (0x40)) ){
21344 if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){
21345 Fts5PoslistPopulator *aPopulator;
21346 int i;
21347
21348 aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
21349 if( aPopulator==0 ) rc = SQLITE_NOMEM7;
21350 if( rc==SQLITE_OK0 ){
21351 rc = fts5SeekCursor(pCsr, 0);
21352 }
21353 for(i=0; i<pConfig->nCol && rc==SQLITE_OK0; i++){
21354 const char *z = 0;
21355 int n = 0;
21356 rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n);
21357 if( rc==SQLITE_OK0 ){
21358 rc = sqlite3Fts5ExprPopulatePoslists(
21359 pConfig, pCsr->pExpr, aPopulator, i, z, n
21360 );
21361 }
21362 sqlite3Fts5ClearLocale(pConfig);
21363 }
21364 sqlite3_freesqlite3_api->free(aPopulator);
21365
21366 if( pCsr->pSorter ){
21367 sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid);
21368 }
21369 }
21370 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags &= ~(0x40));
21371 }
21372
21373 if( rc==SQLITE_OK0 ){
21374 if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL0 ){
21375 Fts5Sorter *pSorter = pCsr->pSorter;
21376 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
21377 *pn = pSorter->aIdx[iPhrase] - i1;
21378 *pa = &pSorter->aPoslist[i1];
21379 }else{
21380 *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
21381 }
21382 }else{
21383 *pa = 0;
21384 *pn = 0;
21385 }
21386
21387 return rc;
21388}
21389
21390/*
21391** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
21392** correctly for the current view. Return SQLITE_OK if successful, or an
21393** SQLite error code otherwise.
21394*/
21395static int fts5CacheInstArray(Fts5Cursor *pCsr){
21396 int rc = SQLITE_OK0;
21397 Fts5PoslistReader *aIter; /* One iterator for each phrase */
21398 int nIter; /* Number of iterators/phrases */
21399 int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol;
21400
21401 nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
21402 if( pCsr->aInstIter==0 ){
21403 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter;
21404 pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
21405 }
21406 aIter = pCsr->aInstIter;
21407
21408 if( aIter ){
21409 int nInst = 0; /* Number instances seen so far */
21410 int i;
21411
21412 /* Initialize all iterators */
21413 for(i=0; i<nIter && rc==SQLITE_OK0; i++){
21414 const u8 *a;
21415 int n;
21416 rc = fts5CsrPoslist(pCsr, i, &a, &n);
21417 if( rc==SQLITE_OK0 ){
21418 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
21419 }
21420 }
21421
21422 if( rc==SQLITE_OK0 ){
21423 while( 1 ){
21424 int *aInst;
21425 int iBest = -1;
21426 for(i=0; i<nIter; i++){
21427 if( (aIter[i].bEof==0)
21428 && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos)
21429 ){
21430 iBest = i;
21431 }
21432 }
21433 if( iBest<0 ) break;
21434
21435 nInst++;
21436 if( nInst>=pCsr->nInstAlloc ){
21437 int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
21438 aInst = (int*)sqlite3_realloc64sqlite3_api->realloc64(
21439 pCsr->aInst, nNewSize*sizeof(int)*3
21440 );
21441 if( aInst ){
21442 pCsr->aInst = aInst;
21443 pCsr->nInstAlloc = nNewSize;
21444 }else{
21445 nInst--;
21446 rc = SQLITE_NOMEM7;
21447 break;
21448 }
21449 }
21450
21451 aInst = &pCsr->aInst[3 * (nInst-1)];
21452 aInst[0] = iBest;
21453 aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos)(int)((aIter[iBest].iPos >> 32) & 0x7FFFFFFF);
21454 aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos)(int)(aIter[iBest].iPos & 0x7FFFFFFF);
21455 assert( aInst[1]>=0 )((void) (0));
21456 if( aInst[1]>=nCol ){
21457 rc = FTS5_CORRUPT(11 | (1<<8));
21458 break;
21459 }
21460 sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
21461 }
21462 }
21463
21464 pCsr->nInstCount = nInst;
21465 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags &= ~(0x08));
21466 }
21467 return rc;
21468}
21469
21470static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
21471 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21472 int rc = SQLITE_OK0;
21473 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0
21474 || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) ){
21475 *pnInst = pCsr->nInstCount;
21476 }
21477 return rc;
21478}
21479
21480static int fts5ApiInst(
21481 Fts5Context *pCtx,
21482 int iIdx,
21483 int *piPhrase,
21484 int *piCol,
21485 int *piOff
21486){
21487 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21488 int rc = SQLITE_OK0;
21489 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0
21490 || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr))
21491 ){
21492 if( iIdx<0 || iIdx>=pCsr->nInstCount ){
21493 rc = SQLITE_RANGE25;
21494 }else{
21495 *piPhrase = pCsr->aInst[iIdx*3];
21496 *piCol = pCsr->aInst[iIdx*3 + 1];
21497 *piOff = pCsr->aInst[iIdx*3 + 2];
21498 }
21499 }
21500 return rc;
21501}
21502
21503static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
21504 return fts5CursorRowid((Fts5Cursor*)pCtx);
21505}
21506
21507static int fts5ColumnSizeCb(
21508 void *pContext, /* Pointer to int */
21509 int tflags,
21510 const char *pUnused, /* Buffer containing token */
21511 int nUnused, /* Size of token in bytes */
21512 int iUnused1, /* Start offset of token */
21513 int iUnused2 /* End offset of token */
21514){
21515 int *pCnt = (int*)pContext;
21516 UNUSED_PARAM2(pUnused, nUnused)(void)(pUnused), (void)(nUnused);
21517 UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2);
21518 if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){
21519 (*pCnt)++;
21520 }
21521 return SQLITE_OK0;
21522}
21523
21524static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
21525 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21526 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
21527 Fts5Config *pConfig = pTab->p.pConfig;
21528 int rc = SQLITE_OK0;
21529
21530 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags & (0x04)) ){
21531 if( pConfig->bColumnsize ){
21532 i64 iRowid = fts5CursorRowid(pCsr);
21533 rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
21534 }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){
21535 int i;
21536 for(i=0; i<pConfig->nCol; i++){
21537 if( pConfig->abUnindexed[i]==0 ){
21538 pCsr->aColumnSize[i] = -1;
21539 }
21540 }
21541 }else{
21542 int i;
21543 rc = fts5SeekCursor(pCsr, 0);
21544 for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){
21545 if( pConfig->abUnindexed[i]==0 ){
21546 const char *z = 0;
21547 int n = 0;
21548 pCsr->aColumnSize[i] = 0;
21549 rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n);
21550 if( rc==SQLITE_OK0 ){
21551 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX0x0008,
21552 z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb
21553 );
21554 }
21555 sqlite3Fts5ClearLocale(pConfig);
21556 }
21557 }
21558 }
21559 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags &= ~(0x04));
21560 }
21561 if( iCol<0 ){
21562 int i;
21563 *pnToken = 0;
21564 for(i=0; i<pConfig->nCol; i++){
21565 *pnToken += pCsr->aColumnSize[i];
21566 }
21567 }else if( iCol<pConfig->nCol ){
21568 *pnToken = pCsr->aColumnSize[iCol];
21569 }else{
21570 *pnToken = 0;
21571 rc = SQLITE_RANGE25;
21572 }
21573 return rc;
21574}
21575
21576/*
21577** Implementation of the xSetAuxdata() method.
21578*/
21579static int fts5ApiSetAuxdata(
21580 Fts5Context *pCtx, /* Fts5 context */
21581 void *pPtr, /* Pointer to save as auxdata */
21582 void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */
21583){
21584 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21585 Fts5Auxdata *pData;
21586
21587 /* Search through the cursors list of Fts5Auxdata objects for one that
21588 ** corresponds to the currently executing auxiliary function. */
21589 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
21590 if( pData->pAux==pCsr->pAux ) break;
21591 }
21592
21593 if( pData ){
21594 if( pData->xDelete ){
21595 pData->xDelete(pData->pPtr);
21596 }
21597 }else{
21598 int rc = SQLITE_OK0;
21599 pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
21600 if( pData==0 ){
21601 if( xDelete ) xDelete(pPtr);
21602 return rc;
21603 }
21604 pData->pAux = pCsr->pAux;
21605 pData->pNext = pCsr->pAuxdata;
21606 pCsr->pAuxdata = pData;
21607 }
21608
21609 pData->xDelete = xDelete;
21610 pData->pPtr = pPtr;
21611 return SQLITE_OK0;
21612}
21613
21614static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
21615 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21616 Fts5Auxdata *pData;
21617 void *pRet = 0;
21618
21619 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
21620 if( pData->pAux==pCsr->pAux ) break;
21621 }
21622
21623 if( pData ){
21624 pRet = pData->pPtr;
21625 if( bClear ){
21626 pData->pPtr = 0;
21627 pData->xDelete = 0;
21628 }
21629 }
21630
21631 return pRet;
21632}
21633
21634static void fts5ApiPhraseNext(
21635 Fts5Context *pCtx,
21636 Fts5PhraseIter *pIter,
21637 int *piCol, int *piOff
21638){
21639 if( pIter->a>=pIter->b ){
21640 *piCol = -1;
21641 *piOff = -1;
21642 }else{
21643 int iVal;
21644 pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal));
21645 if( iVal==1 ){
21646 /* Avoid returning a (*piCol) value that is too large for the table,
21647 ** even if the position-list is corrupt. The caller might not be
21648 ** expecting it. */
21649 int nCol = ((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab))->pConfig->nCol;
21650 pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal));
21651 *piCol = (iVal>=nCol ? nCol-1 : iVal);
21652 *piOff = 0;
21653 pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal));
21654 }
21655 *piOff += (iVal-2);
21656 }
21657}
21658
21659static int fts5ApiPhraseFirst(
21660 Fts5Context *pCtx,
21661 int iPhrase,
21662 Fts5PhraseIter *pIter,
21663 int *piCol, int *piOff
21664){
21665 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21666 int n;
21667 int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
21668 if( rc==SQLITE_OK0 ){
21669 assert( pIter->a || n==0 )((void) (0));
21670 pIter->b = (pIter->a ? &pIter->a[n] : 0);
21671 *piCol = 0;
21672 *piOff = 0;
21673 fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
21674 }
21675 return rc;
21676}
21677
21678static void fts5ApiPhraseNextColumn(
21679 Fts5Context *pCtx,
21680 Fts5PhraseIter *pIter,
21681 int *piCol
21682){
21683 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21684 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
21685
21686 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){
21687 if( pIter->a>=pIter->b ){
21688 *piCol = -1;
21689 }else{
21690 int iIncr;
21691 pIter->a += fts5GetVarint32(&pIter->a[0], iIncr)sqlite3Fts5GetVarint32(&pIter->a[0],(u32*)&(iIncr)
)
;
21692 *piCol += (iIncr-2);
21693 }
21694 }else{
21695 while( 1 ){
21696 int dummy;
21697 if( pIter->a>=pIter->b ){
21698 *piCol = -1;
21699 return;
21700 }
21701 if( pIter->a[0]==0x01 ) break;
21702 pIter->a += fts5GetVarint32(pIter->a, dummy)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(dummy));
21703 }
21704 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol
))
;
21705 }
21706}
21707
21708static int fts5ApiPhraseFirstColumn(
21709 Fts5Context *pCtx,
21710 int iPhrase,
21711 Fts5PhraseIter *pIter,
21712 int *piCol
21713){
21714 int rc = SQLITE_OK0;
21715 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21716 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
21717
21718 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){
21719 Fts5Sorter *pSorter = pCsr->pSorter;
21720 int n;
21721 if( pSorter ){
21722 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
21723 n = pSorter->aIdx[iPhrase] - i1;
21724 pIter->a = &pSorter->aPoslist[i1];
21725 }else{
21726 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
21727 }
21728 if( rc==SQLITE_OK0 ){
21729 assert( pIter->a || n==0 )((void) (0));
21730 pIter->b = (pIter->a ? &pIter->a[n] : 0);
21731 *piCol = 0;
21732 fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
21733 }
21734 }else{
21735 int n;
21736 rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
21737 if( rc==SQLITE_OK0 ){
21738 assert( pIter->a || n==0 )((void) (0));
21739 pIter->b = (pIter->a ? &pIter->a[n] : 0);
21740 if( n<=0 ){
21741 *piCol = -1;
21742 }else if( pIter->a[0]==0x01 ){
21743 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol
))
;
21744 }else{
21745 *piCol = 0;
21746 }
21747 }
21748 }
21749
21750 return rc;
21751}
21752
21753/*
21754** xQueryToken() API implemenetation.
21755*/
21756static int fts5ApiQueryToken(
21757 Fts5Context* pCtx,
21758 int iPhrase,
21759 int iToken,
21760 const char **ppOut,
21761 int *pnOut
21762){
21763 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21764 return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut);
21765}
21766
21767/*
21768** xInstToken() API implemenetation.
21769*/
21770static int fts5ApiInstToken(
21771 Fts5Context *pCtx,
21772 int iIdx,
21773 int iToken,
21774 const char **ppOut, int *pnOut
21775){
21776 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21777 int rc = SQLITE_OK0;
21778 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0
21779 || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr))
21780 ){
21781 if( iIdx<0 || iIdx>=pCsr->nInstCount ){
21782 rc = SQLITE_RANGE25;
21783 }else{
21784 int iPhrase = pCsr->aInst[iIdx*3];
21785 int iCol = pCsr->aInst[iIdx*3 + 1];
21786 int iOff = pCsr->aInst[iIdx*3 + 2];
21787 i64 iRowid = fts5CursorRowid(pCsr);
21788 rc = sqlite3Fts5ExprInstToken(
21789 pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut
21790 );
21791 }
21792 }
21793 return rc;
21794}
21795
21796
21797static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
21798 int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
21799);
21800
21801/*
21802** The xColumnLocale() API.
21803*/
21804static int fts5ApiColumnLocale(
21805 Fts5Context *pCtx,
21806 int iCol,
21807 const char **pzLocale,
21808 int *pnLocale
21809){
21810 int rc = SQLITE_OK0;
21811 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21812 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
21813
21814 *pzLocale = 0;
21815 *pnLocale = 0;
21816
21817 assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0));
21818 if( iCol<0 || iCol>=pConfig->nCol ){
21819 rc = SQLITE_RANGE25;
21820 }else if(
21821 pConfig->abUnindexed[iCol]==0
21822 && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1)
21823 && pConfig->bLocale
21824 ){
21825 rc = fts5SeekCursor(pCsr, 0);
21826 if( rc==SQLITE_OK0 ){
21827 const char *zDummy = 0;
21828 int nDummy = 0;
21829 rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &zDummy, &nDummy);
21830 if( rc==SQLITE_OK0 ){
21831 *pzLocale = pConfig->t.pLocale;
21832 *pnLocale = pConfig->t.nLocale;
21833 }
21834 sqlite3Fts5ClearLocale(pConfig);
21835 }
21836 }
21837
21838 return rc;
21839}
21840
21841static const Fts5ExtensionApi sFts5Api = {
21842 4, /* iVersion */
21843 fts5ApiUserData,
21844 fts5ApiColumnCount,
21845 fts5ApiRowCount,
21846 fts5ApiColumnTotalSize,
21847 fts5ApiTokenize,
21848 fts5ApiPhraseCount,
21849 fts5ApiPhraseSize,
21850 fts5ApiInstCount,
21851 fts5ApiInst,
21852 fts5ApiRowid,
21853 fts5ApiColumnText,
21854 fts5ApiColumnSize,
21855 fts5ApiQueryPhrase,
21856 fts5ApiSetAuxdata,
21857 fts5ApiGetAuxdata,
21858 fts5ApiPhraseFirst,
21859 fts5ApiPhraseNext,
21860 fts5ApiPhraseFirstColumn,
21861 fts5ApiPhraseNextColumn,
21862 fts5ApiQueryToken,
21863 fts5ApiInstToken,
21864 fts5ApiColumnLocale,
21865 fts5ApiTokenize_v2
21866};
21867
21868/*
21869** Implementation of API function xQueryPhrase().
21870*/
21871static int fts5ApiQueryPhrase(
21872 Fts5Context *pCtx,
21873 int iPhrase,
21874 void *pUserData,
21875 int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
21876){
21877 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
21878 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
21879 int rc;
21880 Fts5Cursor *pNew = 0;
21881
21882 rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
21883 if( rc==SQLITE_OK0 ){
21884 pNew->ePlan = FTS5_PLAN_MATCH1;
21885 pNew->iFirstRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)));
21886 pNew->iLastRowid = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32));
21887 pNew->base.pVtab = (sqlite3_vtab*)pTab;
21888 rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr);
21889 }
21890
21891 if( rc==SQLITE_OK0 ){
21892 for(rc = fts5CursorFirst(pTab, pNew, 0);
21893 rc==SQLITE_OK0 && CsrFlagTest(pNew, FTS5CSR_EOF)((pNew)->csrflags & (0x01))==0;
21894 rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
21895 ){
21896 rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
21897 if( rc!=SQLITE_OK0 ){
21898 if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0;
21899 break;
21900 }
21901 }
21902 }
21903
21904 fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
21905 return rc;
21906}
21907
21908static void fts5ApiInvoke(
21909 Fts5Auxiliary *pAux,
21910 Fts5Cursor *pCsr,
21911 sqlite3_context *context,
21912 int argc,
21913 sqlite3_value **argv
21914){
21915 assert( pCsr->pAux==0 )((void) (0));
21916 assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0));
21917 pCsr->pAux = pAux;
21918 pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
21919 pCsr->pAux = 0;
21920}
21921
21922static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
21923 Fts5Cursor *pCsr;
21924 for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
21925 if( pCsr->iCsrId==iCsrId ) break;
21926 }
21927 return pCsr;
21928}
21929
21930/*
21931** Parameter zFmt is a printf() style formatting string. This function
21932** formats it using the trailing arguments and returns the result as
21933** an error message to the context passed as the first argument.
21934*/
21935static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){
21936 char *zErr = 0;
21937 va_list ap;
21938 va_start(ap, zFmt)__builtin_va_start(ap, zFmt);
21939 zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap);
21940 sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1);
21941 sqlite3_freesqlite3_api->free(zErr);
21942 va_end(ap)__builtin_va_end(ap);
21943}
21944
21945static void fts5ApiCallback(
21946 sqlite3_context *context,
21947 int argc,
21948 sqlite3_value **argv
21949){
21950
21951 Fts5Auxiliary *pAux;
21952 Fts5Cursor *pCsr;
21953 i64 iCsrId;
21954
21955 assert( argc>=1 )((void) (0));
21956 pAux = (Fts5Auxiliary*)sqlite3_user_datasqlite3_api->user_data(context);
21957 iCsrId = sqlite3_value_int64sqlite3_api->value_int64(argv[0]);
21958
21959 pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
21960 if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL3) ){
21961 fts5ResultError(context, "no such cursor: %lld", iCsrId);
21962 }else{
21963 sqlite3_vtab *pTab = pCsr->base.pVtab;
21964 fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
21965 sqlite3_freesqlite3_api->free(pTab->zErrMsg);
21966 pTab->zErrMsg = 0;
21967 }
21968}
21969
21970
21971/*
21972** Given cursor id iId, return a pointer to the corresponding Fts5Table
21973** object. Or NULL If the cursor id does not exist.
21974*/
21975static Fts5Table *sqlite3Fts5TableFromCsrid(
21976 Fts5Global *pGlobal, /* FTS5 global context for db handle */
21977 i64 iCsrId /* Id of cursor to find */
21978){
21979 Fts5Cursor *pCsr;
21980 pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
21981 if( pCsr ){
21982 return (Fts5Table*)pCsr->base.pVtab;
21983 }
21984 return 0;
21985}
21986
21987/*
21988** Return a "position-list blob" corresponding to the current position of
21989** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
21990** the current position-list for each phrase in the query associated with
21991** cursor pCsr.
21992**
21993** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
21994** the number of phrases in the query. Following the varints are the
21995** concatenated position lists for each phrase, in order.
21996**
21997** The first varint (if it exists) contains the size of the position list
21998** for phrase 0. The second (same disclaimer) contains the size of position
21999** list 1. And so on. There is no size field for the final position list,
22000** as it can be derived from the total size of the blob.
22001*/
22002static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
22003 int i;
22004 int rc = SQLITE_OK0;
22005 int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
22006 Fts5Buffer val;
22007
22008 memset(&val, 0, sizeof(Fts5Buffer));
22009 switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){
22010 case FTS5_DETAIL_FULL0:
22011
22012 /* Append the varints */
22013 for(i=0; i<(nPhrase-1); i++){
22014 const u8 *dummy;
22015 int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
22016 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
22017 }
22018
22019 /* Append the position lists */
22020 for(i=0; i<nPhrase; i++){
22021 const u8 *pPoslist;
22022 int nPoslist;
22023 nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
22024 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
22025 }
22026 break;
22027
22028 case FTS5_DETAIL_COLUMNS2:
22029
22030 /* Append the varints */
22031 for(i=0; rc==SQLITE_OK0 && i<(nPhrase-1); i++){
22032 const u8 *dummy;
22033 int nByte;
22034 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte);
22035 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
22036 }
22037
22038 /* Append the position lists */
22039 for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){
22040 const u8 *pPoslist;
22041 int nPoslist;
22042 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist);
22043 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
22044 }
22045 break;
22046
22047 default:
22048 break;
22049 }
22050
22051 sqlite3_result_blobsqlite3_api->result_blob(pCtx, val.p, val.n, sqlite3_freesqlite3_api->free);
22052 return rc;
22053}
22054
22055/*
22056** This is the xColumn method, called by SQLite to request a value from
22057** the row that the supplied cursor currently points to.
22058*/
22059static int fts5ColumnMethod(
22060 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
22061 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
22062 int iCol /* Index of column to read value from */
22063){
22064 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
22065 Fts5Config *pConfig = pTab->p.pConfig;
22066 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
22067 int rc = SQLITE_OK0;
22068
22069 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0));
22070
22071 if( pCsr->ePlan==FTS5_PLAN_SPECIAL3 ){
22072 if( iCol==pConfig->nCol ){
22073 sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iSpecial);
22074 }
22075 }else
22076
22077 if( iCol==pConfig->nCol ){
22078 /* User is requesting the value of the special column with the same name
22079 ** as the table. Return the cursor integer id number. This value is only
22080 ** useful in that it may be passed as the first argument to an FTS5
22081 ** auxiliary function. */
22082 sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iCsrId);
22083 }else if( iCol==pConfig->nCol+1 ){
22084 /* The value of the "rank" column. */
22085
22086 if( pCsr->ePlan==FTS5_PLAN_SOURCE2 ){
22087 fts5PoslistBlob(pCtx, pCsr);
22088 }else if(
22089 pCsr->ePlan==FTS5_PLAN_MATCH1
22090 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH4
22091 ){
22092 if( pCsr->pRank || SQLITE_OK0==(rc = fts5FindRankFunction(pCsr)) ){
22093 fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
22094 }
22095 }
22096 }else{
22097 if( !sqlite3_vtab_nochangesqlite3_api->vtab_nochange(pCtx) && pConfig->eContent!=FTS5_CONTENT_NONE1 ){
22098 pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
22099 rc = fts5SeekCursor(pCsr, 1);
22100 if( rc==SQLITE_OK0 ){
22101 sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pCsr->pStmt, iCol+1);
22102 if( pConfig->bLocale
22103 && pConfig->eContent==FTS5_CONTENT_EXTERNAL2
22104 && sqlite3Fts5IsLocaleValue(pConfig, pVal)
22105 ){
22106 const char *z = 0;
22107 int n = 0;
22108 rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n);
22109 if( rc==SQLITE_OK0 ){
22110 sqlite3_result_textsqlite3_api->result_text(pCtx, z, n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
22111 }
22112 sqlite3Fts5ClearLocale(pConfig);
22113 }else{
22114 sqlite3_result_valuesqlite3_api->result_value(pCtx, pVal);
22115 }
22116 }
22117
22118 pConfig->pzErrmsg = 0;
22119 }
22120 }
22121
22122 return rc;
22123}
22124
22125
22126/*
22127** This routine implements the xFindFunction method for the FTS3
22128** virtual table.
22129*/
22130static int fts5FindFunctionMethod(
22131 sqlite3_vtab *pVtab, /* Virtual table handle */
22132 int nUnused, /* Number of SQL function arguments */
22133 const char *zName, /* Name of SQL function */
22134 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
22135 void **ppArg /* OUT: User data for *pxFunc */
22136){
22137 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22138 Fts5Auxiliary *pAux;
22139
22140 UNUSED_PARAM(nUnused)(void)(nUnused);
22141 pAux = fts5FindAuxiliary(pTab, zName);
22142 if( pAux ){
22143 *pxFunc = fts5ApiCallback;
22144 *ppArg = (void*)pAux;
22145 return 1;
22146 }
22147
22148 /* No function of the specified name was found. Return 0. */
22149 return 0;
22150}
22151
22152/*
22153** Implementation of FTS5 xRename method. Rename an fts5 table.
22154*/
22155static int fts5RenameMethod(
22156 sqlite3_vtab *pVtab, /* Virtual table handle */
22157 const char *zName /* New name of table */
22158){
22159 int rc;
22160 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22161 rc = sqlite3Fts5StorageRename(pTab->pStorage, zName);
22162 return rc;
22163}
22164
22165static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){
22166 fts5TripCursors((Fts5FullTable*)pTab);
22167 return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage);
22168}
22169
22170/*
22171** The xSavepoint() method.
22172**
22173** Flush the contents of the pending-terms table to disk.
22174*/
22175static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
22176 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22177 int rc = SQLITE_OK0;
22178
22179 fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint);
22180 rc = sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
22181 if( rc==SQLITE_OK0 ){
22182 pTab->iSavepoint = iSavepoint+1;
22183 }
22184 return rc;
22185}
22186
22187/*
22188** The xRelease() method.
22189**
22190** This is a no-op.
22191*/
22192static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
22193 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22194 int rc = SQLITE_OK0;
22195 fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint);
22196 if( (iSavepoint+1)<pTab->iSavepoint ){
22197 rc = sqlite3Fts5FlushToDisk(&pTab->p);
22198 if( rc==SQLITE_OK0 ){
22199 pTab->iSavepoint = iSavepoint;
22200 }
22201 }
22202 return rc;
22203}
22204
22205/*
22206** The xRollbackTo() method.
22207**
22208** Discard the contents of the pending terms table.
22209*/
22210static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
22211 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22212 int rc = SQLITE_OK0;
22213 fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
22214 fts5TripCursors(pTab);
22215 if( (iSavepoint+1)<=pTab->iSavepoint ){
22216 pTab->p.pConfig->pgsz = 0;
22217 rc = sqlite3Fts5StorageRollback(pTab->pStorage);
22218 }
22219 return rc;
22220}
22221
22222/*
22223** Register a new auxiliary function with global context pGlobal.
22224*/
22225static int fts5CreateAux(
22226 fts5_api *pApi, /* Global context (one per db handle) */
22227 const char *zName, /* Name of new function */
22228 void *pUserData, /* User data for aux. function */
22229 fts5_extension_function xFunc, /* Aux. function implementation */
22230 void(*xDestroy)(void*) /* Destructor for pUserData */
22231){
22232 Fts5Global *pGlobal = (Fts5Global*)pApi;
22233 int rc = sqlite3_overload_functionsqlite3_api->overload_function(pGlobal->db, zName, -1);
22234 if( rc==SQLITE_OK0 ){
22235 Fts5Auxiliary *pAux;
22236 sqlite3_int64 nName; /* Size of zName in bytes, including \0 */
22237 sqlite3_int64 nByte; /* Bytes of space to allocate */
22238
22239 nName = strlen(zName) + 1;
22240 nByte = sizeof(Fts5Auxiliary) + nName;
22241 pAux = (Fts5Auxiliary*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
22242 if( pAux ){
22243 memset(pAux, 0, (size_t)nByte);
22244 pAux->zFunc = (char*)&pAux[1];
22245 memcpy(pAux->zFunc, zName, nName);
22246 pAux->pGlobal = pGlobal;
22247 pAux->pUserData = pUserData;
22248 pAux->xFunc = xFunc;
22249 pAux->xDestroy = xDestroy;
22250 pAux->pNext = pGlobal->pAux;
22251 pGlobal->pAux = pAux;
22252 }else{
22253 rc = SQLITE_NOMEM7;
22254 }
22255 }
22256
22257 return rc;
22258}
22259
22260/*
22261** This function is used by xCreateTokenizer_v2() and xCreateTokenizer().
22262** It allocates and partially populates a new Fts5TokenizerModule object.
22263** The new object is already linked into the Fts5Global context before
22264** returning.
22265**
22266** If successful, SQLITE_OK is returned and a pointer to the new
22267** Fts5TokenizerModule object returned via output parameter (*ppNew). All
22268** that is required is for the caller to fill in the methods in
22269** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native
22270** as appropriate.
22271**
22272** If an error occurs, an SQLite error code is returned and the final value
22273** of (*ppNew) undefined.
22274*/
22275static int fts5NewTokenizerModule(
22276 Fts5Global *pGlobal, /* Global context (one per db handle) */
22277 const char *zName, /* Name of new function */
22278 void *pUserData, /* User data for aux. function */
22279 void(*xDestroy)(void*), /* Destructor for pUserData */
22280 Fts5TokenizerModule **ppNew
22281){
22282 int rc = SQLITE_OK0;
22283 Fts5TokenizerModule *pNew;
22284 sqlite3_int64 nName; /* Size of zName and its \0 terminator */
22285 sqlite3_int64 nByte; /* Bytes of space to allocate */
22286
22287 nName = strlen(zName) + 1;
22288 nByte = sizeof(Fts5TokenizerModule) + nName;
22289 *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte);
22290 if( pNew ){
22291 pNew->zName = (char*)&pNew[1];
22292 memcpy(pNew->zName, zName, nName);
22293 pNew->pUserData = pUserData;
22294 pNew->xDestroy = xDestroy;
22295 pNew->pNext = pGlobal->pTok;
22296 pGlobal->pTok = pNew;
22297 if( pNew->pNext==0 ){
22298 pGlobal->pDfltTok = pNew;
22299 }
22300 }
22301
22302 return rc;
22303}
22304
22305/*
22306** An instance of this type is used as the Fts5Tokenizer object for
22307** wrapper tokenizers - those that provide access to a v1 tokenizer via
22308** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer
22309** via the fts5_tokenizer API.
22310*/
22311typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer;
22312struct Fts5VtoVTokenizer {
22313 int bV2Native; /* True if v2 native tokenizer */
22314 fts5_tokenizer x1; /* Tokenizer functions */
22315 fts5_tokenizer_v2 x2; /* V2 tokenizer functions */
22316 Fts5Tokenizer *pReal;
22317};
22318
22319/*
22320** Create a wrapper tokenizer. The context argument pCtx points to the
22321** Fts5TokenizerModule object.
22322*/
22323static int fts5VtoVCreate(
22324 void *pCtx,
22325 const char **azArg,
22326 int nArg,
22327 Fts5Tokenizer **ppOut
22328){
22329 Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx;
22330 Fts5VtoVTokenizer *pNew = 0;
22331 int rc = SQLITE_OK0;
22332
22333 pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
22334 if( rc==SQLITE_OK0 ){
22335 pNew->x1 = pMod->x1;
22336 pNew->x2 = pMod->x2;
22337 pNew->bV2Native = pMod->bV2Native;
22338 if( pMod->bV2Native ){
22339 rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
22340 }else{
22341 rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal);
22342 }
22343 if( rc!=SQLITE_OK0 ){
22344 sqlite3_freesqlite3_api->free(pNew);
22345 pNew = 0;
22346 }
22347 }
22348
22349 *ppOut = (Fts5Tokenizer*)pNew;
22350 return rc;
22351}
22352
22353/*
22354** Delete an Fts5VtoVTokenizer wrapper tokenizer.
22355*/
22356static void fts5VtoVDelete(Fts5Tokenizer *pTok){
22357 Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
22358 if( p ){
22359 if( p->bV2Native ){
22360 p->x2.xDelete(p->pReal);
22361 }else{
22362 p->x1.xDelete(p->pReal);
22363 }
22364 sqlite3_freesqlite3_api->free(p);
22365 }
22366}
22367
22368
22369/*
22370** xTokenizer method for a wrapper tokenizer that offers the v1 interface
22371** (no support for locales).
22372*/
22373static int fts5V1toV2Tokenize(
22374 Fts5Tokenizer *pTok,
22375 void *pCtx, int flags,
22376 const char *pText, int nText,
22377 int (*xToken)(void*, int, const char*, int, int, int)
22378){
22379 Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
22380 assert( p->bV2Native )((void) (0));
22381 return p->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken);
22382}
22383
22384/*
22385** xTokenizer method for a wrapper tokenizer that offers the v2 interface
22386** (with locale support).
22387*/
22388static int fts5V2toV1Tokenize(
22389 Fts5Tokenizer *pTok,
22390 void *pCtx, int flags,
22391 const char *pText, int nText,
22392 const char *pLocale, int nLocale,
22393 int (*xToken)(void*, int, const char*, int, int, int)
22394){
22395 Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok;
22396 assert( p->bV2Native==0 )((void) (0));
22397 UNUSED_PARAM2(pLocale,nLocale)(void)(pLocale), (void)(nLocale);
22398 return p->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken);
22399}
22400
22401/*
22402** Register a new tokenizer. This is the implementation of the
22403** fts5_api.xCreateTokenizer_v2() method.
22404*/
22405static int fts5CreateTokenizer_v2(
22406 fts5_api *pApi, /* Global context (one per db handle) */
22407 const char *zName, /* Name of new function */
22408 void *pUserData, /* User data for aux. function */
22409 fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */
22410 void(*xDestroy)(void*) /* Destructor for pUserData */
22411){
22412 Fts5Global *pGlobal = (Fts5Global*)pApi;
22413 int rc = SQLITE_OK0;
22414
22415 if( pTokenizer->iVersion>2 ){
22416 rc = SQLITE_ERROR1;
22417 }else{
22418 Fts5TokenizerModule *pNew = 0;
22419 rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew);
22420 if( pNew ){
22421 pNew->x2 = *pTokenizer;
22422 pNew->bV2Native = 1;
22423 pNew->x1.xCreate = fts5VtoVCreate;
22424 pNew->x1.xTokenize = fts5V1toV2Tokenize;
22425 pNew->x1.xDelete = fts5VtoVDelete;
22426 }
22427 }
22428
22429 return rc;
22430}
22431
22432/*
22433** The fts5_api.xCreateTokenizer() method.
22434*/
22435static int fts5CreateTokenizer(
22436 fts5_api *pApi, /* Global context (one per db handle) */
22437 const char *zName, /* Name of new function */
22438 void *pUserData, /* User data for aux. function */
22439 fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
22440 void(*xDestroy)(void*) /* Destructor for pUserData */
22441){
22442 Fts5TokenizerModule *pNew = 0;
22443 int rc = SQLITE_OK0;
22444
22445 rc = fts5NewTokenizerModule(
22446 (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew
22447 );
22448 if( pNew ){
22449 pNew->x1 = *pTokenizer;
22450 pNew->x2.xCreate = fts5VtoVCreate;
22451 pNew->x2.xTokenize = fts5V2toV1Tokenize;
22452 pNew->x2.xDelete = fts5VtoVDelete;
22453 }
22454 return rc;
22455}
22456
22457/*
22458** Search the global context passed as the first argument for a tokenizer
22459** module named zName. If found, return a pointer to the Fts5TokenizerModule
22460** object. Otherwise, return NULL.
22461*/
22462static Fts5TokenizerModule *fts5LocateTokenizer(
22463 Fts5Global *pGlobal, /* Global (one per db handle) object */
22464 const char *zName /* Name of tokenizer module to find */
22465){
22466 Fts5TokenizerModule *pMod = 0;
22467
22468 if( zName==0 ){
22469 pMod = pGlobal->pDfltTok;
22470 }else{
22471 for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
22472 if( sqlite3_stricmpsqlite3_api->stricmp(zName, pMod->zName)==0 ) break;
22473 }
22474 }
22475
22476 return pMod;
22477}
22478
22479/*
22480** Find a tokenizer. This is the implementation of the
22481** fts5_api.xFindTokenizer_v2() method.
22482*/
22483static int fts5FindTokenizer_v2(
22484 fts5_api *pApi, /* Global context (one per db handle) */
22485 const char *zName, /* Name of tokenizer */
22486 void **ppUserData,
22487 fts5_tokenizer_v2 **ppTokenizer /* Populate this object */
22488){
22489 int rc = SQLITE_OK0;
22490 Fts5TokenizerModule *pMod;
22491
22492 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
22493 if( pMod ){
22494 if( pMod->bV2Native ){
22495 *ppUserData = pMod->pUserData;
22496 }else{
22497 *ppUserData = (void*)pMod;
22498 }
22499 *ppTokenizer = &pMod->x2;
22500 }else{
22501 *ppTokenizer = 0;
22502 *ppUserData = 0;
22503 rc = SQLITE_ERROR1;
22504 }
22505
22506 return rc;
22507}
22508
22509/*
22510** Find a tokenizer. This is the implementation of the
22511** fts5_api.xFindTokenizer() method.
22512*/
22513static int fts5FindTokenizer(
22514 fts5_api *pApi, /* Global context (one per db handle) */
22515 const char *zName, /* Name of new function */
22516 void **ppUserData,
22517 fts5_tokenizer *pTokenizer /* Populate this object */
22518){
22519 int rc = SQLITE_OK0;
22520 Fts5TokenizerModule *pMod;
22521
22522 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
22523 if( pMod ){
22524 if( pMod->bV2Native==0 ){
22525 *ppUserData = pMod->pUserData;
22526 }else{
22527 *ppUserData = (void*)pMod;
22528 }
22529 *pTokenizer = pMod->x1;
22530 }else{
22531 memset(pTokenizer, 0, sizeof(*pTokenizer));
22532 *ppUserData = 0;
22533 rc = SQLITE_ERROR1;
22534 }
22535
22536 return rc;
22537}
22538
22539/*
22540** Attempt to instantiate the tokenizer.
22541*/
22542static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){
22543 const char **azArg = pConfig->t.azArg;
22544 const int nArg = pConfig->t.nArg;
22545 Fts5TokenizerModule *pMod = 0;
22546 int rc = SQLITE_OK0;
22547
22548 pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]);
22549 if( pMod==0 ){
22550 assert( nArg>0 )((void) (0));
22551 rc = SQLITE_ERROR1;
22552 sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]);
22553 }else{
22554 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0;
22555 if( pMod->bV2Native ){
22556 xCreate = pMod->x2.xCreate;
22557 pConfig->t.pApi2 = &pMod->x2;
22558 }else{
22559 pConfig->t.pApi1 = &pMod->x1;
22560 xCreate = pMod->x1.xCreate;
22561 }
22562
22563 rc = xCreate(pMod->pUserData,
22564 (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
22565 );
22566
22567 if( rc!=SQLITE_OK0 ){
22568 if( rc!=SQLITE_NOMEM7 ){
22569 sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor");
22570 }
22571 }else if( pMod->bV2Native==0 ){
22572 pConfig->t.ePattern = sqlite3Fts5TokenizerPattern(
22573 pMod->x1.xCreate, pConfig->t.pTok
22574 );
22575 }
22576 }
22577
22578 if( rc!=SQLITE_OK0 ){
22579 pConfig->t.pApi1 = 0;
22580 pConfig->t.pApi2 = 0;
22581 pConfig->t.pTok = 0;
22582 }
22583
22584 return rc;
22585}
22586
22587
22588/*
22589** xDestroy callback passed to sqlite3_create_module(). This is invoked
22590** when the db handle is being closed. Free memory associated with
22591** tokenizers and aux functions registered with this db handle.
22592*/
22593static void fts5ModuleDestroy(void *pCtx){
22594 Fts5TokenizerModule *pTok, *pNextTok;
22595 Fts5Auxiliary *pAux, *pNextAux;
22596 Fts5Global *pGlobal = (Fts5Global*)pCtx;
22597
22598 for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
22599 pNextAux = pAux->pNext;
22600 if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
22601 sqlite3_freesqlite3_api->free(pAux);
22602 }
22603
22604 for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
22605 pNextTok = pTok->pNext;
22606 if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
22607 sqlite3_freesqlite3_api->free(pTok);
22608 }
22609
22610 sqlite3_freesqlite3_api->free(pGlobal);
22611}
22612
22613/*
22614** Implementation of the fts5() function used by clients to obtain the
22615** API pointer.
22616*/
22617static void fts5Fts5Func(
22618 sqlite3_context *pCtx, /* Function call context */
22619 int nArg, /* Number of args */
22620 sqlite3_value **apArg /* Function arguments */
22621){
22622 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx);
22623 fts5_api **ppApi;
22624 UNUSED_PARAM(nArg)(void)(nArg);
22625 assert( nArg==1 )((void) (0));
22626 ppApi = (fts5_api**)sqlite3_value_pointersqlite3_api->value_pointer(apArg[0], "fts5_api_ptr");
22627 if( ppApi ) *ppApi = &pGlobal->api;
22628}
22629
22630/*
22631** Implementation of fts5_source_id() function.
22632*/
22633static void fts5SourceIdFunc(
22634 sqlite3_context *pCtx, /* Function call context */
22635 int nArg, /* Number of args */
22636 sqlite3_value **apUnused /* Function arguments */
22637){
22638 assert( nArg==0 )((void) (0));
22639 UNUSED_PARAM2(nArg, apUnused)(void)(nArg), (void)(apUnused);
22640 sqlite3_result_textsqlite3_api->result_text(pCtx, "fts5: 2025-06-06 14:52:32 b77dc5e0f596d2140d9ac682b2893ff65d3a4140aa86067a3efebe29dc914c95", -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
22641}
22642
22643/*
22644** Implementation of fts5_locale(LOCALE, TEXT) function.
22645**
22646** If parameter LOCALE is NULL, or a zero-length string, then a copy of
22647** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as
22648** text, and the value returned is a blob consisting of:
22649**
22650** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER).
22651** * The LOCALE, as utf-8 text, followed by
22652** * 0x00, followed by
22653** * The TEXT, as utf-8 text.
22654**
22655** There is no final nul-terminator following the TEXT value.
22656*/
22657static void fts5LocaleFunc(
22658 sqlite3_context *pCtx, /* Function call context */
22659 int nArg, /* Number of args */
22660 sqlite3_value **apArg /* Function arguments */
22661){
22662 const char *zLocale = 0;
22663 int nLocale = 0;
22664 const char *zText = 0;
22665 int nText = 0;
22666
22667 assert( nArg==2 )((void) (0));
22668 UNUSED_PARAM(nArg)(void)(nArg);
22669
22670 zLocale = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[0]);
22671 nLocale = sqlite3_value_bytessqlite3_api->value_bytes(apArg[0]);
22672
22673 zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[1]);
22674 nText = sqlite3_value_bytessqlite3_api->value_bytes(apArg[1]);
22675
22676 if( zLocale==0 || zLocale[0]=='\0' ){
22677 sqlite3_result_textsqlite3_api->result_text(pCtx, zText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
22678 }else{
22679 Fts5Global *p = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx);
22680 u8 *pBlob = 0;
22681 u8 *pCsr = 0;
22682 int nBlob = 0;
22683
22684 nBlob = FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) + nLocale + 1 + nText;
22685 pBlob = (u8*)sqlite3_mallocsqlite3_api->malloc(nBlob);
22686 if( pBlob==0 ){
22687 sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx);
22688 return;
22689 }
22690
22691 pCsr = pBlob;
22692 memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )));
22693 pCsr += FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ));
22694 memcpy(pCsr, zLocale, nLocale);
22695 pCsr += nLocale;
22696 (*pCsr++) = 0x00;
22697 if( zText ) memcpy(pCsr, zText, nText);
22698 assert( &pCsr[nText]==&pBlob[nBlob] )((void) (0));
22699
22700 sqlite3_result_blobsqlite3_api->result_blob(pCtx, pBlob, nBlob, sqlite3_freesqlite3_api->free);
22701 }
22702}
22703
22704/*
22705** Implementation of fts5_insttoken() function.
22706*/
22707static void fts5InsttokenFunc(
22708 sqlite3_context *pCtx, /* Function call context */
22709 int nArg, /* Number of args */
22710 sqlite3_value **apArg /* Function arguments */
22711){
22712 assert( nArg==1 )((void) (0));
22713 (void)nArg;
22714 sqlite3_result_valuesqlite3_api->result_value(pCtx, apArg[0]);
22715 sqlite3_result_subtypesqlite3_api->result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE73);
22716}
22717
22718/*
22719** Return true if zName is the extension on one of the shadow tables used
22720** by this module.
22721*/
22722static int fts5ShadowName(const char *zName){
22723 static const char *azName[] = {
22724 "config", "content", "data", "docsize", "idx"
22725 };
22726 unsigned int i;
22727 for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){
22728 if( sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i])==0 ) return 1;
22729 }
22730 return 0;
22731}
22732
22733/*
22734** Run an integrity check on the FTS5 data structures. Return a string
22735** if anything is found amiss. Return a NULL pointer if everything is
22736** OK.
22737*/
22738static int fts5IntegrityMethod(
22739 sqlite3_vtab *pVtab, /* the FTS5 virtual table to check */
22740 const char *zSchema, /* Name of schema in which this table lives */
22741 const char *zTabname, /* Name of the table itself */
22742 int isQuick, /* True if this is a quick-check */
22743 char **pzErr /* Write error message here */
22744){
22745 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
22746 int rc;
22747
22748 assert( pzErr!=0 && *pzErr==0 )((void) (0));
22749 UNUSED_PARAM(isQuick)(void)(isQuick);
22750 assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0));
22751 pTab->p.pConfig->pzErrmsg = pzErr;
22752 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, 0);
1
Calling 'sqlite3Fts5StorageIntegrity'
22753 if( *pzErr==0 && rc!=SQLITE_OK0 ){
22754 if( (rc&0xff)==SQLITE_CORRUPT11 ){
22755 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed inverted index for FTS5 table %s.%s",
22756 zSchema, zTabname);
22757 rc = (*pzErr) ? SQLITE_OK0 : SQLITE_NOMEM7;
22758 }else{
22759 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unable to validate the inverted index for"
22760 " FTS5 table %s.%s: %s",
22761 zSchema, zTabname, sqlite3_errstrsqlite3_api->errstr(rc));
22762 }
22763 }
22764
22765 sqlite3Fts5IndexCloseReader(pTab->p.pIndex);
22766 pTab->p.pConfig->pzErrmsg = 0;
22767
22768 return rc;
22769}
22770
22771static int fts5Init(sqlite3 *db){
22772 static const sqlite3_module fts5Mod = {
22773 /* iVersion */ 4,
22774 /* xCreate */ fts5CreateMethod,
22775 /* xConnect */ fts5ConnectMethod,
22776 /* xBestIndex */ fts5BestIndexMethod,
22777 /* xDisconnect */ fts5DisconnectMethod,
22778 /* xDestroy */ fts5DestroyMethod,
22779 /* xOpen */ fts5OpenMethod,
22780 /* xClose */ fts5CloseMethod,
22781 /* xFilter */ fts5FilterMethod,
22782 /* xNext */ fts5NextMethod,
22783 /* xEof */ fts5EofMethod,
22784 /* xColumn */ fts5ColumnMethod,
22785 /* xRowid */ fts5RowidMethod,
22786 /* xUpdate */ fts5UpdateMethod,
22787 /* xBegin */ fts5BeginMethod,
22788 /* xSync */ fts5SyncMethod,
22789 /* xCommit */ fts5CommitMethod,
22790 /* xRollback */ fts5RollbackMethod,
22791 /* xFindFunction */ fts5FindFunctionMethod,
22792 /* xRename */ fts5RenameMethod,
22793 /* xSavepoint */ fts5SavepointMethod,
22794 /* xRelease */ fts5ReleaseMethod,
22795 /* xRollbackTo */ fts5RollbackToMethod,
22796 /* xShadowName */ fts5ShadowName,
22797 /* xIntegrity */ fts5IntegrityMethod
22798 };
22799
22800 int rc;
22801 Fts5Global *pGlobal = 0;
22802
22803 pGlobal = (Fts5Global*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Global));
22804 if( pGlobal==0 ){
22805 rc = SQLITE_NOMEM7;
22806 }else{
22807 void *p = (void*)pGlobal;
22808 memset(pGlobal, 0, sizeof(Fts5Global));
22809 pGlobal->db = db;
22810 pGlobal->api.iVersion = 3;
22811 pGlobal->api.xCreateFunction = fts5CreateAux;
22812 pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
22813 pGlobal->api.xFindTokenizer = fts5FindTokenizer;
22814 pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2;
22815 pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2;
22816
22817 /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector.
22818 ** The constants below were generated randomly. */
22819 sqlite3_randomnesssqlite3_api->randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr);
22820 pGlobal->aLocaleHdr[0] ^= 0xF924976D;
22821 pGlobal->aLocaleHdr[1] ^= 0x16596E13;
22822 pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA;
22823 pGlobal->aLocaleHdr[3] ^= 0x9B03A67F;
22824 assert( sizeof(pGlobal->aLocaleHdr)==16 )((void) (0));
22825
22826 rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
22827 if( rc==SQLITE_OK0 ) rc = sqlite3Fts5IndexInit(db);
22828 if( rc==SQLITE_OK0 ) rc = sqlite3Fts5ExprInit(pGlobal, db);
22829 if( rc==SQLITE_OK0 ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
22830 if( rc==SQLITE_OK0 ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
22831 if( rc==SQLITE_OK0 ) rc = sqlite3Fts5VocabInit(pGlobal, db);
22832 if( rc==SQLITE_OK0 ){
22833 rc = sqlite3_create_functionsqlite3_api->create_function(
22834 db, "fts5", 1, SQLITE_UTF81, p, fts5Fts5Func, 0, 0
22835 );
22836 }
22837 if( rc==SQLITE_OK0 ){
22838 rc = sqlite3_create_functionsqlite3_api->create_function(
22839 db, "fts5_source_id", 0,
22840 SQLITE_UTF81|SQLITE_DETERMINISTIC0x000000800|SQLITE_INNOCUOUS0x000200000,
22841 p, fts5SourceIdFunc, 0, 0
22842 );
22843 }
22844 if( rc==SQLITE_OK0 ){
22845 rc = sqlite3_create_functionsqlite3_api->create_function(
22846 db, "fts5_locale", 2,
22847 SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000|SQLITE_SUBTYPE0x000100000,
22848 p, fts5LocaleFunc, 0, 0
22849 );
22850 }
22851 if( rc==SQLITE_OK0 ){
22852 rc = sqlite3_create_functionsqlite3_api->create_function(
22853 db, "fts5_insttoken", 1,
22854 SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000,
22855 p, fts5InsttokenFunc, 0, 0
22856 );
22857 }
22858 }
22859
22860 /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
22861 ** fts5_test_mi.c is compiled and linked into the executable. And call
22862 ** its entry point to enable the matchinfo() demo. */
22863#ifdef SQLITE_FTS5_ENABLE_TEST_MI
22864 if( rc==SQLITE_OK0 ){
22865 extern int sqlite3Fts5TestRegisterMatchinfoAPI(fts5_api*);
22866 rc = sqlite3Fts5TestRegisterMatchinfoAPI(&pGlobal->api);
22867 }
22868#endif
22869
22870 return rc;
22871}
22872
22873/*
22874** The following functions are used to register the module with SQLite. If
22875** this module is being built as part of the SQLite core (SQLITE_CORE is
22876** defined), then sqlite3_open() will call sqlite3Fts5Init() directly.
22877**
22878** Or, if this module is being built as a loadable extension,
22879** sqlite3Fts5Init() is omitted and the two standard entry points
22880** sqlite3_fts_init() and sqlite3_fts5_init() defined instead.
22881*/
22882#ifndef SQLITE_CORE
22883#ifdef _WIN32
22884__declspec(dllexport)
22885#endif
22886int sqlite3_fts_init(
22887 sqlite3 *db,
22888 char **pzErrMsg,
22889 const sqlite3_api_routines *pApi
22890){
22891 SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;;
22892 (void)pzErrMsg; /* Unused parameter */
22893 return fts5Init(db);
22894}
22895
22896#ifdef _WIN32
22897__declspec(dllexport)
22898#endif
22899int sqlite3_fts5_init(
22900 sqlite3 *db,
22901 char **pzErrMsg,
22902 const sqlite3_api_routines *pApi
22903){
22904 SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;;
22905 (void)pzErrMsg; /* Unused parameter */
22906 return fts5Init(db);
22907}
22908#else
22909int sqlite3Fts5Init(sqlite3 *db){
22910 return fts5Init(db);
22911}
22912#endif
22913
22914#line 1 "fts5_storage.c"
22915/*
22916** 2014 May 31
22917**
22918** The author disclaims copyright to this source code. In place of
22919** a legal notice, here is a blessing:
22920**
22921** May you do good and not evil.
22922** May you find forgiveness for yourself and forgive others.
22923** May you share freely, never taking more than you give.
22924**
22925******************************************************************************
22926**
22927*/
22928
22929
22930
22931/* #include "fts5Int.h" */
22932
22933/*
22934** pSavedRow:
22935** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it
22936** does a by-rowid lookup to retrieve a single row from the %_content
22937** table or equivalent external-content table/view.
22938**
22939** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original
22940** values for a row being UPDATEd. In that case, the SQL statement is
22941** not reset and pSavedRow is set to point at it. This is so that the
22942** insert operation that follows the delete may access the original
22943** row values for any new values for which sqlite3_value_nochange() returns
22944** true. i.e. if the user executes:
22945**
22946** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1);
22947** ...
22948** UPDATE fts SET a=?, b=? WHERE rowid=?;
22949**
22950** then the value passed to the xUpdate() method of this table as the
22951** new.c value is an sqlite3_value_nochange() value. So in this case it
22952** must be read from the saved row stored in Fts5Storage.pSavedRow.
22953**
22954** This is necessary - using sqlite3_value_nochange() instead of just having
22955** SQLite pass the original value back via xUpdate() - so as not to discard
22956** any locale information associated with such values.
22957**
22958*/
22959struct Fts5Storage {
22960 Fts5Config *pConfig;
22961 Fts5Index *pIndex;
22962 int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
22963 i64 nTotalRow; /* Total number of rows in FTS table */
22964 i64 *aTotalSize; /* Total sizes of each column */
22965 sqlite3_stmt *pSavedRow;
22966 sqlite3_stmt *aStmt[12];
22967};
22968
22969
22970#if FTS5_STMT_SCAN_ASC0!=0
22971# error "FTS5_STMT_SCAN_ASC mismatch"
22972#endif
22973#if FTS5_STMT_SCAN_DESC1!=1
22974# error "FTS5_STMT_SCAN_DESC mismatch"
22975#endif
22976#if FTS5_STMT_LOOKUP2!=2
22977# error "FTS5_STMT_LOOKUP mismatch"
22978#endif
22979
22980#define FTS5_STMT_LOOKUP23 3
22981#define FTS5_STMT_INSERT_CONTENT4 4
22982#define FTS5_STMT_REPLACE_CONTENT5 5
22983#define FTS5_STMT_DELETE_CONTENT6 6
22984#define FTS5_STMT_REPLACE_DOCSIZE7 7
22985#define FTS5_STMT_DELETE_DOCSIZE8 8
22986#define FTS5_STMT_LOOKUP_DOCSIZE9 9
22987#define FTS5_STMT_REPLACE_CONFIG10 10
22988#define FTS5_STMT_SCAN11 11
22989
22990/*
22991** Prepare the two insert statements - Fts5Storage.pInsertContent and
22992** Fts5Storage.pInsertDocsize - if they have not already been prepared.
22993** Return SQLITE_OK if successful, or an SQLite error code if an error
22994** occurs.
22995*/
22996static int fts5StorageGetStmt(
22997 Fts5Storage *p, /* Storage handle */
22998 int eStmt, /* FTS5_STMT_XXX constant */
22999 sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */
23000 char **pzErrMsg /* OUT: Error message (if any) */
23001){
23002 int rc = SQLITE_OK0;
23003
23004 /* If there is no %_docsize table, there should be no requests for
23005 ** statements to operate on it. */
23006 assert( p->pConfig->bColumnsize || (((void) (0))
23007 eStmt!=FTS5_STMT_REPLACE_DOCSIZE((void) (0))
23008 && eStmt!=FTS5_STMT_DELETE_DOCSIZE((void) (0))
23009 && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE((void) (0))
23010 ))((void) (0));
23011
23012 assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) )((void) (0));
23013 if( p->aStmt[eStmt]==0 ){
23014 const char *azStmt[] = {
23015 "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
23016 "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
23017 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */
23018 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */
23019
23020 "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */
23021 "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */
23022 "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */
23023 "REPLACE INTO %Q.'%q_docsize' VALUES(?,?%s)", /* REPLACE_DOCSIZE */
23024 "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */
23025
23026 "SELECT sz%s FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */
23027
23028 "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */
23029 "SELECT %s FROM %s AS T", /* SCAN */
23030 };
23031 Fts5Config *pC = p->pConfig;
23032 char *zSql = 0;
23033
23034 assert( ArraySize(azStmt)==ArraySize(p->aStmt) )((void) (0));
23035
23036 switch( eStmt ){
23037 case FTS5_STMT_SCAN11:
23038 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt],
23039 pC->zContentExprlist, pC->zContent
23040 );
23041 break;
23042
23043 case FTS5_STMT_SCAN_ASC0:
23044 case FTS5_STMT_SCAN_DESC1:
23045 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zContentExprlist,
23046 pC->zContent, pC->zContentRowid, pC->zContentRowid,
23047 pC->zContentRowid
23048 );
23049 break;
23050
23051 case FTS5_STMT_LOOKUP2:
23052 case FTS5_STMT_LOOKUP23:
23053 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt],
23054 pC->zContentExprlist, pC->zContent, pC->zContentRowid
23055 );
23056 break;
23057
23058 case FTS5_STMT_INSERT_CONTENT4:
23059 case FTS5_STMT_REPLACE_CONTENT5: {
23060 char *zBind = 0;
23061 int i;
23062
23063 assert( pC->eContent==FTS5_CONTENT_NORMAL((void) (0))
23064 || pC->eContent==FTS5_CONTENT_UNINDEXED((void) (0))
23065 )((void) (0));
23066
23067 /* Add bindings for the "c*" columns - those that store the actual
23068 ** table content. If eContent==NORMAL, then there is one binding
23069 ** for each column. Or, if eContent==UNINDEXED, then there are only
23070 ** bindings for the UNINDEXED columns. */
23071 for(i=0; rc==SQLITE_OK0 && i<(pC->nCol+1); i++){
23072 if( !i || pC->eContent==FTS5_CONTENT_NORMAL0 || pC->abUnindexed[i-1] ){
23073 zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1);
23074 }
23075 }
23076
23077 /* Add bindings for any "l*" columns. Only non-UNINDEXED columns
23078 ** require these. */
23079 if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL0 ){
23080 for(i=0; rc==SQLITE_OK0 && i<pC->nCol; i++){
23081 if( pC->abUnindexed[i]==0 ){
23082 zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2);
23083 }
23084 }
23085 }
23086
23087 zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind);
23088 sqlite3_freesqlite3_api->free(zBind);
23089 break;
23090 }
23091
23092 case FTS5_STMT_REPLACE_DOCSIZE7:
23093 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName,
23094 (pC->bContentlessDelete ? ",?" : "")
23095 );
23096 break;
23097
23098 case FTS5_STMT_LOOKUP_DOCSIZE9:
23099 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt],
23100 (pC->bContentlessDelete ? ",origin" : ""),
23101 pC->zDb, pC->zName
23102 );
23103 break;
23104
23105 default:
23106 zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName);
23107 break;
23108 }
23109
23110 if( zSql==0 ){
23111 rc = SQLITE_NOMEM7;
23112 }else{
23113 int f = SQLITE_PREPARE_PERSISTENT0x01;
23114 if( eStmt>FTS5_STMT_LOOKUP23 ) f |= SQLITE_PREPARE_NO_VTAB0x04;
23115 p->pConfig->bLock++;
23116 rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0);
23117 p->pConfig->bLock--;
23118 sqlite3_freesqlite3_api->free(zSql);
23119 if( rc!=SQLITE_OK0 && pzErrMsg ){
23120 *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("%s", sqlite3_errmsgsqlite3_api->errmsg(pC->db));
23121 }
23122 if( rc==SQLITE_ERROR1 && eStmt>FTS5_STMT_LOOKUP23 && eStmt<FTS5_STMT_SCAN11 ){
23123 /* One of the internal tables - not the %_content table - is missing.
23124 ** This counts as a corrupted table. */
23125 rc = SQLITE_CORRUPT11;
23126 }
23127 }
23128 }
23129
23130 *ppStmt = p->aStmt[eStmt];
23131 sqlite3_resetsqlite3_api->reset(*ppStmt);
23132 return rc;
23133}
23134
23135
23136static int fts5ExecPrintf(
23137 sqlite3 *db,
23138 char **pzErr,
23139 const char *zFormat,
23140 ...
23141){
23142 int rc;
23143 va_list ap; /* ... printf arguments */
23144 char *zSql;
23145
23146 va_start(ap, zFormat)__builtin_va_start(ap, zFormat);
23147 zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap);
23148
23149 if( zSql==0 ){
23150 rc = SQLITE_NOMEM7;
23151 }else{
23152 rc = sqlite3_execsqlite3_api->exec(db, zSql, 0, 0, pzErr);
23153 sqlite3_freesqlite3_api->free(zSql);
23154 }
23155
23156 va_end(ap)__builtin_va_end(ap);
23157 return rc;
23158}
23159
23160/*
23161** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
23162** code otherwise.
23163*/
23164static int sqlite3Fts5DropAll(Fts5Config *pConfig){
23165 int rc = fts5ExecPrintf(pConfig->db, 0,
23166 "DROP TABLE IF EXISTS %Q.'%q_data';"
23167 "DROP TABLE IF EXISTS %Q.'%q_idx';"
23168 "DROP TABLE IF EXISTS %Q.'%q_config';",
23169 pConfig->zDb, pConfig->zName,
23170 pConfig->zDb, pConfig->zName,
23171 pConfig->zDb, pConfig->zName
23172 );
23173 if( rc==SQLITE_OK0 && pConfig->bColumnsize ){
23174 rc = fts5ExecPrintf(pConfig->db, 0,
23175 "DROP TABLE IF EXISTS %Q.'%q_docsize';",
23176 pConfig->zDb, pConfig->zName
23177 );
23178 }
23179 if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
23180 rc = fts5ExecPrintf(pConfig->db, 0,
23181 "DROP TABLE IF EXISTS %Q.'%q_content';",
23182 pConfig->zDb, pConfig->zName
23183 );
23184 }
23185 return rc;
23186}
23187
23188static void fts5StorageRenameOne(
23189 Fts5Config *pConfig, /* Current FTS5 configuration */
23190 int *pRc, /* IN/OUT: Error code */
23191 const char *zTail, /* Tail of table name e.g. "data", "config" */
23192 const char *zName /* New name of FTS5 table */
23193){
23194 if( *pRc==SQLITE_OK0 ){
23195 *pRc = fts5ExecPrintf(pConfig->db, 0,
23196 "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
23197 pConfig->zDb, pConfig->zName, zTail, zName, zTail
23198 );
23199 }
23200}
23201
23202static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
23203 Fts5Config *pConfig = pStorage->pConfig;
23204 int rc = sqlite3Fts5StorageSync(pStorage);
23205
23206 fts5StorageRenameOne(pConfig, &rc, "data", zName);
23207 fts5StorageRenameOne(pConfig, &rc, "idx", zName);
23208 fts5StorageRenameOne(pConfig, &rc, "config", zName);
23209 if( pConfig->bColumnsize ){
23210 fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
23211 }
23212 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
23213 fts5StorageRenameOne(pConfig, &rc, "content", zName);
23214 }
23215 return rc;
23216}
23217
23218/*
23219** Create the shadow table named zPost, with definition zDefn. Return
23220** SQLITE_OK if successful, or an SQLite error code otherwise.
23221*/
23222static int sqlite3Fts5CreateTable(
23223 Fts5Config *pConfig, /* FTS5 configuration */
23224 const char *zPost, /* Shadow table to create (e.g. "content") */
23225 const char *zDefn, /* Columns etc. for shadow table */
23226 int bWithout, /* True for without rowid */
23227 char **pzErr /* OUT: Error message */
23228){
23229 int rc;
23230 char *zErr = 0;
23231
23232 rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
23233 pConfig->zDb, pConfig->zName, zPost, zDefn,
23234#ifndef SQLITE_FTS5_NO_WITHOUT_ROWID
23235 bWithout?" WITHOUT ROWID":
23236#endif
23237 ""
23238 );
23239 if( zErr ){
23240 *pzErr = sqlite3_mprintfsqlite3_api->mprintf(
23241 "fts5: error creating shadow table %q_%s: %s",
23242 pConfig->zName, zPost, zErr
23243 );
23244 sqlite3_freesqlite3_api->free(zErr);
23245 }
23246
23247 return rc;
23248}
23249
23250/*
23251** Open a new Fts5Index handle. If the bCreate argument is true, create
23252** and initialize the underlying tables
23253**
23254** If successful, set *pp to point to the new object and return SQLITE_OK.
23255** Otherwise, set *pp to NULL and return an SQLite error code.
23256*/
23257static int sqlite3Fts5StorageOpen(
23258 Fts5Config *pConfig,
23259 Fts5Index *pIndex,
23260 int bCreate,
23261 Fts5Storage **pp,
23262 char **pzErr /* OUT: Error message */
23263){
23264 int rc = SQLITE_OK0;
23265 Fts5Storage *p; /* New object */
23266 sqlite3_int64 nByte; /* Bytes of space to allocate */
23267
23268 nByte = sizeof(Fts5Storage) /* Fts5Storage object */
23269 + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */
23270 *pp = p = (Fts5Storage*)sqlite3_malloc64sqlite3_api->malloc64(nByte);
23271 if( !p ) return SQLITE_NOMEM7;
23272
23273 memset(p, 0, (size_t)nByte);
23274 p->aTotalSize = (i64*)&p[1];
23275 p->pConfig = pConfig;
23276 p->pIndex = pIndex;
23277
23278 if( bCreate ){
23279 if( pConfig->eContent==FTS5_CONTENT_NORMAL0
23280 || pConfig->eContent==FTS5_CONTENT_UNINDEXED3
23281 ){
23282 int nDefn = 32 + pConfig->nCol*10;
23283 char *zDefn = sqlite3_malloc64sqlite3_api->malloc64(32 + (sqlite3_int64)pConfig->nCol * 20);
23284 if( zDefn==0 ){
23285 rc = SQLITE_NOMEM7;
23286 }else{
23287 int i;
23288 int iOff;
23289 sqlite3_snprintfsqlite3_api->xsnprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
23290 iOff = (int)strlen(zDefn);
23291 for(i=0; i<pConfig->nCol; i++){
23292 if( pConfig->eContent==FTS5_CONTENT_NORMAL0
23293 || pConfig->abUnindexed[i]
23294 ){
23295 sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
23296 iOff += (int)strlen(&zDefn[iOff]);
23297 }
23298 }
23299 if( pConfig->bLocale ){
23300 for(i=0; i<pConfig->nCol; i++){
23301 if( pConfig->abUnindexed[i]==0 ){
23302 sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i);
23303 iOff += (int)strlen(&zDefn[iOff]);
23304 }
23305 }
23306 }
23307 rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
23308 }
23309 sqlite3_freesqlite3_api->free(zDefn);
23310 }
23311
23312 if( rc==SQLITE_OK0 && pConfig->bColumnsize ){
23313 const char *zCols = "id INTEGER PRIMARY KEY, sz BLOB";
23314 if( pConfig->bContentlessDelete ){
23315 zCols = "id INTEGER PRIMARY KEY, sz BLOB, origin INTEGER";
23316 }
23317 rc = sqlite3Fts5CreateTable(pConfig, "docsize", zCols, 0, pzErr);
23318 }
23319 if( rc==SQLITE_OK0 ){
23320 rc = sqlite3Fts5CreateTable(
23321 pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
23322 );
23323 }
23324 if( rc==SQLITE_OK0 ){
23325 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4);
23326 }
23327 }
23328
23329 if( rc ){
23330 sqlite3Fts5StorageClose(p);
23331 *pp = 0;
23332 }
23333 return rc;
23334}
23335
23336/*
23337** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
23338*/
23339static int sqlite3Fts5StorageClose(Fts5Storage *p){
23340 int rc = SQLITE_OK0;
23341 if( p ){
23342 int i;
23343
23344 /* Finalize all SQL statements */
23345 for(i=0; i<ArraySize(p->aStmt)((int)(sizeof(p->aStmt) / sizeof(p->aStmt[0]))); i++){
23346 sqlite3_finalizesqlite3_api->finalize(p->aStmt[i]);
23347 }
23348
23349 sqlite3_freesqlite3_api->free(p);
23350 }
23351 return rc;
23352}
23353
23354typedef struct Fts5InsertCtx Fts5InsertCtx;
23355struct Fts5InsertCtx {
23356 Fts5Storage *pStorage;
23357 int iCol;
23358 int szCol; /* Size of column value in tokens */
23359};
23360
23361/*
23362** Tokenization callback used when inserting tokens into the FTS index.
23363*/
23364static int fts5StorageInsertCallback(
23365 void *pContext, /* Pointer to Fts5InsertCtx object */
23366 int tflags,
23367 const char *pToken, /* Buffer containing token */
23368 int nToken, /* Size of token in bytes */
23369 int iUnused1, /* Start offset of token */
23370 int iUnused2 /* End offset of token */
23371){
23372 Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
23373 Fts5Index *pIdx = pCtx->pStorage->pIndex;
23374 UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2);
23375 if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768;
23376 if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){
23377 pCtx->szCol++;
23378 }
23379 return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
23380}
23381
23382/*
23383** This function is used as part of an UPDATE statement that modifies the
23384** rowid of a row. In that case, this function is called first to set
23385** Fts5Storage.pSavedRow to point to a statement that may be used to
23386** access the original values of the row being deleted - iDel.
23387**
23388** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
23389** It is not considered an error if row iDel does not exist. In this case
23390** pSavedRow is not set and SQLITE_OK returned.
23391*/
23392static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){
23393 int rc = SQLITE_OK0;
23394 sqlite3_stmt *pSeek = 0;
23395
23396 assert( p->pSavedRow==0 )((void) (0));
23397 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+1, &pSeek, 0);
23398 if( rc==SQLITE_OK0 ){
23399 sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel);
23400 if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){
23401 rc = sqlite3_resetsqlite3_api->reset(pSeek);
23402 }else{
23403 p->pSavedRow = pSeek;
23404 }
23405 }
23406
23407 return rc;
23408}
23409
23410/*
23411** If a row with rowid iDel is present in the %_content table, add the
23412** delete-markers to the FTS index necessary to delete it. Do not actually
23413** remove the %_content row at this time though.
23414**
23415** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left
23416** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access
23417** the original values of the row being deleted. This is used by UPDATE
23418** statements.
23419*/
23420static int fts5StorageDeleteFromIndex(
23421 Fts5Storage *p,
23422 i64 iDel,
23423 sqlite3_value **apVal,
23424 int bSaveRow /* True to set pSavedRow */
23425){
23426 Fts5Config *pConfig = p->pConfig;
23427 sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */
23428 int rc = SQLITE_OK0; /* Return code */
23429 int rc2; /* sqlite3_reset() return code */
23430 int iCol;
23431 Fts5InsertCtx ctx;
23432
23433 assert( bSaveRow==0 || apVal==0 )((void) (0));
23434 assert( bSaveRow==0 || bSaveRow==1 )((void) (0));
23435 assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 )((void) (0));
23436
23437 if( apVal==0 ){
23438 if( p->pSavedRow && bSaveRow ){
23439 pSeek = p->pSavedRow;
23440 p->pSavedRow = 0;
23441 }else{
23442 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+bSaveRow, &pSeek, 0);
23443 if( rc!=SQLITE_OK0 ) return rc;
23444 sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel);
23445 if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){
23446 return sqlite3_resetsqlite3_api->reset(pSeek);
23447 }
23448 }
23449 }
23450
23451 ctx.pStorage = p;
23452 ctx.iCol = -1;
23453 for(iCol=1; rc==SQLITE_OK0 && iCol<=pConfig->nCol; iCol++){
23454 if( pConfig->abUnindexed[iCol-1]==0 ){
23455 sqlite3_value *pVal = 0;
23456 const char *pText = 0;
23457 int nText = 0;
23458 const char *pLoc = 0;
23459 int nLoc = 0;
23460
23461 assert( pSeek==0 || apVal==0 )((void) (0));
23462 assert( pSeek!=0 || apVal!=0 )((void) (0));
23463 if( pSeek ){
23464 pVal = sqlite3_column_valuesqlite3_api->column_value(pSeek, iCol);
23465 }else{
23466 pVal = apVal[iCol-1];
23467 }
23468
23469 if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
23470 rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
23471 }else{
23472 pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
23473 nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
23474 if( pConfig->bLocale && pSeek ){
23475 pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pSeek, iCol + pConfig->nCol);
23476 nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pSeek, iCol + pConfig->nCol);
23477 }
23478 }
23479
23480 if( rc==SQLITE_OK0 ){
23481 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
23482 ctx.szCol = 0;
23483 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT0x0004,
23484 pText, nText, (void*)&ctx, fts5StorageInsertCallback
23485 );
23486 p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
23487 if( rc==SQLITE_OK0 && p->aTotalSize[iCol-1]<0 ){
23488 rc = FTS5_CORRUPT(11 | (1<<8));
23489 }
23490 sqlite3Fts5ClearLocale(pConfig);
23491 }
23492 }
23493 }
23494 if( rc==SQLITE_OK0 && p->nTotalRow<1 ){
23495 rc = FTS5_CORRUPT(11 | (1<<8));
23496 }else{
23497 p->nTotalRow--;
23498 }
23499
23500 if( rc==SQLITE_OK0 && bSaveRow ){
23501 assert( p->pSavedRow==0 )((void) (0));
23502 p->pSavedRow = pSeek;
23503 }else{
23504 rc2 = sqlite3_resetsqlite3_api->reset(pSeek);
23505 if( rc==SQLITE_OK0 ) rc = rc2;
23506 }
23507 return rc;
23508}
23509
23510/*
23511** Reset any saved statement pSavedRow. Zero pSavedRow as well. This
23512** should be called by the xUpdate() method of the fts5 table before
23513** returning from any operation that may have set Fts5Storage.pSavedRow.
23514*/
23515static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){
23516 assert( pStorage->pSavedRow==0((void) (0))
23517 || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2]((void) (0))
23518 )((void) (0));
23519 sqlite3_resetsqlite3_api->reset(pStorage->pSavedRow);
23520 pStorage->pSavedRow = 0;
23521}
23522
23523/*
23524** This function is called to process a DELETE on a contentless_delete=1
23525** table. It adds the tombstone required to delete the entry with rowid
23526** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs,
23527** an SQLite error code.
23528*/
23529static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){
23530 i64 iOrigin = 0;
23531 sqlite3_stmt *pLookup = 0;
23532 int rc = SQLITE_OK0;
23533
23534 assert( p->pConfig->bContentlessDelete )((void) (0));
23535 assert( p->pConfig->eContent==FTS5_CONTENT_NONE((void) (0))
23536 || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED((void) (0))
23537 )((void) (0));
23538
23539 /* Look up the origin of the document in the %_docsize table. Store
23540 ** this in stack variable iOrigin. */
23541 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0);
23542 if( rc==SQLITE_OK0 ){
23543 sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iDel);
23544 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){
23545 iOrigin = sqlite3_column_int64sqlite3_api->column_int64(pLookup, 1);
23546 }
23547 rc = sqlite3_resetsqlite3_api->reset(pLookup);
23548 }
23549
23550 if( rc==SQLITE_OK0 && iOrigin!=0 ){
23551 rc = sqlite3Fts5IndexContentlessDelete(p->pIndex, iOrigin, iDel);
23552 }
23553
23554 return rc;
23555}
23556
23557/*
23558** Insert a record into the %_docsize table. Specifically, do:
23559**
23560** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
23561**
23562** If there is no %_docsize table (as happens if the columnsize=0 option
23563** is specified when the FTS5 table is created), this function is a no-op.
23564*/
23565static int fts5StorageInsertDocsize(
23566 Fts5Storage *p, /* Storage module to write to */
23567 i64 iRowid, /* id value */
23568 Fts5Buffer *pBuf /* sz value */
23569){
23570 int rc = SQLITE_OK0;
23571 if( p->pConfig->bColumnsize ){
23572 sqlite3_stmt *pReplace = 0;
23573 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0);
23574 if( rc==SQLITE_OK0 ){
23575 sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 1, iRowid);
23576 if( p->pConfig->bContentlessDelete ){
23577 i64 iOrigin = 0;
23578 rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin);
23579 sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 3, iOrigin);
23580 }
23581 }
23582 if( rc==SQLITE_OK0 ){
23583 sqlite3_bind_blobsqlite3_api->bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC((sqlite3_destructor_type)0));
23584 sqlite3_stepsqlite3_api->step(pReplace);
23585 rc = sqlite3_resetsqlite3_api->reset(pReplace);
23586 sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2);
23587 }
23588 }
23589 return rc;
23590}
23591
23592/*
23593** Load the contents of the "averages" record from disk into the
23594** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
23595** argument bCache is true, set the p->bTotalsValid flag to indicate
23596** that the contents of aTotalSize[] and nTotalRow are valid until
23597** further notice.
23598**
23599** Return SQLITE_OK if successful, or an SQLite error code if an error
23600** occurs.
23601*/
23602static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
23603 int rc = SQLITE_OK0;
23604 if( p->bTotalsValid==0 ){
23605 rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
23606 p->bTotalsValid = bCache;
23607 }
23608 return rc;
23609}
23610
23611/*
23612** Store the current contents of the p->nTotalRow and p->aTotalSize[]
23613** variables in the "averages" record on disk.
23614**
23615** Return SQLITE_OK if successful, or an SQLite error code if an error
23616** occurs.
23617*/
23618static int fts5StorageSaveTotals(Fts5Storage *p){
23619 int nCol = p->pConfig->nCol;
23620 int i;
23621 Fts5Buffer buf;
23622 int rc = SQLITE_OK0;
23623 memset(&buf, 0, sizeof(buf));
23624
23625 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
23626 for(i=0; i<nCol; i++){
23627 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
23628 }
23629 if( rc==SQLITE_OK0 ){
23630 rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
23631 }
23632 sqlite3_freesqlite3_api->free(buf.p);
23633
23634 return rc;
23635}
23636
23637/*
23638** Remove a row from the FTS table.
23639*/
23640static int sqlite3Fts5StorageDelete(
23641 Fts5Storage *p, /* Storage object */
23642 i64 iDel, /* Rowid to delete from table */
23643 sqlite3_value **apVal, /* Optional - values to remove from index */
23644 int bSaveRow /* If true, set pSavedRow for deleted row */
23645){
23646 Fts5Config *pConfig = p->pConfig;
23647 int rc;
23648 sqlite3_stmt *pDel = 0;
23649
23650 assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 )((void) (0));
23651 rc = fts5StorageLoadTotals(p, 1);
23652
23653 /* Delete the index records */
23654 if( rc==SQLITE_OK0 ){
23655 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
23656 }
23657
23658 if( rc==SQLITE_OK0 ){
23659 if( p->pConfig->bContentlessDelete ){
23660 rc = fts5StorageContentlessDelete(p, iDel);
23661 if( rc==SQLITE_OK0
23662 && bSaveRow
23663 && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED3
23664 ){
23665 rc = sqlite3Fts5StorageFindDeleteRow(p, iDel);
23666 }
23667 }else{
23668 rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow);
23669 }
23670 }
23671
23672 /* Delete the %_docsize record */
23673 if( rc==SQLITE_OK0 && pConfig->bColumnsize ){
23674 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE8, &pDel, 0);
23675 if( rc==SQLITE_OK0 ){
23676 sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel);
23677 sqlite3_stepsqlite3_api->step(pDel);
23678 rc = sqlite3_resetsqlite3_api->reset(pDel);
23679 }
23680 }
23681
23682 /* Delete the %_content record */
23683 if( pConfig->eContent==FTS5_CONTENT_NORMAL0
23684 || pConfig->eContent==FTS5_CONTENT_UNINDEXED3
23685 ){
23686 if( rc==SQLITE_OK0 ){
23687 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT6, &pDel, 0);
23688 }
23689 if( rc==SQLITE_OK0 ){
23690 sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel);
23691 sqlite3_stepsqlite3_api->step(pDel);
23692 rc = sqlite3_resetsqlite3_api->reset(pDel);
23693 }
23694 }
23695
23696 return rc;
23697}
23698
23699/*
23700** Delete all entries in the FTS5 index.
23701*/
23702static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
23703 Fts5Config *pConfig = p->pConfig;
23704 int rc;
23705
23706 p->bTotalsValid = 0;
23707
23708 /* Delete the contents of the %_data and %_docsize tables. */
23709 rc = fts5ExecPrintf(pConfig->db, 0,
23710 "DELETE FROM %Q.'%q_data';"
23711 "DELETE FROM %Q.'%q_idx';",
23712 pConfig->zDb, pConfig->zName,
23713 pConfig->zDb, pConfig->zName
23714 );
23715 if( rc==SQLITE_OK0 && pConfig->bColumnsize ){
23716 rc = fts5ExecPrintf(pConfig->db, 0,
23717 "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName
23718 );
23719 }
23720
23721 if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){
23722 rc = fts5ExecPrintf(pConfig->db, 0,
23723 "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName
23724 );
23725 }
23726
23727 /* Reinitialize the %_data table. This call creates the initial structure
23728 ** and averages records. */
23729 if( rc==SQLITE_OK0 ){
23730 rc = sqlite3Fts5IndexReinit(p->pIndex);
23731 }
23732 if( rc==SQLITE_OK0 ){
23733 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4);
23734 }
23735 return rc;
23736}
23737
23738static int sqlite3Fts5StorageRebuild(Fts5Storage *p){
23739 Fts5Buffer buf = {0,0,0};
23740 Fts5Config *pConfig = p->pConfig;
23741 sqlite3_stmt *pScan = 0;
23742 Fts5InsertCtx ctx;
23743 int rc, rc2;
23744
23745 memset(&ctx, 0, sizeof(Fts5InsertCtx));
23746 ctx.pStorage = p;
23747 rc = sqlite3Fts5StorageDeleteAll(p);
23748 if( rc==SQLITE_OK0 ){
23749 rc = fts5StorageLoadTotals(p, 1);
23750 }
23751
23752 if( rc==SQLITE_OK0 ){
23753 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, pConfig->pzErrmsg);
23754 }
23755
23756 while( rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){
23757 i64 iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0);
23758
23759 sqlite3Fts5BufferZero(&buf);
23760 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
23761 for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){
23762 ctx.szCol = 0;
23763 if( pConfig->abUnindexed[ctx.iCol]==0 ){
23764 int nText = 0; /* Size of pText in bytes */
23765 const char *pText = 0; /* Pointer to buffer containing text value */
23766 int nLoc = 0; /* Size of pLoc in bytes */
23767 const char *pLoc = 0; /* Pointer to buffer containing text value */
23768
23769 sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, ctx.iCol+1);
23770 if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2
23771 && sqlite3Fts5IsLocaleValue(pConfig, pVal)
23772 ){
23773 rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
23774 }else{
23775 pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
23776 nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
23777 if( pConfig->bLocale ){
23778 int iCol = ctx.iCol + 1 + pConfig->nCol;
23779 pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol);
23780 nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol);
23781 }
23782 }
23783
23784 if( rc==SQLITE_OK0 ){
23785 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
23786 rc = sqlite3Fts5Tokenize(pConfig,
23787 FTS5_TOKENIZE_DOCUMENT0x0004,
23788 pText, nText,
23789 (void*)&ctx,
23790 fts5StorageInsertCallback
23791 );
23792 sqlite3Fts5ClearLocale(pConfig);
23793 }
23794 }
23795 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
23796 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
23797 }
23798 p->nTotalRow++;
23799
23800 if( rc==SQLITE_OK0 ){
23801 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
23802 }
23803 }
23804 sqlite3_freesqlite3_api->free(buf.p);
23805 rc2 = sqlite3_resetsqlite3_api->reset(pScan);
23806 if( rc==SQLITE_OK0 ) rc = rc2;
23807
23808 /* Write the averages record */
23809 if( rc==SQLITE_OK0 ){
23810 rc = fts5StorageSaveTotals(p);
23811 }
23812 return rc;
23813}
23814
23815static int sqlite3Fts5StorageOptimize(Fts5Storage *p){
23816 return sqlite3Fts5IndexOptimize(p->pIndex);
23817}
23818
23819static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
23820 return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
23821}
23822
23823static int sqlite3Fts5StorageReset(Fts5Storage *p){
23824 return sqlite3Fts5IndexReset(p->pIndex);
23825}
23826
23827/*
23828** Allocate a new rowid. This is used for "external content" tables when
23829** a NULL value is inserted into the rowid column. The new rowid is allocated
23830** by inserting a dummy row into the %_docsize table. The dummy will be
23831** overwritten later.
23832**
23833** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
23834** this case the user is required to provide a rowid explicitly.
23835*/
23836static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
23837 int rc = SQLITE_MISMATCH20;
23838 if( p->pConfig->bColumnsize ){
23839 sqlite3_stmt *pReplace = 0;
23840 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0);
23841 if( rc==SQLITE_OK0 ){
23842 sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1);
23843 sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2);
23844 sqlite3_stepsqlite3_api->step(pReplace);
23845 rc = sqlite3_resetsqlite3_api->reset(pReplace);
23846 }
23847 if( rc==SQLITE_OK0 ){
23848 *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db);
23849 }
23850 }
23851 return rc;
23852}
23853
23854/*
23855** Insert a new row into the FTS content table.
23856*/
23857static int sqlite3Fts5StorageContentInsert(
23858 Fts5Storage *p,
23859 int bReplace, /* True to use REPLACE instead of INSERT */
23860 sqlite3_value **apVal,
23861 i64 *piRowid
23862){
23863 Fts5Config *pConfig = p->pConfig;
23864 int rc = SQLITE_OK0;
23865
23866 /* Insert the new row into the %_content table. */
23867 if( pConfig->eContent!=FTS5_CONTENT_NORMAL0
23868 && pConfig->eContent!=FTS5_CONTENT_UNINDEXED3
23869 ){
23870 if( sqlite3_value_typesqlite3_api->value_type(apVal[1])==SQLITE_INTEGER1 ){
23871 *piRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]);
23872 }else{
23873 rc = fts5StorageNewRowid(p, piRowid);
23874 }
23875 }else{
23876 sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */
23877 int i; /* Counter variable */
23878
23879 assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT )((void) (0));
23880 assert( bReplace==0 || bReplace==1 )((void) (0));
23881 rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT4+bReplace, &pInsert, 0);
23882 if( pInsert ) sqlite3_clear_bindingssqlite3_api->clear_bindings(pInsert);
23883
23884 /* Bind the rowid value */
23885 sqlite3_bind_valuesqlite3_api->bind_value(pInsert, 1, apVal[1]);
23886
23887 /* Loop through values for user-defined columns. i=2 is the leftmost
23888 ** user-defined column. As is column 1 of pSavedRow. */
23889 for(i=2; rc==SQLITE_OK0 && i<=pConfig->nCol+1; i++){
23890 int bUnindexed = pConfig->abUnindexed[i-2];
23891 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || bUnindexed ){
23892 sqlite3_value *pVal = apVal[i];
23893
23894 if( sqlite3_value_nochangesqlite3_api->value_nochange(pVal) && p->pSavedRow ){
23895 /* This is an UPDATE statement, and user-defined column (i-2) was not
23896 ** modified. Retrieve the value from Fts5Storage.pSavedRow. */
23897 pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, i-1);
23898 if( pConfig->bLocale && bUnindexed==0 ){
23899 sqlite3_bind_valuesqlite3_api->bind_value(pInsert, pConfig->nCol + i,
23900 sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, pConfig->nCol + i - 1)
23901 );
23902 }
23903 }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
23904 const char *pText = 0;
23905 const char *pLoc = 0;
23906 int nText = 0;
23907 int nLoc = 0;
23908 assert( pConfig->bLocale )((void) (0));
23909
23910 rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
23911 if( rc==SQLITE_OK0 ){
23912 sqlite3_bind_textsqlite3_api->bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
23913 if( bUnindexed==0 ){
23914 int iLoc = pConfig->nCol + i;
23915 sqlite3_bind_textsqlite3_api->bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT((sqlite3_destructor_type)-1));
23916 }
23917 }
23918
23919 continue;
23920 }
23921
23922 rc = sqlite3_bind_valuesqlite3_api->bind_value(pInsert, i, pVal);
23923 }
23924 }
23925 if( rc==SQLITE_OK0 ){
23926 sqlite3_stepsqlite3_api->step(pInsert);
23927 rc = sqlite3_resetsqlite3_api->reset(pInsert);
23928 }
23929 *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(pConfig->db);
23930 }
23931
23932 return rc;
23933}
23934
23935/*
23936** Insert new entries into the FTS index and %_docsize table.
23937*/
23938static int sqlite3Fts5StorageIndexInsert(
23939 Fts5Storage *p,
23940 sqlite3_value **apVal,
23941 i64 iRowid
23942){
23943 Fts5Config *pConfig = p->pConfig;
23944 int rc = SQLITE_OK0; /* Return code */
23945 Fts5InsertCtx ctx; /* Tokenization callback context object */
23946 Fts5Buffer buf; /* Buffer used to build up %_docsize blob */
23947
23948 memset(&buf, 0, sizeof(Fts5Buffer));
23949 ctx.pStorage = p;
23950 rc = fts5StorageLoadTotals(p, 1);
23951
23952 if( rc==SQLITE_OK0 ){
23953 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
23954 }
23955 for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){
23956 ctx.szCol = 0;
23957 if( pConfig->abUnindexed[ctx.iCol]==0 ){
23958 int nText = 0; /* Size of pText in bytes */
23959 const char *pText = 0; /* Pointer to buffer containing text value */
23960 int nLoc = 0; /* Size of pText in bytes */
23961 const char *pLoc = 0; /* Pointer to buffer containing text value */
23962
23963 sqlite3_value *pVal = apVal[ctx.iCol+2];
23964 if( p->pSavedRow && sqlite3_value_nochangesqlite3_api->value_nochange(pVal) ){
23965 pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, ctx.iCol+1);
23966 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){
23967 int iCol = ctx.iCol + 1 + pConfig->nCol;
23968 pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(p->pSavedRow, iCol);
23969 nLoc = sqlite3_column_bytessqlite3_api->column_bytes(p->pSavedRow, iCol);
23970 }
23971 }else{
23972 pVal = apVal[ctx.iCol+2];
23973 }
23974
23975 if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
23976 rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
23977 }else{
23978 pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
23979 nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
23980 }
23981
23982 if( rc==SQLITE_OK0 ){
23983 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
23984 rc = sqlite3Fts5Tokenize(pConfig,
23985 FTS5_TOKENIZE_DOCUMENT0x0004, pText, nText, (void*)&ctx,
23986 fts5StorageInsertCallback
23987 );
23988 sqlite3Fts5ClearLocale(pConfig);
23989 }
23990 }
23991 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
23992 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
23993 }
23994 p->nTotalRow++;
23995
23996 /* Write the %_docsize record */
23997 if( rc==SQLITE_OK0 ){
23998 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
23999 }
24000 sqlite3_freesqlite3_api->free(buf.p);
24001
24002 return rc;
24003}
24004
24005static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
24006 Fts5Config *pConfig = p->pConfig;
24007 char *zSql;
24008 int rc;
24009
24010 zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT count(*) FROM %Q.'%q_%s'",
24011 pConfig->zDb, pConfig->zName, zSuffix
24012 );
24013 if( zSql==0 ){
24014 rc = SQLITE_NOMEM7;
24015 }else{
24016 sqlite3_stmt *pCnt = 0;
24017 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
24018 if( rc==SQLITE_OK0 ){
24019 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pCnt) ){
24020 *pnRow = sqlite3_column_int64sqlite3_api->column_int64(pCnt, 0);
24021 }
24022 rc = sqlite3_finalizesqlite3_api->finalize(pCnt);
24023 }
24024 }
24025
24026 sqlite3_freesqlite3_api->free(zSql);
24027 return rc;
24028}
24029
24030/*
24031** Context object used by sqlite3Fts5StorageIntegrity().
24032*/
24033typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
24034struct Fts5IntegrityCtx {
24035 i64 iRowid;
24036 int iCol;
24037 int szCol;
24038 u64 cksum;
24039 Fts5Termset *pTermset;
24040 Fts5Config *pConfig;
24041};
24042
24043
24044/*
24045** Tokenization callback used by integrity check.
24046*/
24047static int fts5StorageIntegrityCallback(
24048 void *pContext, /* Pointer to Fts5IntegrityCtx object */
24049 int tflags,
24050 const char *pToken, /* Buffer containing token */
24051 int nToken, /* Size of token in bytes */
24052 int iUnused1, /* Start offset of token */
24053 int iUnused2 /* End offset of token */
24054){
24055 Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
24056 Fts5Termset *pTermset = pCtx->pTermset;
24057 int bPresent;
24058 int ii;
24059 int rc = SQLITE_OK0;
24060 int iPos;
24061 int iCol;
24062
24063 UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2);
24064 if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768;
24065
24066 if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){
24067 pCtx->szCol++;
24068 }
24069
24070 switch( pCtx->pConfig->eDetail ){
24071 case FTS5_DETAIL_FULL0:
24072 iPos = pCtx->szCol-1;
24073 iCol = pCtx->iCol;
24074 break;
24075
24076 case FTS5_DETAIL_COLUMNS2:
24077 iPos = pCtx->iCol;
24078 iCol = 0;
24079 break;
24080
24081 default:
24082 assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0));
24083 iPos = 0;
24084 iCol = 0;
24085 break;
24086 }
24087
24088 rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
24089 if( rc==SQLITE_OK0 && bPresent==0 ){
24090 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
24091 pCtx->iRowid, iCol, iPos, 0, pToken, nToken
24092 );
24093 }
24094
24095 for(ii=0; rc==SQLITE_OK0 && ii<pCtx->pConfig->nPrefix; ii++){
24096 const int nChar = pCtx->pConfig->aPrefix[ii];
24097 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
24098 if( nByte ){
24099 rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
24100 if( bPresent==0 ){
24101 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
24102 pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
24103 );
24104 }
24105 }
24106 }
24107
24108 return rc;
24109}
24110
24111/*
24112** Check that the contents of the FTS index match that of the %_content
24113** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
24114** some other SQLite error code if an error occurs while attempting to
24115** determine this.
24116*/
24117static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){
24118 Fts5Config *pConfig = p->pConfig;
24119 int rc = SQLITE_OK0; /* Return code */
24120 int *aColSize; /* Array of size pConfig->nCol */
24121 i64 *aTotalSize; /* Array of size pConfig->nCol */
24122 Fts5IntegrityCtx ctx;
24123 sqlite3_stmt *pScan;
24124 int bUseCksum;
24125
24126 memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
24127 ctx.pConfig = p->pConfig;
24128 aTotalSize = (i64*)sqlite3_malloc64sqlite3_api->malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64)));
24129 if( !aTotalSize ) return SQLITE_NOMEM7;
2
Assuming 'aTotalSize' is non-null
3
Taking false branch
24130 aColSize = (int*)&aTotalSize[pConfig->nCol];
24131 memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
24132
24133 bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL0
4
Assuming field 'eContent' is not equal to FTS5_CONTENT_NORMAL
24134 || (pConfig->eContent==FTS5_CONTENT_EXTERNAL2 && iArg)
5
Assuming field 'eContent' is not equal to FTS5_CONTENT_EXTERNAL
24135 );
24136 if( bUseCksum
5.1
'bUseCksum' is 0
){
6
Taking false branch
24137 /* Generate the expected index checksum based on the contents of the
24138 ** %_content table. This block stores the checksum in ctx.cksum. */
24139 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, 0);
24140 if( rc==SQLITE_OK0 ){
24141 int rc2;
24142 while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){
24143 int i;
24144 ctx.iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0);
24145 ctx.szCol = 0;
24146 if( pConfig->bColumnsize ){
24147 rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
24148 }
24149 if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_NONE1 ){
24150 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
24151 }
24152 for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){
24153 if( pConfig->abUnindexed[i]==0 ){
24154 const char *pText = 0;
24155 int nText = 0;
24156 const char *pLoc = 0;
24157 int nLoc = 0;
24158 sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, i+1);
24159
24160 if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2
24161 && sqlite3Fts5IsLocaleValue(pConfig, pVal)
24162 ){
24163 rc = sqlite3Fts5DecodeLocaleValue(
24164 pVal, &pText, &nText, &pLoc, &nLoc
24165 );
24166 }else{
24167 if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){
24168 int iCol = i + 1 + pConfig->nCol;
24169 pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol);
24170 nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol);
24171 }
24172 pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal);
24173 nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal);
24174 }
24175
24176 ctx.iCol = i;
24177 ctx.szCol = 0;
24178
24179 if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){
24180 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
24181 }
24182
24183 if( rc==SQLITE_OK0 ){
24184 sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
24185 rc = sqlite3Fts5Tokenize(pConfig,
24186 FTS5_TOKENIZE_DOCUMENT0x0004,
24187 pText, nText,
24188 (void*)&ctx,
24189 fts5StorageIntegrityCallback
24190 );
24191 sqlite3Fts5ClearLocale(pConfig);
24192 }
24193
24194 /* If this is not a columnsize=0 database, check that the number
24195 ** of tokens in the value matches the aColSize[] value read from
24196 ** the %_docsize table. */
24197 if( rc==SQLITE_OK0
24198 && pConfig->bColumnsize
24199 && ctx.szCol!=aColSize[i]
24200 ){
24201 rc = FTS5_CORRUPT(11 | (1<<8));
24202 }
24203 aTotalSize[i] += ctx.szCol;
24204 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){
24205 sqlite3Fts5TermsetFree(ctx.pTermset);
24206 ctx.pTermset = 0;
24207 }
24208 }
24209 }
24210 sqlite3Fts5TermsetFree(ctx.pTermset);
24211 ctx.pTermset = 0;
24212
24213 if( rc!=SQLITE_OK0 ) break;
24214 }
24215 rc2 = sqlite3_resetsqlite3_api->reset(pScan);
24216 if( rc==SQLITE_OK0 ) rc = rc2;
24217 }
24218
24219 /* Test that the "totals" (sometimes called "averages") record looks Ok */
24220 if( rc==SQLITE_OK0 ){
24221 int i;
24222 rc = fts5StorageLoadTotals(p, 0);
24223 for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){
24224 if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT(11 | (1<<8));
24225 }
24226 }
24227
24228 /* Check that the %_docsize and %_content tables contain the expected
24229 ** number of rows. */
24230 if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){
24231 i64 nRow = 0;
24232 rc = fts5StorageCount(p, "content", &nRow);
24233 if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8));
24234 }
24235 if( rc==SQLITE_OK0 && pConfig->bColumnsize ){
24236 i64 nRow = 0;
24237 rc = fts5StorageCount(p, "docsize", &nRow);
24238 if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8));
24239 }
24240 }
24241
24242 /* Pass the expected checksum down to the FTS index module. It will
24243 ** verify, amongst other things, that it matches the checksum generated by
24244 ** inspecting the index itself. */
24245 if( rc
6.1
'rc' is equal to SQLITE_OK
==SQLITE_OK0 ){
7
Taking true branch
24246 rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum);
8
Calling 'sqlite3Fts5IndexIntegrityCheck'
24247 }
24248
24249 sqlite3_freesqlite3_api->free(aTotalSize);
24250 return rc;
24251}
24252
24253/*
24254** Obtain an SQLite statement handle that may be used to read data from the
24255** %_content table.
24256*/
24257static int sqlite3Fts5StorageStmt(
24258 Fts5Storage *p,
24259 int eStmt,
24260 sqlite3_stmt **pp,
24261 char **pzErrMsg
24262){
24263 int rc;
24264 assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0))
24265 || eStmt==FTS5_STMT_SCAN_DESC((void) (0))
24266 || eStmt==FTS5_STMT_LOOKUP((void) (0))
24267 )((void) (0));
24268 rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
24269 if( rc==SQLITE_OK0 ){
24270 assert( p->aStmt[eStmt]==*pp )((void) (0));
24271 p->aStmt[eStmt] = 0;
24272 }
24273 return rc;
24274}
24275
24276/*
24277** Release an SQLite statement handle obtained via an earlier call to
24278** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
24279** must match that passed to the sqlite3Fts5StorageStmt() call.
24280*/
24281static void sqlite3Fts5StorageStmtRelease(
24282 Fts5Storage *p,
24283 int eStmt,
24284 sqlite3_stmt *pStmt
24285){
24286 assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0))
24287 || eStmt==FTS5_STMT_SCAN_DESC((void) (0))
24288 || eStmt==FTS5_STMT_LOOKUP((void) (0))
24289 )((void) (0));
24290 if( p->aStmt[eStmt]==0 ){
24291 sqlite3_resetsqlite3_api->reset(pStmt);
24292 p->aStmt[eStmt] = pStmt;
24293 }else{
24294 sqlite3_finalizesqlite3_api->finalize(pStmt);
24295 }
24296}
24297
24298static int fts5StorageDecodeSizeArray(
24299 int *aCol, int nCol, /* Array to populate */
24300 const u8 *aBlob, int nBlob /* Record to read varints from */
24301){
24302 int i;
24303 int iOff = 0;
24304 for(i=0; i<nCol; i++){
24305 if( iOff>=nBlob ) return 1;
24306 iOff += fts5GetVarint32(&aBlob[iOff], aCol[i])sqlite3Fts5GetVarint32(&aBlob[iOff],(u32*)&(aCol[i]));
24307 }
24308 return (iOff!=nBlob);
24309}
24310
24311/*
24312** Argument aCol points to an array of integers containing one entry for
24313** each table column. This function reads the %_docsize record for the
24314** specified rowid and populates aCol[] with the results.
24315**
24316** An SQLite error code is returned if an error occurs, or SQLITE_OK
24317** otherwise.
24318*/
24319static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
24320 int nCol = p->pConfig->nCol; /* Number of user columns in table */
24321 sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */
24322 int rc; /* Return Code */
24323
24324 assert( p->pConfig->bColumnsize )((void) (0));
24325 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0);
24326 if( pLookup ){
24327 int bCorrupt = 1;
24328 assert( rc==SQLITE_OK )((void) (0));
24329 sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iRowid);
24330 if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){
24331 const u8 *aBlob = sqlite3_column_blobsqlite3_api->column_blob(pLookup, 0);
24332 int nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pLookup, 0);
24333 if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
24334 bCorrupt = 0;
24335 }
24336 }
24337 rc = sqlite3_resetsqlite3_api->reset(pLookup);
24338 if( bCorrupt && rc==SQLITE_OK0 ){
24339 rc = FTS5_CORRUPT(11 | (1<<8));
24340 }
24341 }else{
24342 assert( rc!=SQLITE_OK )((void) (0));
24343 }
24344
24345 return rc;
24346}
24347
24348static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
24349 int rc = fts5StorageLoadTotals(p, 0);
24350 if( rc==SQLITE_OK0 ){
24351 *pnToken = 0;
24352 if( iCol<0 ){
24353 int i;
24354 for(i=0; i<p->pConfig->nCol; i++){
24355 *pnToken += p->aTotalSize[i];
24356 }
24357 }else if( iCol<p->pConfig->nCol ){
24358 *pnToken = p->aTotalSize[iCol];
24359 }else{
24360 rc = SQLITE_RANGE25;
24361 }
24362 }
24363 return rc;
24364}
24365
24366static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
24367 int rc = fts5StorageLoadTotals(p, 0);
24368 if( rc==SQLITE_OK0 ){
24369 /* nTotalRow being zero does not necessarily indicate a corrupt
24370 ** database - it might be that the FTS5 table really does contain zero
24371 ** rows. However this function is only called from the xRowCount() API,
24372 ** and there is no way for that API to be invoked if the table contains
24373 ** no rows. Hence the FTS5_CORRUPT return. */
24374 *pnRow = p->nTotalRow;
24375 if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT(11 | (1<<8));
24376 }
24377 return rc;
24378}
24379
24380/*
24381** Flush any data currently held in-memory to disk.
24382*/
24383static int sqlite3Fts5StorageSync(Fts5Storage *p){
24384 int rc = SQLITE_OK0;
24385 i64 iLastRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db);
24386 if( p->bTotalsValid ){
24387 rc = fts5StorageSaveTotals(p);
24388 if( rc==SQLITE_OK0 ){
24389 p->bTotalsValid = 0;
24390 }
24391 }
24392 if( rc==SQLITE_OK0 ){
24393 rc = sqlite3Fts5IndexSync(p->pIndex);
24394 }
24395 sqlite3_set_last_insert_rowidsqlite3_api->set_last_insert_rowid(p->pConfig->db, iLastRowid);
24396 return rc;
24397}
24398
24399static int sqlite3Fts5StorageRollback(Fts5Storage *p){
24400 p->bTotalsValid = 0;
24401 return sqlite3Fts5IndexRollback(p->pIndex);
24402}
24403
24404static int sqlite3Fts5StorageConfigValue(
24405 Fts5Storage *p,
24406 const char *z,
24407 sqlite3_value *pVal,
24408 int iVal
24409){
24410 sqlite3_stmt *pReplace = 0;
24411 int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG10, &pReplace, 0);
24412 if( rc==SQLITE_OK0 ){
24413 sqlite3_bind_textsqlite3_api->bind_text(pReplace, 1, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0));
24414 if( pVal ){
24415 sqlite3_bind_valuesqlite3_api->bind_value(pReplace, 2, pVal);
24416 }else{
24417 sqlite3_bind_intsqlite3_api->bind_int(pReplace, 2, iVal);
24418 }
24419 sqlite3_stepsqlite3_api->step(pReplace);
24420 rc = sqlite3_resetsqlite3_api->reset(pReplace);
24421 sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1);
24422 }
24423 if( rc==SQLITE_OK0 && pVal ){
24424 int iNew = p->pConfig->iCookie + 1;
24425 rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
24426 if( rc==SQLITE_OK0 ){
24427 p->pConfig->iCookie = iNew;
24428 }
24429 }
24430 return rc;
24431}
24432
24433#line 1 "fts5_tokenize.c"
24434/*
24435** 2014 May 31
24436**
24437** The author disclaims copyright to this source code. In place of
24438** a legal notice, here is a blessing:
24439**
24440** May you do good and not evil.
24441** May you find forgiveness for yourself and forgive others.
24442** May you share freely, never taking more than you give.
24443**
24444******************************************************************************
24445*/
24446
24447
24448/* #include "fts5Int.h" */
24449
24450/**************************************************************************
24451** Start of ascii tokenizer implementation.
24452*/
24453
24454/*
24455** For tokenizers with no "unicode" modifier, the set of token characters
24456** is the same as the set of ASCII range alphanumeric characters.
24457*/
24458static unsigned char aAsciiTokenChar[128] = {
24459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */
24460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */
24461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */
24462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */
24463 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */
24464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */
24465 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */
24466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */
24467};
24468
24469typedef struct AsciiTokenizer AsciiTokenizer;
24470struct AsciiTokenizer {
24471 unsigned char aTokenChar[128];
24472};
24473
24474static void fts5AsciiAddExceptions(
24475 AsciiTokenizer *p,
24476 const char *zArg,
24477 int bTokenChars
24478){
24479 int i;
24480 for(i=0; zArg[i]; i++){
24481 if( (zArg[i] & 0x80)==0 ){
24482 p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
24483 }
24484 }
24485}
24486
24487/*
24488** Delete a "ascii" tokenizer.
24489*/
24490static void fts5AsciiDelete(Fts5Tokenizer *p){
24491 sqlite3_freesqlite3_api->free(p);
24492}
24493
24494/*
24495** Create an "ascii" tokenizer.
24496*/
24497static int fts5AsciiCreate(
24498 void *pUnused,
24499 const char **azArg, int nArg,
24500 Fts5Tokenizer **ppOut
24501){
24502 int rc = SQLITE_OK0;
24503 AsciiTokenizer *p = 0;
24504 UNUSED_PARAM(pUnused)(void)(pUnused);
24505 if( nArg%2 ){
24506 rc = SQLITE_ERROR1;
24507 }else{
24508 p = sqlite3_mallocsqlite3_api->malloc(sizeof(AsciiTokenizer));
24509 if( p==0 ){
24510 rc = SQLITE_NOMEM7;
24511 }else{
24512 int i;
24513 memset(p, 0, sizeof(AsciiTokenizer));
24514 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
24515 for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){
24516 const char *zArg = azArg[i+1];
24517 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){
24518 fts5AsciiAddExceptions(p, zArg, 1);
24519 }else
24520 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){
24521 fts5AsciiAddExceptions(p, zArg, 0);
24522 }else{
24523 rc = SQLITE_ERROR1;
24524 }
24525 }
24526 if( rc!=SQLITE_OK0 ){
24527 fts5AsciiDelete((Fts5Tokenizer*)p);
24528 p = 0;
24529 }
24530 }
24531 }
24532
24533 *ppOut = (Fts5Tokenizer*)p;
24534 return rc;
24535}
24536
24537
24538static void asciiFold(char *aOut, const char *aIn, int nByte){
24539 int i;
24540 for(i=0; i<nByte; i++){
24541 char c = aIn[i];
24542 if( c>='A' && c<='Z' ) c += 32;
24543 aOut[i] = c;
24544 }
24545}
24546
24547/*
24548** Tokenize some text using the ascii tokenizer.
24549*/
24550static int fts5AsciiTokenize(
24551 Fts5Tokenizer *pTokenizer,
24552 void *pCtx,
24553 int iUnused,
24554 const char *pText, int nText,
24555 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
24556){
24557 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
24558 int rc = SQLITE_OK0;
24559 int ie;
24560 int is = 0;
24561
24562 char aFold[64];
24563 int nFold = sizeof(aFold);
24564 char *pFold = aFold;
24565 unsigned char *a = p->aTokenChar;
24566
24567 UNUSED_PARAM(iUnused)(void)(iUnused);
24568
24569 while( is<nText && rc==SQLITE_OK0 ){
24570 int nByte;
24571
24572 /* Skip any leading divider characters. */
24573 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
24574 is++;
24575 }
24576 if( is==nText ) break;
24577
24578 /* Count the token characters */
24579 ie = is+1;
24580 while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
24581 ie++;
24582 }
24583
24584 /* Fold to lower case */
24585 nByte = ie-is;
24586 if( nByte>nFold ){
24587 if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold);
24588 pFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nByte*2);
24589 if( pFold==0 ){
24590 rc = SQLITE_NOMEM7;
24591 break;
24592 }
24593 nFold = nByte*2;
24594 }
24595 asciiFold(pFold, &pText[is], nByte);
24596
24597 /* Invoke the token callback */
24598 rc = xToken(pCtx, 0, pFold, nByte, is, ie);
24599 is = ie+1;
24600 }
24601
24602 if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold);
24603 if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0;
24604 return rc;
24605}
24606
24607/**************************************************************************
24608** Start of unicode61 tokenizer implementation.
24609*/
24610
24611
24612/*
24613** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
24614** from the sqlite3 source file utf.c. If this file is compiled as part
24615** of the amalgamation, they are not required.
24616*/
24617#ifndef SQLITE_AMALGAMATION
24618
24619static const unsigned char sqlite3Utf8Trans1[] = {
24620 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
24621 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
24622 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
24623 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
24624 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
24625 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
24626 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
24627 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
24628};
24629
24630#define READ_UTF8(zIn, zTerm, c)c = *(zIn++); if( c>=0xc0 ){ c = sqlite3Utf8Trans1[c-0xc0]
; while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ c
= (c<<6) + (0x3f & *(zIn++)); } if( c<0x80 || (
c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c =
0xFFFD; } }
\
24631 c = *(zIn++); \
24632 if( c>=0xc0 ){ \
24633 c = sqlite3Utf8Trans1[c-0xc0]; \
24634 while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ \
24635 c = (c<<6) + (0x3f & *(zIn++)); \
24636 } \
24637 if( c<0x80 \
24638 || (c&0xFFFFF800)==0xD800 \
24639 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
24640 }
24641
24642
24643#define WRITE_UTF8(zOut, c){ if( c<0x00080 ){ *zOut++ = (unsigned char)(c&0xFF); }
else if( c<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((c
>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(c &
0x3F); } else if( c<0x10000 ){ *zOut++ = 0xE0 + (unsigned
char)((c>>12)&0x0F); *zOut++ = 0x80 + (unsigned char
)((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)(
c & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((c>>
18) & 0x07); *zOut++ = 0x80 + (unsigned char)((c>>12
) & 0x3F); *zOut++ = 0x80 + (unsigned char)((c>>6) &
0x3F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } }
{ \
24644 if( c<0x00080 ){ \
24645 *zOut++ = (unsigned char)(c&0xFF); \
24646 } \
24647 else if( c<0x00800 ){ \
24648 *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
24649 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
24650 } \
24651 else if( c<0x10000 ){ \
24652 *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
24653 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
24654 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
24655 }else{ \
24656 *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
24657 *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
24658 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
24659 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
24660 } \
24661}
24662
24663#endif /* ifndef SQLITE_AMALGAMATION */
24664
24665#define FTS5_SKIP_UTF8(zIn){ if( ((unsigned char)(*(zIn++)))>=0xc0 ){ while( (((unsigned
char)*zIn) & 0xc0)==0x80 ){ zIn++; } } }
{ \
24666 if( ((unsigned char)(*(zIn++)))>=0xc0 ){ \
24667 while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } \
24668 } \
24669}
24670
24671typedef struct Unicode61Tokenizer Unicode61Tokenizer;
24672struct Unicode61Tokenizer {
24673 unsigned char aTokenChar[128]; /* ASCII range token characters */
24674 char *aFold; /* Buffer to fold text into */
24675 int nFold; /* Size of aFold[] in bytes */
24676 int eRemoveDiacritic; /* True if remove_diacritics=1 is set */
24677 int nException;
24678 int *aiException;
24679
24680 unsigned char aCategory[32]; /* True for token char categories */
24681};
24682
24683/* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */
24684#define FTS5_REMOVE_DIACRITICS_NONE0 0
24685#define FTS5_REMOVE_DIACRITICS_SIMPLE1 1
24686#define FTS5_REMOVE_DIACRITICS_COMPLEX2 2
24687
24688static int fts5UnicodeAddExceptions(
24689 Unicode61Tokenizer *p, /* Tokenizer object */
24690 const char *z, /* Characters to treat as exceptions */
24691 int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
24692){
24693 int rc = SQLITE_OK0;
24694 int n = (int)strlen(z);
24695 int *aNew;
24696
24697 if( n>0 ){
24698 aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aiException,
24699 (n+p->nException)*sizeof(int));
24700 if( aNew ){
24701 int nNew = p->nException;
24702 const unsigned char *zCsr = (const unsigned char*)z;
24703 const unsigned char *zTerm = (const unsigned char*)&z[n];
24704 while( zCsr<zTerm ){
24705 u32 iCode;
24706 int bToken;
24707 READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1
[iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0
)==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++));
} if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode
&0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }
;
24708 if( iCode<128 ){
24709 p->aTokenChar[iCode] = (unsigned char)bTokenChars;
24710 }else{
24711 bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
24712 assert( (bToken==0 || bToken==1) )((void) (0));
24713 assert( (bTokenChars==0 || bTokenChars==1) )((void) (0));
24714 if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
24715 int i;
24716 for(i=0; i<nNew; i++){
24717 if( (u32)aNew[i]>iCode ) break;
24718 }
24719 memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
24720 aNew[i] = iCode;
24721 nNew++;
24722 }
24723 }
24724 }
24725 p->aiException = aNew;
24726 p->nException = nNew;
24727 }else{
24728 rc = SQLITE_NOMEM7;
24729 }
24730 }
24731
24732 return rc;
24733}
24734
24735/*
24736** Return true if the p->aiException[] array contains the value iCode.
24737*/
24738static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
24739 if( p->nException>0 ){
24740 int *a = p->aiException;
24741 int iLo = 0;
24742 int iHi = p->nException-1;
24743
24744 while( iHi>=iLo ){
24745 int iTest = (iHi + iLo) / 2;
24746 if( iCode==a[iTest] ){
24747 return 1;
24748 }else if( iCode>a[iTest] ){
24749 iLo = iTest+1;
24750 }else{
24751 iHi = iTest-1;
24752 }
24753 }
24754 }
24755
24756 return 0;
24757}
24758
24759/*
24760** Delete a "unicode61" tokenizer.
24761*/
24762static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
24763 if( pTok ){
24764 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
24765 sqlite3_freesqlite3_api->free(p->aiException);
24766 sqlite3_freesqlite3_api->free(p->aFold);
24767 sqlite3_freesqlite3_api->free(p);
24768 }
24769 return;
24770}
24771
24772static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){
24773 const char *z = zCat;
24774
24775 while( *z ){
24776 while( *z==' ' || *z=='\t' ) z++;
24777 if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){
24778 return SQLITE_ERROR1;
24779 }
24780 while( *z!=' ' && *z!='\t' && *z!='\0' ) z++;
24781 }
24782
24783 sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar);
24784 return SQLITE_OK0;
24785}
24786
24787/*
24788** Create a "unicode61" tokenizer.
24789*/
24790static int fts5UnicodeCreate(
24791 void *pUnused,
24792 const char **azArg, int nArg,
24793 Fts5Tokenizer **ppOut
24794){
24795 int rc = SQLITE_OK0; /* Return code */
24796 Unicode61Tokenizer *p = 0; /* New tokenizer object */
24797
24798 UNUSED_PARAM(pUnused)(void)(pUnused);
24799
24800 if( nArg%2 ){
24801 rc = SQLITE_ERROR1;
24802 }else{
24803 p = (Unicode61Tokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(Unicode61Tokenizer));
24804 if( p ){
24805 const char *zCat = "L* N* Co";
24806 int i;
24807 memset(p, 0, sizeof(Unicode61Tokenizer));
24808
24809 p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE1;
24810 p->nFold = 64;
24811 p->aFold = sqlite3_malloc64sqlite3_api->malloc64(p->nFold * sizeof(char));
24812 if( p->aFold==0 ){
24813 rc = SQLITE_NOMEM7;
24814 }
24815
24816 /* Search for a "categories" argument */
24817 for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){
24818 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){
24819 zCat = azArg[i+1];
24820 }
24821 }
24822 if( rc==SQLITE_OK0 ){
24823 rc = unicodeSetCategories(p, zCat);
24824 }
24825
24826 for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){
24827 const char *zArg = azArg[i+1];
24828 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){
24829 if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
24830 rc = SQLITE_ERROR1;
24831 }else{
24832 p->eRemoveDiacritic = (zArg[0] - '0');
24833 assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE((void) (0))
24834 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE((void) (0))
24835 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX((void) (0))
24836 )((void) (0));
24837 }
24838 }else
24839 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){
24840 rc = fts5UnicodeAddExceptions(p, zArg, 1);
24841 }else
24842 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){
24843 rc = fts5UnicodeAddExceptions(p, zArg, 0);
24844 }else
24845 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){
24846 /* no-op */
24847 }else{
24848 rc = SQLITE_ERROR1;
24849 }
24850 }
24851 }else{
24852 rc = SQLITE_NOMEM7;
24853 }
24854 if( rc!=SQLITE_OK0 ){
24855 fts5UnicodeDelete((Fts5Tokenizer*)p);
24856 p = 0;
24857 }
24858 *ppOut = (Fts5Tokenizer*)p;
24859 }
24860 return rc;
24861}
24862
24863/*
24864** Return true if, for the purposes of tokenizing with the tokenizer
24865** passed as the first argument, codepoint iCode is considered a token
24866** character (not a separator).
24867*/
24868static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
24869 return (
24870 p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]
24871 ^ fts5UnicodeIsException(p, iCode)
24872 );
24873}
24874
24875static int fts5UnicodeTokenize(
24876 Fts5Tokenizer *pTokenizer,
24877 void *pCtx,
24878 int iUnused,
24879 const char *pText, int nText,
24880 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
24881){
24882 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
24883 int rc = SQLITE_OK0;
24884 unsigned char *a = p->aTokenChar;
24885
24886 unsigned char *zTerm = (unsigned char*)&pText[nText];
24887 unsigned char *zCsr = (unsigned char *)pText;
24888
24889 /* Output buffer */
24890 char *aFold = p->aFold;
24891 int nFold = p->nFold;
24892 const char *pEnd = &aFold[nFold-6];
24893
24894 UNUSED_PARAM(iUnused)(void)(iUnused);
24895
24896 /* Each iteration of this loop gobbles up a contiguous run of separators,
24897 ** then the next token. */
24898 while( rc==SQLITE_OK0 ){
24899 u32 iCode; /* non-ASCII codepoint read from input */
24900 char *zOut = aFold;
24901 int is;
24902 int ie;
24903
24904 /* Skip any separator characters. */
24905 while( 1 ){
24906 if( zCsr>=zTerm ) goto tokenize_done;
24907 if( *zCsr & 0x80 ) {
24908 /* A character outside of the ascii range. Skip past it if it is
24909 ** a separator character. Or break out of the loop if it is not. */
24910 is = zCsr - (unsigned char*)pText;
24911 READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1
[iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0
)==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++));
} if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode
&0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }
;
24912 if( fts5UnicodeIsAlnum(p, iCode) ){
24913 goto non_ascii_tokenchar;
24914 }
24915 }else{
24916 if( a[*zCsr] ){
24917 is = zCsr - (unsigned char*)pText;
24918 goto ascii_tokenchar;
24919 }
24920 zCsr++;
24921 }
24922 }
24923
24924 /* Run through the tokenchars. Fold them into the output buffer along
24925 ** the way. */
24926 while( zCsr<zTerm ){
24927
24928 /* Grow the output buffer so that there is sufficient space to fit the
24929 ** largest possible utf-8 character. */
24930 if( zOut>pEnd ){
24931 aFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nFold*2);
24932 if( aFold==0 ){
24933 rc = SQLITE_NOMEM7;
24934 goto tokenize_done;
24935 }
24936 zOut = &aFold[zOut - p->aFold];
24937 memcpy(aFold, p->aFold, nFold);
24938 sqlite3_freesqlite3_api->free(p->aFold);
24939 p->aFold = aFold;
24940 p->nFold = nFold = nFold*2;
24941 pEnd = &aFold[nFold-6];
24942 }
24943
24944 if( *zCsr & 0x80 ){
24945 /* An non-ascii-range character. Fold it into the output buffer if
24946 ** it is a token character, or break out of the loop if it is not. */
24947 READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1
[iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0
)==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++));
} if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode
&0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }
;
24948 if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
24949 non_ascii_tokenchar:
24950 iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic);
24951 if( iCode ) WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode&
0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned
char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned
char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut
++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++
= 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut
++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); } }
;
24952 }else{
24953 break;
24954 }
24955 }else if( a[*zCsr]==0 ){
24956 /* An ascii-range separator character. End of token. */
24957 break;
24958 }else{
24959 ascii_tokenchar:
24960 if( *zCsr>='A' && *zCsr<='Z' ){
24961 *zOut++ = *zCsr + 32;
24962 }else{
24963 *zOut++ = *zCsr;
24964 }
24965 zCsr++;
24966 }
24967 ie = zCsr - (unsigned char*)pText;
24968 }
24969
24970 /* Invoke the token callback */
24971 rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
24972 }
24973
24974 tokenize_done:
24975 if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0;
24976 return rc;
24977}
24978
24979/**************************************************************************
24980** Start of porter stemmer implementation.
24981*/
24982
24983/* Any tokens larger than this (in bytes) are passed through without
24984** stemming. */
24985#define FTS5_PORTER_MAX_TOKEN64 64
24986
24987typedef struct PorterTokenizer PorterTokenizer;
24988struct PorterTokenizer {
24989 fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */
24990 Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */
24991 char aBuf[FTS5_PORTER_MAX_TOKEN64 + 64];
24992};
24993
24994/*
24995** Delete a "porter" tokenizer.
24996*/
24997static void fts5PorterDelete(Fts5Tokenizer *pTok){
24998 if( pTok ){
24999 PorterTokenizer *p = (PorterTokenizer*)pTok;
25000 if( p->pTokenizer ){
25001 p->tokenizer_v2.xDelete(p->pTokenizer);
25002 }
25003 sqlite3_freesqlite3_api->free(p);
25004 }
25005}
25006
25007/*
25008** Create a "porter" tokenizer.
25009*/
25010static int fts5PorterCreate(
25011 void *pCtx,
25012 const char **azArg, int nArg,
25013 Fts5Tokenizer **ppOut
25014){
25015 fts5_api *pApi = (fts5_api*)pCtx;
25016 int rc = SQLITE_OK0;
25017 PorterTokenizer *pRet;
25018 void *pUserdata = 0;
25019 const char *zBase = "unicode61";
25020 fts5_tokenizer_v2 *pV2 = 0;
25021
25022 if( nArg>0 ){
25023 zBase = azArg[0];
25024 }
25025
25026 pRet = (PorterTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(PorterTokenizer));
25027 if( pRet ){
25028 memset(pRet, 0, sizeof(PorterTokenizer));
25029 rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2);
25030 }else{
25031 rc = SQLITE_NOMEM7;
25032 }
25033 if( rc==SQLITE_OK0 ){
25034 int nArg2 = (nArg>0 ? nArg-1 : 0);
25035 const char **az2 = (nArg2 ? &azArg[1] : 0);
25036 memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2));
25037 rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer);
25038 }
25039
25040 if( rc!=SQLITE_OK0 ){
25041 fts5PorterDelete((Fts5Tokenizer*)pRet);
25042 pRet = 0;
25043 }
25044 *ppOut = (Fts5Tokenizer*)pRet;
25045 return rc;
25046}
25047
25048typedef struct PorterContext PorterContext;
25049struct PorterContext {
25050 void *pCtx;
25051 int (*xToken)(void*, int, const char*, int, int, int);
25052 char *aBuf;
25053};
25054
25055typedef struct PorterRule PorterRule;
25056struct PorterRule {
25057 const char *zSuffix;
25058 int nSuffix;
25059 int (*xCond)(char *zStem, int nStem);
25060 const char *zOutput;
25061 int nOutput;
25062};
25063
25064#if 0
25065static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
25066 int ret = -1;
25067 int nBuf = *pnBuf;
25068 PorterRule *p;
25069
25070 for(p=aRule; p->zSuffix; p++){
25071 assert( strlen(p->zSuffix)==p->nSuffix )((void) (0));
25072 assert( strlen(p->zOutput)==p->nOutput )((void) (0));
25073 if( nBuf<p->nSuffix ) continue;
25074 if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
25075 }
25076
25077 if( p->zSuffix ){
25078 int nStem = nBuf - p->nSuffix;
25079 if( p->xCond==0 || p->xCond(aBuf, nStem) ){
25080 memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
25081 *pnBuf = nStem + p->nOutput;
25082 ret = p - aRule;
25083 }
25084 }
25085
25086 return ret;
25087}
25088#endif
25089
25090static int fts5PorterIsVowel(char c, int bYIsVowel){
25091 return (
25092 c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
25093 );
25094}
25095
25096static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
25097 int i;
25098 int bCons = bPrevCons;
25099
25100 /* Scan for a vowel */
25101 for(i=0; i<nStem; i++){
25102 if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
25103 }
25104
25105 /* Scan for a consonent */
25106 for(i++; i<nStem; i++){
25107 if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
25108 }
25109 return 0;
25110}
25111
25112/* porter rule condition: (m > 0) */
25113static int fts5Porter_MGt0(char *zStem, int nStem){
25114 return !!fts5PorterGobbleVC(zStem, nStem, 0);
25115}
25116
25117/* porter rule condition: (m > 1) */
25118static int fts5Porter_MGt1(char *zStem, int nStem){
25119 int n;
25120 n = fts5PorterGobbleVC(zStem, nStem, 0);
25121 if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
25122 return 1;
25123 }
25124 return 0;
25125}
25126
25127/* porter rule condition: (m = 1) */
25128static int fts5Porter_MEq1(char *zStem, int nStem){
25129 int n;
25130 n = fts5PorterGobbleVC(zStem, nStem, 0);
25131 if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
25132 return 1;
25133 }
25134 return 0;
25135}
25136
25137/* porter rule condition: (*o) */
25138static int fts5Porter_Ostar(char *zStem, int nStem){
25139 if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
25140 return 0;
25141 }else{
25142 int i;
25143 int mask = 0;
25144 int bCons = 0;
25145 for(i=0; i<nStem; i++){
25146 bCons = !fts5PorterIsVowel(zStem[i], bCons);
25147 assert( bCons==0 || bCons==1 )((void) (0));
25148 mask = (mask << 1) + bCons;
25149 }
25150 return ((mask & 0x0007)==0x0005);
25151 }
25152}
25153
25154/* porter rule condition: (m > 1 and (*S or *T)) */
25155static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
25156 assert( nStem>0 )((void) (0));
25157 return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
25158 && fts5Porter_MGt1(zStem, nStem);
25159}
25160
25161/* porter rule condition: (*v*) */
25162static int fts5Porter_Vowel(char *zStem, int nStem){
25163 int i;
25164 for(i=0; i<nStem; i++){
25165 if( fts5PorterIsVowel(zStem[i], i>0) ){
25166 return 1;
25167 }
25168 }
25169 return 0;
25170}
25171
25172
25173/**************************************************************************
25174***************************************************************************
25175** GENERATED CODE STARTS HERE (mkportersteps.tcl)
25176*/
25177
25178static int fts5PorterStep4(char *aBuf, int *pnBuf){
25179 int ret = 0;
25180 int nBuf = *pnBuf;
25181 switch( aBuf[nBuf-2] ){
25182
25183 case 'a':
25184 if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
25185 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
25186 *pnBuf = nBuf - 2;
25187 }
25188 }
25189 break;
25190
25191 case 'c':
25192 if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
25193 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
25194 *pnBuf = nBuf - 4;
25195 }
25196 }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
25197 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
25198 *pnBuf = nBuf - 4;
25199 }
25200 }
25201 break;
25202
25203 case 'e':
25204 if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
25205 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
25206 *pnBuf = nBuf - 2;
25207 }
25208 }
25209 break;
25210
25211 case 'i':
25212 if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
25213 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
25214 *pnBuf = nBuf - 2;
25215 }
25216 }
25217 break;
25218
25219 case 'l':
25220 if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
25221 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
25222 *pnBuf = nBuf - 4;
25223 }
25224 }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
25225 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
25226 *pnBuf = nBuf - 4;
25227 }
25228 }
25229 break;
25230
25231 case 'n':
25232 if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
25233 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25234 *pnBuf = nBuf - 3;
25235 }
25236 }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
25237 if( fts5Porter_MGt1(aBuf, nBuf-5) ){
25238 *pnBuf = nBuf - 5;
25239 }
25240 }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
25241 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
25242 *pnBuf = nBuf - 4;
25243 }
25244 }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
25245 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25246 *pnBuf = nBuf - 3;
25247 }
25248 }
25249 break;
25250
25251 case 'o':
25252 if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
25253 if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
25254 *pnBuf = nBuf - 3;
25255 }
25256 }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
25257 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
25258 *pnBuf = nBuf - 2;
25259 }
25260 }
25261 break;
25262
25263 case 's':
25264 if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
25265 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25266 *pnBuf = nBuf - 3;
25267 }
25268 }
25269 break;
25270
25271 case 't':
25272 if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
25273 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25274 *pnBuf = nBuf - 3;
25275 }
25276 }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
25277 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25278 *pnBuf = nBuf - 3;
25279 }
25280 }
25281 break;
25282
25283 case 'u':
25284 if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
25285 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25286 *pnBuf = nBuf - 3;
25287 }
25288 }
25289 break;
25290
25291 case 'v':
25292 if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
25293 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25294 *pnBuf = nBuf - 3;
25295 }
25296 }
25297 break;
25298
25299 case 'z':
25300 if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
25301 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
25302 *pnBuf = nBuf - 3;
25303 }
25304 }
25305 break;
25306
25307 }
25308 return ret;
25309}
25310
25311
25312static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
25313 int ret = 0;
25314 int nBuf = *pnBuf;
25315 switch( aBuf[nBuf-2] ){
25316
25317 case 'a':
25318 if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
25319 memcpy(&aBuf[nBuf-2], "ate", 3);
25320 *pnBuf = nBuf - 2 + 3;
25321 ret = 1;
25322 }
25323 break;
25324
25325 case 'b':
25326 if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
25327 memcpy(&aBuf[nBuf-2], "ble", 3);
25328 *pnBuf = nBuf - 2 + 3;
25329 ret = 1;
25330 }
25331 break;
25332
25333 case 'i':
25334 if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
25335 memcpy(&aBuf[nBuf-2], "ize", 3);
25336 *pnBuf = nBuf - 2 + 3;
25337 ret = 1;
25338 }
25339 break;
25340
25341 }
25342 return ret;
25343}
25344
25345
25346static int fts5PorterStep2(char *aBuf, int *pnBuf){
25347 int ret = 0;
25348 int nBuf = *pnBuf;
25349 switch( aBuf[nBuf-2] ){
25350
25351 case 'a':
25352 if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
25353 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
25354 memcpy(&aBuf[nBuf-7], "ate", 3);
25355 *pnBuf = nBuf - 7 + 3;
25356 }
25357 }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
25358 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
25359 memcpy(&aBuf[nBuf-6], "tion", 4);
25360 *pnBuf = nBuf - 6 + 4;
25361 }
25362 }
25363 break;
25364
25365 case 'c':
25366 if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
25367 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25368 memcpy(&aBuf[nBuf-4], "ence", 4);
25369 *pnBuf = nBuf - 4 + 4;
25370 }
25371 }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
25372 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25373 memcpy(&aBuf[nBuf-4], "ance", 4);
25374 *pnBuf = nBuf - 4 + 4;
25375 }
25376 }
25377 break;
25378
25379 case 'e':
25380 if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
25381 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25382 memcpy(&aBuf[nBuf-4], "ize", 3);
25383 *pnBuf = nBuf - 4 + 3;
25384 }
25385 }
25386 break;
25387
25388 case 'g':
25389 if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
25390 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25391 memcpy(&aBuf[nBuf-4], "log", 3);
25392 *pnBuf = nBuf - 4 + 3;
25393 }
25394 }
25395 break;
25396
25397 case 'l':
25398 if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
25399 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
25400 memcpy(&aBuf[nBuf-3], "ble", 3);
25401 *pnBuf = nBuf - 3 + 3;
25402 }
25403 }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
25404 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25405 memcpy(&aBuf[nBuf-4], "al", 2);
25406 *pnBuf = nBuf - 4 + 2;
25407 }
25408 }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
25409 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25410 memcpy(&aBuf[nBuf-5], "ent", 3);
25411 *pnBuf = nBuf - 5 + 3;
25412 }
25413 }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
25414 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
25415 memcpy(&aBuf[nBuf-3], "e", 1);
25416 *pnBuf = nBuf - 3 + 1;
25417 }
25418 }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
25419 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25420 memcpy(&aBuf[nBuf-5], "ous", 3);
25421 *pnBuf = nBuf - 5 + 3;
25422 }
25423 }
25424 break;
25425
25426 case 'o':
25427 if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
25428 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
25429 memcpy(&aBuf[nBuf-7], "ize", 3);
25430 *pnBuf = nBuf - 7 + 3;
25431 }
25432 }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
25433 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25434 memcpy(&aBuf[nBuf-5], "ate", 3);
25435 *pnBuf = nBuf - 5 + 3;
25436 }
25437 }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
25438 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25439 memcpy(&aBuf[nBuf-4], "ate", 3);
25440 *pnBuf = nBuf - 4 + 3;
25441 }
25442 }
25443 break;
25444
25445 case 's':
25446 if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
25447 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25448 memcpy(&aBuf[nBuf-5], "al", 2);
25449 *pnBuf = nBuf - 5 + 2;
25450 }
25451 }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
25452 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
25453 memcpy(&aBuf[nBuf-7], "ive", 3);
25454 *pnBuf = nBuf - 7 + 3;
25455 }
25456 }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
25457 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
25458 memcpy(&aBuf[nBuf-7], "ful", 3);
25459 *pnBuf = nBuf - 7 + 3;
25460 }
25461 }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
25462 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
25463 memcpy(&aBuf[nBuf-7], "ous", 3);
25464 *pnBuf = nBuf - 7 + 3;
25465 }
25466 }
25467 break;
25468
25469 case 't':
25470 if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
25471 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25472 memcpy(&aBuf[nBuf-5], "al", 2);
25473 *pnBuf = nBuf - 5 + 2;
25474 }
25475 }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
25476 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25477 memcpy(&aBuf[nBuf-5], "ive", 3);
25478 *pnBuf = nBuf - 5 + 3;
25479 }
25480 }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
25481 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
25482 memcpy(&aBuf[nBuf-6], "ble", 3);
25483 *pnBuf = nBuf - 6 + 3;
25484 }
25485 }
25486 break;
25487
25488 }
25489 return ret;
25490}
25491
25492
25493static int fts5PorterStep3(char *aBuf, int *pnBuf){
25494 int ret = 0;
25495 int nBuf = *pnBuf;
25496 switch( aBuf[nBuf-2] ){
25497
25498 case 'a':
25499 if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
25500 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25501 memcpy(&aBuf[nBuf-4], "ic", 2);
25502 *pnBuf = nBuf - 4 + 2;
25503 }
25504 }
25505 break;
25506
25507 case 's':
25508 if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
25509 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
25510 *pnBuf = nBuf - 4;
25511 }
25512 }
25513 break;
25514
25515 case 't':
25516 if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
25517 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25518 memcpy(&aBuf[nBuf-5], "ic", 2);
25519 *pnBuf = nBuf - 5 + 2;
25520 }
25521 }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
25522 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25523 memcpy(&aBuf[nBuf-5], "ic", 2);
25524 *pnBuf = nBuf - 5 + 2;
25525 }
25526 }
25527 break;
25528
25529 case 'u':
25530 if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
25531 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
25532 *pnBuf = nBuf - 3;
25533 }
25534 }
25535 break;
25536
25537 case 'v':
25538 if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
25539 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25540 *pnBuf = nBuf - 5;
25541 }
25542 }
25543 break;
25544
25545 case 'z':
25546 if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
25547 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
25548 memcpy(&aBuf[nBuf-5], "al", 2);
25549 *pnBuf = nBuf - 5 + 2;
25550 }
25551 }
25552 break;
25553
25554 }
25555 return ret;
25556}
25557
25558
25559static int fts5PorterStep1B(char *aBuf, int *pnBuf){
25560 int ret = 0;
25561 int nBuf = *pnBuf;
25562 switch( aBuf[nBuf-2] ){
25563
25564 case 'e':
25565 if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
25566 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
25567 memcpy(&aBuf[nBuf-3], "ee", 2);
25568 *pnBuf = nBuf - 3 + 2;
25569 }
25570 }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
25571 if( fts5Porter_Vowel(aBuf, nBuf-2) ){
25572 *pnBuf = nBuf - 2;
25573 ret = 1;
25574 }
25575 }
25576 break;
25577
25578 case 'n':
25579 if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
25580 if( fts5Porter_Vowel(aBuf, nBuf-3) ){
25581 *pnBuf = nBuf - 3;
25582 ret = 1;
25583 }
25584 }
25585 break;
25586
25587 }
25588 return ret;
25589}
25590
25591/*
25592** GENERATED CODE ENDS HERE (mkportersteps.tcl)
25593***************************************************************************
25594**************************************************************************/
25595
25596static void fts5PorterStep1A(char *aBuf, int *pnBuf){
25597 int nBuf = *pnBuf;
25598 if( aBuf[nBuf-1]=='s' ){
25599 if( aBuf[nBuf-2]=='e' ){
25600 if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s')
25601 || (nBuf>3 && aBuf[nBuf-3]=='i' )
25602 ){
25603 *pnBuf = nBuf-2;
25604 }else{
25605 *pnBuf = nBuf-1;
25606 }
25607 }
25608 else if( aBuf[nBuf-2]!='s' ){
25609 *pnBuf = nBuf-1;
25610 }
25611 }
25612}
25613
25614static int fts5PorterCb(
25615 void *pCtx,
25616 int tflags,
25617 const char *pToken,
25618 int nToken,
25619 int iStart,
25620 int iEnd
25621){
25622 PorterContext *p = (PorterContext*)pCtx;
25623
25624 char *aBuf;
25625 int nBuf;
25626
25627 if( nToken>FTS5_PORTER_MAX_TOKEN64 || nToken<3 ) goto pass_through;
25628 aBuf = p->aBuf;
25629 nBuf = nToken;
25630 memcpy(aBuf, pToken, nBuf);
25631
25632 /* Step 1. */
25633 fts5PorterStep1A(aBuf, &nBuf);
25634 if( fts5PorterStep1B(aBuf, &nBuf) ){
25635 if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
25636 char c = aBuf[nBuf-1];
25637 if( fts5PorterIsVowel(c, 0)==0
25638 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2]
25639 ){
25640 nBuf--;
25641 }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
25642 aBuf[nBuf++] = 'e';
25643 }
25644 }
25645 }
25646
25647 /* Step 1C. */
25648 if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
25649 aBuf[nBuf-1] = 'i';
25650 }
25651
25652 /* Steps 2 through 4. */
25653 fts5PorterStep2(aBuf, &nBuf);
25654 fts5PorterStep3(aBuf, &nBuf);
25655 fts5PorterStep4(aBuf, &nBuf);
25656
25657 /* Step 5a. */
25658 assert( nBuf>0 )((void) (0));
25659 if( aBuf[nBuf-1]=='e' ){
25660 if( fts5Porter_MGt1(aBuf, nBuf-1)
25661 || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
25662 ){
25663 nBuf--;
25664 }
25665 }
25666
25667 /* Step 5b. */
25668 if( nBuf>1 && aBuf[nBuf-1]=='l'
25669 && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1)
25670 ){
25671 nBuf--;
25672 }
25673
25674 return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
25675
25676 pass_through:
25677 return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
25678}
25679
25680/*
25681** Tokenize using the porter tokenizer.
25682*/
25683static int fts5PorterTokenize(
25684 Fts5Tokenizer *pTokenizer,
25685 void *pCtx,
25686 int flags,
25687 const char *pText, int nText,
25688 const char *pLoc, int nLoc,
25689 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
25690){
25691 PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
25692 PorterContext sCtx;
25693 sCtx.xToken = xToken;
25694 sCtx.pCtx = pCtx;
25695 sCtx.aBuf = p->aBuf;
25696 return p->tokenizer_v2.xTokenize(
25697 p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb
25698 );
25699}
25700
25701/**************************************************************************
25702** Start of trigram implementation.
25703*/
25704typedef struct TrigramTokenizer TrigramTokenizer;
25705struct TrigramTokenizer {
25706 int bFold; /* True to fold to lower-case */
25707 int iFoldParam; /* Parameter to pass to Fts5UnicodeFold() */
25708};
25709
25710/*
25711** Free a trigram tokenizer.
25712*/
25713static void fts5TriDelete(Fts5Tokenizer *p){
25714 sqlite3_freesqlite3_api->free(p);
25715}
25716
25717/*
25718** Allocate a trigram tokenizer.
25719*/
25720static int fts5TriCreate(
25721 void *pUnused,
25722 const char **azArg,
25723 int nArg,
25724 Fts5Tokenizer **ppOut
25725){
25726 int rc = SQLITE_OK0;
25727 TrigramTokenizer *pNew = 0;
25728 UNUSED_PARAM(pUnused)(void)(pUnused);
25729 if( nArg%2 ){
25730 rc = SQLITE_ERROR1;
25731 }else{
25732 int i;
25733 pNew = (TrigramTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew));
25734 if( pNew==0 ){
25735 rc = SQLITE_NOMEM7;
25736 }else{
25737 pNew->bFold = 1;
25738 pNew->iFoldParam = 0;
25739
25740 for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){
25741 const char *zArg = azArg[i+1];
25742 if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "case_sensitive") ){
25743 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
25744 rc = SQLITE_ERROR1;
25745 }else{
25746 pNew->bFold = (zArg[0]=='0');
25747 }
25748 }else if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){
25749 if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
25750 rc = SQLITE_ERROR1;
25751 }else{
25752 pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
25753 }
25754 }else{
25755 rc = SQLITE_ERROR1;
25756 }
25757 }
25758
25759 if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
25760 rc = SQLITE_ERROR1;
25761 }
25762
25763 if( rc!=SQLITE_OK0 ){
25764 fts5TriDelete((Fts5Tokenizer*)pNew);
25765 pNew = 0;
25766 }
25767 }
25768 }
25769 *ppOut = (Fts5Tokenizer*)pNew;
25770 return rc;
25771}
25772
25773/*
25774** Trigram tokenizer tokenize routine.
25775*/
25776static int fts5TriTokenize(
25777 Fts5Tokenizer *pTok,
25778 void *pCtx,
25779 int unusedFlags,
25780 const char *pText, int nText,
25781 int (*xToken)(void*, int, const char*, int, int, int)
25782){
25783 TrigramTokenizer *p = (TrigramTokenizer*)pTok;
25784 int rc = SQLITE_OK0;
25785 char aBuf[32];
25786 char *zOut = aBuf;
25787 int ii;
25788 const unsigned char *zIn = (const unsigned char*)pText;
25789 const unsigned char *zEof = (zIn ? &zIn[nText] : 0);
25790 u32 iCode = 0;
25791 int aStart[3]; /* Input offset of each character in aBuf[] */
25792
25793 UNUSED_PARAM(unusedFlags)(void)(unusedFlags);
25794
25795 /* Populate aBuf[] with the characters for the first trigram. */
25796 for(ii=0; ii<3; ii++){
25797 do {
25798 aStart[ii] = zIn - (const unsigned char*)pText;
25799 if( zIn>=zEof ) return SQLITE_OK0;
25800 READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1
[iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0)
==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); }
if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode
&0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }
;
25801 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
25802 }while( iCode==0 );
25803 WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode&
0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned
char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned
char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut
++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++
= 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut
++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); } }
;
25804 }
25805
25806 /* At the start of each iteration of this loop:
25807 **
25808 ** aBuf: Contains 3 characters. The 3 characters of the next trigram.
25809 ** zOut: Points to the byte following the last character in aBuf.
25810 ** aStart[3]: Contains the byte offset in the input text corresponding
25811 ** to the start of each of the three characters in the buffer.
25812 */
25813 assert( zIn<=zEof )((void) (0));
25814 while( 1 ){
25815 int iNext; /* Start of character following current tri */
25816 const char *z1;
25817
25818 /* Read characters from the input up until the first non-diacritic */
25819 do {
25820 iNext = zIn - (const unsigned char*)pText;
25821 if( zIn>=zEof ){
25822 iCode = 0;
25823 break;
25824 }
25825 READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1
[iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0)
==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); }
if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode
&0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }
;
25826 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam);
25827 }while( iCode==0 );
25828
25829 /* Pass the current trigram back to fts5 */
25830 rc = xToken(pCtx, 0, aBuf, zOut-aBuf, aStart[0], iNext);
25831 if( iCode==0 || rc!=SQLITE_OK0 ) break;
25832
25833 /* Remove the first character from buffer aBuf[]. Append the character
25834 ** with codepoint iCode. */
25835 z1 = aBuf;
25836 FTS5_SKIP_UTF8(z1){ if( ((unsigned char)(*(z1++)))>=0xc0 ){ while( (((unsigned
char)*z1) & 0xc0)==0x80 ){ z1++; } } }
;
25837 memmove(aBuf, z1, zOut - z1);
25838 zOut -= (z1 - aBuf);
25839 WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode&
0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned
char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned
char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut
++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++
= 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut
++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut
++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut
++ = 0x80 + (unsigned char)(iCode & 0x3F); } }
;
25840
25841 /* Update the aStart[] array */
25842 aStart[0] = aStart[1];
25843 aStart[1] = aStart[2];
25844 aStart[2] = iNext;
25845 }
25846
25847 return rc;
25848}
25849
25850/*
25851** Argument xCreate is a pointer to a constructor function for a tokenizer.
25852** pTok is a tokenizer previously created using the same method. This function
25853** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB
25854** indicating the style of pattern matching that the tokenizer can support.
25855** In practice, this is:
25856**
25857** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB
25858** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE
25859** all other tokenizers - FTS5_PATTERN_NONE
25860*/
25861static int sqlite3Fts5TokenizerPattern(
25862 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
25863 Fts5Tokenizer *pTok
25864){
25865 if( xCreate==fts5TriCreate ){
25866 TrigramTokenizer *p = (TrigramTokenizer*)pTok;
25867 if( p->iFoldParam==0 ){
25868 return p->bFold ? FTS5_PATTERN_LIKE65 : FTS5_PATTERN_GLOB66;
25869 }
25870 }
25871 return FTS5_PATTERN_NONE0;
25872}
25873
25874/*
25875** Return true if the tokenizer described by p->azArg[] is the trigram
25876** tokenizer. This tokenizer needs to be loaded before xBestIndex is
25877** called for the first time in order to correctly handle LIKE/GLOB.
25878*/
25879static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){
25880 return (p->nArg>=1 && 0==sqlite3_stricmpsqlite3_api->stricmp(p->azArg[0], "trigram"));
25881}
25882
25883
25884/*
25885** Register all built-in tokenizers with FTS5.
25886*/
25887static int sqlite3Fts5TokenizerInit(fts5_api *pApi){
25888 struct BuiltinTokenizer {
25889 const char *zName;
25890 fts5_tokenizer x;
25891 } aBuiltin[] = {
25892 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
25893 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
25894 { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}},
25895 };
25896
25897 int rc = SQLITE_OK0; /* Return code */
25898 int i; /* To iterate through builtin functions */
25899
25900 for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){
25901 rc = pApi->xCreateTokenizer(pApi,
25902 aBuiltin[i].zName,
25903 (void*)pApi,
25904 &aBuiltin[i].x,
25905 0
25906 );
25907 }
25908 if( rc==SQLITE_OK0 ){
25909 fts5_tokenizer_v2 sPorter = {
25910 2,
25911 fts5PorterCreate,
25912 fts5PorterDelete,
25913 fts5PorterTokenize
25914 };
25915 rc = pApi->xCreateTokenizer_v2(pApi,
25916 "porter",
25917 (void*)pApi,
25918 &sPorter,
25919 0
25920 );
25921 }
25922 return rc;
25923}
25924
25925#line 1 "fts5_unicode2.c"
25926/*
25927** 2012-05-25
25928**
25929** The author disclaims copyright to this source code. In place of
25930** a legal notice, here is a blessing:
25931**
25932** May you do good and not evil.
25933** May you find forgiveness for yourself and forgive others.
25934** May you share freely, never taking more than you give.
25935**
25936******************************************************************************
25937*/
25938
25939/*
25940** DO NOT EDIT THIS MACHINE GENERATED FILE.
25941*/
25942
25943
25944#include <assert.h>
25945
25946
25947
25948/*
25949** If the argument is a codepoint corresponding to a lowercase letter
25950** in the ASCII range with a diacritic added, return the codepoint
25951** of the ASCII letter only. For example, if passed 235 - "LATIN
25952** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
25953** E"). The resuls of passing a codepoint that corresponds to an
25954** uppercase letter are undefined.
25955*/
25956static int fts5_remove_diacritic(int c, int bComplex){
25957 unsigned short aDia[] = {
25958 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
25959 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
25960 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
25961 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
25962 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
25963 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
25964 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
25965 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
25966 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
25967 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
25968 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
25969 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
25970 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
25971 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
25972 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
25973 63182, 63242, 63274, 63310, 63368, 63390,
25974 };
25975#define HIBIT((unsigned char)0x80) ((unsigned char)0x80)
25976 unsigned char aChar[] = {
25977 '\0', 'a', 'c', 'e', 'i', 'n',
25978 'o', 'u', 'y', 'y', 'a', 'c',
25979 'd', 'e', 'e', 'g', 'h', 'i',
25980 'j', 'k', 'l', 'n', 'o', 'r',
25981 's', 't', 'u', 'u', 'w', 'y',
25982 'z', 'o', 'u', 'a', 'i', 'o',
25983 'u', 'u'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'g', 'k', 'o',
25984 'o'|HIBIT((unsigned char)0x80), 'j', 'g', 'n', 'a'|HIBIT((unsigned char)0x80), 'a',
25985 'e', 'i', 'o', 'r', 'u', 's',
25986 't', 'h', 'a', 'e', 'o'|HIBIT((unsigned char)0x80), 'o',
25987 'o'|HIBIT((unsigned char)0x80), 'y', '\0', '\0', '\0', '\0',
25988 '\0', '\0', '\0', '\0', 'a', 'b',
25989 'c'|HIBIT((unsigned char)0x80), 'd', 'd', 'e'|HIBIT((unsigned char)0x80), 'e', 'e'|HIBIT((unsigned char)0x80),
25990 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT((unsigned char)0x80),
25991 'k', 'l', 'l'|HIBIT((unsigned char)0x80), 'l', 'm', 'n',
25992 'o'|HIBIT((unsigned char)0x80), 'p', 'r', 'r'|HIBIT((unsigned char)0x80), 'r', 's',
25993 's'|HIBIT((unsigned char)0x80), 't', 'u', 'u'|HIBIT((unsigned char)0x80), 'v', 'w',
25994 'w', 'x', 'y', 'z', 'h', 't',
25995 'w', 'y', 'a', 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80),
25996 'e', 'e'|HIBIT((unsigned char)0x80), 'e'|HIBIT((unsigned char)0x80), 'i', 'o', 'o'|HIBIT((unsigned char)0x80),
25997 'o'|HIBIT((unsigned char)0x80), 'o'|HIBIT((unsigned char)0x80), 'u', 'u'|HIBIT((unsigned char)0x80), 'u'|HIBIT((unsigned char)0x80), 'y',
25998 };
25999
26000 unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
26001 int iRes = 0;
26002 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
26003 int iLo = 0;
26004 while( iHi>=iLo ){
26005 int iTest = (iHi + iLo) / 2;
26006 if( key >= aDia[iTest] ){
26007 iRes = iTest;
26008 iLo = iTest+1;
26009 }else{
26010 iHi = iTest-1;
26011 }
26012 }
26013 assert( key>=aDia[iRes] )((void) (0));
26014 if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
26015 return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
26016}
26017
26018
26019/*
26020** Return true if the argument interpreted as a unicode codepoint
26021** is a diacritical modifier character.
26022*/
26023static int sqlite3Fts5UnicodeIsdiacritic(int c){
26024 unsigned int mask0 = 0x08029FDF;
26025 unsigned int mask1 = 0x000361F8;
26026 if( c<768 || c>817 ) return 0;
26027 return (c < 768+32) ?
26028 (mask0 & ((unsigned int)1 << (c-768))) :
26029 (mask1 & ((unsigned int)1 << (c-768-32)));
26030}
26031
26032
26033/*
26034** Interpret the argument as a unicode codepoint. If the codepoint
26035** is an upper case character that has a lower case equivalent,
26036** return the codepoint corresponding to the lower case version.
26037** Otherwise, return a copy of the argument.
26038**
26039** The results are undefined if the value passed to this function
26040** is less than zero.
26041*/
26042static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){
26043 /* Each entry in the following array defines a rule for folding a range
26044 ** of codepoints to lower case. The rule applies to a range of nRange
26045 ** codepoints starting at codepoint iCode.
26046 **
26047 ** If the least significant bit in flags is clear, then the rule applies
26048 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
26049 ** need to be folded). Or, if it is set, then the rule only applies to
26050 ** every second codepoint in the range, starting with codepoint C.
26051 **
26052 ** The 7 most significant bits in flags are an index into the aiOff[]
26053 ** array. If a specific codepoint C does require folding, then its lower
26054 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
26055 **
26056 ** The contents of this array are generated by parsing the CaseFolding.txt
26057 ** file distributed as part of the "Unicode Character Database". See
26058 ** http://www.unicode.org for details.
26059 */
26060 static const struct TableEntry {
26061 unsigned short iCode;
26062 unsigned char flags;
26063 unsigned char nRange;
26064 } aEntry[] = {
26065 {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
26066 {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
26067 {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
26068 {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
26069 {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
26070 {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
26071 {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
26072 {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
26073 {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
26074 {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
26075 {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
26076 {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
26077 {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
26078 {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
26079 {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
26080 {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
26081 {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
26082 {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
26083 {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
26084 {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
26085 {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
26086 {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
26087 {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
26088 {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
26089 {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
26090 {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
26091 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
26092 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
26093 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
26094 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
26095 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
26096 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
26097 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
26098 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
26099 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
26100 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
26101 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
26102 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
26103 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
26104 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
26105 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
26106 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
26107 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
26108 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
26109 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
26110 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
26111 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
26112 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
26113 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
26114 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
26115 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
26116 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
26117 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
26118 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
26119 {65313, 14, 26},
26120 };
26121 static const unsigned short aiOff[] = {
26122 1, 2, 8, 15, 16, 26, 28, 32,
26123 37, 38, 40, 48, 63, 64, 69, 71,
26124 79, 80, 116, 202, 203, 205, 206, 207,
26125 209, 210, 211, 213, 214, 217, 218, 219,
26126 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
26127 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
26128 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
26129 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
26130 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
26131 65514, 65521, 65527, 65528, 65529,
26132 };
26133
26134 int ret = c;
26135
26136 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 )((void) (0));
26137
26138 if( c<128 ){
26139 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
26140 }else if( c<65536 ){
26141 const struct TableEntry *p;
26142 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
26143 int iLo = 0;
26144 int iRes = -1;
26145
26146 assert( c>aEntry[0].iCode )((void) (0));
26147 while( iHi>=iLo ){
26148 int iTest = (iHi + iLo) / 2;
26149 int cmp = (c - aEntry[iTest].iCode);
26150 if( cmp>=0 ){
26151 iRes = iTest;
26152 iLo = iTest+1;
26153 }else{
26154 iHi = iTest-1;
26155 }
26156 }
26157
26158 assert( iRes>=0 && c>=aEntry[iRes].iCode )((void) (0));
26159 p = &aEntry[iRes];
26160 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
26161 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
26162 assert( ret>0 )((void) (0));
26163 }
26164
26165 if( eRemoveDiacritic ){
26166 ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2);
26167 }
26168 }
26169
26170 else if( c>=66560 && c<66600 ){
26171 ret = c + 40;
26172 }
26173
26174 return ret;
26175}
26176
26177
26178static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
26179 aArray[0] = 1;
26180 switch( zCat[0] ){
26181 case 'C':
26182 switch( zCat[1] ){
26183 case 'c': aArray[1] = 1; break;
26184 case 'f': aArray[2] = 1; break;
26185 case 'n': aArray[3] = 1; break;
26186 case 's': aArray[4] = 1; break;
26187 case 'o': aArray[31] = 1; break;
26188 case '*':
26189 aArray[1] = 1;
26190 aArray[2] = 1;
26191 aArray[3] = 1;
26192 aArray[4] = 1;
26193 aArray[31] = 1;
26194 break;
26195 default: return 1; }
26196 break;
26197
26198 case 'L':
26199 switch( zCat[1] ){
26200 case 'l': aArray[5] = 1; break;
26201 case 'm': aArray[6] = 1; break;
26202 case 'o': aArray[7] = 1; break;
26203 case 't': aArray[8] = 1; break;
26204 case 'u': aArray[9] = 1; break;
26205 case 'C': aArray[30] = 1; break;
26206 case '*':
26207 aArray[5] = 1;
26208 aArray[6] = 1;
26209 aArray[7] = 1;
26210 aArray[8] = 1;
26211 aArray[9] = 1;
26212 aArray[30] = 1;
26213 break;
26214 default: return 1; }
26215 break;
26216
26217 case 'M':
26218 switch( zCat[1] ){
26219 case 'c': aArray[10] = 1; break;
26220 case 'e': aArray[11] = 1; break;
26221 case 'n': aArray[12] = 1; break;
26222 case '*':
26223 aArray[10] = 1;
26224 aArray[11] = 1;
26225 aArray[12] = 1;
26226 break;
26227 default: return 1; }
26228 break;
26229
26230 case 'N':
26231 switch( zCat[1] ){
26232 case 'd': aArray[13] = 1; break;
26233 case 'l': aArray[14] = 1; break;
26234 case 'o': aArray[15] = 1; break;
26235 case '*':
26236 aArray[13] = 1;
26237 aArray[14] = 1;
26238 aArray[15] = 1;
26239 break;
26240 default: return 1; }
26241 break;
26242
26243 case 'P':
26244 switch( zCat[1] ){
26245 case 'c': aArray[16] = 1; break;
26246 case 'd': aArray[17] = 1; break;
26247 case 'e': aArray[18] = 1; break;
26248 case 'f': aArray[19] = 1; break;
26249 case 'i': aArray[20] = 1; break;
26250 case 'o': aArray[21] = 1; break;
26251 case 's': aArray[22] = 1; break;
26252 case '*':
26253 aArray[16] = 1;
26254 aArray[17] = 1;
26255 aArray[18] = 1;
26256 aArray[19] = 1;
26257 aArray[20] = 1;
26258 aArray[21] = 1;
26259 aArray[22] = 1;
26260 break;
26261 default: return 1; }
26262 break;
26263
26264 case 'S':
26265 switch( zCat[1] ){
26266 case 'c': aArray[23] = 1; break;
26267 case 'k': aArray[24] = 1; break;
26268 case 'm': aArray[25] = 1; break;
26269 case 'o': aArray[26] = 1; break;
26270 case '*':
26271 aArray[23] = 1;
26272 aArray[24] = 1;
26273 aArray[25] = 1;
26274 aArray[26] = 1;
26275 break;
26276 default: return 1; }
26277 break;
26278
26279 case 'Z':
26280 switch( zCat[1] ){
26281 case 'l': aArray[27] = 1; break;
26282 case 'p': aArray[28] = 1; break;
26283 case 's': aArray[29] = 1; break;
26284 case '*':
26285 aArray[27] = 1;
26286 aArray[28] = 1;
26287 aArray[29] = 1;
26288 break;
26289 default: return 1; }
26290 break;
26291
26292
26293 default:
26294 return 1;
26295 }
26296 return 0;
26297}
26298
26299static u16 aFts5UnicodeBlock[] = {
26300 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760,
26301 1760, 1760, 1760, 1760, 1760, 1763, 1765,
26302 };
26303static u16 aFts5UnicodeMap[] = {
26304 0, 32, 33, 36, 37, 40, 41, 42, 43, 44,
26305 45, 46, 48, 58, 60, 63, 65, 91, 92, 93,
26306 94, 95, 96, 97, 123, 124, 125, 126, 127, 160,
26307 161, 162, 166, 167, 168, 169, 170, 171, 172, 173,
26308 174, 175, 176, 177, 178, 180, 181, 182, 184, 185,
26309 186, 187, 188, 191, 192, 215, 216, 223, 247, 248,
26310 256, 312, 313, 329, 330, 377, 383, 385, 387, 388,
26311 391, 394, 396, 398, 402, 403, 405, 406, 409, 412,
26312 414, 415, 417, 418, 423, 427, 428, 431, 434, 436,
26313 437, 440, 442, 443, 444, 446, 448, 452, 453, 454,
26314 455, 456, 457, 458, 459, 460, 461, 477, 478, 496,
26315 497, 498, 499, 500, 503, 505, 506, 564, 570, 572,
26316 573, 575, 577, 580, 583, 584, 592, 660, 661, 688,
26317 706, 710, 722, 736, 741, 748, 749, 750, 751, 768,
26318 880, 884, 885, 886, 890, 891, 894, 900, 902, 903,
26319 904, 908, 910, 912, 913, 931, 940, 975, 977, 978,
26320 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072,
26321 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369,
26322 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473,
26323 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545,
26324 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611,
26325 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758,
26326 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791,
26327 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984,
26328 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075,
26329 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210,
26330 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369,
26331 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416,
26332 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482,
26333 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519,
26334 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561,
26335 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622,
26336 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677,
26337 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749,
26338 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790,
26339 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869,
26340 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902,
26341 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947,
26342 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006,
26343 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059,
26344 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134,
26345 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199,
26346 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263,
26347 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302,
26348 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402,
26349 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458,
26350 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544,
26351 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655,
26352 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737,
26353 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773,
26354 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860,
26355 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896,
26356 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967,
26357 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046,
26358 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153,
26359 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190,
26360 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229,
26361 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295,
26362 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704,
26363 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888,
26364 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743,
26365 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906,
26366 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068,
26367 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107,
26368 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160,
26369 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435,
26370 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480,
26371 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679,
26372 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754,
26373 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824,
26374 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978,
26375 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043,
26376 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098,
26377 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168,
26378 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288,
26379 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406,
26380 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616,
26381 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976,
26382 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033,
26383 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118,
26384 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141,
26385 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184,
26386 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219,
26387 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249,
26388 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275,
26389 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317,
26390 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413,
26391 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459,
26392 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484,
26393 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500,
26394 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523,
26395 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597,
26396 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623,
26397 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972,
26398 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180,
26399 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665,
26400 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091,
26401 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101,
26402 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217,
26403 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627,
26404 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637,
26405 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647,
26406 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750,
26407 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365,
26408 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393,
26409 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520,
26410 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696,
26411 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780,
26412 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800,
26413 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812,
26414 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904,
26415 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296,
26416 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306,
26417 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317,
26418 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347,
26419 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449,
26420 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736,
26421 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938,
26422 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981,
26423 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528,
26424 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624,
26425 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800,
26426 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912,
26427 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043,
26428 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136,
26429 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264,
26430 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395,
26431 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472,
26432 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588,
26433 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643,
26434 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713,
26435 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762,
26436 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003,
26437 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203,
26438 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112,
26439 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320,
26440 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020,
26441 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075,
26442 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086,
26443 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097,
26444 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118,
26445 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279,
26446 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294,
26447 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343,
26448 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378,
26449 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490,
26450 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529,
26451 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263,
26452 311, 320, 373, 377, 394, 400, 464, 509, 640, 672,
26453 768, 800, 816, 833, 834, 842, 896, 927, 928, 968,
26454 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103,
26455 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432,
26456 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623,
26457 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912,
26458 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178,
26459 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285,
26460 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416,
26461 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760,
26462 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216,
26463 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248,
26464 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637,
26465 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298,
26466 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441,
26467 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541,
26468 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662,
26469 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922,
26470 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062,
26471 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178,
26472 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961,
26473 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003,
26474 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028,
26475 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099,
26476 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744,
26477 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368,
26478 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971,
26479 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488,
26480 1, 32, 256, 0, 65533,
26481 };
26482static u16 aFts5UnicodeData[] = {
26483 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53,
26484 49, 85, 333, 85, 121, 85, 841, 54, 53, 50,
26485 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61,
26486 53, 151, 58, 53, 56, 58, 39, 52, 57, 34,
26487 58, 56, 58, 57, 79, 56, 37, 85, 56, 47,
26488 39, 51, 111, 53, 745, 57, 233, 773, 57, 261,
26489 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126,
26490 126, 73, 69, 137, 37, 73, 37, 105, 101, 73,
26491 37, 73, 37, 190, 158, 37, 126, 126, 73, 37,
26492 126, 94, 37, 39, 94, 69, 135, 41, 40, 37,
26493 41, 40, 37, 41, 40, 37, 542, 37, 606, 37,
26494 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37,
26495 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582,
26496 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596,
26497 158, 38, 56, 94, 38, 101, 53, 88, 41, 53,
26498 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105,
26499 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541,
26500 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38,
26501 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76,
26502 53, 76, 53, 44, 871, 103, 85, 162, 121, 85,
26503 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684,
26504 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58,
26505 204, 70, 76, 58, 140, 71, 333, 103, 90, 39,
26506 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333,
26507 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300,
26508 38, 108, 38, 172, 501, 807, 108, 53, 39, 359,
26509 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268,
26510 138, 44, 74, 39, 236, 327, 76, 85, 333, 53,
26511 38, 199, 231, 44, 74, 263, 71, 711, 231, 39,
26512 135, 44, 39, 106, 140, 74, 74, 44, 39, 42,
26513 71, 103, 76, 333, 71, 87, 207, 58, 55, 76,
26514 42, 199, 71, 711, 231, 71, 71, 71, 44, 106,
26515 76, 76, 108, 44, 135, 39, 333, 76, 103, 44,
26516 76, 42, 295, 103, 711, 231, 71, 167, 44, 39,
26517 106, 172, 76, 42, 74, 44, 39, 71, 76, 333,
26518 53, 55, 44, 74, 263, 71, 711, 231, 71, 167,
26519 44, 39, 42, 44, 42, 140, 74, 74, 44, 44,
26520 42, 71, 103, 76, 333, 58, 39, 207, 44, 39,
26521 199, 103, 135, 71, 39, 71, 71, 103, 391, 74,
26522 44, 74, 106, 106, 44, 39, 42, 333, 111, 218,
26523 55, 58, 106, 263, 103, 743, 327, 167, 39, 108,
26524 138, 108, 140, 76, 71, 71, 76, 333, 239, 58,
26525 74, 263, 103, 743, 327, 167, 44, 39, 42, 44,
26526 170, 44, 74, 74, 76, 74, 39, 71, 76, 333,
26527 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106,
26528 44, 39, 42, 71, 76, 333, 207, 58, 199, 74,
26529 583, 775, 295, 39, 231, 44, 106, 108, 44, 266,
26530 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268,
26531 53, 333, 85, 71, 39, 71, 39, 39, 135, 231,
26532 103, 39, 39, 71, 135, 44, 71, 204, 76, 39,
26533 167, 38, 204, 333, 135, 39, 122, 501, 58, 53,
26534 122, 76, 218, 333, 335, 58, 44, 58, 44, 58,
26535 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42,
26536 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90,
26537 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76,
26538 74, 76, 39, 333, 213, 199, 74, 76, 135, 108,
26539 39, 106, 71, 234, 103, 140, 423, 44, 74, 76,
26540 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41,
26541 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319,
26542 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151,
26543 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551,
26544 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108,
26545 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76,
26546 42, 236, 266, 44, 74, 364, 117, 38, 117, 55,
26547 39, 44, 333, 335, 213, 49, 149, 108, 61, 333,
26548 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138,
26549 76, 106, 74, 44, 202, 108, 58, 85, 333, 967,
26550 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76,
26551 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44,
26552 74, 268, 202, 332, 44, 333, 333, 245, 38, 213,
26553 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44,
26554 74, 231, 333, 245, 346, 300, 314, 76, 42, 967,
26555 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415,
26556 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159,
26557 266, 268, 74, 76, 181, 333, 103, 333, 967, 198,
26558 85, 277, 108, 53, 428, 42, 236, 135, 44, 135,
26559 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260,
26560 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265,
26561 261, 265, 197, 201, 261, 41, 41, 41, 94, 229,
26562 265, 453, 261, 264, 261, 264, 261, 264, 165, 69,
26563 137, 40, 56, 37, 120, 101, 69, 137, 40, 120,
26564 133, 69, 137, 120, 261, 169, 120, 101, 69, 137,
26565 40, 88, 381, 162, 209, 85, 52, 51, 54, 84,
26566 51, 54, 52, 277, 59, 60, 162, 61, 309, 52,
26567 51, 149, 80, 117, 57, 54, 50, 373, 57, 53,
26568 48, 341, 61, 162, 194, 47, 38, 207, 121, 54,
26569 50, 38, 335, 121, 54, 50, 422, 855, 428, 139,
26570 44, 107, 396, 90, 41, 154, 41, 90, 37, 105,
26571 69, 105, 37, 58, 41, 90, 57, 169, 218, 41,
26572 58, 41, 58, 41, 58, 137, 58, 37, 137, 37,
26573 135, 37, 90, 69, 73, 185, 94, 101, 58, 57,
26574 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186,
26575 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018,
26576 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666,
26577 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217,
26578 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57,
26579 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50,
26580 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
26581 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50,
26582 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54,
26583 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
26584 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
26585 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281,
26586 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69,
26587 254, 105, 37, 94, 37, 94, 165, 70, 105, 37,
26588 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221,
26589 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231,
26590 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52,
26591 51, 117, 52, 51, 53, 52, 51, 309, 49, 85,
26592 49, 53, 52, 51, 85, 52, 51, 54, 50, 54,
26593 50, 54, 50, 54, 50, 181, 38, 341, 81, 858,
26594 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54,
26595 50, 54, 50, 54, 50, 54, 50, 54, 50, 90,
26596 54, 50, 54, 50, 54, 50, 54, 50, 49, 54,
26597 82, 58, 302, 140, 74, 49, 166, 90, 110, 38,
26598 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887,
26599 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178,
26600 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274,
26601 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38,
26602 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333,
26603 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798,
26604 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69,
26605 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382,
26606 70, 37, 231, 44, 103, 44, 135, 44, 743, 74,
26607 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74,
26608 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333,
26609 903, 268, 85, 743, 364, 74, 53, 935, 108, 42,
26610 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333,
26611 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263,
26612 44, 42, 333, 149, 519, 38, 199, 122, 39, 42,
26613 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44,
26614 39, 71, 38, 85, 359, 42, 76, 74, 85, 39,
26615 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74,
26616 44, 74, 44, 74, 53, 42, 44, 333, 39, 39,
26617 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399,
26618 229, 165, 39, 44, 327, 57, 423, 167, 39, 71,
26619 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55,
26620 58, 524, 245, 54, 50, 53, 236, 53, 81, 80,
26621 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
26622 54, 50, 54, 50, 54, 50, 85, 54, 50, 149,
26623 112, 117, 149, 49, 54, 50, 54, 50, 54, 50,
26624 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34,
26625 117, 55, 117, 54, 50, 53, 57, 53, 49, 85,
26626 333, 85, 121, 85, 841, 54, 53, 50, 56, 48,
26627 56, 837, 54, 57, 50, 57, 54, 50, 53, 54,
26628 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199,
26629 103, 87, 57, 56, 58, 87, 58, 153, 90, 98,
26630 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455,
26631 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575,
26632 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263,
26633 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71,
26634 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799,
26635 71, 39, 108, 76, 140, 135, 103, 871, 108, 44,
26636 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615,
26637 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655,
26638 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34,
26639 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149,
26640 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383,
26641 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182,
26642 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898,
26643 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236,
26644 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837,
26645 841, 229, 581, 841, 837, 41, 73, 41, 73, 137,
26646 265, 133, 37, 229, 357, 841, 837, 73, 137, 265,
26647 233, 837, 73, 137, 169, 41, 233, 837, 841, 837,
26648 841, 837, 841, 837, 841, 837, 841, 837, 841, 901,
26649 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
26650 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
26651 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71,
26652 39, 39, 327, 135, 39, 39, 39, 39, 39, 39,
26653 103, 71, 39, 39, 39, 39, 39, 39, 71, 39,
26654 135, 231, 135, 135, 39, 327, 551, 103, 167, 551,
26655 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946,
26656 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210,
26657 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266,
26658 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351,
26659 34, 3074, 7692, 63, 63,
26660 };
26661
26662static int sqlite3Fts5UnicodeCategory(u32 iCode) {
26663 int iRes = -1;
26664 int iHi;
26665 int iLo;
26666 int ret;
26667 u16 iKey;
26668
26669 if( iCode>=(1<<20) ){
26670 return 0;
26671 }
26672 iLo = aFts5UnicodeBlock[(iCode>>16)];
26673 iHi = aFts5UnicodeBlock[1+(iCode>>16)];
26674 iKey = (iCode & 0xFFFF);
26675 while( iHi>iLo ){
26676 int iTest = (iHi + iLo) / 2;
26677 assert( iTest>=iLo && iTest<iHi )((void) (0));
26678 if( iKey>=aFts5UnicodeMap[iTest] ){
26679 iRes = iTest;
26680 iLo = iTest+1;
26681 }else{
26682 iHi = iTest;
26683 }
26684 }
26685
26686 if( iRes<0 ) return 0;
26687 if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0;
26688 ret = aFts5UnicodeData[iRes] & 0x1F;
26689 if( ret!=30 ) return ret;
26690 return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9;
26691}
26692
26693static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
26694 int i = 0;
26695 int iTbl = 0;
26696 while( i<128 ){
26697 int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
26698 int n = (aFts5UnicodeData[iTbl] >> 5) + i;
26699 for(; i<128 && i<n; i++){
26700 aAscii[i] = (u8)bToken;
26701 }
26702 iTbl++;
26703 }
26704 aAscii[0] = 0; /* 0x00 is never a token character */
26705}
26706
26707#line 1 "fts5_varint.c"
26708/*
26709** 2015 May 30
26710**
26711** The author disclaims copyright to this source code. In place of
26712** a legal notice, here is a blessing:
26713**
26714** May you do good and not evil.
26715** May you find forgiveness for yourself and forgive others.
26716** May you share freely, never taking more than you give.
26717**
26718******************************************************************************
26719**
26720** Routines for varint serialization and deserialization.
26721*/
26722
26723
26724/* #include "fts5Int.h" */
26725
26726/*
26727** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
26728** Except, this version does handle the single byte case that the core
26729** version depends on being handled before its function is called.
26730*/
26731static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
26732 u32 a,b;
26733
26734 /* The 1-byte case. Overwhelmingly the most common. */
26735 a = *p;
26736 /* a: p0 (unmasked) */
26737 if (!(a&0x80))
26738 {
26739 /* Values between 0 and 127 */
26740 *v = a;
26741 return 1;
26742 }
26743
26744 /* The 2-byte case */
26745 p++;
26746 b = *p;
26747 /* b: p1 (unmasked) */
26748 if (!(b&0x80))
26749 {
26750 /* Values between 128 and 16383 */
26751 a &= 0x7f;
26752 a = a<<7;
26753 *v = a | b;
26754 return 2;
26755 }
26756
26757 /* The 3-byte case */
26758 p++;
26759 a = a<<14;
26760 a |= *p;
26761 /* a: p0<<14 | p2 (unmasked) */
26762 if (!(a&0x80))
26763 {
26764 /* Values between 16384 and 2097151 */
26765 a &= (0x7f<<14)|(0x7f);
26766 b &= 0x7f;
26767 b = b<<7;
26768 *v = a | b;
26769 return 3;
26770 }
26771
26772 /* A 32-bit varint is used to store size information in btrees.
26773 ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
26774 ** A 3-byte varint is sufficient, for example, to record the size
26775 ** of a 1048569-byte BLOB or string.
26776 **
26777 ** We only unroll the first 1-, 2-, and 3- byte cases. The very
26778 ** rare larger cases can be handled by the slower 64-bit varint
26779 ** routine.
26780 */
26781 {
26782 u64 v64;
26783 u8 n;
26784 p -= 2;
26785 n = sqlite3Fts5GetVarint(p, &v64);
26786 *v = ((u32)v64) & 0x7FFFFFFF;
26787 assert( n>3 && n<=9 )((void) (0));
26788 return n;
26789 }
26790}
26791
26792
26793/*
26794** Bitmasks used by sqlite3GetVarint(). These precomputed constants
26795** are defined here rather than simply putting the constant expressions
26796** inline in order to work around bugs in the RVT compiler.
26797**
26798** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
26799**
26800** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
26801*/
26802#define SLOT_2_00x001fc07f 0x001fc07f
26803#define SLOT_4_2_00xf01fc07f 0xf01fc07f
26804
26805/*
26806** Read a 64-bit variable-length integer from memory starting at p[0].
26807** Return the number of bytes read. The value is stored in *v.
26808*/
26809static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
26810 u32 a,b,s;
26811
26812 a = *p;
26813 /* a: p0 (unmasked) */
26814 if (!(a&0x80))
26815 {
26816 *v = a;
26817 return 1;
26818 }
26819
26820 p++;
26821 b = *p;
26822 /* b: p1 (unmasked) */
26823 if (!(b&0x80))
26824 {
26825 a &= 0x7f;
26826 a = a<<7;
26827 a |= b;
26828 *v = a;
26829 return 2;
26830 }
26831
26832 /* Verify that constants are precomputed correctly */
26833 assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) )((void) (0));
26834 assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) )((void) (0));
26835
26836 p++;
26837 a = a<<14;
26838 a |= *p;
26839 /* a: p0<<14 | p2 (unmasked) */
26840 if (!(a&0x80))
26841 {
26842 a &= SLOT_2_00x001fc07f;
26843 b &= 0x7f;
26844 b = b<<7;
26845 a |= b;
26846 *v = a;
26847 return 3;
26848 }
26849
26850 /* CSE1 from below */
26851 a &= SLOT_2_00x001fc07f;
26852 p++;
26853 b = b<<14;
26854 b |= *p;
26855 /* b: p1<<14 | p3 (unmasked) */
26856 if (!(b&0x80))
26857 {
26858 b &= SLOT_2_00x001fc07f;
26859 /* moved CSE1 up */
26860 /* a &= (0x7f<<14)|(0x7f); */
26861 a = a<<7;
26862 a |= b;
26863 *v = a;
26864 return 4;
26865 }
26866
26867 /* a: p0<<14 | p2 (masked) */
26868 /* b: p1<<14 | p3 (unmasked) */
26869 /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
26870 /* moved CSE1 up */
26871 /* a &= (0x7f<<14)|(0x7f); */
26872 b &= SLOT_2_00x001fc07f;
26873 s = a;
26874 /* s: p0<<14 | p2 (masked) */
26875
26876 p++;
26877 a = a<<14;
26878 a |= *p;
26879 /* a: p0<<28 | p2<<14 | p4 (unmasked) */
26880 if (!(a&0x80))
26881 {
26882 /* we can skip these cause they were (effectively) done above in calc'ing s */
26883 /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
26884 /* b &= (0x7f<<14)|(0x7f); */
26885 b = b<<7;
26886 a |= b;
26887 s = s>>18;
26888 *v = ((u64)s)<<32 | a;
26889 return 5;
26890 }
26891
26892 /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
26893 s = s<<7;
26894 s |= b;
26895 /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
26896
26897 p++;
26898 b = b<<14;
26899 b |= *p;
26900 /* b: p1<<28 | p3<<14 | p5 (unmasked) */
26901 if (!(b&0x80))
26902 {
26903 /* we can skip this cause it was (effectively) done above in calc'ing s */
26904 /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
26905 a &= SLOT_2_00x001fc07f;
26906 a = a<<7;
26907 a |= b;
26908 s = s>>18;
26909 *v = ((u64)s)<<32 | a;
26910 return 6;
26911 }
26912
26913 p++;
26914 a = a<<14;
26915 a |= *p;
26916 /* a: p2<<28 | p4<<14 | p6 (unmasked) */
26917 if (!(a&0x80))
26918 {
26919 a &= SLOT_4_2_00xf01fc07f;
26920 b &= SLOT_2_00x001fc07f;
26921 b = b<<7;
26922 a |= b;
26923 s = s>>11;
26924 *v = ((u64)s)<<32 | a;
26925 return 7;
26926 }
26927
26928 /* CSE2 from below */
26929 a &= SLOT_2_00x001fc07f;
26930 p++;
26931 b = b<<14;
26932 b |= *p;
26933 /* b: p3<<28 | p5<<14 | p7 (unmasked) */
26934 if (!(b&0x80))
26935 {
26936 b &= SLOT_4_2_00xf01fc07f;
26937 /* moved CSE2 up */
26938 /* a &= (0x7f<<14)|(0x7f); */
26939 a = a<<7;
26940 a |= b;
26941 s = s>>4;
26942 *v = ((u64)s)<<32 | a;
26943 return 8;
26944 }
26945
26946 p++;
26947 a = a<<15;
26948 a |= *p;
26949 /* a: p4<<29 | p6<<15 | p8 (unmasked) */
26950
26951 /* moved CSE2 up */
26952 /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
26953 b &= SLOT_2_00x001fc07f;
26954 b = b<<8;
26955 a |= b;
26956
26957 s = s<<4;
26958 b = p[-4];
26959 b &= 0x7f;
26960 b = b>>3;
26961 s |= b;
26962
26963 *v = ((u64)s)<<32 | a;
26964
26965 return 9;
26966}
26967
26968/*
26969** The variable-length integer encoding is as follows:
26970**
26971** KEY:
26972** A = 0xxxxxxx 7 bits of data and one flag bit
26973** B = 1xxxxxxx 7 bits of data and one flag bit
26974** C = xxxxxxxx 8 bits of data
26975**
26976** 7 bits - A
26977** 14 bits - BA
26978** 21 bits - BBA
26979** 28 bits - BBBA
26980** 35 bits - BBBBA
26981** 42 bits - BBBBBA
26982** 49 bits - BBBBBBA
26983** 56 bits - BBBBBBBA
26984** 64 bits - BBBBBBBBC
26985*/
26986
26987#ifdef SQLITE_NOINLINE
26988# define FTS5_NOINLINE SQLITE_NOINLINE
26989#else
26990# define FTS5_NOINLINE
26991#endif
26992
26993/*
26994** Write a 64-bit variable-length integer to memory starting at p[0].
26995** The length of data write will be between 1 and 9 bytes. The number
26996** of bytes written is returned.
26997**
26998** A variable-length integer consists of the lower 7 bits of each byte
26999** for all bytes that have the 8th bit set and one byte with the 8th
27000** bit clear. Except, if we get to the 9th byte, it stores the full
27001** 8 bits and is the last byte.
27002*/
27003static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
27004 int i, j, n;
27005 u8 buf[10];
27006 if( v & (((u64)0xff000000)<<32) ){
27007 p[8] = (u8)v;
27008 v >>= 8;
27009 for(i=7; i>=0; i--){
27010 p[i] = (u8)((v & 0x7f) | 0x80);
27011 v >>= 7;
27012 }
27013 return 9;
27014 }
27015 n = 0;
27016 do{
27017 buf[n++] = (u8)((v & 0x7f) | 0x80);
27018 v >>= 7;
27019 }while( v!=0 );
27020 buf[0] &= 0x7f;
27021 assert( n<=9 )((void) (0));
27022 for(i=0, j=n-1; j>=0; j--, i++){
27023 p[i] = buf[j];
27024 }
27025 return n;
27026}
27027
27028static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
27029 if( v<=0x7f ){
27030 p[0] = v&0x7f;
27031 return 1;
27032 }
27033 if( v<=0x3fff ){
27034 p[0] = ((v>>7)&0x7f)|0x80;
27035 p[1] = v&0x7f;
27036 return 2;
27037 }
27038 return fts5PutVarint64(p,v);
27039}
27040
27041
27042static int sqlite3Fts5GetVarintLen(u32 iVal){
27043#if 0
27044 if( iVal<(1 << 7 ) ) return 1;
27045#endif
27046 assert( iVal>=(1 << 7) )((void) (0));
27047 if( iVal<(1 << 14) ) return 2;
27048 if( iVal<(1 << 21) ) return 3;
27049 if( iVal<(1 << 28) ) return 4;
27050 return 5;
27051}
27052
27053#line 1 "fts5_vocab.c"
27054/*
27055** 2015 May 08
27056**
27057** The author disclaims copyright to this source code. In place of
27058** a legal notice, here is a blessing:
27059**
27060** May you do good and not evil.
27061** May you find forgiveness for yourself and forgive others.
27062** May you share freely, never taking more than you give.
27063**
27064******************************************************************************
27065**
27066** This is an SQLite virtual table module implementing direct access to an
27067** existing FTS5 index. The module may create several different types of
27068** tables:
27069**
27070** col:
27071** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
27072**
27073** One row for each term/column combination. The value of $doc is set to
27074** the number of fts5 rows that contain at least one instance of term
27075** $term within column $col. Field $cnt is set to the total number of
27076** instances of term $term in column $col (in any row of the fts5 table).
27077**
27078** row:
27079** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
27080**
27081** One row for each term in the database. The value of $doc is set to
27082** the number of fts5 rows that contain at least one instance of term
27083** $term. Field $cnt is set to the total number of instances of term
27084** $term in the database.
27085**
27086** instance:
27087** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
27088**
27089** One row for each term instance in the database.
27090*/
27091
27092
27093/* #include "fts5Int.h" */
27094
27095
27096typedef struct Fts5VocabTable Fts5VocabTable;
27097typedef struct Fts5VocabCursor Fts5VocabCursor;
27098
27099struct Fts5VocabTable {
27100 sqlite3_vtab base;
27101 char *zFts5Tbl; /* Name of fts5 table */
27102 char *zFts5Db; /* Db containing fts5 table */
27103 sqlite3 *db; /* Database handle */
27104 Fts5Global *pGlobal; /* FTS5 global object for this database */
27105 int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */
27106 unsigned bBusy; /* True if busy */
27107};
27108
27109struct Fts5VocabCursor {
27110 sqlite3_vtab_cursor base;
27111 sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
27112 Fts5Table *pFts5; /* Associated FTS5 table */
27113
27114 int bEof; /* True if this cursor is at EOF */
27115 Fts5IndexIter *pIter; /* Term/rowid iterator object */
27116 void *pStruct; /* From sqlite3Fts5StructureRef() */
27117
27118 int nLeTerm; /* Size of zLeTerm in bytes */
27119 char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
27120 int colUsed; /* Copy of sqlite3_index_info.colUsed */
27121
27122 /* These are used by 'col' tables only */
27123 int iCol;
27124 i64 *aCnt;
27125 i64 *aDoc;
27126
27127 /* Output values used by all tables. */
27128 i64 rowid; /* This table's current rowid value */
27129 Fts5Buffer term; /* Current value of 'term' column */
27130
27131 /* Output values Used by 'instance' tables only */
27132 i64 iInstPos;
27133 int iInstOff;
27134};
27135
27136#define FTS5_VOCAB_COL0 0
27137#define FTS5_VOCAB_ROW1 1
27138#define FTS5_VOCAB_INSTANCE2 2
27139
27140#define FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" "term, col, doc, cnt"
27141#define FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" "term, doc, cnt"
27142#define FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" "term, doc, col, offset"
27143
27144/*
27145** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
27146*/
27147#define FTS5_VOCAB_TERM_EQ0x0100 0x0100
27148#define FTS5_VOCAB_TERM_GE0x0200 0x0200
27149#define FTS5_VOCAB_TERM_LE0x0400 0x0400
27150
27151#define FTS5_VOCAB_COLUSED_MASK0xFF 0xFF
27152
27153
27154/*
27155** Translate a string containing an fts5vocab table type to an
27156** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
27157** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
27158** and return SQLITE_ERROR.
27159*/
27160static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
27161 int rc = SQLITE_OK0;
27162 char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
27163 if( rc==SQLITE_OK0 ){
27164 sqlite3Fts5Dequote(zCopy);
27165 if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "col")==0 ){
27166 *peType = FTS5_VOCAB_COL0;
27167 }else
27168
27169 if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "row")==0 ){
27170 *peType = FTS5_VOCAB_ROW1;
27171 }else
27172 if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "instance")==0 ){
27173 *peType = FTS5_VOCAB_INSTANCE2;
27174 }else
27175 {
27176 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("fts5vocab: unknown table type: %Q", zCopy);
27177 rc = SQLITE_ERROR1;
27178 }
27179 sqlite3_freesqlite3_api->free(zCopy);
27180 }
27181
27182 return rc;
27183}
27184
27185
27186/*
27187** The xDisconnect() virtual table method.
27188*/
27189static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
27190 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
27191 sqlite3_freesqlite3_api->free(pTab);
27192 return SQLITE_OK0;
27193}
27194
27195/*
27196** The xDestroy() virtual table method.
27197*/
27198static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
27199 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
27200 sqlite3_freesqlite3_api->free(pTab);
27201 return SQLITE_OK0;
27202}
27203
27204/*
27205** This function is the implementation of both the xConnect and xCreate
27206** methods of the FTS3 virtual table.
27207**
27208** The argv[] array contains the following:
27209**
27210** argv[0] -> module name ("fts5vocab")
27211** argv[1] -> database name
27212** argv[2] -> table name
27213**
27214** then:
27215**
27216** argv[3] -> name of fts5 table
27217** argv[4] -> type of fts5vocab table
27218**
27219** or, for tables in the TEMP schema only.
27220**
27221** argv[3] -> name of fts5 tables database
27222** argv[4] -> name of fts5 table
27223** argv[5] -> type of fts5vocab table
27224*/
27225static int fts5VocabInitVtab(
27226 sqlite3 *db, /* The SQLite database connection */
27227 void *pAux, /* Pointer to Fts5Global object */
27228 int argc, /* Number of elements in argv array */
27229 const char * const *argv, /* xCreate/xConnect argument array */
27230 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
27231 char **pzErr /* Write any error message here */
27232){
27233 const char *azSchema[] = {
27234 "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" ")",
27235 "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" ")",
27236 "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" ")"
27237 };
27238
27239 Fts5VocabTable *pRet = 0;
27240 int rc = SQLITE_OK0; /* Return code */
27241 int bDb;
27242
27243 bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
27244
27245 if( argc!=5 && bDb==0 ){
27246 *pzErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of vtable arguments");
27247 rc = SQLITE_ERROR1;
27248 }else{
27249 i64 nByte; /* Bytes of space to allocate */
27250 const char *zDb = bDb ? argv[3] : argv[1];
27251 const char *zTab = bDb ? argv[4] : argv[3];
27252 const char *zType = bDb ? argv[5] : argv[4];
27253 i64 nDb = strlen(zDb)+1;
27254 i64 nTab = strlen(zTab)+1;
27255 int eType = 0;
27256
27257 rc = fts5VocabTableType(zType, pzErr, &eType);
27258 if( rc==SQLITE_OK0 ){
27259 assert( eType>=0 && eType<ArraySize(azSchema) )((void) (0));
27260 rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, azSchema[eType]);
27261 }
27262
27263 nByte = sizeof(Fts5VocabTable) + nDb + nTab;
27264 pRet = sqlite3Fts5MallocZero(&rc, nByte);
27265 if( pRet ){
27266 pRet->pGlobal = (Fts5Global*)pAux;
27267 pRet->eType = eType;
27268 pRet->db = db;
27269 pRet->zFts5Tbl = (char*)&pRet[1];
27270 pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
27271 memcpy(pRet->zFts5Tbl, zTab, nTab);
27272 memcpy(pRet->zFts5Db, zDb, nDb);
27273 sqlite3Fts5Dequote(pRet->zFts5Tbl);
27274 sqlite3Fts5Dequote(pRet->zFts5Db);
27275 }
27276 }
27277
27278 *ppVTab = (sqlite3_vtab*)pRet;
27279 return rc;
27280}
27281
27282
27283/*
27284** The xConnect() and xCreate() methods for the virtual table. All the
27285** work is done in function fts5VocabInitVtab().
27286*/
27287static int fts5VocabConnectMethod(
27288 sqlite3 *db, /* Database connection */
27289 void *pAux, /* Pointer to tokenizer hash table */
27290 int argc, /* Number of elements in argv array */
27291 const char * const *argv, /* xCreate/xConnect argument array */
27292 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
27293 char **pzErr /* OUT: sqlite3_malloc'd error message */
27294){
27295 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
27296}
27297static int fts5VocabCreateMethod(
27298 sqlite3 *db, /* Database connection */
27299 void *pAux, /* Pointer to tokenizer hash table */
27300 int argc, /* Number of elements in argv array */
27301 const char * const *argv, /* xCreate/xConnect argument array */
27302 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
27303 char **pzErr /* OUT: sqlite3_malloc'd error message */
27304){
27305 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
27306}
27307
27308/*
27309** Implementation of the xBestIndex method.
27310**
27311** Only constraints of the form:
27312**
27313** term <= ?
27314** term == ?
27315** term >= ?
27316**
27317** are interpreted. Less-than and less-than-or-equal are treated
27318** identically, as are greater-than and greater-than-or-equal.
27319*/
27320static int fts5VocabBestIndexMethod(
27321 sqlite3_vtab *pUnused,
27322 sqlite3_index_info *pInfo
27323){
27324 int i;
27325 int iTermEq = -1;
27326 int iTermGe = -1;
27327 int iTermLe = -1;
27328 int idxNum = (int)pInfo->colUsed;
27329 int nArg = 0;
27330
27331 UNUSED_PARAM(pUnused)(void)(pUnused);
27332
27333 assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed )((void) (0));
27334
27335 for(i=0; i<pInfo->nConstraint; i++){
27336 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
27337 if( p->usable==0 ) continue;
27338 if( p->iColumn==0 ){ /* term column */
27339 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 ) iTermEq = i;
27340 if( p->op==SQLITE_INDEX_CONSTRAINT_LE8 ) iTermLe = i;
27341 if( p->op==SQLITE_INDEX_CONSTRAINT_LT16 ) iTermLe = i;
27342 if( p->op==SQLITE_INDEX_CONSTRAINT_GE32 ) iTermGe = i;
27343 if( p->op==SQLITE_INDEX_CONSTRAINT_GT4 ) iTermGe = i;
27344 }
27345 }
27346
27347 if( iTermEq>=0 ){
27348 idxNum |= FTS5_VOCAB_TERM_EQ0x0100;
27349 pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
27350 pInfo->estimatedCost = 100;
27351 }else{
27352 pInfo->estimatedCost = 1000000;
27353 if( iTermGe>=0 ){
27354 idxNum |= FTS5_VOCAB_TERM_GE0x0200;
27355 pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
27356 pInfo->estimatedCost = pInfo->estimatedCost / 2;
27357 }
27358 if( iTermLe>=0 ){
27359 idxNum |= FTS5_VOCAB_TERM_LE0x0400;
27360 pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
27361 pInfo->estimatedCost = pInfo->estimatedCost / 2;
27362 }
27363 }
27364
27365 /* This virtual table always delivers results in ascending order of
27366 ** the "term" column (column 0). So if the user has requested this
27367 ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
27368 ** sqlite3_index_info.orderByConsumed flag to tell the core the results
27369 ** are already in sorted order. */
27370 if( pInfo->nOrderBy==1
27371 && pInfo->aOrderBy[0].iColumn==0
27372 && pInfo->aOrderBy[0].desc==0
27373 ){
27374 pInfo->orderByConsumed = 1;
27375 }
27376
27377 pInfo->idxNum = idxNum;
27378 return SQLITE_OK0;
27379}
27380
27381/*
27382** Implementation of xOpen method.
27383*/
27384static int fts5VocabOpenMethod(
27385 sqlite3_vtab *pVTab,
27386 sqlite3_vtab_cursor **ppCsr
27387){
27388 Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
27389 Fts5Table *pFts5 = 0;
27390 Fts5VocabCursor *pCsr = 0;
27391 int rc = SQLITE_OK0;
27392 sqlite3_stmt *pStmt = 0;
27393 char *zSql = 0;
27394
27395 if( pTab->bBusy ){
27396 pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf(
27397 "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
27398 );
27399 return SQLITE_ERROR1;
27400 }
27401 zSql = sqlite3Fts5Mprintf(&rc,
27402 "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
27403 pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
27404 );
27405 if( zSql ){
27406 rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
27407 }
27408 sqlite3_freesqlite3_api->free(zSql);
27409 assert( rc==SQLITE_OK || pStmt==0 )((void) (0));
27410 if( rc==SQLITE_ERROR1 ) rc = SQLITE_OK0;
27411
27412 pTab->bBusy = 1;
27413 if( pStmt && sqlite3_stepsqlite3_api->step(pStmt)==SQLITE_ROW100 ){
27414 i64 iId = sqlite3_column_int64sqlite3_api->column_int64(pStmt, 0);
27415 pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId);
27416 }
27417 pTab->bBusy = 0;
27418
27419 if( rc==SQLITE_OK0 ){
27420 if( pFts5==0 ){
27421 rc = sqlite3_finalizesqlite3_api->finalize(pStmt);
27422 pStmt = 0;
27423 if( rc==SQLITE_OK0 ){
27424 pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf(
27425 "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
27426 );
27427 rc = SQLITE_ERROR1;
27428 }
27429 }else{
27430 rc = sqlite3Fts5FlushToDisk(pFts5);
27431 }
27432 }
27433
27434 if( rc==SQLITE_OK0 ){
27435 i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor);
27436 pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
27437 }
27438
27439 if( pCsr ){
27440 pCsr->pFts5 = pFts5;
27441 pCsr->pStmt = pStmt;
27442 pCsr->aCnt = (i64*)&pCsr[1];
27443 pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol];
27444 }else{
27445 sqlite3_finalizesqlite3_api->finalize(pStmt);
27446 }
27447
27448 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
27449 return rc;
27450}
27451
27452static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
27453 pCsr->rowid = 0;
27454 sqlite3Fts5IterClose(pCsr->pIter);
27455 sqlite3Fts5StructureRelease(pCsr->pStruct);
27456 pCsr->pStruct = 0;
27457 pCsr->pIter = 0;
27458 sqlite3_freesqlite3_api->free(pCsr->zLeTerm);
27459 pCsr->nLeTerm = -1;
27460 pCsr->zLeTerm = 0;
27461 pCsr->bEof = 0;
27462}
27463
27464/*
27465** Close the cursor. For additional information see the documentation
27466** on the xClose method of the virtual table interface.
27467*/
27468static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
27469 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27470 fts5VocabResetCursor(pCsr);
27471 sqlite3Fts5BufferFree(&pCsr->term);
27472 sqlite3_finalizesqlite3_api->finalize(pCsr->pStmt);
27473 sqlite3_freesqlite3_api->free(pCsr);
27474 return SQLITE_OK0;
27475}
27476
27477static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
27478 int rc = SQLITE_OK0;
27479
27480 if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){
27481 pCsr->bEof = 1;
27482 }else{
27483 const char *zTerm;
27484 int nTerm;
27485 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
27486 if( pCsr->nLeTerm>=0 ){
27487 int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm
))
;
27488 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
27489 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
27490 pCsr->bEof = 1;
27491 }
27492 }
27493
27494 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
27495 }
27496 return rc;
27497}
27498
27499static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
27500 int eDetail = pCsr->pFts5->pConfig->eDetail;
27501 int rc = SQLITE_OK0;
27502 Fts5IndexIter *pIter = pCsr->pIter;
27503 i64 *pp = &pCsr->iInstPos;
27504 int *po = &pCsr->iInstOff;
27505
27506 assert( sqlite3Fts5IterEof(pIter)==0 )((void) (0));
27507 assert( pCsr->bEof==0 )((void) (0));
27508 while( eDetail==FTS5_DETAIL_NONE1
27509 || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
27510 ){
27511 pCsr->iInstPos = 0;
27512 pCsr->iInstOff = 0;
27513
27514 rc = sqlite3Fts5IterNextScan(pCsr->pIter);
27515 if( rc==SQLITE_OK0 ){
27516 rc = fts5VocabInstanceNewTerm(pCsr);
27517 if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE1 ) break;
27518 }
27519 if( rc ){
27520 pCsr->bEof = 1;
27521 break;
27522 }
27523 }
27524
27525 return rc;
27526}
27527
27528/*
27529** Advance the cursor to the next row in the table.
27530*/
27531static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
27532 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27533 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
27534 int nCol = pCsr->pFts5->pConfig->nCol;
27535 int rc;
27536
27537 rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct);
27538 if( rc!=SQLITE_OK0 ) return rc;
27539 pCsr->rowid++;
27540
27541 if( pTab->eType==FTS5_VOCAB_INSTANCE2 ){
27542 return fts5VocabInstanceNext(pCsr);
27543 }
27544
27545 if( pTab->eType==FTS5_VOCAB_COL0 ){
27546 for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
27547 if( pCsr->aDoc[pCsr->iCol] ) break;
27548 }
27549 }
27550
27551 if( pTab->eType!=FTS5_VOCAB_COL0 || pCsr->iCol>=nCol ){
27552 if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){
27553 pCsr->bEof = 1;
27554 }else{
27555 const char *zTerm;
27556 int nTerm;
27557
27558 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
27559 assert( nTerm>=0 )((void) (0));
27560 if( pCsr->nLeTerm>=0 ){
27561 int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm
))
;
27562 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
27563 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
27564 pCsr->bEof = 1;
27565 return SQLITE_OK0;
27566 }
27567 }
27568
27569 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
27570 memset(pCsr->aCnt, 0, nCol * sizeof(i64));
27571 memset(pCsr->aDoc, 0, nCol * sizeof(i64));
27572 pCsr->iCol = 0;
27573
27574 assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW )((void) (0));
27575 while( rc==SQLITE_OK0 ){
27576 int eDetail = pCsr->pFts5->pConfig->eDetail;
27577 const u8 *pPos; int nPos; /* Position list */
27578 i64 iPos = 0; /* 64-bit position read from poslist */
27579 int iOff = 0; /* Current offset within position list */
27580
27581 pPos = pCsr->pIter->pData;
27582 nPos = pCsr->pIter->nData;
27583
27584 switch( pTab->eType ){
27585 case FTS5_VOCAB_ROW1:
27586 /* Do not bother counting the number of instances if the "cnt"
27587 ** column is not being read (according to colUsed). */
27588 if( eDetail==FTS5_DETAIL_FULL0 && (pCsr->colUsed & 0x04) ){
27589 while( iPos<nPos ){
27590 u32 ii;
27591 fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32
(&(pPos)[iPos],(u32*)&(ii)); } }
;
27592 if( ii==1 ){
27593 /* New column in the position list */
27594 fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32
(&(pPos)[iPos],(u32*)&(ii)); } }
;
27595 }else{
27596 /* An instance - increment pCsr->aCnt[] */
27597 pCsr->aCnt[0]++;
27598 }
27599 }
27600 }
27601 pCsr->aDoc[0]++;
27602 break;
27603
27604 case FTS5_VOCAB_COL0:
27605 if( eDetail==FTS5_DETAIL_FULL0 ){
27606 int iCol = -1;
27607 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
27608 int ii = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF);
27609 if( iCol!=ii ){
27610 if( ii>=nCol ){
27611 rc = FTS5_CORRUPT(11 | (1<<8));
27612 break;
27613 }
27614 pCsr->aDoc[ii]++;
27615 iCol = ii;
27616 }
27617 pCsr->aCnt[ii]++;
27618 }
27619 }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){
27620 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
27621 assert_nc( iPos>=0 && iPos<nCol )((void) (0));
27622 if( iPos>=nCol ){
27623 rc = FTS5_CORRUPT(11 | (1<<8));
27624 break;
27625 }
27626 pCsr->aDoc[iPos]++;
27627 }
27628 }else{
27629 assert( eDetail==FTS5_DETAIL_NONE )((void) (0));
27630 pCsr->aDoc[0]++;
27631 }
27632 break;
27633
27634 default:
27635 assert( pTab->eType==FTS5_VOCAB_INSTANCE )((void) (0));
27636 break;
27637 }
27638
27639 if( rc==SQLITE_OK0 ){
27640 rc = sqlite3Fts5IterNextScan(pCsr->pIter);
27641 }
27642 if( pTab->eType==FTS5_VOCAB_INSTANCE2 ) break;
27643
27644 if( rc==SQLITE_OK0 ){
27645 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
27646 if( nTerm!=pCsr->term.n
27647 || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm))
27648 ){
27649 break;
27650 }
27651 if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ) break;
27652 }
27653 }
27654 }
27655 }
27656
27657 if( rc==SQLITE_OK0 && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL0 ){
27658 for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++);
27659 if( pCsr->iCol==nCol ){
27660 rc = FTS5_CORRUPT(11 | (1<<8));
27661 }
27662 }
27663 return rc;
27664}
27665
27666/*
27667** This is the xFilter implementation for the virtual table.
27668*/
27669static int fts5VocabFilterMethod(
27670 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
27671 int idxNum, /* Strategy index */
27672 const char *zUnused, /* Unused */
27673 int nUnused, /* Number of elements in apVal */
27674 sqlite3_value **apVal /* Arguments for the indexing scheme */
27675){
27676 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
27677 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27678 int eType = pTab->eType;
27679 int rc = SQLITE_OK0;
27680
27681 int iVal = 0;
27682 int f = FTS5INDEX_QUERY_SCAN0x0008;
27683 const char *zTerm = 0;
27684 int nTerm = 0;
27685
27686 sqlite3_value *pEq = 0;
27687 sqlite3_value *pGe = 0;
27688 sqlite3_value *pLe = 0;
27689
27690 UNUSED_PARAM2(zUnused, nUnused)(void)(zUnused), (void)(nUnused);
27691
27692 fts5VocabResetCursor(pCsr);
27693 if( idxNum & FTS5_VOCAB_TERM_EQ0x0100 ) pEq = apVal[iVal++];
27694 if( idxNum & FTS5_VOCAB_TERM_GE0x0200 ) pGe = apVal[iVal++];
27695 if( idxNum & FTS5_VOCAB_TERM_LE0x0400 ) pLe = apVal[iVal++];
27696 pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK0xFF);
27697
27698 if( pEq ){
27699 zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pEq);
27700 nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pEq);
27701 f = FTS5INDEX_QUERY_NOTOKENDATA0x0080;
27702 }else{
27703 if( pGe ){
27704 zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pGe);
27705 nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pGe);
27706 }
27707 if( pLe ){
27708 const char *zCopy = (const char *)sqlite3_value_textsqlite3_api->value_text(pLe);
27709 if( zCopy==0 ) zCopy = "";
27710 pCsr->nLeTerm = sqlite3_value_bytessqlite3_api->value_bytes(pLe);
27711 pCsr->zLeTerm = sqlite3_mallocsqlite3_api->malloc(pCsr->nLeTerm+1);
27712 if( pCsr->zLeTerm==0 ){
27713 rc = SQLITE_NOMEM7;
27714 }else{
27715 memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
27716 }
27717 }
27718 }
27719
27720 if( rc==SQLITE_OK0 ){
27721 Fts5Index *pIndex = pCsr->pFts5->pIndex;
27722 rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
27723 if( rc==SQLITE_OK0 ){
27724 pCsr->pStruct = sqlite3Fts5StructureRef(pIndex);
27725 }
27726 }
27727 if( rc==SQLITE_OK0 && eType==FTS5_VOCAB_INSTANCE2 ){
27728 rc = fts5VocabInstanceNewTerm(pCsr);
27729 }
27730 if( rc==SQLITE_OK0 && !pCsr->bEof
27731 && (eType!=FTS5_VOCAB_INSTANCE2
27732 || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE1)
27733 ){
27734 rc = fts5VocabNextMethod(pCursor);
27735 }
27736
27737 return rc;
27738}
27739
27740/*
27741** This is the xEof method of the virtual table. SQLite calls this
27742** routine to find out if it has reached the end of a result set.
27743*/
27744static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
27745 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27746 return pCsr->bEof;
27747}
27748
27749static int fts5VocabColumnMethod(
27750 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
27751 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
27752 int iCol /* Index of column to read value from */
27753){
27754 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27755 int eDetail = pCsr->pFts5->pConfig->eDetail;
27756 int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
27757 i64 iVal = 0;
27758
27759 if( iCol==0 ){
27760 sqlite3_result_textsqlite3_api->result_text(
27761 pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)
27762 );
27763 }else if( eType==FTS5_VOCAB_COL0 ){
27764 assert( iCol==1 || iCol==2 || iCol==3 )((void) (0));
27765 if( iCol==1 ){
27766 if( eDetail!=FTS5_DETAIL_NONE1 ){
27767 const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol];
27768 sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0));
27769 }
27770 }else if( iCol==2 ){
27771 iVal = pCsr->aDoc[pCsr->iCol];
27772 }else{
27773 iVal = pCsr->aCnt[pCsr->iCol];
27774 }
27775 }else if( eType==FTS5_VOCAB_ROW1 ){
27776 assert( iCol==1 || iCol==2 )((void) (0));
27777 if( iCol==1 ){
27778 iVal = pCsr->aDoc[0];
27779 }else{
27780 iVal = pCsr->aCnt[0];
27781 }
27782 }else{
27783 assert( eType==FTS5_VOCAB_INSTANCE )((void) (0));
27784 switch( iCol ){
27785 case 1:
27786 sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->pIter->iRowid);
27787 break;
27788 case 2: {
27789 int ii = -1;
27790 if( eDetail==FTS5_DETAIL_FULL0 ){
27791 ii = FTS5_POS2COLUMN(pCsr->iInstPos)(int)((pCsr->iInstPos >> 32) & 0x7FFFFFFF);
27792 }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){
27793 ii = (int)pCsr->iInstPos;
27794 }
27795 if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){
27796 const char *z = pCsr->pFts5->pConfig->azCol[ii];
27797 sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0));
27798 }
27799 break;
27800 }
27801 default: {
27802 assert( iCol==3 )((void) (0));
27803 if( eDetail==FTS5_DETAIL_FULL0 ){
27804 int ii = FTS5_POS2OFFSET(pCsr->iInstPos)(int)(pCsr->iInstPos & 0x7FFFFFFF);
27805 sqlite3_result_intsqlite3_api->result_int(pCtx, ii);
27806 }
27807 break;
27808 }
27809 }
27810 }
27811
27812 if( iVal>0 ) sqlite3_result_int64sqlite3_api->result_int64(pCtx, iVal);
27813 return SQLITE_OK0;
27814}
27815
27816/*
27817** This is the xRowid method. The SQLite core calls this routine to
27818** retrieve the rowid for the current row of the result set. The
27819** rowid should be written to *pRowid.
27820*/
27821static int fts5VocabRowidMethod(
27822 sqlite3_vtab_cursor *pCursor,
27823 sqlite_int64 *pRowid
27824){
27825 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
27826 *pRowid = pCsr->rowid;
27827 return SQLITE_OK0;
27828}
27829
27830static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
27831 static const sqlite3_module fts5Vocab = {
27832 /* iVersion */ 2,
27833 /* xCreate */ fts5VocabCreateMethod,
27834 /* xConnect */ fts5VocabConnectMethod,
27835 /* xBestIndex */ fts5VocabBestIndexMethod,
27836 /* xDisconnect */ fts5VocabDisconnectMethod,
27837 /* xDestroy */ fts5VocabDestroyMethod,
27838 /* xOpen */ fts5VocabOpenMethod,
27839 /* xClose */ fts5VocabCloseMethod,
27840 /* xFilter */ fts5VocabFilterMethod,
27841 /* xNext */ fts5VocabNextMethod,
27842 /* xEof */ fts5VocabEofMethod,
27843 /* xColumn */ fts5VocabColumnMethod,
27844 /* xRowid */ fts5VocabRowidMethod,
27845 /* xUpdate */ 0,
27846 /* xBegin */ 0,
27847 /* xSync */ 0,
27848 /* xCommit */ 0,
27849 /* xRollback */ 0,
27850 /* xFindFunction */ 0,
27851 /* xRename */ 0,
27852 /* xSavepoint */ 0,
27853 /* xRelease */ 0,
27854 /* xRollbackTo */ 0,
27855 /* xShadowName */ 0,
27856 /* xIntegrity */ 0
27857 };
27858 void *p = (void*)pGlobal;
27859
27860 return sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
27861}
27862
27863
27864/* Here ends the fts5.c composite file. */
27865#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */