| File: | root/firefox-clang/third_party/sqlite3/ext/fts5.c |
| Warning: | line 21742, column 17 Array access (via field 'a') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | ||||
| 2 | /* | |||
| 3 | ** This, the "fts5.c" source file, is a composite file that is itself | |||
| 4 | ** assembled from the following files: | |||
| 5 | ** | |||
| 6 | ** fts5.h | |||
| 7 | ** fts5Int.h | |||
| 8 | ** fts5parse.h <--- Generated from fts5parse.y by Lemon | |||
| 9 | ** fts5parse.c <--- Generated from fts5parse.y by Lemon | |||
| 10 | ** fts5_aux.c | |||
| 11 | ** fts5_buffer.c | |||
| 12 | ** fts5_config.c | |||
| 13 | ** fts5_expr.c | |||
| 14 | ** fts5_hash.c | |||
| 15 | ** fts5_index.c | |||
| 16 | ** fts5_main.c | |||
| 17 | ** fts5_storage.c | |||
| 18 | ** fts5_tokenize.c | |||
| 19 | ** fts5_unicode2.c | |||
| 20 | ** fts5_varint.c | |||
| 21 | ** fts5_vocab.c | |||
| 22 | */ | |||
| 23 | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) | |||
| 24 | ||||
| 25 | #if !defined(NDEBUG1) && !defined(SQLITE_DEBUG) | |||
| 26 | # define NDEBUG1 1 | |||
| 27 | #endif | |||
| 28 | #if defined(NDEBUG1) && defined(SQLITE_DEBUG) | |||
| 29 | # undef NDEBUG1 | |||
| 30 | #endif | |||
| 31 | ||||
| 32 | #ifdef HAVE_STDINT_H1 | |||
| 33 | #include <stdint.h> | |||
| 34 | #endif | |||
| 35 | #ifdef HAVE_INTTYPES_H1 | |||
| 36 | #include <inttypes.h> | |||
| 37 | #endif | |||
| 38 | #line 1 "fts5.h" | |||
| 39 | /* | |||
| 40 | ** 2014 May 31 | |||
| 41 | ** | |||
| 42 | ** The author disclaims copyright to this source code. In place of | |||
| 43 | ** a legal notice, here is a blessing: | |||
| 44 | ** | |||
| 45 | ** May you do good and not evil. | |||
| 46 | ** May you find forgiveness for yourself and forgive others. | |||
| 47 | ** May you share freely, never taking more than you give. | |||
| 48 | ** | |||
| 49 | ****************************************************************************** | |||
| 50 | ** | |||
| 51 | ** Interfaces to extend FTS5. Using the interfaces defined in this file, | |||
| 52 | ** FTS5 may be extended with: | |||
| 53 | ** | |||
| 54 | ** * custom tokenizers, and | |||
| 55 | ** * custom auxiliary functions. | |||
| 56 | */ | |||
| 57 | ||||
| 58 | ||||
| 59 | #ifndef _FTS5_H | |||
| 60 | #define _FTS5_H | |||
| 61 | ||||
| 62 | #include "sqlite3.h" | |||
| 63 | ||||
| 64 | #ifdef __cplusplus | |||
| 65 | extern "C" { | |||
| 66 | #endif | |||
| 67 | ||||
| 68 | /************************************************************************* | |||
| 69 | ** CUSTOM AUXILIARY FUNCTIONS | |||
| 70 | ** | |||
| 71 | ** Virtual table implementations may overload SQL functions by implementing | |||
| 72 | ** the sqlite3_module.xFindFunction() method. | |||
| 73 | */ | |||
| 74 | ||||
| 75 | typedef struct Fts5ExtensionApi Fts5ExtensionApi; | |||
| 76 | typedef struct Fts5Context Fts5Context; | |||
| 77 | typedef struct Fts5PhraseIter Fts5PhraseIter; | |||
| 78 | ||||
| 79 | typedef void (*fts5_extension_function)( | |||
| 80 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 81 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 82 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
| 83 | int nVal, /* Number of values in apVal[] array */ | |||
| 84 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
| 85 | ); | |||
| 86 | ||||
| 87 | struct Fts5PhraseIter { | |||
| 88 | const unsigned char *a; | |||
| 89 | const unsigned char *b; | |||
| 90 | }; | |||
| 91 | ||||
| 92 | /* | |||
| 93 | ** EXTENSION API FUNCTIONS | |||
| 94 | ** | |||
| 95 | ** xUserData(pFts): | |||
| 96 | ** Return a copy of the pUserData pointer passed to the xCreateFunction() | |||
| 97 | ** API when the extension function was registered. | |||
| 98 | ** | |||
| 99 | ** xColumnTotalSize(pFts, iCol, pnToken): | |||
| 100 | ** If parameter iCol is less than zero, set output variable *pnToken | |||
| 101 | ** to the total number of tokens in the FTS5 table. Or, if iCol is | |||
| 102 | ** non-negative but less than the number of columns in the table, return | |||
| 103 | ** the total number of tokens in column iCol, considering all rows in | |||
| 104 | ** the FTS5 table. | |||
| 105 | ** | |||
| 106 | ** If parameter iCol is greater than or equal to the number of columns | |||
| 107 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | |||
| 108 | ** an OOM condition or IO error), an appropriate SQLite error code is | |||
| 109 | ** returned. | |||
| 110 | ** | |||
| 111 | ** xColumnCount(pFts): | |||
| 112 | ** Return the number of columns in the table. | |||
| 113 | ** | |||
| 114 | ** xColumnSize(pFts, iCol, pnToken): | |||
| 115 | ** If parameter iCol is less than zero, set output variable *pnToken | |||
| 116 | ** to the total number of tokens in the current row. Or, if iCol is | |||
| 117 | ** non-negative but less than the number of columns in the table, set | |||
| 118 | ** *pnToken to the number of tokens in column iCol of the current row. | |||
| 119 | ** | |||
| 120 | ** If parameter iCol is greater than or equal to the number of columns | |||
| 121 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. | |||
| 122 | ** an OOM condition or IO error), an appropriate SQLite error code is | |||
| 123 | ** returned. | |||
| 124 | ** | |||
| 125 | ** This function may be quite inefficient if used with an FTS5 table | |||
| 126 | ** created with the "columnsize=0" option. | |||
| 127 | ** | |||
| 128 | ** xColumnText: | |||
| 129 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
| 130 | ** number of columns in the table, SQLITE_RANGE is returned. | |||
| 131 | ** | |||
| 132 | ** Otherwise, this function attempts to retrieve the text of column iCol of | |||
| 133 | ** the current document. If successful, (*pz) is set to point to a buffer | |||
| 134 | ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes | |||
| 135 | ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, | |||
| 136 | ** if an error occurs, an SQLite error code is returned and the final values | |||
| 137 | ** of (*pz) and (*pn) are undefined. | |||
| 138 | ** | |||
| 139 | ** xPhraseCount: | |||
| 140 | ** Returns the number of phrases in the current query expression. | |||
| 141 | ** | |||
| 142 | ** xPhraseSize: | |||
| 143 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
| 144 | ** number of phrases in the current query, as returned by xPhraseCount, | |||
| 145 | ** 0 is returned. Otherwise, this function returns the number of tokens in | |||
| 146 | ** phrase iPhrase of the query. Phrases are numbered starting from zero. | |||
| 147 | ** | |||
| 148 | ** xInstCount: | |||
| 149 | ** Set *pnInst to the total number of occurrences of all phrases within | |||
| 150 | ** the query within the current row. Return SQLITE_OK if successful, or | |||
| 151 | ** an error code (i.e. SQLITE_NOMEM) if an error occurs. | |||
| 152 | ** | |||
| 153 | ** This API can be quite slow if used with an FTS5 table created with the | |||
| 154 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | |||
| 155 | ** with either "detail=none" or "detail=column" and "content=" option | |||
| 156 | ** (i.e. if it is a contentless table), then this API always returns 0. | |||
| 157 | ** | |||
| 158 | ** xInst: | |||
| 159 | ** Query for the details of phrase match iIdx within the current row. | |||
| 160 | ** Phrase matches are numbered starting from zero, so the iIdx argument | |||
| 161 | ** should be greater than or equal to zero and smaller than the value | |||
| 162 | ** output by xInstCount(). If iIdx is less than zero or greater than | |||
| 163 | ** or equal to the value returned by xInstCount(), SQLITE_RANGE is returned. | |||
| 164 | ** | |||
| 165 | ** Otherwise, output parameter *piPhrase is set to the phrase number, *piCol | |||
| 166 | ** to the column in which it occurs and *piOff the token offset of the | |||
| 167 | ** first token of the phrase. SQLITE_OK is returned if successful, or an | |||
| 168 | ** error code (i.e. SQLITE_NOMEM) if an error occurs. | |||
| 169 | ** | |||
| 170 | ** This API can be quite slow if used with an FTS5 table created with the | |||
| 171 | ** "detail=none" or "detail=column" option. | |||
| 172 | ** | |||
| 173 | ** xRowid: | |||
| 174 | ** Returns the rowid of the current row. | |||
| 175 | ** | |||
| 176 | ** xTokenize: | |||
| 177 | ** Tokenize text using the tokenizer belonging to the FTS5 table. | |||
| 178 | ** | |||
| 179 | ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): | |||
| 180 | ** This API function is used to query the FTS table for phrase iPhrase | |||
| 181 | ** of the current query. Specifically, a query equivalent to: | |||
| 182 | ** | |||
| 183 | ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid | |||
| 184 | ** | |||
| 185 | ** with $p set to a phrase equivalent to the phrase iPhrase of the | |||
| 186 | ** current query is executed. Any column filter that applies to | |||
| 187 | ** phrase iPhrase of the current query is included in $p. For each | |||
| 188 | ** row visited, the callback function passed as the fourth argument | |||
| 189 | ** is invoked. The context and API objects passed to the callback | |||
| 190 | ** function may be used to access the properties of each matched row. | |||
| 191 | ** Invoking Api.xUserData() returns a copy of the pointer passed as | |||
| 192 | ** the third argument to pUserData. | |||
| 193 | ** | |||
| 194 | ** If parameter iPhrase is less than zero, or greater than or equal to | |||
| 195 | ** the number of phrases in the query, as returned by xPhraseCount(), | |||
| 196 | ** this function returns SQLITE_RANGE. | |||
| 197 | ** | |||
| 198 | ** If the callback function returns any value other than SQLITE_OK, the | |||
| 199 | ** query is abandoned and the xQueryPhrase function returns immediately. | |||
| 200 | ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. | |||
| 201 | ** Otherwise, the error code is propagated upwards. | |||
| 202 | ** | |||
| 203 | ** If the query runs to completion without incident, SQLITE_OK is returned. | |||
| 204 | ** Or, if some error occurs before the query completes or is aborted by | |||
| 205 | ** the callback, an SQLite error code is returned. | |||
| 206 | ** | |||
| 207 | ** | |||
| 208 | ** xSetAuxdata(pFts5, pAux, xDelete) | |||
| 209 | ** | |||
| 210 | ** Save the pointer passed as the second argument as the extension function's | |||
| 211 | ** "auxiliary data". The pointer may then be retrieved by the current or any | |||
| 212 | ** future invocation of the same fts5 extension function made as part of | |||
| 213 | ** the same MATCH query using the xGetAuxdata() API. | |||
| 214 | ** | |||
| 215 | ** Each extension function is allocated a single auxiliary data slot for | |||
| 216 | ** each FTS query (MATCH expression). If the extension function is invoked | |||
| 217 | ** more than once for a single FTS query, then all invocations share a | |||
| 218 | ** single auxiliary data context. | |||
| 219 | ** | |||
| 220 | ** If there is already an auxiliary data pointer when this function is | |||
| 221 | ** invoked, then it is replaced by the new pointer. If an xDelete callback | |||
| 222 | ** was specified along with the original pointer, it is invoked at this | |||
| 223 | ** point. | |||
| 224 | ** | |||
| 225 | ** The xDelete callback, if one is specified, is also invoked on the | |||
| 226 | ** auxiliary data pointer after the FTS5 query has finished. | |||
| 227 | ** | |||
| 228 | ** If an error (e.g. an OOM condition) occurs within this function, | |||
| 229 | ** the auxiliary data is set to NULL and an error code returned. If the | |||
| 230 | ** xDelete parameter was not NULL, it is invoked on the auxiliary data | |||
| 231 | ** pointer before returning. | |||
| 232 | ** | |||
| 233 | ** | |||
| 234 | ** xGetAuxdata(pFts5, bClear) | |||
| 235 | ** | |||
| 236 | ** Returns the current auxiliary data pointer for the fts5 extension | |||
| 237 | ** function. See the xSetAuxdata() method for details. | |||
| 238 | ** | |||
| 239 | ** If the bClear argument is non-zero, then the auxiliary data is cleared | |||
| 240 | ** (set to NULL) before this function returns. In this case the xDelete, | |||
| 241 | ** if any, is not invoked. | |||
| 242 | ** | |||
| 243 | ** | |||
| 244 | ** xRowCount(pFts5, pnRow) | |||
| 245 | ** | |||
| 246 | ** This function is used to retrieve the total number of rows in the table. | |||
| 247 | ** In other words, the same value that would be returned by: | |||
| 248 | ** | |||
| 249 | ** SELECT count(*) FROM ftstable; | |||
| 250 | ** | |||
| 251 | ** xPhraseFirst() | |||
| 252 | ** This function is used, along with type Fts5PhraseIter and the xPhraseNext | |||
| 253 | ** method, to iterate through all instances of a single query phrase within | |||
| 254 | ** the current row. This is the same information as is accessible via the | |||
| 255 | ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient | |||
| 256 | ** to use, this API may be faster under some circumstances. To iterate | |||
| 257 | ** through instances of phrase iPhrase, use the following code: | |||
| 258 | ** | |||
| 259 | ** Fts5PhraseIter iter; | |||
| 260 | ** int iCol, iOff; | |||
| 261 | ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); | |||
| 262 | ** iCol>=0; | |||
| 263 | ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) | |||
| 264 | ** ){ | |||
| 265 | ** // An instance of phrase iPhrase at offset iOff of column iCol | |||
| 266 | ** } | |||
| 267 | ** | |||
| 268 | ** The Fts5PhraseIter structure is defined above. Applications should not | |||
| 269 | ** modify this structure directly - it should only be used as shown above | |||
| 270 | ** with the xPhraseFirst() and xPhraseNext() API methods (and by | |||
| 271 | ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). | |||
| 272 | ** | |||
| 273 | ** This API can be quite slow if used with an FTS5 table created with the | |||
| 274 | ** "detail=none" or "detail=column" option. If the FTS5 table is created | |||
| 275 | ** with either "detail=none" or "detail=column" and "content=" option | |||
| 276 | ** (i.e. if it is a contentless table), then this API always iterates | |||
| 277 | ** through an empty set (all calls to xPhraseFirst() set iCol to -1). | |||
| 278 | ** | |||
| 279 | ** In all cases, matches are visited in (column ASC, offset ASC) order. | |||
| 280 | ** i.e. all those in column 0, sorted by offset, followed by those in | |||
| 281 | ** column 1, etc. | |||
| 282 | ** | |||
| 283 | ** xPhraseNext() | |||
| 284 | ** See xPhraseFirst above. | |||
| 285 | ** | |||
| 286 | ** xPhraseFirstColumn() | |||
| 287 | ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() | |||
| 288 | ** and xPhraseNext() APIs described above. The difference is that instead | |||
| 289 | ** of iterating through all instances of a phrase in the current row, these | |||
| 290 | ** APIs are used to iterate through the set of columns in the current row | |||
| 291 | ** that contain one or more instances of a specified phrase. For example: | |||
| 292 | ** | |||
| 293 | ** Fts5PhraseIter iter; | |||
| 294 | ** int iCol; | |||
| 295 | ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); | |||
| 296 | ** iCol>=0; | |||
| 297 | ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) | |||
| 298 | ** ){ | |||
| 299 | ** // Column iCol contains at least one instance of phrase iPhrase | |||
| 300 | ** } | |||
| 301 | ** | |||
| 302 | ** This API can be quite slow if used with an FTS5 table created with the | |||
| 303 | ** "detail=none" option. If the FTS5 table is created with either | |||
| 304 | ** "detail=none" "content=" option (i.e. if it is a contentless table), | |||
| 305 | ** then this API always iterates through an empty set (all calls to | |||
| 306 | ** xPhraseFirstColumn() set iCol to -1). | |||
| 307 | ** | |||
| 308 | ** The information accessed using this API and its companion | |||
| 309 | ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext | |||
| 310 | ** (or xInst/xInstCount). The chief advantage of this API is that it is | |||
| 311 | ** significantly more efficient than those alternatives when used with | |||
| 312 | ** "detail=column" tables. | |||
| 313 | ** | |||
| 314 | ** xPhraseNextColumn() | |||
| 315 | ** See xPhraseFirstColumn above. | |||
| 316 | ** | |||
| 317 | ** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) | |||
| 318 | ** This is used to access token iToken of phrase iPhrase of the current | |||
| 319 | ** query. Before returning, output parameter *ppToken is set to point | |||
| 320 | ** to a buffer containing the requested token, and *pnToken to the | |||
| 321 | ** size of this buffer in bytes. | |||
| 322 | ** | |||
| 323 | ** If iPhrase or iToken are less than zero, or if iPhrase is greater than | |||
| 324 | ** or equal to the number of phrases in the query as reported by | |||
| 325 | ** xPhraseCount(), or if iToken is equal to or greater than the number of | |||
| 326 | ** tokens in the phrase, SQLITE_RANGE is returned and *ppToken and *pnToken | |||
| 327 | are both zeroed. | |||
| 328 | ** | |||
| 329 | ** The output text is not a copy of the query text that specified the | |||
| 330 | ** token. It is the output of the tokenizer module. For tokendata=1 | |||
| 331 | ** tables, this includes any embedded 0x00 and trailing data. | |||
| 332 | ** | |||
| 333 | ** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) | |||
| 334 | ** This is used to access token iToken of phrase hit iIdx within the | |||
| 335 | ** current row. If iIdx is less than zero or greater than or equal to the | |||
| 336 | ** value returned by xInstCount(), SQLITE_RANGE is returned. Otherwise, | |||
| 337 | ** output variable (*ppToken) is set to point to a buffer containing the | |||
| 338 | ** matching document token, and (*pnToken) to the size of that buffer in | |||
| 339 | ** bytes. | |||
| 340 | ** | |||
| 341 | ** The output text is not a copy of the document text that was tokenized. | |||
| 342 | ** It is the output of the tokenizer module. For tokendata=1 tables, this | |||
| 343 | ** includes any embedded 0x00 and trailing data. | |||
| 344 | ** | |||
| 345 | ** This API may be slow in some cases if the token identified by parameters | |||
| 346 | ** iIdx and iToken matched a prefix token in the query. In most cases, the | |||
| 347 | ** first call to this API for each prefix token in the query is forced | |||
| 348 | ** to scan the portion of the full-text index that matches the prefix | |||
| 349 | ** token to collect the extra data required by this API. If the prefix | |||
| 350 | ** token matches a large number of token instances in the document set, | |||
| 351 | ** this may be a performance problem. | |||
| 352 | ** | |||
| 353 | ** If the user knows in advance that a query may use this API for a | |||
| 354 | ** prefix token, FTS5 may be configured to collect all required data as part | |||
| 355 | ** of the initial querying of the full-text index, avoiding the second scan | |||
| 356 | ** entirely. This also causes prefix queries that do not use this API to | |||
| 357 | ** run more slowly and use more memory. FTS5 may be configured in this way | |||
| 358 | ** either on a per-table basis using the [FTS5 insttoken | 'insttoken'] | |||
| 359 | ** option, or on a per-query basis using the | |||
| 360 | ** [fts5_insttoken | fts5_insttoken()] user function. | |||
| 361 | ** | |||
| 362 | ** This API can be quite slow if used with an FTS5 table created with the | |||
| 363 | ** "detail=none" or "detail=column" option. | |||
| 364 | ** | |||
| 365 | ** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale) | |||
| 366 | ** If parameter iCol is less than zero, or greater than or equal to the | |||
| 367 | ** number of columns in the table, SQLITE_RANGE is returned. | |||
| 368 | ** | |||
| 369 | ** Otherwise, this function attempts to retrieve the locale associated | |||
| 370 | ** with column iCol of the current row. Usually, there is no associated | |||
| 371 | ** locale, and output parameters (*pzLocale) and (*pnLocale) are set | |||
| 372 | ** to NULL and 0, respectively. However, if the fts5_locale() function | |||
| 373 | ** was used to associate a locale with the value when it was inserted | |||
| 374 | ** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated | |||
| 375 | ** buffer containing the name of the locale in utf-8 encoding. (*pnLocale) | |||
| 376 | ** is set to the size in bytes of the buffer, not including the | |||
| 377 | ** nul-terminator. | |||
| 378 | ** | |||
| 379 | ** If successful, SQLITE_OK is returned. Or, if an error occurs, an | |||
| 380 | ** SQLite error code is returned. The final value of the output parameters | |||
| 381 | ** is undefined in this case. | |||
| 382 | ** | |||
| 383 | ** xTokenize_v2: | |||
| 384 | ** Tokenize text using the tokenizer belonging to the FTS5 table. This | |||
| 385 | ** API is the same as the xTokenize() API, except that it allows a tokenizer | |||
| 386 | ** locale to be specified. | |||
| 387 | */ | |||
| 388 | struct Fts5ExtensionApi { | |||
| 389 | int iVersion; /* Currently always set to 4 */ | |||
| 390 | ||||
| 391 | void *(*xUserData)(Fts5Context*); | |||
| 392 | ||||
| 393 | int (*xColumnCount)(Fts5Context*); | |||
| 394 | int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); | |||
| 395 | int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); | |||
| 396 | ||||
| 397 | int (*xTokenize)(Fts5Context*, | |||
| 398 | const char *pText, int nText, /* Text to tokenize */ | |||
| 399 | void *pCtx, /* Context passed to xToken() */ | |||
| 400 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
| 401 | ); | |||
| 402 | ||||
| 403 | int (*xPhraseCount)(Fts5Context*); | |||
| 404 | int (*xPhraseSize)(Fts5Context*, int iPhrase); | |||
| 405 | ||||
| 406 | int (*xInstCount)(Fts5Context*, int *pnInst); | |||
| 407 | int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); | |||
| 408 | ||||
| 409 | sqlite3_int64 (*xRowid)(Fts5Context*); | |||
| 410 | int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); | |||
| 411 | int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); | |||
| 412 | ||||
| 413 | int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, | |||
| 414 | int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) | |||
| 415 | ); | |||
| 416 | int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); | |||
| 417 | void *(*xGetAuxdata)(Fts5Context*, int bClear); | |||
| 418 | ||||
| 419 | int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); | |||
| 420 | void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); | |||
| 421 | ||||
| 422 | int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); | |||
| 423 | void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); | |||
| 424 | ||||
| 425 | /* Below this point are iVersion>=3 only */ | |||
| 426 | int (*xQueryToken)(Fts5Context*, | |||
| 427 | int iPhrase, int iToken, | |||
| 428 | const char **ppToken, int *pnToken | |||
| 429 | ); | |||
| 430 | int (*xInstToken)(Fts5Context*, int iIdx, int iToken, const char**, int*); | |||
| 431 | ||||
| 432 | /* Below this point are iVersion>=4 only */ | |||
| 433 | int (*xColumnLocale)(Fts5Context*, int iCol, const char **pz, int *pn); | |||
| 434 | int (*xTokenize_v2)(Fts5Context*, | |||
| 435 | const char *pText, int nText, /* Text to tokenize */ | |||
| 436 | const char *pLocale, int nLocale, /* Locale to pass to tokenizer */ | |||
| 437 | void *pCtx, /* Context passed to xToken() */ | |||
| 438 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
| 439 | ); | |||
| 440 | }; | |||
| 441 | ||||
| 442 | /* | |||
| 443 | ** CUSTOM AUXILIARY FUNCTIONS | |||
| 444 | *************************************************************************/ | |||
| 445 | ||||
| 446 | /************************************************************************* | |||
| 447 | ** CUSTOM TOKENIZERS | |||
| 448 | ** | |||
| 449 | ** Applications may also register custom tokenizer types. A tokenizer | |||
| 450 | ** is registered by providing fts5 with a populated instance of the | |||
| 451 | ** following structure. All structure methods must be defined, setting | |||
| 452 | ** any member of the fts5_tokenizer struct to NULL leads to undefined | |||
| 453 | ** behaviour. The structure methods are expected to function as follows: | |||
| 454 | ** | |||
| 455 | ** xCreate: | |||
| 456 | ** This function is used to allocate and initialize a tokenizer instance. | |||
| 457 | ** A tokenizer instance is required to actually tokenize text. | |||
| 458 | ** | |||
| 459 | ** The first argument passed to this function is a copy of the (void*) | |||
| 460 | ** pointer provided by the application when the fts5_tokenizer_v2 object | |||
| 461 | ** was registered with FTS5 (the third argument to xCreateTokenizer()). | |||
| 462 | ** The second and third arguments are an array of nul-terminated strings | |||
| 463 | ** containing the tokenizer arguments, if any, specified following the | |||
| 464 | ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used | |||
| 465 | ** to create the FTS5 table. | |||
| 466 | ** | |||
| 467 | ** The final argument is an output variable. If successful, (*ppOut) | |||
| 468 | ** should be set to point to the new tokenizer handle and SQLITE_OK | |||
| 469 | ** returned. If an error occurs, some value other than SQLITE_OK should | |||
| 470 | ** be returned. In this case, fts5 assumes that the final value of *ppOut | |||
| 471 | ** is undefined. | |||
| 472 | ** | |||
| 473 | ** xDelete: | |||
| 474 | ** This function is invoked to delete a tokenizer handle previously | |||
| 475 | ** allocated using xCreate(). Fts5 guarantees that this function will | |||
| 476 | ** be invoked exactly once for each successful call to xCreate(). | |||
| 477 | ** | |||
| 478 | ** xTokenize: | |||
| 479 | ** This function is expected to tokenize the nText byte string indicated | |||
| 480 | ** by argument pText. pText may or may not be nul-terminated. The first | |||
| 481 | ** argument passed to this function is a pointer to an Fts5Tokenizer object | |||
| 482 | ** returned by an earlier call to xCreate(). | |||
| 483 | ** | |||
| 484 | ** The third argument indicates the reason that FTS5 is requesting | |||
| 485 | ** tokenization of the supplied text. This is always one of the following | |||
| 486 | ** four values: | |||
| 487 | ** | |||
| 488 | ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into | |||
| 489 | ** or removed from the FTS table. The tokenizer is being invoked to | |||
| 490 | ** determine the set of tokens to add to (or delete from) the | |||
| 491 | ** FTS index. | |||
| 492 | ** | |||
| 493 | ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed | |||
| 494 | ** against the FTS index. The tokenizer is being called to tokenize | |||
| 495 | ** a bareword or quoted string specified as part of the query. | |||
| 496 | ** | |||
| 497 | ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as | |||
| 498 | ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is | |||
| 499 | ** followed by a "*" character, indicating that the last token | |||
| 500 | ** returned by the tokenizer will be treated as a token prefix. | |||
| 501 | ** | |||
| 502 | ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to | |||
| 503 | ** satisfy an fts5_api.xTokenize() request made by an auxiliary | |||
| 504 | ** function. Or an fts5_api.xColumnSize() request made by the same | |||
| 505 | ** on a columnsize=0 database. | |||
| 506 | ** </ul> | |||
| 507 | ** | |||
| 508 | ** The sixth and seventh arguments passed to xTokenize() - pLocale and | |||
| 509 | ** nLocale - are a pointer to a buffer containing the locale to use for | |||
| 510 | ** tokenization (e.g. "en_US") and its size in bytes, respectively. The | |||
| 511 | ** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in | |||
| 512 | ** which case nLocale is always 0) to indicate that the tokenizer should | |||
| 513 | ** use its default locale. | |||
| 514 | ** | |||
| 515 | ** For each token in the input string, the supplied callback xToken() must | |||
| 516 | ** be invoked. The first argument to it should be a copy of the pointer | |||
| 517 | ** passed as the second argument to xTokenize(). The third and fourth | |||
| 518 | ** arguments are a pointer to a buffer containing the token text, and the | |||
| 519 | ** size of the token in bytes. The 4th and 5th arguments are the byte offsets | |||
| 520 | ** of the first byte of and first byte immediately following the text from | |||
| 521 | ** which the token is derived within the input. | |||
| 522 | ** | |||
| 523 | ** The second argument passed to the xToken() callback ("tflags") should | |||
| 524 | ** normally be set to 0. The exception is if the tokenizer supports | |||
| 525 | ** synonyms. In this case see the discussion below for details. | |||
| 526 | ** | |||
| 527 | ** FTS5 assumes the xToken() callback is invoked for each token in the | |||
| 528 | ** order that they occur within the input text. | |||
| 529 | ** | |||
| 530 | ** If an xToken() callback returns any value other than SQLITE_OK, then | |||
| 531 | ** the tokenization should be abandoned and the xTokenize() method should | |||
| 532 | ** immediately return a copy of the xToken() return value. Or, if the | |||
| 533 | ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, | |||
| 534 | ** if an error occurs with the xTokenize() implementation itself, it | |||
| 535 | ** may abandon the tokenization and return any error code other than | |||
| 536 | ** SQLITE_OK or SQLITE_DONE. | |||
| 537 | ** | |||
| 538 | ** If the tokenizer is registered using an fts5_tokenizer_v2 object, | |||
| 539 | ** then the xTokenize() method has two additional arguments - pLocale | |||
| 540 | ** and nLocale. These specify the locale that the tokenizer should use | |||
| 541 | ** for the current request. If pLocale and nLocale are both 0, then the | |||
| 542 | ** tokenizer should use its default locale. Otherwise, pLocale points to | |||
| 543 | ** an nLocale byte buffer containing the name of the locale to use as utf-8 | |||
| 544 | ** text. pLocale is not nul-terminated. | |||
| 545 | ** | |||
| 546 | ** FTS5_TOKENIZER | |||
| 547 | ** | |||
| 548 | ** There is also an fts5_tokenizer object. This is an older, deprecated, | |||
| 549 | ** version of fts5_tokenizer_v2. It is similar except that: | |||
| 550 | ** | |||
| 551 | ** <ul> | |||
| 552 | ** <li> There is no "iVersion" field, and | |||
| 553 | ** <li> The xTokenize() method does not take a locale argument. | |||
| 554 | ** </ul> | |||
| 555 | ** | |||
| 556 | ** Legacy fts5_tokenizer tokenizers must be registered using the | |||
| 557 | ** legacy xCreateTokenizer() function, instead of xCreateTokenizer_v2(). | |||
| 558 | ** | |||
| 559 | ** Tokenizer implementations registered using either API may be retrieved | |||
| 560 | ** using both xFindTokenizer() and xFindTokenizer_v2(). | |||
| 561 | ** | |||
| 562 | ** SYNONYM SUPPORT | |||
| 563 | ** | |||
| 564 | ** Custom tokenizers may also support synonyms. Consider a case in which a | |||
| 565 | ** user wishes to query for a phrase such as "first place". Using the | |||
| 566 | ** built-in tokenizers, the FTS5 query 'first + place' will match instances | |||
| 567 | ** of "first place" within the document set, but not alternative forms | |||
| 568 | ** such as "1st place". In some applications, it would be better to match | |||
| 569 | ** all instances of "first place" or "1st place" regardless of which form | |||
| 570 | ** the user specified in the MATCH query text. | |||
| 571 | ** | |||
| 572 | ** There are several ways to approach this in FTS5: | |||
| 573 | ** | |||
| 574 | ** <ol><li> By mapping all synonyms to a single token. In this case, using | |||
| 575 | ** the above example, this means that the tokenizer returns the | |||
| 576 | ** same token for inputs "first" and "1st". Say that token is in | |||
| 577 | ** fact "first", so that when the user inserts the document "I won | |||
| 578 | ** 1st place" entries are added to the index for tokens "i", "won", | |||
| 579 | ** "first" and "place". If the user then queries for '1st + place', | |||
| 580 | ** the tokenizer substitutes "first" for "1st" and the query works | |||
| 581 | ** as expected. | |||
| 582 | ** | |||
| 583 | ** <li> By querying the index for all synonyms of each query term | |||
| 584 | ** separately. In this case, when tokenizing query text, the | |||
| 585 | ** tokenizer may provide multiple synonyms for a single term | |||
| 586 | ** within the document. FTS5 then queries the index for each | |||
| 587 | ** synonym individually. For example, faced with the query: | |||
| 588 | ** | |||
| 589 | ** <codeblock> | |||
| 590 | ** ... MATCH 'first place'</codeblock> | |||
| 591 | ** | |||
| 592 | ** the tokenizer offers both "1st" and "first" as synonyms for the | |||
| 593 | ** first token in the MATCH query and FTS5 effectively runs a query | |||
| 594 | ** similar to: | |||
| 595 | ** | |||
| 596 | ** <codeblock> | |||
| 597 | ** ... MATCH '(first OR 1st) place'</codeblock> | |||
| 598 | ** | |||
| 599 | ** except that, for the purposes of auxiliary functions, the query | |||
| 600 | ** still appears to contain just two phrases - "(first OR 1st)" | |||
| 601 | ** being treated as a single phrase. | |||
| 602 | ** | |||
| 603 | ** <li> By adding multiple synonyms for a single term to the FTS index. | |||
| 604 | ** Using this method, when tokenizing document text, the tokenizer | |||
| 605 | ** provides multiple synonyms for each token. So that when a | |||
| 606 | ** document such as "I won first place" is tokenized, entries are | |||
| 607 | ** added to the FTS index for "i", "won", "first", "1st" and | |||
| 608 | ** "place". | |||
| 609 | ** | |||
| 610 | ** This way, even if the tokenizer does not provide synonyms | |||
| 611 | ** when tokenizing query text (it should not - to do so would be | |||
| 612 | ** inefficient), it doesn't matter if the user queries for | |||
| 613 | ** 'first + place' or '1st + place', as there are entries in the | |||
| 614 | ** FTS index corresponding to both forms of the first token. | |||
| 615 | ** </ol> | |||
| 616 | ** | |||
| 617 | ** Whether it is parsing document or query text, any call to xToken that | |||
| 618 | ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit | |||
| 619 | ** is considered to supply a synonym for the previous token. For example, | |||
| 620 | ** when parsing the document "I won first place", a tokenizer that supports | |||
| 621 | ** synonyms would call xToken() 5 times, as follows: | |||
| 622 | ** | |||
| 623 | ** <codeblock> | |||
| 624 | ** xToken(pCtx, 0, "i", 1, 0, 1); | |||
| 625 | ** xToken(pCtx, 0, "won", 3, 2, 5); | |||
| 626 | ** xToken(pCtx, 0, "first", 5, 6, 11); | |||
| 627 | ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); | |||
| 628 | ** xToken(pCtx, 0, "place", 5, 12, 17); | |||
| 629 | **</codeblock> | |||
| 630 | ** | |||
| 631 | ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time | |||
| 632 | ** xToken() is called. Multiple synonyms may be specified for a single token | |||
| 633 | ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. | |||
| 634 | ** There is no limit to the number of synonyms that may be provided for a | |||
| 635 | ** single token. | |||
| 636 | ** | |||
| 637 | ** In many cases, method (1) above is the best approach. It does not add | |||
| 638 | ** extra data to the FTS index or require FTS5 to query for multiple terms, | |||
| 639 | ** so it is efficient in terms of disk space and query speed. However, it | |||
| 640 | ** does not support prefix queries very well. If, as suggested above, the | |||
| 641 | ** token "first" is substituted for "1st" by the tokenizer, then the query: | |||
| 642 | ** | |||
| 643 | ** <codeblock> | |||
| 644 | ** ... MATCH '1s*'</codeblock> | |||
| 645 | ** | |||
| 646 | ** will not match documents that contain the token "1st" (as the tokenizer | |||
| 647 | ** will probably not map "1s" to any prefix of "first"). | |||
| 648 | ** | |||
| 649 | ** For full prefix support, method (3) may be preferred. In this case, | |||
| 650 | ** because the index contains entries for both "first" and "1st", prefix | |||
| 651 | ** queries such as 'fi*' or '1s*' will match correctly. However, because | |||
| 652 | ** extra entries are added to the FTS index, this method uses more space | |||
| 653 | ** within the database. | |||
| 654 | ** | |||
| 655 | ** Method (2) offers a midpoint between (1) and (3). Using this method, | |||
| 656 | ** a query such as '1s*' will match documents that contain the literal | |||
| 657 | ** token "1st", but not "first" (assuming the tokenizer is not able to | |||
| 658 | ** provide synonyms for prefixes). However, a non-prefix query like '1st' | |||
| 659 | ** will match against "1st" and "first". This method does not require | |||
| 660 | ** extra disk space, as no extra entries are added to the FTS index. | |||
| 661 | ** On the other hand, it may require more CPU cycles to run MATCH queries, | |||
| 662 | ** as separate queries of the FTS index are required for each synonym. | |||
| 663 | ** | |||
| 664 | ** When using methods (2) or (3), it is important that the tokenizer only | |||
| 665 | ** provide synonyms when tokenizing document text (method (3)) or query | |||
| 666 | ** text (method (2)), not both. Doing so will not cause any errors, but is | |||
| 667 | ** inefficient. | |||
| 668 | */ | |||
| 669 | typedef struct Fts5Tokenizer Fts5Tokenizer; | |||
| 670 | typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2; | |||
| 671 | struct fts5_tokenizer_v2 { | |||
| 672 | int iVersion; /* Currently always 2 */ | |||
| 673 | ||||
| 674 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | |||
| 675 | void (*xDelete)(Fts5Tokenizer*); | |||
| 676 | int (*xTokenize)(Fts5Tokenizer*, | |||
| 677 | void *pCtx, | |||
| 678 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | |||
| 679 | const char *pText, int nText, | |||
| 680 | const char *pLocale, int nLocale, | |||
| 681 | int (*xToken)( | |||
| 682 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
| 683 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 684 | const char *pToken, /* Pointer to buffer containing token */ | |||
| 685 | int nToken, /* Size of token in bytes */ | |||
| 686 | int iStart, /* Byte offset of token within input text */ | |||
| 687 | int iEnd /* Byte offset of end of token within input text */ | |||
| 688 | ) | |||
| 689 | ); | |||
| 690 | }; | |||
| 691 | ||||
| 692 | /* | |||
| 693 | ** New code should use the fts5_tokenizer_v2 type to define tokenizer | |||
| 694 | ** implementations. The following type is included for legacy applications | |||
| 695 | ** that still use it. | |||
| 696 | */ | |||
| 697 | typedef struct fts5_tokenizer fts5_tokenizer; | |||
| 698 | struct fts5_tokenizer { | |||
| 699 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); | |||
| 700 | void (*xDelete)(Fts5Tokenizer*); | |||
| 701 | int (*xTokenize)(Fts5Tokenizer*, | |||
| 702 | void *pCtx, | |||
| 703 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ | |||
| 704 | const char *pText, int nText, | |||
| 705 | int (*xToken)( | |||
| 706 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
| 707 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 708 | const char *pToken, /* Pointer to buffer containing token */ | |||
| 709 | int nToken, /* Size of token in bytes */ | |||
| 710 | int iStart, /* Byte offset of token within input text */ | |||
| 711 | int iEnd /* Byte offset of end of token within input text */ | |||
| 712 | ) | |||
| 713 | ); | |||
| 714 | }; | |||
| 715 | ||||
| 716 | ||||
| 717 | /* Flags that may be passed as the third argument to xTokenize() */ | |||
| 718 | #define FTS5_TOKENIZE_QUERY0x0001 0x0001 | |||
| 719 | #define FTS5_TOKENIZE_PREFIX0x0002 0x0002 | |||
| 720 | #define FTS5_TOKENIZE_DOCUMENT0x0004 0x0004 | |||
| 721 | #define FTS5_TOKENIZE_AUX0x0008 0x0008 | |||
| 722 | ||||
| 723 | /* Flags that may be passed by the tokenizer implementation back to FTS5 | |||
| 724 | ** as the third argument to the supplied xToken callback. */ | |||
| 725 | #define FTS5_TOKEN_COLOCATED0x0001 0x0001 /* Same position as prev. token */ | |||
| 726 | ||||
| 727 | /* | |||
| 728 | ** END OF CUSTOM TOKENIZERS | |||
| 729 | *************************************************************************/ | |||
| 730 | ||||
| 731 | /************************************************************************* | |||
| 732 | ** FTS5 EXTENSION REGISTRATION API | |||
| 733 | */ | |||
| 734 | typedef struct fts5_api fts5_api; | |||
| 735 | struct fts5_api { | |||
| 736 | int iVersion; /* Currently always set to 3 */ | |||
| 737 | ||||
| 738 | /* Create a new tokenizer */ | |||
| 739 | int (*xCreateTokenizer)( | |||
| 740 | fts5_api *pApi, | |||
| 741 | const char *zName, | |||
| 742 | void *pUserData, | |||
| 743 | fts5_tokenizer *pTokenizer, | |||
| 744 | void (*xDestroy)(void*) | |||
| 745 | ); | |||
| 746 | ||||
| 747 | /* Find an existing tokenizer */ | |||
| 748 | int (*xFindTokenizer)( | |||
| 749 | fts5_api *pApi, | |||
| 750 | const char *zName, | |||
| 751 | void **ppUserData, | |||
| 752 | fts5_tokenizer *pTokenizer | |||
| 753 | ); | |||
| 754 | ||||
| 755 | /* Create a new auxiliary function */ | |||
| 756 | int (*xCreateFunction)( | |||
| 757 | fts5_api *pApi, | |||
| 758 | const char *zName, | |||
| 759 | void *pUserData, | |||
| 760 | fts5_extension_function xFunction, | |||
| 761 | void (*xDestroy)(void*) | |||
| 762 | ); | |||
| 763 | ||||
| 764 | /* APIs below this point are only available if iVersion>=3 */ | |||
| 765 | ||||
| 766 | /* Create a new tokenizer */ | |||
| 767 | int (*xCreateTokenizer_v2)( | |||
| 768 | fts5_api *pApi, | |||
| 769 | const char *zName, | |||
| 770 | void *pUserData, | |||
| 771 | fts5_tokenizer_v2 *pTokenizer, | |||
| 772 | void (*xDestroy)(void*) | |||
| 773 | ); | |||
| 774 | ||||
| 775 | /* Find an existing tokenizer */ | |||
| 776 | int (*xFindTokenizer_v2)( | |||
| 777 | fts5_api *pApi, | |||
| 778 | const char *zName, | |||
| 779 | void **ppUserData, | |||
| 780 | fts5_tokenizer_v2 **ppTokenizer | |||
| 781 | ); | |||
| 782 | }; | |||
| 783 | ||||
| 784 | /* | |||
| 785 | ** END OF REGISTRATION API | |||
| 786 | *************************************************************************/ | |||
| 787 | ||||
| 788 | #ifdef __cplusplus | |||
| 789 | } /* end of the 'extern "C"' block */ | |||
| 790 | #endif | |||
| 791 | ||||
| 792 | #endif /* _FTS5_H */ | |||
| 793 | ||||
| 794 | #line 1 "fts5Int.h" | |||
| 795 | /* | |||
| 796 | ** 2014 May 31 | |||
| 797 | ** | |||
| 798 | ** The author disclaims copyright to this source code. In place of | |||
| 799 | ** a legal notice, here is a blessing: | |||
| 800 | ** | |||
| 801 | ** May you do good and not evil. | |||
| 802 | ** May you find forgiveness for yourself and forgive others. | |||
| 803 | ** May you share freely, never taking more than you give. | |||
| 804 | ** | |||
| 805 | ****************************************************************************** | |||
| 806 | ** | |||
| 807 | */ | |||
| 808 | #ifndef _FTS5INT_H | |||
| 809 | #define _FTS5INT_H | |||
| 810 | ||||
| 811 | /* #include "fts5.h" */ | |||
| 812 | #include "sqlite3ext.h" | |||
| 813 | SQLITE_EXTENSION_INIT1const sqlite3_api_routines *sqlite3_api=0; | |||
| 814 | ||||
| 815 | #include <string.h> | |||
| 816 | #include <assert.h> | |||
| 817 | #include <stddef.h> | |||
| 818 | ||||
| 819 | #ifndef SQLITE_AMALGAMATION | |||
| 820 | ||||
| 821 | typedef unsigned char u8; | |||
| 822 | typedef unsigned int u32; | |||
| 823 | typedef unsigned short u16; | |||
| 824 | typedef short i16; | |||
| 825 | typedef sqlite3_int64 i64; | |||
| 826 | typedef sqlite3_uint64 u64; | |||
| 827 | ||||
| 828 | #ifndef ArraySize | |||
| 829 | # define ArraySize(x)((int)(sizeof(x) / sizeof(x[0]))) ((int)(sizeof(x) / sizeof(x[0]))) | |||
| 830 | #endif | |||
| 831 | ||||
| 832 | #define testcase(x) | |||
| 833 | ||||
| 834 | #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) | |||
| 835 | # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 | |||
| 836 | #endif | |||
| 837 | #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) | |||
| 838 | # define ALWAYS(X)(X) (1) | |||
| 839 | # define NEVER(X)(X) (0) | |||
| 840 | #elif !defined(NDEBUG1) | |||
| 841 | # define ALWAYS(X)(X) ((X)?1:(assert(0)((void) (0)),0)) | |||
| 842 | # define NEVER(X)(X) ((X)?(assert(0)((void) (0)),1):0) | |||
| 843 | #else | |||
| 844 | # define ALWAYS(X)(X) (X) | |||
| 845 | # define NEVER(X)(X) (X) | |||
| 846 | #endif | |||
| 847 | ||||
| 848 | #define MIN(x,y)(((x) < (y)) ? (x) : (y)) (((x) < (y)) ? (x) : (y)) | |||
| 849 | #define MAX(x,y)(((x) > (y)) ? (x) : (y)) (((x) > (y)) ? (x) : (y)) | |||
| 850 | ||||
| 851 | /* | |||
| 852 | ** Constants for the largest and smallest possible 64-bit signed integers. | |||
| 853 | */ | |||
| 854 | # define LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | |||
| 855 | # define SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) (((i64)-1) - LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))) | |||
| 856 | ||||
| 857 | /* The uptr type is an unsigned integer large enough to hold a pointer | |||
| 858 | */ | |||
| 859 | #if defined(HAVE_STDINT_H1) | |||
| 860 | typedef uintptr_t uptr; | |||
| 861 | #elif SQLITE_PTRSIZE==4 | |||
| 862 | typedef u32 uptr; | |||
| 863 | #else | |||
| 864 | typedef u64 uptr; | |||
| 865 | #endif | |||
| 866 | ||||
| 867 | #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC | |||
| 868 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&3)==0) | |||
| 869 | #else | |||
| 870 | # define EIGHT_BYTE_ALIGNMENT(X)((((uptr)(X) - (uptr)0)&7)==0) ((((uptr)(X) - (uptr)0)&7)==0) | |||
| 871 | #endif | |||
| 872 | ||||
| 873 | /* | |||
| 874 | ** Macros needed to provide flexible arrays in a portable way | |||
| 875 | */ | |||
| 876 | #ifndef offsetof | |||
| 877 | # define offsetof(STRUCTURE,FIELD)__builtin_offsetof(STRUCTURE, FIELD) ((size_t)((char*)&((STRUCTURE*)0)->FIELD)) | |||
| 878 | #endif | |||
| 879 | #if defined(__STDC_VERSION__201710L) && (__STDC_VERSION__201710L >= 199901L) | |||
| 880 | # define FLEXARRAY | |||
| 881 | #else | |||
| 882 | # define FLEXARRAY 1 | |||
| 883 | #endif | |||
| 884 | ||||
| 885 | #endif | |||
| 886 | ||||
| 887 | /* Truncate very long tokens to this many bytes. Hard limit is | |||
| 888 | ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset | |||
| 889 | ** field that occurs at the start of each leaf page (see fts5_index.c). */ | |||
| 890 | #define FTS5_MAX_TOKEN_SIZE32768 32768 | |||
| 891 | ||||
| 892 | /* | |||
| 893 | ** Maximum number of prefix indexes on single FTS5 table. This must be | |||
| 894 | ** less than 32. If it is set to anything large than that, an #error | |||
| 895 | ** directive in fts5_index.c will cause the build to fail. | |||
| 896 | */ | |||
| 897 | #define FTS5_MAX_PREFIX_INDEXES31 31 | |||
| 898 | ||||
| 899 | /* | |||
| 900 | ** Maximum segments permitted in a single index | |||
| 901 | */ | |||
| 902 | #define FTS5_MAX_SEGMENT2000 2000 | |||
| 903 | ||||
| 904 | #define FTS5_DEFAULT_NEARDIST10 10 | |||
| 905 | #define FTS5_DEFAULT_RANK"bm25" "bm25" | |||
| 906 | ||||
| 907 | /* Name of rank and rowid columns */ | |||
| 908 | #define FTS5_RANK_NAME"rank" "rank" | |||
| 909 | #define FTS5_ROWID_NAME"rowid" "rowid" | |||
| 910 | ||||
| 911 | #ifdef SQLITE_DEBUG | |||
| 912 | # define FTS5_CORRUPT(11 | (1<<8)) sqlite3Fts5Corrupt() | |||
| 913 | static int sqlite3Fts5Corrupt(void); | |||
| 914 | #else | |||
| 915 | # define FTS5_CORRUPT(11 | (1<<8)) SQLITE_CORRUPT_VTAB(11 | (1<<8)) | |||
| 916 | #endif | |||
| 917 | ||||
| 918 | /* | |||
| 919 | ** The assert_nc() macro is similar to the assert() macro, except that it | |||
| 920 | ** is used for assert() conditions that are true only if it can be | |||
| 921 | ** guranteed that the database is not corrupt. | |||
| 922 | */ | |||
| 923 | #ifdef SQLITE_DEBUG | |||
| 924 | extern int sqlite3_fts5_may_be_corrupt; | |||
| 925 | # define assert_nc(x)((void) (0)) assert(sqlite3_fts5_may_be_corrupt || (x))((void) (0)) | |||
| 926 | #else | |||
| 927 | # define assert_nc(x)((void) (0)) assert(x)((void) (0)) | |||
| 928 | #endif | |||
| 929 | ||||
| 930 | /* | |||
| 931 | ** A version of memcmp() that does not cause asan errors if one of the pointer | |||
| 932 | ** parameters is NULL and the number of bytes to compare is zero. | |||
| 933 | */ | |||
| 934 | #define fts5Memcmp(s1, s2, n)((n)<=0 ? 0 : memcmp((s1), (s2), (n))) ((n)<=0 ? 0 : memcmp((s1), (s2), (n))) | |||
| 935 | ||||
| 936 | /* Mark a function parameter as unused, to suppress nuisance compiler | |||
| 937 | ** warnings. */ | |||
| 938 | #ifndef UNUSED_PARAM | |||
| 939 | # define UNUSED_PARAM(X)(void)(X) (void)(X) | |||
| 940 | #endif | |||
| 941 | ||||
| 942 | #ifndef UNUSED_PARAM2 | |||
| 943 | # define UNUSED_PARAM2(X, Y)(void)(X), (void)(Y) (void)(X), (void)(Y) | |||
| 944 | #endif | |||
| 945 | ||||
| 946 | typedef struct Fts5Global Fts5Global; | |||
| 947 | typedef struct Fts5Colset Fts5Colset; | |||
| 948 | ||||
| 949 | /* If a NEAR() clump or phrase may only match a specific set of columns, | |||
| 950 | ** then an object of the following type is used to record the set of columns. | |||
| 951 | ** Each entry in the aiCol[] array is a column that may be matched. | |||
| 952 | ** | |||
| 953 | ** This object is used by fts5_expr.c and fts5_index.c. | |||
| 954 | */ | |||
| 955 | struct Fts5Colset { | |||
| 956 | int nCol; | |||
| 957 | int aiCol[FLEXARRAY]; | |||
| 958 | }; | |||
| 959 | ||||
| 960 | /* Size (int bytes) of a complete Fts5Colset object with N columns. */ | |||
| 961 | #define SZ_FTS5COLSET(N)(sizeof(i64)*((N+2)/2)) (sizeof(i64)*((N+2)/2)) | |||
| 962 | ||||
| 963 | /************************************************************************** | |||
| 964 | ** Interface to code in fts5_config.c. fts5_config.c contains contains code | |||
| 965 | ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. | |||
| 966 | */ | |||
| 967 | ||||
| 968 | typedef struct Fts5Config Fts5Config; | |||
| 969 | typedef struct Fts5TokenizerConfig Fts5TokenizerConfig; | |||
| 970 | ||||
| 971 | struct Fts5TokenizerConfig { | |||
| 972 | Fts5Tokenizer *pTok; | |||
| 973 | fts5_tokenizer_v2 *pApi2; | |||
| 974 | fts5_tokenizer *pApi1; | |||
| 975 | const char **azArg; | |||
| 976 | int nArg; | |||
| 977 | int ePattern; /* FTS_PATTERN_XXX constant */ | |||
| 978 | const char *pLocale; /* Current locale to use */ | |||
| 979 | int nLocale; /* Size of pLocale in bytes */ | |||
| 980 | }; | |||
| 981 | ||||
| 982 | /* | |||
| 983 | ** An instance of the following structure encodes all information that can | |||
| 984 | ** be gleaned from the CREATE VIRTUAL TABLE statement. | |||
| 985 | ** | |||
| 986 | ** And all information loaded from the %_config table. | |||
| 987 | ** | |||
| 988 | ** nAutomerge: | |||
| 989 | ** The minimum number of segments that an auto-merge operation should | |||
| 990 | ** attempt to merge together. A value of 1 sets the object to use the | |||
| 991 | ** compile time default. Zero disables auto-merge altogether. | |||
| 992 | ** | |||
| 993 | ** bContentlessDelete: | |||
| 994 | ** True if the contentless_delete option was present in the CREATE | |||
| 995 | ** VIRTUAL TABLE statement. | |||
| 996 | ** | |||
| 997 | ** zContent: | |||
| 998 | ** | |||
| 999 | ** zContentRowid: | |||
| 1000 | ** The value of the content_rowid= option, if one was specified. Or | |||
| 1001 | ** the string "rowid" otherwise. This text is not quoted - if it is | |||
| 1002 | ** used as part of an SQL statement it needs to be quoted appropriately. | |||
| 1003 | ** | |||
| 1004 | ** zContentExprlist: | |||
| 1005 | ** | |||
| 1006 | ** pzErrmsg: | |||
| 1007 | ** This exists in order to allow the fts5_index.c module to return a | |||
| 1008 | ** decent error message if it encounters a file-format version it does | |||
| 1009 | ** not understand. | |||
| 1010 | ** | |||
| 1011 | ** bColumnsize: | |||
| 1012 | ** True if the %_docsize table is created. | |||
| 1013 | ** | |||
| 1014 | ** bPrefixIndex: | |||
| 1015 | ** This is only used for debugging. If set to false, any prefix indexes | |||
| 1016 | ** are ignored. This value is configured using: | |||
| 1017 | ** | |||
| 1018 | ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); | |||
| 1019 | ** | |||
| 1020 | ** bLocale: | |||
| 1021 | ** Set to true if locale=1 was specified when the table was created. | |||
| 1022 | */ | |||
| 1023 | struct Fts5Config { | |||
| 1024 | sqlite3 *db; /* Database handle */ | |||
| 1025 | Fts5Global *pGlobal; /* Global fts5 object for handle db */ | |||
| 1026 | char *zDb; /* Database holding FTS index (e.g. "main") */ | |||
| 1027 | char *zName; /* Name of FTS index */ | |||
| 1028 | int nCol; /* Number of columns */ | |||
| 1029 | char **azCol; /* Column names */ | |||
| 1030 | u8 *abUnindexed; /* True for unindexed columns */ | |||
| 1031 | int nPrefix; /* Number of prefix indexes */ | |||
| 1032 | int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ | |||
| 1033 | int eContent; /* An FTS5_CONTENT value */ | |||
| 1034 | int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ | |||
| 1035 | int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ | |||
| 1036 | char *zContent; /* content table */ | |||
| 1037 | char *zContentRowid; /* "content_rowid=" option value */ | |||
| 1038 | int bColumnsize; /* "columnsize=" option value (dflt==1) */ | |||
| 1039 | int bTokendata; /* "tokendata=" option value (dflt==0) */ | |||
| 1040 | int bLocale; /* "locale=" option value (dflt==0) */ | |||
| 1041 | int eDetail; /* FTS5_DETAIL_XXX value */ | |||
| 1042 | char *zContentExprlist; | |||
| 1043 | Fts5TokenizerConfig t; | |||
| 1044 | int bLock; /* True when table is preparing statement */ | |||
| 1045 | ||||
| 1046 | ||||
| 1047 | /* Values loaded from the %_config table */ | |||
| 1048 | int iVersion; /* fts5 file format 'version' */ | |||
| 1049 | int iCookie; /* Incremented when %_config is modified */ | |||
| 1050 | int pgsz; /* Approximate page size used in %_data */ | |||
| 1051 | int nAutomerge; /* 'automerge' setting */ | |||
| 1052 | int nCrisisMerge; /* Maximum allowed segments per level */ | |||
| 1053 | int nUsermerge; /* 'usermerge' setting */ | |||
| 1054 | int nHashSize; /* Bytes of memory for in-memory hash */ | |||
| 1055 | char *zRank; /* Name of rank function */ | |||
| 1056 | char *zRankArgs; /* Arguments to rank function */ | |||
| 1057 | int bSecureDelete; /* 'secure-delete' */ | |||
| 1058 | int nDeleteMerge; /* 'deletemerge' */ | |||
| 1059 | int bPrefixInsttoken; /* 'prefix-insttoken' */ | |||
| 1060 | ||||
| 1061 | /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ | |||
| 1062 | char **pzErrmsg; | |||
| 1063 | ||||
| 1064 | #ifdef SQLITE_DEBUG | |||
| 1065 | int bPrefixIndex; /* True to use prefix-indexes */ | |||
| 1066 | #endif | |||
| 1067 | }; | |||
| 1068 | ||||
| 1069 | /* Current expected value of %_config table 'version' field. And | |||
| 1070 | ** the expected version if the 'secure-delete' option has ever been | |||
| 1071 | ** set on the table. */ | |||
| 1072 | #define FTS5_CURRENT_VERSION4 4 | |||
| 1073 | #define FTS5_CURRENT_VERSION_SECUREDELETE5 5 | |||
| 1074 | ||||
| 1075 | #define FTS5_CONTENT_NORMAL0 0 | |||
| 1076 | #define FTS5_CONTENT_NONE1 1 | |||
| 1077 | #define FTS5_CONTENT_EXTERNAL2 2 | |||
| 1078 | #define FTS5_CONTENT_UNINDEXED3 3 | |||
| 1079 | ||||
| 1080 | #define FTS5_DETAIL_FULL0 0 | |||
| 1081 | #define FTS5_DETAIL_NONE1 1 | |||
| 1082 | #define FTS5_DETAIL_COLUMNS2 2 | |||
| 1083 | ||||
| 1084 | #define FTS5_PATTERN_NONE0 0 | |||
| 1085 | #define FTS5_PATTERN_LIKE65 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */ | |||
| 1086 | #define FTS5_PATTERN_GLOB66 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */ | |||
| 1087 | ||||
| 1088 | static int sqlite3Fts5ConfigParse( | |||
| 1089 | Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** | |||
| 1090 | ); | |||
| 1091 | static void sqlite3Fts5ConfigFree(Fts5Config*); | |||
| 1092 | ||||
| 1093 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); | |||
| 1094 | ||||
| 1095 | static int sqlite3Fts5Tokenize( | |||
| 1096 | Fts5Config *pConfig, /* FTS5 Configuration object */ | |||
| 1097 | int flags, /* FTS5_TOKENIZE_* flags */ | |||
| 1098 | const char *pText, int nText, /* Text to tokenize */ | |||
| 1099 | void *pCtx, /* Context passed to xToken() */ | |||
| 1100 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
| 1101 | ); | |||
| 1102 | ||||
| 1103 | static void sqlite3Fts5Dequote(char *z); | |||
| 1104 | ||||
| 1105 | /* Load the contents of the %_config table */ | |||
| 1106 | static int sqlite3Fts5ConfigLoad(Fts5Config*, int); | |||
| 1107 | ||||
| 1108 | /* Set the value of a single config attribute */ | |||
| 1109 | static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); | |||
| 1110 | ||||
| 1111 | static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); | |||
| 1112 | ||||
| 1113 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...); | |||
| 1114 | ||||
| 1115 | /* | |||
| 1116 | ** End of interface to code in fts5_config.c. | |||
| 1117 | **************************************************************************/ | |||
| 1118 | ||||
| 1119 | /************************************************************************** | |||
| 1120 | ** Interface to code in fts5_buffer.c. | |||
| 1121 | */ | |||
| 1122 | ||||
| 1123 | /* | |||
| 1124 | ** Buffer object for the incremental building of string data. | |||
| 1125 | */ | |||
| 1126 | typedef struct Fts5Buffer Fts5Buffer; | |||
| 1127 | struct Fts5Buffer { | |||
| 1128 | u8 *p; | |||
| 1129 | int n; | |||
| 1130 | int nSpace; | |||
| 1131 | }; | |||
| 1132 | ||||
| 1133 | static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); | |||
| 1134 | static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); | |||
| 1135 | static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); | |||
| 1136 | static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); | |||
| 1137 | static void sqlite3Fts5BufferFree(Fts5Buffer*); | |||
| 1138 | static void sqlite3Fts5BufferZero(Fts5Buffer*); | |||
| 1139 | static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); | |||
| 1140 | static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); | |||
| 1141 | ||||
| 1142 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); | |||
| 1143 | ||||
| 1144 | #define fts5BufferZero(x)sqlite3Fts5BufferZero(x) sqlite3Fts5BufferZero(x) | |||
| 1145 | #define fts5BufferAppendVarint(a,b,c)sqlite3Fts5BufferAppendVarint(a,b,(i64)c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c) | |||
| 1146 | #define fts5BufferFree(a)sqlite3Fts5BufferFree(a) sqlite3Fts5BufferFree(a) | |||
| 1147 | #define fts5BufferAppendBlob(a,b,c,d)sqlite3Fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) | |||
| 1148 | #define fts5BufferSet(a,b,c,d)sqlite3Fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) | |||
| 1149 | ||||
| 1150 | #define fts5BufferGrow(pRc,pBuf,nn)( (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) ) ( \ | |||
| 1151 | (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ | |||
| 1152 | sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ | |||
| 1153 | ) | |||
| 1154 | ||||
| 1155 | /* Write and decode big-endian 32-bit integer values */ | |||
| 1156 | static void sqlite3Fts5Put32(u8*, int); | |||
| 1157 | static int sqlite3Fts5Get32(const u8*); | |||
| 1158 | ||||
| 1159 | #define FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF) (int)((iPos >> 32) & 0x7FFFFFFF) | |||
| 1160 | #define FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF) (int)(iPos & 0x7FFFFFFF) | |||
| 1161 | ||||
| 1162 | typedef struct Fts5PoslistReader Fts5PoslistReader; | |||
| 1163 | struct Fts5PoslistReader { | |||
| 1164 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ | |||
| 1165 | const u8 *a; /* Position list to iterate through */ | |||
| 1166 | int n; /* Size of buffer at a[] in bytes */ | |||
| 1167 | int i; /* Current offset in a[] */ | |||
| 1168 | ||||
| 1169 | u8 bFlag; /* For client use (any custom purpose) */ | |||
| 1170 | ||||
| 1171 | /* Output variables */ | |||
| 1172 | u8 bEof; /* Set to true at EOF */ | |||
| 1173 | i64 iPos; /* (iCol<<32) + iPos */ | |||
| 1174 | }; | |||
| 1175 | static int sqlite3Fts5PoslistReaderInit( | |||
| 1176 | const u8 *a, int n, /* Poslist buffer to iterate through */ | |||
| 1177 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | |||
| 1178 | ); | |||
| 1179 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); | |||
| 1180 | ||||
| 1181 | typedef struct Fts5PoslistWriter Fts5PoslistWriter; | |||
| 1182 | struct Fts5PoslistWriter { | |||
| 1183 | i64 iPrev; | |||
| 1184 | }; | |||
| 1185 | static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); | |||
| 1186 | static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); | |||
| 1187 | ||||
| 1188 | static int sqlite3Fts5PoslistNext64( | |||
| 1189 | const u8 *a, int n, /* Buffer containing poslist */ | |||
| 1190 | int *pi, /* IN/OUT: Offset within a[] */ | |||
| 1191 | i64 *piOff /* IN/OUT: Current offset */ | |||
| 1192 | ); | |||
| 1193 | ||||
| 1194 | /* Malloc utility */ | |||
| 1195 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte); | |||
| 1196 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); | |||
| 1197 | ||||
| 1198 | /* Character set tests (like isspace(), isalpha() etc.) */ | |||
| 1199 | static int sqlite3Fts5IsBareword(char t); | |||
| 1200 | ||||
| 1201 | ||||
| 1202 | /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ | |||
| 1203 | typedef struct Fts5Termset Fts5Termset; | |||
| 1204 | static int sqlite3Fts5TermsetNew(Fts5Termset**); | |||
| 1205 | static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); | |||
| 1206 | static void sqlite3Fts5TermsetFree(Fts5Termset*); | |||
| 1207 | ||||
| 1208 | /* | |||
| 1209 | ** End of interface to code in fts5_buffer.c. | |||
| 1210 | **************************************************************************/ | |||
| 1211 | ||||
| 1212 | /************************************************************************** | |||
| 1213 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code | |||
| 1214 | ** to access the data stored in the %_data table. | |||
| 1215 | */ | |||
| 1216 | ||||
| 1217 | typedef struct Fts5Index Fts5Index; | |||
| 1218 | typedef struct Fts5IndexIter Fts5IndexIter; | |||
| 1219 | ||||
| 1220 | struct Fts5IndexIter { | |||
| 1221 | i64 iRowid; | |||
| 1222 | const u8 *pData; | |||
| 1223 | int nData; | |||
| 1224 | u8 bEof; | |||
| 1225 | }; | |||
| 1226 | ||||
| 1227 | #define sqlite3Fts5IterEof(x)((x)->bEof) ((x)->bEof) | |||
| 1228 | ||||
| 1229 | /* | |||
| 1230 | ** Values used as part of the flags argument passed to IndexQuery(). | |||
| 1231 | */ | |||
| 1232 | #define FTS5INDEX_QUERY_PREFIX0x0001 0x0001 /* Prefix query */ | |||
| 1233 | #define FTS5INDEX_QUERY_DESC0x0002 0x0002 /* Docs in descending rowid order */ | |||
| 1234 | #define FTS5INDEX_QUERY_TEST_NOIDX0x0004 0x0004 /* Do not use prefix index */ | |||
| 1235 | #define FTS5INDEX_QUERY_SCAN0x0008 0x0008 /* Scan query (fts5vocab) */ | |||
| 1236 | ||||
| 1237 | /* The following are used internally by the fts5_index.c module. They are | |||
| 1238 | ** defined here only to make it easier to avoid clashes with the flags | |||
| 1239 | ** above. */ | |||
| 1240 | #define FTS5INDEX_QUERY_SKIPEMPTY0x0010 0x0010 | |||
| 1241 | #define FTS5INDEX_QUERY_NOOUTPUT0x0020 0x0020 | |||
| 1242 | #define FTS5INDEX_QUERY_SKIPHASH0x0040 0x0040 | |||
| 1243 | #define FTS5INDEX_QUERY_NOTOKENDATA0x0080 0x0080 | |||
| 1244 | #define FTS5INDEX_QUERY_SCANONETERM0x0100 0x0100 | |||
| 1245 | ||||
| 1246 | /* | |||
| 1247 | ** Create/destroy an Fts5Index object. | |||
| 1248 | */ | |||
| 1249 | static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); | |||
| 1250 | static int sqlite3Fts5IndexClose(Fts5Index *p); | |||
| 1251 | ||||
| 1252 | /* | |||
| 1253 | ** Return a simple checksum value based on the arguments. | |||
| 1254 | */ | |||
| 1255 | static u64 sqlite3Fts5IndexEntryCksum( | |||
| 1256 | i64 iRowid, | |||
| 1257 | int iCol, | |||
| 1258 | int iPos, | |||
| 1259 | int iIdx, | |||
| 1260 | const char *pTerm, | |||
| 1261 | int nTerm | |||
| 1262 | ); | |||
| 1263 | ||||
| 1264 | /* | |||
| 1265 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | |||
| 1266 | ** size. Return the number of bytes in the nChar character prefix of the | |||
| 1267 | ** buffer, or 0 if there are less than nChar characters in total. | |||
| 1268 | */ | |||
| 1269 | static int sqlite3Fts5IndexCharlenToBytelen( | |||
| 1270 | const char *p, | |||
| 1271 | int nByte, | |||
| 1272 | int nChar | |||
| 1273 | ); | |||
| 1274 | ||||
| 1275 | /* | |||
| 1276 | ** Open a new iterator to iterate though all rowids that match the | |||
| 1277 | ** specified token or token prefix. | |||
| 1278 | */ | |||
| 1279 | static int sqlite3Fts5IndexQuery( | |||
| 1280 | Fts5Index *p, /* FTS index to query */ | |||
| 1281 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | |||
| 1282 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | |||
| 1283 | Fts5Colset *pColset, /* Match these columns only */ | |||
| 1284 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | |||
| 1285 | ); | |||
| 1286 | ||||
| 1287 | /* | |||
| 1288 | ** The various operations on open token or token prefix iterators opened | |||
| 1289 | ** using sqlite3Fts5IndexQuery(). | |||
| 1290 | */ | |||
| 1291 | static int sqlite3Fts5IterNext(Fts5IndexIter*); | |||
| 1292 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); | |||
| 1293 | ||||
| 1294 | /* | |||
| 1295 | ** Close an iterator opened by sqlite3Fts5IndexQuery(). | |||
| 1296 | */ | |||
| 1297 | static void sqlite3Fts5IterClose(Fts5IndexIter*); | |||
| 1298 | ||||
| 1299 | /* | |||
| 1300 | ** Close the reader blob handle, if it is open. | |||
| 1301 | */ | |||
| 1302 | static void sqlite3Fts5IndexCloseReader(Fts5Index*); | |||
| 1303 | ||||
| 1304 | /* | |||
| 1305 | ** This interface is used by the fts5vocab module. | |||
| 1306 | */ | |||
| 1307 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); | |||
| 1308 | static int sqlite3Fts5IterNextScan(Fts5IndexIter*); | |||
| 1309 | static void *sqlite3Fts5StructureRef(Fts5Index*); | |||
| 1310 | static void sqlite3Fts5StructureRelease(void*); | |||
| 1311 | static int sqlite3Fts5StructureTest(Fts5Index*, void*); | |||
| 1312 | ||||
| 1313 | /* | |||
| 1314 | ** Used by xInstToken(): | |||
| 1315 | */ | |||
| 1316 | static int sqlite3Fts5IterToken( | |||
| 1317 | Fts5IndexIter *pIndexIter, | |||
| 1318 | const char *pToken, int nToken, | |||
| 1319 | i64 iRowid, | |||
| 1320 | int iCol, | |||
| 1321 | int iOff, | |||
| 1322 | const char **ppOut, int *pnOut | |||
| 1323 | ); | |||
| 1324 | ||||
| 1325 | /* | |||
| 1326 | ** Insert or remove data to or from the index. Each time a document is | |||
| 1327 | ** added to or removed from the index, this function is called one or more | |||
| 1328 | ** times. | |||
| 1329 | ** | |||
| 1330 | ** For an insert, it must be called once for each token in the new document. | |||
| 1331 | ** If the operation is a delete, it must be called (at least) once for each | |||
| 1332 | ** unique token in the document with an iCol value less than zero. The iPos | |||
| 1333 | ** argument is ignored for a delete. | |||
| 1334 | */ | |||
| 1335 | static int sqlite3Fts5IndexWrite( | |||
| 1336 | Fts5Index *p, /* Index to write to */ | |||
| 1337 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
| 1338 | int iPos, /* Position of token within column */ | |||
| 1339 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
| 1340 | ); | |||
| 1341 | ||||
| 1342 | /* | |||
| 1343 | ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to | |||
| 1344 | ** document iDocid. | |||
| 1345 | */ | |||
| 1346 | static int sqlite3Fts5IndexBeginWrite( | |||
| 1347 | Fts5Index *p, /* Index to write to */ | |||
| 1348 | int bDelete, /* True if current operation is a delete */ | |||
| 1349 | i64 iDocid /* Docid to add or remove data from */ | |||
| 1350 | ); | |||
| 1351 | ||||
| 1352 | /* | |||
| 1353 | ** Flush any data stored in the in-memory hash tables to the database. | |||
| 1354 | ** Also close any open blob handles. | |||
| 1355 | */ | |||
| 1356 | static int sqlite3Fts5IndexSync(Fts5Index *p); | |||
| 1357 | ||||
| 1358 | /* | |||
| 1359 | ** Discard any data stored in the in-memory hash tables. Do not write it | |||
| 1360 | ** to the database. Additionally, assume that the contents of the %_data | |||
| 1361 | ** table may have changed on disk. So any in-memory caches of %_data | |||
| 1362 | ** records must be invalidated. | |||
| 1363 | */ | |||
| 1364 | static int sqlite3Fts5IndexRollback(Fts5Index *p); | |||
| 1365 | ||||
| 1366 | /* | |||
| 1367 | ** Get or set the "averages" values. | |||
| 1368 | */ | |||
| 1369 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); | |||
| 1370 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); | |||
| 1371 | ||||
| 1372 | /* | |||
| 1373 | ** Functions called by the storage module as part of integrity-check. | |||
| 1374 | */ | |||
| 1375 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum); | |||
| 1376 | ||||
| 1377 | /* | |||
| 1378 | ** Called during virtual module initialization to register UDF | |||
| 1379 | ** fts5_decode() with SQLite | |||
| 1380 | */ | |||
| 1381 | static int sqlite3Fts5IndexInit(sqlite3*); | |||
| 1382 | ||||
| 1383 | static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); | |||
| 1384 | ||||
| 1385 | /* | |||
| 1386 | ** Return the total number of entries read from the %_data table by | |||
| 1387 | ** this connection since it was created. | |||
| 1388 | */ | |||
| 1389 | static int sqlite3Fts5IndexReads(Fts5Index *p); | |||
| 1390 | ||||
| 1391 | static int sqlite3Fts5IndexReinit(Fts5Index *p); | |||
| 1392 | static int sqlite3Fts5IndexOptimize(Fts5Index *p); | |||
| 1393 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); | |||
| 1394 | static int sqlite3Fts5IndexReset(Fts5Index *p); | |||
| 1395 | ||||
| 1396 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); | |||
| 1397 | ||||
| 1398 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin); | |||
| 1399 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid); | |||
| 1400 | ||||
| 1401 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*); | |||
| 1402 | ||||
| 1403 | /* Used to populate hash tables for xInstToken in detail=none/column mode. */ | |||
| 1404 | static int sqlite3Fts5IndexIterWriteTokendata( | |||
| 1405 | Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff | |||
| 1406 | ); | |||
| 1407 | ||||
| 1408 | /* | |||
| 1409 | ** End of interface to code in fts5_index.c. | |||
| 1410 | **************************************************************************/ | |||
| 1411 | ||||
| 1412 | /************************************************************************** | |||
| 1413 | ** Interface to code in fts5_varint.c. | |||
| 1414 | */ | |||
| 1415 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); | |||
| 1416 | static int sqlite3Fts5GetVarintLen(u32 iVal); | |||
| 1417 | static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); | |||
| 1418 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); | |||
| 1419 | ||||
| 1420 | #define fts5GetVarint32(a,b)sqlite3Fts5GetVarint32(a,(u32*)&(b)) sqlite3Fts5GetVarint32(a,(u32*)&(b)) | |||
| 1421 | #define fts5GetVarintsqlite3Fts5GetVarint sqlite3Fts5GetVarint | |||
| 1422 | ||||
| 1423 | #define fts5FastGetVarint32(a, iOff, nVal){ nVal = (a)[iOff++]; if( nVal & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nVal)); } } { \ | |||
| 1424 | nVal = (a)[iOff++]; \ | |||
| 1425 | if( nVal & 0x80 ){ \ | |||
| 1426 | iOff--; \ | |||
| 1427 | iOff += fts5GetVarint32(&(a)[iOff], nVal)sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nVal)); \ | |||
| 1428 | } \ | |||
| 1429 | } | |||
| 1430 | ||||
| 1431 | ||||
| 1432 | /* | |||
| 1433 | ** End of interface to code in fts5_varint.c. | |||
| 1434 | **************************************************************************/ | |||
| 1435 | ||||
| 1436 | ||||
| 1437 | /************************************************************************** | |||
| 1438 | ** Interface to code in fts5_main.c. | |||
| 1439 | */ | |||
| 1440 | ||||
| 1441 | /* | |||
| 1442 | ** Virtual-table object. | |||
| 1443 | */ | |||
| 1444 | typedef struct Fts5Table Fts5Table; | |||
| 1445 | struct Fts5Table { | |||
| 1446 | sqlite3_vtab base; /* Base class used by SQLite core */ | |||
| 1447 | Fts5Config *pConfig; /* Virtual table configuration */ | |||
| 1448 | Fts5Index *pIndex; /* Full-text index */ | |||
| 1449 | }; | |||
| 1450 | ||||
| 1451 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig); | |||
| 1452 | ||||
| 1453 | static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); | |||
| 1454 | ||||
| 1455 | static int sqlite3Fts5FlushToDisk(Fts5Table*); | |||
| 1456 | ||||
| 1457 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); | |||
| 1458 | static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc); | |||
| 1459 | ||||
| 1460 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); | |||
| 1461 | static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, | |||
| 1462 | const char **ppText, int *pnText, const char **ppLoc, int *pnLoc | |||
| 1463 | ); | |||
| 1464 | ||||
| 1465 | /* | |||
| 1466 | ** End of interface to code in fts5.c. | |||
| 1467 | **************************************************************************/ | |||
| 1468 | ||||
| 1469 | /************************************************************************** | |||
| 1470 | ** Interface to code in fts5_hash.c. | |||
| 1471 | */ | |||
| 1472 | typedef struct Fts5Hash Fts5Hash; | |||
| 1473 | ||||
| 1474 | /* | |||
| 1475 | ** Create a hash table, free a hash table. | |||
| 1476 | */ | |||
| 1477 | static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); | |||
| 1478 | static void sqlite3Fts5HashFree(Fts5Hash*); | |||
| 1479 | ||||
| 1480 | static int sqlite3Fts5HashWrite( | |||
| 1481 | Fts5Hash*, | |||
| 1482 | i64 iRowid, /* Rowid for this entry */ | |||
| 1483 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
| 1484 | int iPos, /* Position of token within column */ | |||
| 1485 | char bByte, | |||
| 1486 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
| 1487 | ); | |||
| 1488 | ||||
| 1489 | /* | |||
| 1490 | ** Empty (but do not delete) a hash table. | |||
| 1491 | */ | |||
| 1492 | static void sqlite3Fts5HashClear(Fts5Hash*); | |||
| 1493 | ||||
| 1494 | /* | |||
| 1495 | ** Return true if the hash is empty, false otherwise. | |||
| 1496 | */ | |||
| 1497 | static int sqlite3Fts5HashIsEmpty(Fts5Hash*); | |||
| 1498 | ||||
| 1499 | static int sqlite3Fts5HashQuery( | |||
| 1500 | Fts5Hash*, /* Hash table to query */ | |||
| 1501 | int nPre, | |||
| 1502 | const char *pTerm, int nTerm, /* Query term */ | |||
| 1503 | void **ppObj, /* OUT: Pointer to doclist for pTerm */ | |||
| 1504 | int *pnDoclist /* OUT: Size of doclist in bytes */ | |||
| 1505 | ); | |||
| 1506 | ||||
| 1507 | static int sqlite3Fts5HashScanInit( | |||
| 1508 | Fts5Hash*, /* Hash table to query */ | |||
| 1509 | const char *pTerm, int nTerm /* Query prefix */ | |||
| 1510 | ); | |||
| 1511 | static void sqlite3Fts5HashScanNext(Fts5Hash*); | |||
| 1512 | static int sqlite3Fts5HashScanEof(Fts5Hash*); | |||
| 1513 | static void sqlite3Fts5HashScanEntry(Fts5Hash *, | |||
| 1514 | const char **pzTerm, /* OUT: term (nul-terminated) */ | |||
| 1515 | int *pnTerm, /* OUT: Size of term in bytes */ | |||
| 1516 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | |||
| 1517 | int *pnDoclist /* OUT: size of doclist in bytes */ | |||
| 1518 | ); | |||
| 1519 | ||||
| 1520 | ||||
| 1521 | ||||
| 1522 | /* | |||
| 1523 | ** End of interface to code in fts5_hash.c. | |||
| 1524 | **************************************************************************/ | |||
| 1525 | ||||
| 1526 | /************************************************************************** | |||
| 1527 | ** Interface to code in fts5_storage.c. fts5_storage.c contains contains | |||
| 1528 | ** code to access the data stored in the %_content and %_docsize tables. | |||
| 1529 | */ | |||
| 1530 | ||||
| 1531 | #define FTS5_STMT_SCAN_ASC0 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ | |||
| 1532 | #define FTS5_STMT_SCAN_DESC1 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ | |||
| 1533 | #define FTS5_STMT_LOOKUP2 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ | |||
| 1534 | ||||
| 1535 | typedef struct Fts5Storage Fts5Storage; | |||
| 1536 | ||||
| 1537 | static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); | |||
| 1538 | static int sqlite3Fts5StorageClose(Fts5Storage *p); | |||
| 1539 | static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); | |||
| 1540 | ||||
| 1541 | static int sqlite3Fts5DropAll(Fts5Config*); | |||
| 1542 | static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); | |||
| 1543 | ||||
| 1544 | static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); | |||
| 1545 | static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*); | |||
| 1546 | static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); | |||
| 1547 | ||||
| 1548 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); | |||
| 1549 | ||||
| 1550 | static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); | |||
| 1551 | static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); | |||
| 1552 | ||||
| 1553 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); | |||
| 1554 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); | |||
| 1555 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); | |||
| 1556 | ||||
| 1557 | static int sqlite3Fts5StorageSync(Fts5Storage *p); | |||
| 1558 | static int sqlite3Fts5StorageRollback(Fts5Storage *p); | |||
| 1559 | ||||
| 1560 | static int sqlite3Fts5StorageConfigValue( | |||
| 1561 | Fts5Storage *p, const char*, sqlite3_value*, int | |||
| 1562 | ); | |||
| 1563 | ||||
| 1564 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); | |||
| 1565 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p); | |||
| 1566 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p); | |||
| 1567 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); | |||
| 1568 | static int sqlite3Fts5StorageReset(Fts5Storage *p); | |||
| 1569 | ||||
| 1570 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*); | |||
| 1571 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel); | |||
| 1572 | ||||
| 1573 | /* | |||
| 1574 | ** End of interface to code in fts5_storage.c. | |||
| 1575 | **************************************************************************/ | |||
| 1576 | ||||
| 1577 | ||||
| 1578 | /************************************************************************** | |||
| 1579 | ** Interface to code in fts5_expr.c. | |||
| 1580 | */ | |||
| 1581 | typedef struct Fts5Expr Fts5Expr; | |||
| 1582 | typedef struct Fts5ExprNode Fts5ExprNode; | |||
| 1583 | typedef struct Fts5Parse Fts5Parse; | |||
| 1584 | typedef struct Fts5Token Fts5Token; | |||
| 1585 | typedef struct Fts5ExprPhrase Fts5ExprPhrase; | |||
| 1586 | typedef struct Fts5ExprNearset Fts5ExprNearset; | |||
| 1587 | ||||
| 1588 | struct Fts5Token { | |||
| 1589 | const char *p; /* Token text (not NULL terminated) */ | |||
| 1590 | int n; /* Size of buffer p in bytes */ | |||
| 1591 | }; | |||
| 1592 | ||||
| 1593 | /* Parse a MATCH expression. */ | |||
| 1594 | static int sqlite3Fts5ExprNew( | |||
| 1595 | Fts5Config *pConfig, | |||
| 1596 | int bPhraseToAnd, | |||
| 1597 | int iCol, /* Column on LHS of MATCH operator */ | |||
| 1598 | const char *zExpr, | |||
| 1599 | Fts5Expr **ppNew, | |||
| 1600 | char **pzErr | |||
| 1601 | ); | |||
| 1602 | static int sqlite3Fts5ExprPattern( | |||
| 1603 | Fts5Config *pConfig, | |||
| 1604 | int bGlob, | |||
| 1605 | int iCol, | |||
| 1606 | const char *zText, | |||
| 1607 | Fts5Expr **pp | |||
| 1608 | ); | |||
| 1609 | ||||
| 1610 | /* | |||
| 1611 | ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); | |||
| 1612 | ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); | |||
| 1613 | ** rc = sqlite3Fts5ExprNext(pExpr) | |||
| 1614 | ** ){ | |||
| 1615 | ** // The document with rowid iRowid matches the expression! | |||
| 1616 | ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); | |||
| 1617 | ** } | |||
| 1618 | */ | |||
| 1619 | static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); | |||
| 1620 | static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); | |||
| 1621 | static int sqlite3Fts5ExprEof(Fts5Expr*); | |||
| 1622 | static i64 sqlite3Fts5ExprRowid(Fts5Expr*); | |||
| 1623 | ||||
| 1624 | static void sqlite3Fts5ExprFree(Fts5Expr*); | |||
| 1625 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2); | |||
| 1626 | ||||
| 1627 | /* Called during startup to register a UDF with SQLite */ | |||
| 1628 | static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); | |||
| 1629 | ||||
| 1630 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); | |||
| 1631 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); | |||
| 1632 | static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); | |||
| 1633 | ||||
| 1634 | typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; | |||
| 1635 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); | |||
| 1636 | static int sqlite3Fts5ExprPopulatePoslists( | |||
| 1637 | Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int | |||
| 1638 | ); | |||
| 1639 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); | |||
| 1640 | ||||
| 1641 | static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); | |||
| 1642 | ||||
| 1643 | static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); | |||
| 1644 | ||||
| 1645 | static int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*); | |||
| 1646 | static int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*); | |||
| 1647 | static void sqlite3Fts5ExprClearTokens(Fts5Expr*); | |||
| 1648 | ||||
| 1649 | /******************************************* | |||
| 1650 | ** The fts5_expr.c API above this point is used by the other hand-written | |||
| 1651 | ** C code in this module. The interfaces below this point are called by | |||
| 1652 | ** the parser code in fts5parse.y. */ | |||
| 1653 | ||||
| 1654 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); | |||
| 1655 | ||||
| 1656 | static Fts5ExprNode *sqlite3Fts5ParseNode( | |||
| 1657 | Fts5Parse *pParse, | |||
| 1658 | int eType, | |||
| 1659 | Fts5ExprNode *pLeft, | |||
| 1660 | Fts5ExprNode *pRight, | |||
| 1661 | Fts5ExprNearset *pNear | |||
| 1662 | ); | |||
| 1663 | ||||
| 1664 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | |||
| 1665 | Fts5Parse *pParse, | |||
| 1666 | Fts5ExprNode *pLeft, | |||
| 1667 | Fts5ExprNode *pRight | |||
| 1668 | ); | |||
| 1669 | ||||
| 1670 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | |||
| 1671 | Fts5Parse *pParse, | |||
| 1672 | Fts5ExprPhrase *pPhrase, | |||
| 1673 | Fts5Token *pToken, | |||
| 1674 | int bPrefix | |||
| 1675 | ); | |||
| 1676 | ||||
| 1677 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); | |||
| 1678 | ||||
| 1679 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | |||
| 1680 | Fts5Parse*, | |||
| 1681 | Fts5ExprNearset*, | |||
| 1682 | Fts5ExprPhrase* | |||
| 1683 | ); | |||
| 1684 | ||||
| 1685 | static Fts5Colset *sqlite3Fts5ParseColset( | |||
| 1686 | Fts5Parse*, | |||
| 1687 | Fts5Colset*, | |||
| 1688 | Fts5Token * | |||
| 1689 | ); | |||
| 1690 | ||||
| 1691 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); | |||
| 1692 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); | |||
| 1693 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); | |||
| 1694 | ||||
| 1695 | static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); | |||
| 1696 | static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*); | |||
| 1697 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); | |||
| 1698 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); | |||
| 1699 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); | |||
| 1700 | ||||
| 1701 | /* | |||
| 1702 | ** End of interface to code in fts5_expr.c. | |||
| 1703 | **************************************************************************/ | |||
| 1704 | ||||
| 1705 | ||||
| 1706 | ||||
| 1707 | /************************************************************************** | |||
| 1708 | ** Interface to code in fts5_aux.c. | |||
| 1709 | */ | |||
| 1710 | ||||
| 1711 | static int sqlite3Fts5AuxInit(fts5_api*); | |||
| 1712 | /* | |||
| 1713 | ** End of interface to code in fts5_aux.c. | |||
| 1714 | **************************************************************************/ | |||
| 1715 | ||||
| 1716 | /************************************************************************** | |||
| 1717 | ** Interface to code in fts5_tokenizer.c. | |||
| 1718 | */ | |||
| 1719 | ||||
| 1720 | static int sqlite3Fts5TokenizerInit(fts5_api*); | |||
| 1721 | static int sqlite3Fts5TokenizerPattern( | |||
| 1722 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | |||
| 1723 | Fts5Tokenizer *pTok | |||
| 1724 | ); | |||
| 1725 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*); | |||
| 1726 | /* | |||
| 1727 | ** End of interface to code in fts5_tokenizer.c. | |||
| 1728 | **************************************************************************/ | |||
| 1729 | ||||
| 1730 | /************************************************************************** | |||
| 1731 | ** Interface to code in fts5_vocab.c. | |||
| 1732 | */ | |||
| 1733 | ||||
| 1734 | static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); | |||
| 1735 | ||||
| 1736 | /* | |||
| 1737 | ** End of interface to code in fts5_vocab.c. | |||
| 1738 | **************************************************************************/ | |||
| 1739 | ||||
| 1740 | ||||
| 1741 | /************************************************************************** | |||
| 1742 | ** Interface to automatically generated code in fts5_unicode2.c. | |||
| 1743 | */ | |||
| 1744 | static int sqlite3Fts5UnicodeIsdiacritic(int c); | |||
| 1745 | static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); | |||
| 1746 | ||||
| 1747 | static int sqlite3Fts5UnicodeCatParse(const char*, u8*); | |||
| 1748 | static int sqlite3Fts5UnicodeCategory(u32 iCode); | |||
| 1749 | static void sqlite3Fts5UnicodeAscii(u8*, u8*); | |||
| 1750 | /* | |||
| 1751 | ** End of interface to code in fts5_unicode2.c. | |||
| 1752 | **************************************************************************/ | |||
| 1753 | ||||
| 1754 | #endif | |||
| 1755 | ||||
| 1756 | #line 1 "fts5parse.h" | |||
| 1757 | #define FTS5_OR1 1 | |||
| 1758 | #define FTS5_AND2 2 | |||
| 1759 | #define FTS5_NOT3 3 | |||
| 1760 | #define FTS5_TERM4 4 | |||
| 1761 | #define FTS5_COLON5 5 | |||
| 1762 | #define FTS5_MINUS6 6 | |||
| 1763 | #define FTS5_LCP7 7 | |||
| 1764 | #define FTS5_RCP8 8 | |||
| 1765 | #define FTS5_STRING9 9 | |||
| 1766 | #define FTS5_LP10 10 | |||
| 1767 | #define FTS5_RP11 11 | |||
| 1768 | #define FTS5_CARET12 12 | |||
| 1769 | #define FTS5_COMMA13 13 | |||
| 1770 | #define FTS5_PLUS14 14 | |||
| 1771 | #define FTS5_STAR15 15 | |||
| 1772 | ||||
| 1773 | #line 1 "fts5parse.c" | |||
| 1774 | /* This file is automatically generated by Lemon from input grammar | |||
| 1775 | ** source file "fts5parse.y". | |||
| 1776 | */ | |||
| 1777 | /* | |||
| 1778 | ** 2000-05-29 | |||
| 1779 | ** | |||
| 1780 | ** The author disclaims copyright to this source code. In place of | |||
| 1781 | ** a legal notice, here is a blessing: | |||
| 1782 | ** | |||
| 1783 | ** May you do good and not evil. | |||
| 1784 | ** May you find forgiveness for yourself and forgive others. | |||
| 1785 | ** May you share freely, never taking more than you give. | |||
| 1786 | ** | |||
| 1787 | ************************************************************************* | |||
| 1788 | ** Driver template for the LEMON parser generator. | |||
| 1789 | ** | |||
| 1790 | ** The "lemon" program processes an LALR(1) input grammar file, then uses | |||
| 1791 | ** this template to construct a parser. The "lemon" program inserts text | |||
| 1792 | ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the | |||
| 1793 | ** interstitial "-" characters) contained in this template is changed into | |||
| 1794 | ** the value of the %name directive from the grammar. Otherwise, the content | |||
| 1795 | ** of this template is copied straight through into the generate parser | |||
| 1796 | ** source file. | |||
| 1797 | ** | |||
| 1798 | ** The following is the concatenation of all %include directives from the | |||
| 1799 | ** input grammar file: | |||
| 1800 | */ | |||
| 1801 | /************ Begin %include sections from the grammar ************************/ | |||
| 1802 | #line 47 "fts5parse.y" | |||
| 1803 | ||||
| 1804 | /* #include "fts5Int.h" */ | |||
| 1805 | /* #include "fts5parse.h" */ | |||
| 1806 | ||||
| 1807 | /* | |||
| 1808 | ** Disable all error recovery processing in the parser push-down | |||
| 1809 | ** automaton. | |||
| 1810 | */ | |||
| 1811 | #define fts5YYNOERRORRECOVERY1 1 | |||
| 1812 | ||||
| 1813 | /* | |||
| 1814 | ** Make fts5yytestcase() the same as testcase() | |||
| 1815 | */ | |||
| 1816 | #define fts5yytestcase(X) testcase(X) | |||
| 1817 | ||||
| 1818 | /* | |||
| 1819 | ** Indicate that sqlite3ParserFree() will never be called with a null | |||
| 1820 | ** pointer. | |||
| 1821 | */ | |||
| 1822 | #define fts5YYPARSEFREENOTNULL1 1 | |||
| 1823 | ||||
| 1824 | /* | |||
| 1825 | ** Alternative datatype for the argument to the malloc() routine passed | |||
| 1826 | ** into sqlite3ParserAlloc(). The default is size_t. | |||
| 1827 | */ | |||
| 1828 | #define fts5YYMALLOCARGTYPEu64 u64 | |||
| 1829 | ||||
| 1830 | #line 58 "fts5parse.sql" | |||
| 1831 | /**************** End of %include directives **********************************/ | |||
| 1832 | /* These constants specify the various numeric values for terminal symbols. | |||
| 1833 | ***************** Begin token definitions *************************************/ | |||
| 1834 | #ifndef FTS5_OR1 | |||
| 1835 | #define FTS5_OR1 1 | |||
| 1836 | #define FTS5_AND2 2 | |||
| 1837 | #define FTS5_NOT3 3 | |||
| 1838 | #define FTS5_TERM4 4 | |||
| 1839 | #define FTS5_COLON5 5 | |||
| 1840 | #define FTS5_MINUS6 6 | |||
| 1841 | #define FTS5_LCP7 7 | |||
| 1842 | #define FTS5_RCP8 8 | |||
| 1843 | #define FTS5_STRING9 9 | |||
| 1844 | #define FTS5_LP10 10 | |||
| 1845 | #define FTS5_RP11 11 | |||
| 1846 | #define FTS5_CARET12 12 | |||
| 1847 | #define FTS5_COMMA13 13 | |||
| 1848 | #define FTS5_PLUS14 14 | |||
| 1849 | #define FTS5_STAR15 15 | |||
| 1850 | #endif | |||
| 1851 | /**************** End token definitions ***************************************/ | |||
| 1852 | ||||
| 1853 | /* The next sections is a series of control #defines. | |||
| 1854 | ** various aspects of the generated parser. | |||
| 1855 | ** fts5YYCODETYPE is the data type used to store the integer codes | |||
| 1856 | ** that represent terminal and non-terminal symbols. | |||
| 1857 | ** "unsigned char" is used if there are fewer than | |||
| 1858 | ** 256 symbols. Larger types otherwise. | |||
| 1859 | ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for | |||
| 1860 | ** any terminal or nonterminal symbol. | |||
| 1861 | ** fts5YYFALLBACK If defined, this indicates that one or more tokens | |||
| 1862 | ** (also known as: "terminal symbols") have fall-back | |||
| 1863 | ** values which should be used if the original symbol | |||
| 1864 | ** would not parse. This permits keywords to sometimes | |||
| 1865 | ** be used as identifiers, for example. | |||
| 1866 | ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers | |||
| 1867 | ** that indicate what to do in response to the next | |||
| 1868 | ** token. | |||
| 1869 | ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal | |||
| 1870 | ** symbols. Background: A "minor type" is a semantic | |||
| 1871 | ** value associated with a terminal or non-terminal | |||
| 1872 | ** symbols. For example, for an "ID" terminal symbol, | |||
| 1873 | ** the minor type might be the name of the identifier. | |||
| 1874 | ** Each non-terminal can have a different minor type. | |||
| 1875 | ** Terminal symbols all have the same minor type, though. | |||
| 1876 | ** This macros defines the minor type for terminal | |||
| 1877 | ** symbols. | |||
| 1878 | ** fts5YYMINORTYPE is the data type used for all minor types. | |||
| 1879 | ** This is typically a union of many types, one of | |||
| 1880 | ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union | |||
| 1881 | ** for terminal symbols is called "fts5yy0". | |||
| 1882 | ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If | |||
| 1883 | ** zero the stack is dynamically sized using realloc() | |||
| 1884 | ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument | |||
| 1885 | ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument | |||
| 1886 | ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter | |||
| 1887 | ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser | |||
| 1888 | ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser | |||
| 1889 | ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context | |||
| 1890 | ** fts5YYREALLOC Name of the realloc() function to use | |||
| 1891 | ** fts5YYFREE Name of the free() function to use | |||
| 1892 | ** fts5YYDYNSTACK True if stack space should be extended on heap | |||
| 1893 | ** fts5YYERRORSYMBOL is the code number of the error symbol. If not | |||
| 1894 | ** defined, then do no error processing. | |||
| 1895 | ** fts5YYNSTATE the combined number of states. | |||
| 1896 | ** fts5YYNRULE the number of rules in the grammar | |||
| 1897 | ** fts5YYNFTS5TOKEN Number of terminal symbols | |||
| 1898 | ** fts5YY_MAX_SHIFT Maximum value for shift actions | |||
| 1899 | ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions | |||
| 1900 | ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions | |||
| 1901 | ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error | |||
| 1902 | ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept | |||
| 1903 | ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op | |||
| 1904 | ** fts5YY_MIN_REDUCE Minimum value for reduce actions | |||
| 1905 | ** fts5YY_MAX_REDUCE Maximum value for reduce actions | |||
| 1906 | ** fts5YY_MIN_DSTRCTR Minimum symbol value that has a destructor | |||
| 1907 | ** fts5YY_MAX_DSTRCTR Maximum symbol value that has a destructor | |||
| 1908 | */ | |||
| 1909 | #ifndef INTERFACE1 | |||
| 1910 | # define INTERFACE1 1 | |||
| 1911 | #endif | |||
| 1912 | /************* Begin control #defines *****************************************/ | |||
| 1913 | #define fts5YYCODETYPEunsigned char unsigned char | |||
| 1914 | #define fts5YYNOCODE27 27 | |||
| 1915 | #define fts5YYACTIONTYPEunsigned char unsigned char | |||
| 1916 | #define sqlite3Fts5ParserFTS5TOKENTYPEFts5Token Fts5Token | |||
| 1917 | typedef union { | |||
| 1918 | int fts5yyinit; | |||
| 1919 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yy0; | |||
| 1920 | int fts5yy4; | |||
| 1921 | Fts5Colset* fts5yy11; | |||
| 1922 | Fts5ExprNode* fts5yy24; | |||
| 1923 | Fts5ExprNearset* fts5yy46; | |||
| 1924 | Fts5ExprPhrase* fts5yy53; | |||
| 1925 | } fts5YYMINORTYPE; | |||
| 1926 | #ifndef fts5YYSTACKDEPTH100 | |||
| 1927 | #define fts5YYSTACKDEPTH100 100 | |||
| 1928 | #endif | |||
| 1929 | #define sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; Fts5Parse *pParse; | |||
| 1930 | #define sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse ,Fts5Parse *pParse | |||
| 1931 | #define sqlite3Fts5ParserARG_PARAM,pParse ,pParse | |||
| 1932 | #define sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; Fts5Parse *pParse=fts5yypParser->pParse; | |||
| 1933 | #define sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; fts5yypParser->pParse=pParse; | |||
| 1934 | #define fts5YYREALLOCrealloc realloc | |||
| 1935 | #define fts5YYFREEfree free | |||
| 1936 | #define fts5YYDYNSTACK0 0 | |||
| 1937 | #define sqlite3Fts5ParserCTX_SDECL | |||
| 1938 | #define sqlite3Fts5ParserCTX_PDECL | |||
| 1939 | #define sqlite3Fts5ParserCTX_PARAM | |||
| 1940 | #define sqlite3Fts5ParserCTX_FETCH | |||
| 1941 | #define sqlite3Fts5ParserCTX_STORE | |||
| 1942 | #define fts5YYNSTATE35 35 | |||
| 1943 | #define fts5YYNRULE28 28 | |||
| 1944 | #define fts5YYNRULE_WITH_ACTION28 28 | |||
| 1945 | #define fts5YYNFTS5TOKEN16 16 | |||
| 1946 | #define fts5YY_MAX_SHIFT34 34 | |||
| 1947 | #define fts5YY_MIN_SHIFTREDUCE52 52 | |||
| 1948 | #define fts5YY_MAX_SHIFTREDUCE79 79 | |||
| 1949 | #define fts5YY_ERROR_ACTION80 80 | |||
| 1950 | #define fts5YY_ACCEPT_ACTION81 81 | |||
| 1951 | #define fts5YY_NO_ACTION82 82 | |||
| 1952 | #define fts5YY_MIN_REDUCE83 83 | |||
| 1953 | #define fts5YY_MAX_REDUCE110 110 | |||
| 1954 | #define fts5YY_MIN_DSTRCTR16 16 | |||
| 1955 | #define fts5YY_MAX_DSTRCTR24 24 | |||
| 1956 | /************* End control #defines *******************************************/ | |||
| 1957 | #define fts5YY_NLOOKAHEAD((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) | |||
| 1958 | ||||
| 1959 | /* Define the fts5yytestcase() macro to be a no-op if is not already defined | |||
| 1960 | ** otherwise. | |||
| 1961 | ** | |||
| 1962 | ** Applications can choose to define fts5yytestcase() in the %include section | |||
| 1963 | ** to a macro that can assist in verifying code coverage. For production | |||
| 1964 | ** code the fts5yytestcase() macro should be turned off. But it is useful | |||
| 1965 | ** for testing. | |||
| 1966 | */ | |||
| 1967 | #ifndef fts5yytestcase | |||
| 1968 | # define fts5yytestcase(X) | |||
| 1969 | #endif | |||
| 1970 | ||||
| 1971 | /* Macro to determine if stack space has the ability to grow using | |||
| 1972 | ** heap memory. | |||
| 1973 | */ | |||
| 1974 | #if fts5YYSTACKDEPTH100<=0 || fts5YYDYNSTACK0 | |||
| 1975 | # define fts5YYGROWABLESTACK0 1 | |||
| 1976 | #else | |||
| 1977 | # define fts5YYGROWABLESTACK0 0 | |||
| 1978 | #endif | |||
| 1979 | ||||
| 1980 | /* Guarantee a minimum number of initial stack slots. | |||
| 1981 | */ | |||
| 1982 | #if fts5YYSTACKDEPTH100<=0 | |||
| 1983 | # undef fts5YYSTACKDEPTH100 | |||
| 1984 | # define fts5YYSTACKDEPTH100 2 /* Need a minimum stack size */ | |||
| 1985 | #endif | |||
| 1986 | ||||
| 1987 | ||||
| 1988 | /* Next are the tables used to determine what action to take based on the | |||
| 1989 | ** current state and lookahead token. These tables are used to implement | |||
| 1990 | ** functions that take a state number and lookahead value and return an | |||
| 1991 | ** action integer. | |||
| 1992 | ** | |||
| 1993 | ** Suppose the action integer is N. Then the action is determined as | |||
| 1994 | ** follows | |||
| 1995 | ** | |||
| 1996 | ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead | |||
| 1997 | ** token onto the stack and goto state N. | |||
| 1998 | ** | |||
| 1999 | ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then | |||
| 2000 | ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. | |||
| 2001 | ** | |||
| 2002 | ** N == fts5YY_ERROR_ACTION A syntax error has occurred. | |||
| 2003 | ** | |||
| 2004 | ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. | |||
| 2005 | ** | |||
| 2006 | ** N == fts5YY_NO_ACTION No such action. Denotes unused | |||
| 2007 | ** slots in the fts5yy_action[] table. | |||
| 2008 | ** | |||
| 2009 | ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE | |||
| 2010 | ** and fts5YY_MAX_REDUCE | |||
| 2011 | ** | |||
| 2012 | ** The action table is constructed as a single large table named fts5yy_action[]. | |||
| 2013 | ** Given state S and lookahead X, the action is computed as either: | |||
| 2014 | ** | |||
| 2015 | ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] | |||
| 2016 | ** (B) N = fts5yy_default[S] | |||
| 2017 | ** | |||
| 2018 | ** The (A) formula is preferred. The B formula is used instead if | |||
| 2019 | ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. | |||
| 2020 | ** | |||
| 2021 | ** The formulas above are for computing the action when the lookahead is | |||
| 2022 | ** a terminal symbol. If the lookahead is a non-terminal (as occurs after | |||
| 2023 | ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of | |||
| 2024 | ** the fts5yy_shift_ofst[] array. | |||
| 2025 | ** | |||
| 2026 | ** The following are the tables generated in this section: | |||
| 2027 | ** | |||
| 2028 | ** fts5yy_action[] A single table containing all actions. | |||
| 2029 | ** fts5yy_lookahead[] A table containing the lookahead for each entry in | |||
| 2030 | ** fts5yy_action. Used to detect hash collisions. | |||
| 2031 | ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for | |||
| 2032 | ** shifting terminals. | |||
| 2033 | ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for | |||
| 2034 | ** shifting non-terminals after a reduce. | |||
| 2035 | ** fts5yy_default[] Default action for each state. | |||
| 2036 | ** | |||
| 2037 | *********** Begin parsing tables **********************************************/ | |||
| 2038 | #define fts5YY_ACTTAB_COUNT(105) (105) | |||
| 2039 | static const fts5YYACTIONTYPEunsigned char fts5yy_action[] = { | |||
| 2040 | /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, | |||
| 2041 | /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, | |||
| 2042 | /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, | |||
| 2043 | /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, | |||
| 2044 | /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, | |||
| 2045 | /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, | |||
| 2046 | /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, | |||
| 2047 | /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, | |||
| 2048 | /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, | |||
| 2049 | /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, | |||
| 2050 | /* 100 */ 57, 15, 82, 82, 71, | |||
| 2051 | }; | |||
| 2052 | static const fts5YYCODETYPEunsigned char fts5yy_lookahead[] = { | |||
| 2053 | /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, | |||
| 2054 | /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, | |||
| 2055 | /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, | |||
| 2056 | /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, | |||
| 2057 | /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, | |||
| 2058 | /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, | |||
| 2059 | /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, | |||
| 2060 | /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, | |||
| 2061 | /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, | |||
| 2062 | /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, | |||
| 2063 | /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, | |||
| 2064 | /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, | |||
| 2065 | /* 120 */ 27, | |||
| 2066 | }; | |||
| 2067 | #define fts5YY_SHIFT_COUNT(34) (34) | |||
| 2068 | #define fts5YY_SHIFT_MIN(0) (0) | |||
| 2069 | #define fts5YY_SHIFT_MAX(93) (93) | |||
| 2070 | static const unsigned char fts5yy_shift_ofst[] = { | |||
| 2071 | /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, | |||
| 2072 | /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, | |||
| 2073 | /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, | |||
| 2074 | /* 30 */ 53, 87, 92, 53, 93, | |||
| 2075 | }; | |||
| 2076 | #define fts5YY_REDUCE_COUNT(17) (17) | |||
| 2077 | #define fts5YY_REDUCE_MIN(-17) (-17) | |||
| 2078 | #define fts5YY_REDUCE_MAX(67) (67) | |||
| 2079 | static const signed char fts5yy_reduce_ofst[] = { | |||
| 2080 | /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, | |||
| 2081 | /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38, | |||
| 2082 | }; | |||
| 2083 | static const fts5YYACTIONTYPEunsigned char fts5yy_default[] = { | |||
| 2084 | /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, | |||
| 2085 | /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, | |||
| 2086 | /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, | |||
| 2087 | /* 30 */ 103, 80, 80, 104, 80, | |||
| 2088 | }; | |||
| 2089 | /********** End of lemon-generated parsing tables *****************************/ | |||
| 2090 | ||||
| 2091 | /* The next table maps tokens (terminal symbols) into fallback tokens. | |||
| 2092 | ** If a construct like the following: | |||
| 2093 | ** | |||
| 2094 | ** %fallback ID X Y Z. | |||
| 2095 | ** | |||
| 2096 | ** appears in the grammar, then ID becomes a fallback token for X, Y, | |||
| 2097 | ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser | |||
| 2098 | ** but it does not parse, the type of the token is changed to ID and | |||
| 2099 | ** the parse is retried before an error is thrown. | |||
| 2100 | ** | |||
| 2101 | ** This feature can be used, for example, to cause some keywords in a language | |||
| 2102 | ** to revert to identifiers if they keyword does not apply in the context where | |||
| 2103 | ** it appears. | |||
| 2104 | */ | |||
| 2105 | #ifdef fts5YYFALLBACK | |||
| 2106 | static const fts5YYCODETYPEunsigned char fts5yyFallback[] = { | |||
| 2107 | }; | |||
| 2108 | #endif /* fts5YYFALLBACK */ | |||
| 2109 | ||||
| 2110 | /* The following structure represents a single element of the | |||
| 2111 | ** parser's stack. Information stored includes: | |||
| 2112 | ** | |||
| 2113 | ** + The state number for the parser at this level of the stack. | |||
| 2114 | ** | |||
| 2115 | ** + The value of the token stored at this level of the stack. | |||
| 2116 | ** (In other words, the "major" token.) | |||
| 2117 | ** | |||
| 2118 | ** + The semantic value stored at this level of the stack. This is | |||
| 2119 | ** the information used by the action routines in the grammar. | |||
| 2120 | ** It is sometimes called the "minor" token. | |||
| 2121 | ** | |||
| 2122 | ** After the "shift" half of a SHIFTREDUCE action, the stateno field | |||
| 2123 | ** actually contains the reduce action for the second half of the | |||
| 2124 | ** SHIFTREDUCE. | |||
| 2125 | */ | |||
| 2126 | struct fts5yyStackEntry { | |||
| 2127 | fts5YYACTIONTYPEunsigned char stateno; /* The state-number, or reduce action in SHIFTREDUCE */ | |||
| 2128 | fts5YYCODETYPEunsigned char major; /* The major token value. This is the code | |||
| 2129 | ** number for the token at this stack level */ | |||
| 2130 | fts5YYMINORTYPE minor; /* The user-supplied minor token value. This | |||
| 2131 | ** is the value of the token */ | |||
| 2132 | }; | |||
| 2133 | typedef struct fts5yyStackEntry fts5yyStackEntry; | |||
| 2134 | ||||
| 2135 | /* The state of the parser is completely contained in an instance of | |||
| 2136 | ** the following structure */ | |||
| 2137 | struct fts5yyParser { | |||
| 2138 | fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ | |||
| 2139 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
| 2140 | int fts5yyhwm; /* High-water mark of the stack */ | |||
| 2141 | #endif | |||
| 2142 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 2143 | int fts5yyerrcnt; /* Shifts left before out of the error */ | |||
| 2144 | #endif | |||
| 2145 | sqlite3Fts5ParserARG_SDECLFts5Parse *pParse; /* A place to hold %extra_argument */ | |||
| 2146 | sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */ | |||
| 2147 | fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */ | |||
| 2148 | fts5yyStackEntry *fts5yystack; /* The parser stack */ | |||
| 2149 | fts5yyStackEntry fts5yystk0[fts5YYSTACKDEPTH100]; /* Initial stack space */ | |||
| 2150 | }; | |||
| 2151 | typedef struct fts5yyParser fts5yyParser; | |||
| 2152 | ||||
| 2153 | #include <assert.h> | |||
| 2154 | #ifndef NDEBUG1 | |||
| 2155 | #include <stdio.h> | |||
| 2156 | static FILE *fts5yyTraceFILE = 0; | |||
| 2157 | static char *fts5yyTracePrompt = 0; | |||
| 2158 | #endif /* NDEBUG */ | |||
| 2159 | ||||
| 2160 | #ifndef NDEBUG1 | |||
| 2161 | /* | |||
| 2162 | ** Turn parser tracing on by giving a stream to which to write the trace | |||
| 2163 | ** and a prompt to preface each trace message. Tracing is turned off | |||
| 2164 | ** by making either argument NULL | |||
| 2165 | ** | |||
| 2166 | ** Inputs: | |||
| 2167 | ** <ul> | |||
| 2168 | ** <li> A FILE* to which trace output should be written. | |||
| 2169 | ** If NULL, then tracing is turned off. | |||
| 2170 | ** <li> A prefix string written at the beginning of every | |||
| 2171 | ** line of trace output. If NULL, then tracing is | |||
| 2172 | ** turned off. | |||
| 2173 | ** </ul> | |||
| 2174 | ** | |||
| 2175 | ** Outputs: | |||
| 2176 | ** None. | |||
| 2177 | */ | |||
| 2178 | static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ | |||
| 2179 | fts5yyTraceFILE = TraceFILE; | |||
| 2180 | fts5yyTracePrompt = zTracePrompt; | |||
| 2181 | if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; | |||
| 2182 | else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; | |||
| 2183 | } | |||
| 2184 | #endif /* NDEBUG */ | |||
| 2185 | ||||
| 2186 | #if defined(fts5YYCOVERAGE) || !defined(NDEBUG1) | |||
| 2187 | /* For tracing shifts, the names of all terminals and nonterminals | |||
| 2188 | ** are required. The following table supplies these names */ | |||
| 2189 | static const char *const fts5yyTokenName[] = { | |||
| 2190 | /* 0 */ "$", | |||
| 2191 | /* 1 */ "OR", | |||
| 2192 | /* 2 */ "AND", | |||
| 2193 | /* 3 */ "NOT", | |||
| 2194 | /* 4 */ "TERM", | |||
| 2195 | /* 5 */ "COLON", | |||
| 2196 | /* 6 */ "MINUS", | |||
| 2197 | /* 7 */ "LCP", | |||
| 2198 | /* 8 */ "RCP", | |||
| 2199 | /* 9 */ "STRING", | |||
| 2200 | /* 10 */ "LP", | |||
| 2201 | /* 11 */ "RP", | |||
| 2202 | /* 12 */ "CARET", | |||
| 2203 | /* 13 */ "COMMA", | |||
| 2204 | /* 14 */ "PLUS", | |||
| 2205 | /* 15 */ "STAR", | |||
| 2206 | /* 16 */ "input", | |||
| 2207 | /* 17 */ "expr", | |||
| 2208 | /* 18 */ "cnearset", | |||
| 2209 | /* 19 */ "exprlist", | |||
| 2210 | /* 20 */ "colset", | |||
| 2211 | /* 21 */ "colsetlist", | |||
| 2212 | /* 22 */ "nearset", | |||
| 2213 | /* 23 */ "nearphrases", | |||
| 2214 | /* 24 */ "phrase", | |||
| 2215 | /* 25 */ "neardist_opt", | |||
| 2216 | /* 26 */ "star_opt", | |||
| 2217 | }; | |||
| 2218 | #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */ | |||
| 2219 | ||||
| 2220 | #ifndef NDEBUG1 | |||
| 2221 | /* For tracing reduce actions, the names of all rules are required. | |||
| 2222 | */ | |||
| 2223 | static const char *const fts5yyRuleName[] = { | |||
| 2224 | /* 0 */ "input ::= expr", | |||
| 2225 | /* 1 */ "colset ::= MINUS LCP colsetlist RCP", | |||
| 2226 | /* 2 */ "colset ::= LCP colsetlist RCP", | |||
| 2227 | /* 3 */ "colset ::= STRING", | |||
| 2228 | /* 4 */ "colset ::= MINUS STRING", | |||
| 2229 | /* 5 */ "colsetlist ::= colsetlist STRING", | |||
| 2230 | /* 6 */ "colsetlist ::= STRING", | |||
| 2231 | /* 7 */ "expr ::= expr AND expr", | |||
| 2232 | /* 8 */ "expr ::= expr OR expr", | |||
| 2233 | /* 9 */ "expr ::= expr NOT expr", | |||
| 2234 | /* 10 */ "expr ::= colset COLON LP expr RP", | |||
| 2235 | /* 11 */ "expr ::= LP expr RP", | |||
| 2236 | /* 12 */ "expr ::= exprlist", | |||
| 2237 | /* 13 */ "exprlist ::= cnearset", | |||
| 2238 | /* 14 */ "exprlist ::= exprlist cnearset", | |||
| 2239 | /* 15 */ "cnearset ::= nearset", | |||
| 2240 | /* 16 */ "cnearset ::= colset COLON nearset", | |||
| 2241 | /* 17 */ "nearset ::= phrase", | |||
| 2242 | /* 18 */ "nearset ::= CARET phrase", | |||
| 2243 | /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP", | |||
| 2244 | /* 20 */ "nearphrases ::= phrase", | |||
| 2245 | /* 21 */ "nearphrases ::= nearphrases phrase", | |||
| 2246 | /* 22 */ "neardist_opt ::=", | |||
| 2247 | /* 23 */ "neardist_opt ::= COMMA STRING", | |||
| 2248 | /* 24 */ "phrase ::= phrase PLUS STRING star_opt", | |||
| 2249 | /* 25 */ "phrase ::= STRING star_opt", | |||
| 2250 | /* 26 */ "star_opt ::= STAR", | |||
| 2251 | /* 27 */ "star_opt ::=", | |||
| 2252 | }; | |||
| 2253 | #endif /* NDEBUG */ | |||
| 2254 | ||||
| 2255 | ||||
| 2256 | #if fts5YYGROWABLESTACK0 | |||
| 2257 | /* | |||
| 2258 | ** Try to increase the size of the parser stack. Return the number | |||
| 2259 | ** of errors. Return 0 on success. | |||
| 2260 | */ | |||
| 2261 | static int fts5yyGrowStack(fts5yyParser *p)1{ | |||
| 2262 | int oldSize = 1 + (int)(p->fts5yystackEnd - p->fts5yystack); | |||
| 2263 | int newSize; | |||
| 2264 | int idx; | |||
| 2265 | fts5yyStackEntry *pNew; | |||
| 2266 | ||||
| 2267 | newSize = oldSize*2 + 100; | |||
| 2268 | idx = (int)(p->fts5yytos - p->fts5yystack); | |||
| 2269 | if( p->fts5yystack==p->fts5yystk0 ){ | |||
| 2270 | pNew = fts5YYREALLOCrealloc(0, newSize*sizeof(pNew[0])); | |||
| 2271 | if( pNew==0 ) return 1; | |||
| 2272 | memcpy(pNew, p->fts5yystack, oldSize*sizeof(pNew[0])); | |||
| 2273 | }else{ | |||
| 2274 | pNew = fts5YYREALLOCrealloc(p->fts5yystack, newSize*sizeof(pNew[0])); | |||
| 2275 | if( pNew==0 ) return 1; | |||
| 2276 | } | |||
| 2277 | p->fts5yystack = pNew; | |||
| 2278 | p->fts5yytos = &p->fts5yystack[idx]; | |||
| 2279 | #ifndef NDEBUG1 | |||
| 2280 | if( fts5yyTraceFILE ){ | |||
| 2281 | fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n", | |||
| 2282 | fts5yyTracePrompt, oldSize, newSize); | |||
| 2283 | } | |||
| 2284 | #endif | |||
| 2285 | p->fts5yystackEnd = &p->fts5yystack[newSize-1]; | |||
| 2286 | return 0; | |||
| 2287 | } | |||
| 2288 | #endif /* fts5YYGROWABLESTACK */ | |||
| 2289 | ||||
| 2290 | #if !fts5YYGROWABLESTACK0 | |||
| 2291 | /* For builds that do no have a growable stack, fts5yyGrowStack always | |||
| 2292 | ** returns an error. | |||
| 2293 | */ | |||
| 2294 | # define fts5yyGrowStack(X)1 1 | |||
| 2295 | #endif | |||
| 2296 | ||||
| 2297 | /* Datatype of the argument to the memory allocated passed as the | |||
| 2298 | ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by | |||
| 2299 | ** putting an appropriate #define in the %include section of the input | |||
| 2300 | ** grammar. | |||
| 2301 | */ | |||
| 2302 | #ifndef fts5YYMALLOCARGTYPEu64 | |||
| 2303 | # define fts5YYMALLOCARGTYPEu64 size_t | |||
| 2304 | #endif | |||
| 2305 | ||||
| 2306 | /* Initialize a new parser that has already been allocated. | |||
| 2307 | */ | |||
| 2308 | static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){ | |||
| 2309 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser; | |||
| 2310 | sqlite3Fts5ParserCTX_STORE | |||
| 2311 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
| 2312 | fts5yypParser->fts5yyhwm = 0; | |||
| 2313 | #endif | |||
| 2314 | fts5yypParser->fts5yystack = fts5yypParser->fts5yystk0; | |||
| 2315 | fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH100-1]; | |||
| 2316 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 2317 | fts5yypParser->fts5yyerrcnt = -1; | |||
| 2318 | #endif | |||
| 2319 | fts5yypParser->fts5yytos = fts5yypParser->fts5yystack; | |||
| 2320 | fts5yypParser->fts5yystack[0].stateno = 0; | |||
| 2321 | fts5yypParser->fts5yystack[0].major = 0; | |||
| 2322 | } | |||
| 2323 | ||||
| 2324 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | |||
| 2325 | /* | |||
| 2326 | ** This function allocates a new parser. | |||
| 2327 | ** The only argument is a pointer to a function which works like | |||
| 2328 | ** malloc. | |||
| 2329 | ** | |||
| 2330 | ** Inputs: | |||
| 2331 | ** A pointer to the function used to allocate memory. | |||
| 2332 | ** | |||
| 2333 | ** Outputs: | |||
| 2334 | ** A pointer to a parser. This pointer is used in subsequent calls | |||
| 2335 | ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. | |||
| 2336 | */ | |||
| 2337 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPEu64) sqlite3Fts5ParserCTX_PDECL){ | |||
| 2338 | fts5yyParser *fts5yypParser; | |||
| 2339 | fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPEu64)sizeof(fts5yyParser) ); | |||
| 2340 | if( fts5yypParser ){ | |||
| 2341 | sqlite3Fts5ParserCTX_STORE | |||
| 2342 | sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM); | |||
| 2343 | } | |||
| 2344 | return (void*)fts5yypParser; | |||
| 2345 | } | |||
| 2346 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | |||
| 2347 | ||||
| 2348 | ||||
| 2349 | /* The following function deletes the "minor type" or semantic value | |||
| 2350 | ** associated with a symbol. The symbol can be either a terminal | |||
| 2351 | ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is | |||
| 2352 | ** a pointer to the value to be deleted. The code used to do the | |||
| 2353 | ** deletions is derived from the %destructor and/or %token_destructor | |||
| 2354 | ** directives of the input grammar. | |||
| 2355 | */ | |||
| 2356 | static void fts5yy_destructor( | |||
| 2357 | fts5yyParser *fts5yypParser, /* The parser */ | |||
| 2358 | fts5YYCODETYPEunsigned char fts5yymajor, /* Type code for object to destroy */ | |||
| 2359 | fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ | |||
| 2360 | ){ | |||
| 2361 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 2362 | sqlite3Fts5ParserCTX_FETCH | |||
| 2363 | switch( fts5yymajor ){ | |||
| 2364 | /* Here is inserted the actions which take place when a | |||
| 2365 | ** terminal or non-terminal is destroyed. This can happen | |||
| 2366 | ** when the symbol is popped from the stack during a | |||
| 2367 | ** reduce or during error processing or when a parser is | |||
| 2368 | ** being destroyed before it is finished parsing. | |||
| 2369 | ** | |||
| 2370 | ** Note: during a reduce, the only symbols destroyed are those | |||
| 2371 | ** which appear on the RHS of the rule, but which are *not* used | |||
| 2372 | ** inside the C code. | |||
| 2373 | */ | |||
| 2374 | /********* Begin destructor definitions ***************************************/ | |||
| 2375 | case 16: /* input */ | |||
| 2376 | { | |||
| 2377 | #line 83 "fts5parse.y" | |||
| 2378 | (void)pParse; | |||
| 2379 | #line 606 "fts5parse.sql" | |||
| 2380 | } | |||
| 2381 | break; | |||
| 2382 | case 17: /* expr */ | |||
| 2383 | case 18: /* cnearset */ | |||
| 2384 | case 19: /* exprlist */ | |||
| 2385 | { | |||
| 2386 | #line 89 "fts5parse.y" | |||
| 2387 | sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); | |||
| 2388 | #line 615 "fts5parse.sql" | |||
| 2389 | } | |||
| 2390 | break; | |||
| 2391 | case 20: /* colset */ | |||
| 2392 | case 21: /* colsetlist */ | |||
| 2393 | { | |||
| 2394 | #line 93 "fts5parse.y" | |||
| 2395 | sqlite3_freesqlite3_api->free((fts5yypminor->fts5yy11)); | |||
| 2396 | #line 623 "fts5parse.sql" | |||
| 2397 | } | |||
| 2398 | break; | |||
| 2399 | case 22: /* nearset */ | |||
| 2400 | case 23: /* nearphrases */ | |||
| 2401 | { | |||
| 2402 | #line 148 "fts5parse.y" | |||
| 2403 | sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); | |||
| 2404 | #line 631 "fts5parse.sql" | |||
| 2405 | } | |||
| 2406 | break; | |||
| 2407 | case 24: /* phrase */ | |||
| 2408 | { | |||
| 2409 | #line 183 "fts5parse.y" | |||
| 2410 | sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); | |||
| 2411 | #line 638 "fts5parse.sql" | |||
| 2412 | } | |||
| 2413 | break; | |||
| 2414 | /********* End destructor definitions *****************************************/ | |||
| 2415 | default: break; /* If no destructor action specified: do nothing */ | |||
| 2416 | } | |||
| 2417 | } | |||
| 2418 | ||||
| 2419 | /* | |||
| 2420 | ** Pop the parser's stack once. | |||
| 2421 | ** | |||
| 2422 | ** If there is a destructor routine associated with the token which | |||
| 2423 | ** is popped from the stack, then call it. | |||
| 2424 | */ | |||
| 2425 | static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ | |||
| 2426 | fts5yyStackEntry *fts5yytos; | |||
| 2427 | assert( pParser->fts5yytos!=0 )((void) (0)); | |||
| 2428 | assert( pParser->fts5yytos > pParser->fts5yystack )((void) (0)); | |||
| 2429 | fts5yytos = pParser->fts5yytos--; | |||
| 2430 | #ifndef NDEBUG1 | |||
| 2431 | if( fts5yyTraceFILE ){ | |||
| 2432 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | |||
| 2433 | fts5yyTracePrompt, | |||
| 2434 | fts5yyTokenName[fts5yytos->major]); | |||
| 2435 | } | |||
| 2436 | #endif | |||
| 2437 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | |||
| 2438 | } | |||
| 2439 | ||||
| 2440 | /* | |||
| 2441 | ** Clear all secondary memory allocations from the parser | |||
| 2442 | */ | |||
| 2443 | static void sqlite3Fts5ParserFinalize(void *p){ | |||
| 2444 | fts5yyParser *pParser = (fts5yyParser*)p; | |||
| 2445 | ||||
| 2446 | /* In-lined version of calling fts5yy_pop_parser_stack() for each | |||
| 2447 | ** element left in the stack */ | |||
| 2448 | fts5yyStackEntry *fts5yytos = pParser->fts5yytos; | |||
| 2449 | while( fts5yytos>pParser->fts5yystack ){ | |||
| 2450 | #ifndef NDEBUG1 | |||
| 2451 | if( fts5yyTraceFILE ){ | |||
| 2452 | fprintf(fts5yyTraceFILE,"%sPopping %s\n", | |||
| 2453 | fts5yyTracePrompt, | |||
| 2454 | fts5yyTokenName[fts5yytos->major]); | |||
| 2455 | } | |||
| 2456 | #endif | |||
| 2457 | if( fts5yytos->major>=fts5YY_MIN_DSTRCTR16 ){ | |||
| 2458 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); | |||
| 2459 | } | |||
| 2460 | fts5yytos--; | |||
| 2461 | } | |||
| 2462 | ||||
| 2463 | #if fts5YYGROWABLESTACK0 | |||
| 2464 | if( pParser->fts5yystack!=pParser->fts5yystk0 ) fts5YYFREEfree(pParser->fts5yystack); | |||
| 2465 | #endif | |||
| 2466 | } | |||
| 2467 | ||||
| 2468 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK | |||
| 2469 | /* | |||
| 2470 | ** Deallocate and destroy a parser. Destructors are called for | |||
| 2471 | ** all stack elements before shutting the parser down. | |||
| 2472 | ** | |||
| 2473 | ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it | |||
| 2474 | ** is defined in a %include section of the input grammar) then it is | |||
| 2475 | ** assumed that the input pointer is never NULL. | |||
| 2476 | */ | |||
| 2477 | static void sqlite3Fts5ParserFree( | |||
| 2478 | void *p, /* The parser to be deleted */ | |||
| 2479 | void (*freeProc)(void*) /* Function used to reclaim memory */ | |||
| 2480 | ){ | |||
| 2481 | #ifndef fts5YYPARSEFREENEVERNULL | |||
| 2482 | if( p==0 ) return; | |||
| 2483 | #endif | |||
| 2484 | sqlite3Fts5ParserFinalize(p); | |||
| 2485 | (*freeProc)(p); | |||
| 2486 | } | |||
| 2487 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ | |||
| 2488 | ||||
| 2489 | /* | |||
| 2490 | ** Return the peak depth of the stack for a parser. | |||
| 2491 | */ | |||
| 2492 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
| 2493 | static int sqlite3Fts5ParserStackPeak(void *p){ | |||
| 2494 | fts5yyParser *pParser = (fts5yyParser*)p; | |||
| 2495 | return pParser->fts5yyhwm; | |||
| 2496 | } | |||
| 2497 | #endif | |||
| 2498 | ||||
| 2499 | /* This array of booleans keeps track of the parser statement | |||
| 2500 | ** coverage. The element fts5yycoverage[X][Y] is set when the parser | |||
| 2501 | ** is in state X and has a lookahead token Y. In a well-tested | |||
| 2502 | ** systems, every element of this matrix should end up being set. | |||
| 2503 | */ | |||
| 2504 | #if defined(fts5YYCOVERAGE) | |||
| 2505 | static unsigned char fts5yycoverage[fts5YYNSTATE35][fts5YYNFTS5TOKEN16]; | |||
| 2506 | #endif | |||
| 2507 | ||||
| 2508 | /* | |||
| 2509 | ** Write into out a description of every state/lookahead combination that | |||
| 2510 | ** | |||
| 2511 | ** (1) has not been used by the parser, and | |||
| 2512 | ** (2) is not a syntax error. | |||
| 2513 | ** | |||
| 2514 | ** Return the number of missed state/lookahead combinations. | |||
| 2515 | */ | |||
| 2516 | #if defined(fts5YYCOVERAGE) | |||
| 2517 | static int sqlite3Fts5ParserCoverage(FILE *out){ | |||
| 2518 | int stateno, iLookAhead, i; | |||
| 2519 | int nMissed = 0; | |||
| 2520 | for(stateno=0; stateno<fts5YYNSTATE35; stateno++){ | |||
| 2521 | i = fts5yy_shift_ofst[stateno]; | |||
| 2522 | for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN16; iLookAhead++){ | |||
| 2523 | if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; | |||
| 2524 | if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; | |||
| 2525 | if( out ){ | |||
| 2526 | fprintf(out,"State %d lookahead %s %s\n", stateno, | |||
| 2527 | fts5yyTokenName[iLookAhead], | |||
| 2528 | fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed"); | |||
| 2529 | } | |||
| 2530 | } | |||
| 2531 | } | |||
| 2532 | return nMissed; | |||
| 2533 | } | |||
| 2534 | #endif | |||
| 2535 | ||||
| 2536 | /* | |||
| 2537 | ** Find the appropriate action for a parser given the terminal | |||
| 2538 | ** look-ahead token iLookAhead. | |||
| 2539 | */ | |||
| 2540 | static fts5YYACTIONTYPEunsigned char fts5yy_find_shift_action( | |||
| 2541 | fts5YYCODETYPEunsigned char iLookAhead, /* The look-ahead token */ | |||
| 2542 | fts5YYACTIONTYPEunsigned char stateno /* Current state number */ | |||
| 2543 | ){ | |||
| 2544 | int i; | |||
| 2545 | ||||
| 2546 | if( stateno>fts5YY_MAX_SHIFT34 ) return stateno; | |||
| 2547 | assert( stateno <= fts5YY_SHIFT_COUNT )((void) (0)); | |||
| 2548 | #if defined(fts5YYCOVERAGE) | |||
| 2549 | fts5yycoverage[stateno][iLookAhead] = 1; | |||
| 2550 | #endif | |||
| 2551 | do{ | |||
| 2552 | i = fts5yy_shift_ofst[stateno]; | |||
| 2553 | assert( i>=0 )((void) (0)); | |||
| 2554 | assert( i<=fts5YY_ACTTAB_COUNT )((void) (0)); | |||
| 2555 | assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD )((void) (0)); | |||
| 2556 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | |||
| 2557 | assert( iLookAhead < fts5YYNFTS5TOKEN )((void) (0)); | |||
| 2558 | i += iLookAhead; | |||
| 2559 | assert( i<(int)fts5YY_NLOOKAHEAD )((void) (0)); | |||
| 2560 | if( fts5yy_lookahead[i]!=iLookAhead ){ | |||
| 2561 | #ifdef fts5YYFALLBACK | |||
| 2562 | fts5YYCODETYPEunsigned char iFallback; /* Fallback token */ | |||
| 2563 | assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) )((void) (0)); | |||
| 2564 | iFallback = fts5yyFallback[iLookAhead]; | |||
| 2565 | if( iFallback!=0 ){ | |||
| 2566 | #ifndef NDEBUG1 | |||
| 2567 | if( fts5yyTraceFILE ){ | |||
| 2568 | fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n", | |||
| 2569 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]); | |||
| 2570 | } | |||
| 2571 | #endif | |||
| 2572 | assert( fts5yyFallback[iFallback]==0 )((void) (0)); /* Fallback loop must terminate */ | |||
| 2573 | iLookAhead = iFallback; | |||
| 2574 | continue; | |||
| 2575 | } | |||
| 2576 | #endif | |||
| 2577 | #ifdef fts5YYWILDCARD | |||
| 2578 | { | |||
| 2579 | int j = i - iLookAhead + fts5YYWILDCARD; | |||
| 2580 | assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) )((void) (0)); | |||
| 2581 | if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){ | |||
| 2582 | #ifndef NDEBUG1 | |||
| 2583 | if( fts5yyTraceFILE ){ | |||
| 2584 | fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n", | |||
| 2585 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], | |||
| 2586 | fts5yyTokenName[fts5YYWILDCARD]); | |||
| 2587 | } | |||
| 2588 | #endif /* NDEBUG */ | |||
| 2589 | return fts5yy_action[j]; | |||
| 2590 | } | |||
| 2591 | } | |||
| 2592 | #endif /* fts5YYWILDCARD */ | |||
| 2593 | return fts5yy_default[stateno]; | |||
| 2594 | }else{ | |||
| 2595 | assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) )((void) (0)); | |||
| 2596 | return fts5yy_action[i]; | |||
| 2597 | } | |||
| 2598 | }while(1); | |||
| 2599 | } | |||
| 2600 | ||||
| 2601 | /* | |||
| 2602 | ** Find the appropriate action for a parser given the non-terminal | |||
| 2603 | ** look-ahead token iLookAhead. | |||
| 2604 | */ | |||
| 2605 | static fts5YYACTIONTYPEunsigned char fts5yy_find_reduce_action( | |||
| 2606 | fts5YYACTIONTYPEunsigned char stateno, /* Current state number */ | |||
| 2607 | fts5YYCODETYPEunsigned char iLookAhead /* The look-ahead token */ | |||
| 2608 | ){ | |||
| 2609 | int i; | |||
| 2610 | #ifdef fts5YYERRORSYMBOL | |||
| 2611 | if( stateno>fts5YY_REDUCE_COUNT(17) ){ | |||
| 2612 | return fts5yy_default[stateno]; | |||
| 2613 | } | |||
| 2614 | #else | |||
| 2615 | assert( stateno<=fts5YY_REDUCE_COUNT )((void) (0)); | |||
| 2616 | #endif | |||
| 2617 | i = fts5yy_reduce_ofst[stateno]; | |||
| 2618 | assert( iLookAhead!=fts5YYNOCODE )((void) (0)); | |||
| 2619 | i += iLookAhead; | |||
| 2620 | #ifdef fts5YYERRORSYMBOL | |||
| 2621 | if( i<0 || i>=fts5YY_ACTTAB_COUNT(105) || fts5yy_lookahead[i]!=iLookAhead ){ | |||
| 2622 | return fts5yy_default[stateno]; | |||
| 2623 | } | |||
| 2624 | #else | |||
| 2625 | assert( i>=0 && i<fts5YY_ACTTAB_COUNT )((void) (0)); | |||
| 2626 | assert( fts5yy_lookahead[i]==iLookAhead )((void) (0)); | |||
| 2627 | #endif | |||
| 2628 | return fts5yy_action[i]; | |||
| 2629 | } | |||
| 2630 | ||||
| 2631 | /* | |||
| 2632 | ** The following routine is called if the stack overflows. | |||
| 2633 | */ | |||
| 2634 | static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ | |||
| 2635 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 2636 | sqlite3Fts5ParserCTX_FETCH | |||
| 2637 | #ifndef NDEBUG1 | |||
| 2638 | if( fts5yyTraceFILE ){ | |||
| 2639 | fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt); | |||
| 2640 | } | |||
| 2641 | #endif | |||
| 2642 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | |||
| 2643 | /* Here code is inserted which will execute if the parser | |||
| 2644 | ** stack every overflows */ | |||
| 2645 | /******** Begin %stack_overflow code ******************************************/ | |||
| 2646 | #line 36 "fts5parse.y" | |||
| 2647 | ||||
| 2648 | sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); | |||
| 2649 | #line 876 "fts5parse.sql" | |||
| 2650 | /******** End %stack_overflow code ********************************************/ | |||
| 2651 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument var */ | |||
| 2652 | sqlite3Fts5ParserCTX_STORE | |||
| 2653 | } | |||
| 2654 | ||||
| 2655 | /* | |||
| 2656 | ** Print tracing information for a SHIFT action | |||
| 2657 | */ | |||
| 2658 | #ifndef NDEBUG1 | |||
| 2659 | static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){ | |||
| 2660 | if( fts5yyTraceFILE ){ | |||
| 2661 | if( fts5yyNewState<fts5YYNSTATE35 ){ | |||
| 2662 | fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n", | |||
| 2663 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | |||
| 2664 | fts5yyNewState); | |||
| 2665 | }else{ | |||
| 2666 | fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n", | |||
| 2667 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], | |||
| 2668 | fts5yyNewState - fts5YY_MIN_REDUCE83); | |||
| 2669 | } | |||
| 2670 | } | |||
| 2671 | } | |||
| 2672 | #else | |||
| 2673 | # define fts5yyTraceShift(X,Y,Z) | |||
| 2674 | #endif | |||
| 2675 | ||||
| 2676 | /* | |||
| 2677 | ** Perform a shift action. | |||
| 2678 | */ | |||
| 2679 | static void fts5yy_shift( | |||
| 2680 | fts5yyParser *fts5yypParser, /* The parser to be shifted */ | |||
| 2681 | fts5YYACTIONTYPEunsigned char fts5yyNewState, /* The new state to shift in */ | |||
| 2682 | fts5YYCODETYPEunsigned char fts5yyMajor, /* The major token to shift in */ | |||
| 2683 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyMinor /* The minor token to shift in */ | |||
| 2684 | ){ | |||
| 2685 | fts5yyStackEntry *fts5yytos; | |||
| 2686 | fts5yypParser->fts5yytos++; | |||
| 2687 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
| 2688 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | |||
| 2689 | fts5yypParser->fts5yyhwm++; | |||
| 2690 | assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) )((void) (0)); | |||
| 2691 | } | |||
| 2692 | #endif | |||
| 2693 | fts5yytos = fts5yypParser->fts5yytos; | |||
| 2694 | if( fts5yytos>fts5yypParser->fts5yystackEnd ){ | |||
| 2695 | if( fts5yyGrowStack(fts5yypParser)1 ){ | |||
| 2696 | fts5yypParser->fts5yytos--; | |||
| 2697 | fts5yyStackOverflow(fts5yypParser); | |||
| 2698 | return; | |||
| 2699 | } | |||
| 2700 | fts5yytos = fts5yypParser->fts5yytos; | |||
| 2701 | assert( fts5yytos <= fts5yypParser->fts5yystackEnd )((void) (0)); | |||
| 2702 | } | |||
| 2703 | if( fts5yyNewState > fts5YY_MAX_SHIFT34 ){ | |||
| 2704 | fts5yyNewState += fts5YY_MIN_REDUCE83 - fts5YY_MIN_SHIFTREDUCE52; | |||
| 2705 | } | |||
| 2706 | fts5yytos->stateno = fts5yyNewState; | |||
| 2707 | fts5yytos->major = fts5yyMajor; | |||
| 2708 | fts5yytos->minor.fts5yy0 = fts5yyMinor; | |||
| 2709 | fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift"); | |||
| 2710 | } | |||
| 2711 | ||||
| 2712 | /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side | |||
| 2713 | ** of that rule */ | |||
| 2714 | static const fts5YYCODETYPEunsigned char fts5yyRuleInfoLhs[] = { | |||
| 2715 | 16, /* (0) input ::= expr */ | |||
| 2716 | 20, /* (1) colset ::= MINUS LCP colsetlist RCP */ | |||
| 2717 | 20, /* (2) colset ::= LCP colsetlist RCP */ | |||
| 2718 | 20, /* (3) colset ::= STRING */ | |||
| 2719 | 20, /* (4) colset ::= MINUS STRING */ | |||
| 2720 | 21, /* (5) colsetlist ::= colsetlist STRING */ | |||
| 2721 | 21, /* (6) colsetlist ::= STRING */ | |||
| 2722 | 17, /* (7) expr ::= expr AND expr */ | |||
| 2723 | 17, /* (8) expr ::= expr OR expr */ | |||
| 2724 | 17, /* (9) expr ::= expr NOT expr */ | |||
| 2725 | 17, /* (10) expr ::= colset COLON LP expr RP */ | |||
| 2726 | 17, /* (11) expr ::= LP expr RP */ | |||
| 2727 | 17, /* (12) expr ::= exprlist */ | |||
| 2728 | 19, /* (13) exprlist ::= cnearset */ | |||
| 2729 | 19, /* (14) exprlist ::= exprlist cnearset */ | |||
| 2730 | 18, /* (15) cnearset ::= nearset */ | |||
| 2731 | 18, /* (16) cnearset ::= colset COLON nearset */ | |||
| 2732 | 22, /* (17) nearset ::= phrase */ | |||
| 2733 | 22, /* (18) nearset ::= CARET phrase */ | |||
| 2734 | 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
| 2735 | 23, /* (20) nearphrases ::= phrase */ | |||
| 2736 | 23, /* (21) nearphrases ::= nearphrases phrase */ | |||
| 2737 | 25, /* (22) neardist_opt ::= */ | |||
| 2738 | 25, /* (23) neardist_opt ::= COMMA STRING */ | |||
| 2739 | 24, /* (24) phrase ::= phrase PLUS STRING star_opt */ | |||
| 2740 | 24, /* (25) phrase ::= STRING star_opt */ | |||
| 2741 | 26, /* (26) star_opt ::= STAR */ | |||
| 2742 | 26, /* (27) star_opt ::= */ | |||
| 2743 | }; | |||
| 2744 | ||||
| 2745 | /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number | |||
| 2746 | ** of symbols on the right-hand side of that rule. */ | |||
| 2747 | static const signed char fts5yyRuleInfoNRhs[] = { | |||
| 2748 | -1, /* (0) input ::= expr */ | |||
| 2749 | -4, /* (1) colset ::= MINUS LCP colsetlist RCP */ | |||
| 2750 | -3, /* (2) colset ::= LCP colsetlist RCP */ | |||
| 2751 | -1, /* (3) colset ::= STRING */ | |||
| 2752 | -2, /* (4) colset ::= MINUS STRING */ | |||
| 2753 | -2, /* (5) colsetlist ::= colsetlist STRING */ | |||
| 2754 | -1, /* (6) colsetlist ::= STRING */ | |||
| 2755 | -3, /* (7) expr ::= expr AND expr */ | |||
| 2756 | -3, /* (8) expr ::= expr OR expr */ | |||
| 2757 | -3, /* (9) expr ::= expr NOT expr */ | |||
| 2758 | -5, /* (10) expr ::= colset COLON LP expr RP */ | |||
| 2759 | -3, /* (11) expr ::= LP expr RP */ | |||
| 2760 | -1, /* (12) expr ::= exprlist */ | |||
| 2761 | -1, /* (13) exprlist ::= cnearset */ | |||
| 2762 | -2, /* (14) exprlist ::= exprlist cnearset */ | |||
| 2763 | -1, /* (15) cnearset ::= nearset */ | |||
| 2764 | -3, /* (16) cnearset ::= colset COLON nearset */ | |||
| 2765 | -1, /* (17) nearset ::= phrase */ | |||
| 2766 | -2, /* (18) nearset ::= CARET phrase */ | |||
| 2767 | -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
| 2768 | -1, /* (20) nearphrases ::= phrase */ | |||
| 2769 | -2, /* (21) nearphrases ::= nearphrases phrase */ | |||
| 2770 | 0, /* (22) neardist_opt ::= */ | |||
| 2771 | -2, /* (23) neardist_opt ::= COMMA STRING */ | |||
| 2772 | -4, /* (24) phrase ::= phrase PLUS STRING star_opt */ | |||
| 2773 | -2, /* (25) phrase ::= STRING star_opt */ | |||
| 2774 | -1, /* (26) star_opt ::= STAR */ | |||
| 2775 | 0, /* (27) star_opt ::= */ | |||
| 2776 | }; | |||
| 2777 | ||||
| 2778 | static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ | |||
| 2779 | ||||
| 2780 | /* | |||
| 2781 | ** Perform a reduce action and the shift that must immediately | |||
| 2782 | ** follow the reduce. | |||
| 2783 | ** | |||
| 2784 | ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions | |||
| 2785 | ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE | |||
| 2786 | ** if the lookahead token has already been consumed. As this procedure is | |||
| 2787 | ** only called from one place, optimizing compilers will in-line it, which | |||
| 2788 | ** means that the extra parameters have no performance impact. | |||
| 2789 | */ | |||
| 2790 | static fts5YYACTIONTYPEunsigned char fts5yy_reduce( | |||
| 2791 | fts5yyParser *fts5yypParser, /* The parser */ | |||
| 2792 | unsigned int fts5yyruleno, /* Number of the rule by which to reduce */ | |||
| 2793 | int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */ | |||
| 2794 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyLookaheadToken /* Value of the lookahead token */ | |||
| 2795 | sqlite3Fts5ParserCTX_PDECL /* %extra_context */ | |||
| 2796 | ){ | |||
| 2797 | int fts5yygoto; /* The next state */ | |||
| 2798 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The next action */ | |||
| 2799 | fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ | |||
| 2800 | int fts5yysize; /* Amount to pop the stack */ | |||
| 2801 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 2802 | (void)fts5yyLookahead; | |||
| 2803 | (void)fts5yyLookaheadToken; | |||
| 2804 | fts5yymsp = fts5yypParser->fts5yytos; | |||
| 2805 | ||||
| 2806 | switch( fts5yyruleno ){ | |||
| 2807 | /* Beginning here are the reduction cases. A typical example | |||
| 2808 | ** follows: | |||
| 2809 | ** case 0: | |||
| 2810 | ** #line <lineno> <grammarfile> | |||
| 2811 | ** { ... } // User supplied code | |||
| 2812 | ** #line <lineno> <thisfile> | |||
| 2813 | ** break; | |||
| 2814 | */ | |||
| 2815 | /********** Begin reduce actions **********************************************/ | |||
| 2816 | fts5YYMINORTYPE fts5yylhsminor; | |||
| 2817 | case 0: /* input ::= expr */ | |||
| 2818 | #line 82 "fts5parse.y" | |||
| 2819 | { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } | |||
| 2820 | #line 1047 "fts5parse.sql" | |||
| 2821 | break; | |||
| 2822 | case 1: /* colset ::= MINUS LCP colsetlist RCP */ | |||
| 2823 | #line 97 "fts5parse.y" | |||
| 2824 | { | |||
| 2825 | fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | |||
| 2826 | } | |||
| 2827 | #line 1054 "fts5parse.sql" | |||
| 2828 | break; | |||
| 2829 | case 2: /* colset ::= LCP colsetlist RCP */ | |||
| 2830 | #line 100 "fts5parse.y" | |||
| 2831 | { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } | |||
| 2832 | #line 1059 "fts5parse.sql" | |||
| 2833 | break; | |||
| 2834 | case 3: /* colset ::= STRING */ | |||
| 2835 | #line 101 "fts5parse.y" | |||
| 2836 | { | |||
| 2837 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
| 2838 | } | |||
| 2839 | #line 1066 "fts5parse.sql" | |||
| 2840 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
| 2841 | break; | |||
| 2842 | case 4: /* colset ::= MINUS STRING */ | |||
| 2843 | #line 104 "fts5parse.y" | |||
| 2844 | { | |||
| 2845 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
| 2846 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); | |||
| 2847 | } | |||
| 2848 | #line 1075 "fts5parse.sql" | |||
| 2849 | break; | |||
| 2850 | case 5: /* colsetlist ::= colsetlist STRING */ | |||
| 2851 | #line 109 "fts5parse.y" | |||
| 2852 | { | |||
| 2853 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); } | |||
| 2854 | #line 1081 "fts5parse.sql" | |||
| 2855 | fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
| 2856 | break; | |||
| 2857 | case 6: /* colsetlist ::= STRING */ | |||
| 2858 | #line 111 "fts5parse.y" | |||
| 2859 | { | |||
| 2860 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); | |||
| 2861 | } | |||
| 2862 | #line 1089 "fts5parse.sql" | |||
| 2863 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; | |||
| 2864 | break; | |||
| 2865 | case 7: /* expr ::= expr AND expr */ | |||
| 2866 | #line 115 "fts5parse.y" | |||
| 2867 | { | |||
| 2868 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND2, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
| 2869 | } | |||
| 2870 | #line 1097 "fts5parse.sql" | |||
| 2871 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2872 | break; | |||
| 2873 | case 8: /* expr ::= expr OR expr */ | |||
| 2874 | #line 118 "fts5parse.y" | |||
| 2875 | { | |||
| 2876 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR1, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
| 2877 | } | |||
| 2878 | #line 1105 "fts5parse.sql" | |||
| 2879 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2880 | break; | |||
| 2881 | case 9: /* expr ::= expr NOT expr */ | |||
| 2882 | #line 121 "fts5parse.y" | |||
| 2883 | { | |||
| 2884 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT3, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); | |||
| 2885 | } | |||
| 2886 | #line 1113 "fts5parse.sql" | |||
| 2887 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2888 | break; | |||
| 2889 | case 10: /* expr ::= colset COLON LP expr RP */ | |||
| 2890 | #line 125 "fts5parse.y" | |||
| 2891 | { | |||
| 2892 | sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11); | |||
| 2893 | fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24; | |||
| 2894 | } | |||
| 2895 | #line 1122 "fts5parse.sql" | |||
| 2896 | fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2897 | break; | |||
| 2898 | case 11: /* expr ::= LP expr RP */ | |||
| 2899 | #line 129 "fts5parse.y" | |||
| 2900 | {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} | |||
| 2901 | #line 1128 "fts5parse.sql" | |||
| 2902 | break; | |||
| 2903 | case 12: /* expr ::= exprlist */ | |||
| 2904 | case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13); | |||
| 2905 | #line 130 "fts5parse.y" | |||
| 2906 | {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} | |||
| 2907 | #line 1134 "fts5parse.sql" | |||
| 2908 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2909 | break; | |||
| 2910 | case 14: /* exprlist ::= exprlist cnearset */ | |||
| 2911 | #line 133 "fts5parse.y" | |||
| 2912 | { | |||
| 2913 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24); | |||
| 2914 | } | |||
| 2915 | #line 1142 "fts5parse.sql" | |||
| 2916 | fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2917 | break; | |||
| 2918 | case 15: /* cnearset ::= nearset */ | |||
| 2919 | #line 137 "fts5parse.y" | |||
| 2920 | { | |||
| 2921 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | |||
| 2922 | } | |||
| 2923 | #line 1150 "fts5parse.sql" | |||
| 2924 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2925 | break; | |||
| 2926 | case 16: /* cnearset ::= colset COLON nearset */ | |||
| 2927 | #line 140 "fts5parse.y" | |||
| 2928 | { | |||
| 2929 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, 0, 0, fts5yymsp[0].minor.fts5yy46); | |||
| 2930 | sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11); | |||
| 2931 | } | |||
| 2932 | #line 1159 "fts5parse.sql" | |||
| 2933 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; | |||
| 2934 | break; | |||
| 2935 | case 17: /* nearset ::= phrase */ | |||
| 2936 | #line 151 "fts5parse.y" | |||
| 2937 | { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); } | |||
| 2938 | #line 1165 "fts5parse.sql" | |||
| 2939 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
| 2940 | break; | |||
| 2941 | case 18: /* nearset ::= CARET phrase */ | |||
| 2942 | #line 152 "fts5parse.y" | |||
| 2943 | { | |||
| 2944 | sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53); | |||
| 2945 | fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | |||
| 2946 | } | |||
| 2947 | #line 1174 "fts5parse.sql" | |||
| 2948 | break; | |||
| 2949 | case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */ | |||
| 2950 | #line 156 "fts5parse.y" | |||
| 2951 | { | |||
| 2952 | sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); | |||
| 2953 | sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0); | |||
| 2954 | fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; | |||
| 2955 | } | |||
| 2956 | #line 1183 "fts5parse.sql" | |||
| 2957 | fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
| 2958 | break; | |||
| 2959 | case 20: /* nearphrases ::= phrase */ | |||
| 2960 | #line 162 "fts5parse.y" | |||
| 2961 | { | |||
| 2962 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); | |||
| 2963 | } | |||
| 2964 | #line 1191 "fts5parse.sql" | |||
| 2965 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
| 2966 | break; | |||
| 2967 | case 21: /* nearphrases ::= nearphrases phrase */ | |||
| 2968 | #line 165 "fts5parse.y" | |||
| 2969 | { | |||
| 2970 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53); | |||
| 2971 | } | |||
| 2972 | #line 1199 "fts5parse.sql" | |||
| 2973 | fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; | |||
| 2974 | break; | |||
| 2975 | case 22: /* neardist_opt ::= */ | |||
| 2976 | #line 172 "fts5parse.y" | |||
| 2977 | { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } | |||
| 2978 | #line 1205 "fts5parse.sql" | |||
| 2979 | break; | |||
| 2980 | case 23: /* neardist_opt ::= COMMA STRING */ | |||
| 2981 | #line 173 "fts5parse.y" | |||
| 2982 | { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } | |||
| 2983 | #line 1210 "fts5parse.sql" | |||
| 2984 | break; | |||
| 2985 | case 24: /* phrase ::= phrase PLUS STRING star_opt */ | |||
| 2986 | #line 185 "fts5parse.y" | |||
| 2987 | { | |||
| 2988 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | |||
| 2989 | } | |||
| 2990 | #line 1217 "fts5parse.sql" | |||
| 2991 | fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | |||
| 2992 | break; | |||
| 2993 | case 25: /* phrase ::= STRING star_opt */ | |||
| 2994 | #line 188 "fts5parse.y" | |||
| 2995 | { | |||
| 2996 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); | |||
| 2997 | } | |||
| 2998 | #line 1225 "fts5parse.sql" | |||
| 2999 | fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; | |||
| 3000 | break; | |||
| 3001 | case 26: /* star_opt ::= STAR */ | |||
| 3002 | #line 196 "fts5parse.y" | |||
| 3003 | { fts5yymsp[0].minor.fts5yy4 = 1; } | |||
| 3004 | #line 1231 "fts5parse.sql" | |||
| 3005 | break; | |||
| 3006 | case 27: /* star_opt ::= */ | |||
| 3007 | #line 197 "fts5parse.y" | |||
| 3008 | { fts5yymsp[1].minor.fts5yy4 = 0; } | |||
| 3009 | #line 1236 "fts5parse.sql" | |||
| 3010 | break; | |||
| 3011 | default: | |||
| 3012 | break; | |||
| 3013 | /********** End reduce actions ************************************************/ | |||
| 3014 | }; | |||
| 3015 | assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) )((void) (0)); | |||
| 3016 | fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno]; | |||
| 3017 | fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | |||
| 3018 | fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPEunsigned char)fts5yygoto); | |||
| 3019 | ||||
| 3020 | /* There are no SHIFTREDUCE actions on nonterminals because the table | |||
| 3021 | ** generator has simplified them to pure REDUCE actions. */ | |||
| 3022 | assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) )((void) (0)); | |||
| 3023 | ||||
| 3024 | /* It is not possible for a REDUCE to be followed by an error */ | |||
| 3025 | assert( fts5yyact!=fts5YY_ERROR_ACTION )((void) (0)); | |||
| 3026 | ||||
| 3027 | fts5yymsp += fts5yysize+1; | |||
| 3028 | fts5yypParser->fts5yytos = fts5yymsp; | |||
| 3029 | fts5yymsp->stateno = (fts5YYACTIONTYPEunsigned char)fts5yyact; | |||
| 3030 | fts5yymsp->major = (fts5YYCODETYPEunsigned char)fts5yygoto; | |||
| 3031 | fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift"); | |||
| 3032 | return fts5yyact; | |||
| 3033 | } | |||
| 3034 | ||||
| 3035 | /* | |||
| 3036 | ** The following code executes when the parse fails | |||
| 3037 | */ | |||
| 3038 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 3039 | static void fts5yy_parse_failed( | |||
| 3040 | fts5yyParser *fts5yypParser /* The parser */ | |||
| 3041 | ){ | |||
| 3042 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 3043 | sqlite3Fts5ParserCTX_FETCH | |||
| 3044 | #ifndef NDEBUG1 | |||
| 3045 | if( fts5yyTraceFILE ){ | |||
| 3046 | fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt); | |||
| 3047 | } | |||
| 3048 | #endif | |||
| 3049 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); | |||
| 3050 | /* Here code is inserted which will be executed whenever the | |||
| 3051 | ** parser fails */ | |||
| 3052 | /************ Begin %parse_failure code ***************************************/ | |||
| 3053 | /************ End %parse_failure code *****************************************/ | |||
| 3054 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
| 3055 | sqlite3Fts5ParserCTX_STORE | |||
| 3056 | } | |||
| 3057 | #endif /* fts5YYNOERRORRECOVERY */ | |||
| 3058 | ||||
| 3059 | /* | |||
| 3060 | ** The following code executes when a syntax error first occurs. | |||
| 3061 | */ | |||
| 3062 | static void fts5yy_syntax_error( | |||
| 3063 | fts5yyParser *fts5yypParser, /* The parser */ | |||
| 3064 | int fts5yymajor, /* The major type of the error token */ | |||
| 3065 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The minor type of the error token */ | |||
| 3066 | ){ | |||
| 3067 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 3068 | sqlite3Fts5ParserCTX_FETCH | |||
| 3069 | #define FTS5TOKENfts5yyminor fts5yyminor | |||
| 3070 | /************ Begin %syntax_error code ****************************************/ | |||
| 3071 | #line 30 "fts5parse.y" | |||
| 3072 | ||||
| 3073 | UNUSED_PARAM(fts5yymajor)(void)(fts5yymajor); /* Silence a compiler warning */ | |||
| 3074 | sqlite3Fts5ParseError( | |||
| 3075 | pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKENfts5yyminor.n,FTS5TOKENfts5yyminor.p | |||
| 3076 | ); | |||
| 3077 | #line 1304 "fts5parse.sql" | |||
| 3078 | /************ End %syntax_error code ******************************************/ | |||
| 3079 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
| 3080 | sqlite3Fts5ParserCTX_STORE | |||
| 3081 | } | |||
| 3082 | ||||
| 3083 | /* | |||
| 3084 | ** The following is executed when the parser accepts | |||
| 3085 | */ | |||
| 3086 | static void fts5yy_accept( | |||
| 3087 | fts5yyParser *fts5yypParser /* The parser */ | |||
| 3088 | ){ | |||
| 3089 | sqlite3Fts5ParserARG_FETCHFts5Parse *pParse=fts5yypParser->pParse; | |||
| 3090 | sqlite3Fts5ParserCTX_FETCH | |||
| 3091 | #ifndef NDEBUG1 | |||
| 3092 | if( fts5yyTraceFILE ){ | |||
| 3093 | fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt); | |||
| 3094 | } | |||
| 3095 | #endif | |||
| 3096 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 3097 | fts5yypParser->fts5yyerrcnt = -1; | |||
| 3098 | #endif | |||
| 3099 | assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack )((void) (0)); | |||
| 3100 | /* Here code is inserted which will be executed whenever the | |||
| 3101 | ** parser accepts */ | |||
| 3102 | /*********** Begin %parse_accept code *****************************************/ | |||
| 3103 | /*********** End %parse_accept code *******************************************/ | |||
| 3104 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; /* Suppress warning about unused %extra_argument variable */ | |||
| 3105 | sqlite3Fts5ParserCTX_STORE | |||
| 3106 | } | |||
| 3107 | ||||
| 3108 | /* The main parser program. | |||
| 3109 | ** The first argument is a pointer to a structure obtained from | |||
| 3110 | ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. | |||
| 3111 | ** The second argument is the major token number. The third is | |||
| 3112 | ** the minor token. The fourth optional argument is whatever the | |||
| 3113 | ** user wants (and specified in the grammar) and is available for | |||
| 3114 | ** use by the action routines. | |||
| 3115 | ** | |||
| 3116 | ** Inputs: | |||
| 3117 | ** <ul> | |||
| 3118 | ** <li> A pointer to the parser (an opaque structure.) | |||
| 3119 | ** <li> The major token number. | |||
| 3120 | ** <li> The minor token number. | |||
| 3121 | ** <li> An option argument of a grammar-specified type. | |||
| 3122 | ** </ul> | |||
| 3123 | ** | |||
| 3124 | ** Outputs: | |||
| 3125 | ** None. | |||
| 3126 | */ | |||
| 3127 | static void sqlite3Fts5Parser( | |||
| 3128 | void *fts5yyp, /* The parser */ | |||
| 3129 | int fts5yymajor, /* The major token code number */ | |||
| 3130 | sqlite3Fts5ParserFTS5TOKENTYPEFts5Token fts5yyminor /* The value for the token */ | |||
| 3131 | sqlite3Fts5ParserARG_PDECL,Fts5Parse *pParse /* Optional %extra_argument parameter */ | |||
| 3132 | ){ | |||
| 3133 | fts5YYMINORTYPE fts5yyminorunion; | |||
| 3134 | fts5YYACTIONTYPEunsigned char fts5yyact; /* The parser action. */ | |||
| 3135 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | |||
| 3136 | int fts5yyendofinput; /* True if we are at the end of input */ | |||
| 3137 | #endif | |||
| 3138 | #ifdef fts5YYERRORSYMBOL | |||
| 3139 | int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ | |||
| 3140 | #endif | |||
| 3141 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */ | |||
| 3142 | sqlite3Fts5ParserCTX_FETCH | |||
| 3143 | sqlite3Fts5ParserARG_STOREfts5yypParser->pParse=pParse; | |||
| 3144 | ||||
| 3145 | assert( fts5yypParser->fts5yytos!=0 )((void) (0)); | |||
| 3146 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY1) | |||
| 3147 | fts5yyendofinput = (fts5yymajor==0); | |||
| 3148 | #endif | |||
| 3149 | ||||
| 3150 | fts5yyact = fts5yypParser->fts5yytos->stateno; | |||
| 3151 | #ifndef NDEBUG1 | |||
| 3152 | if( fts5yyTraceFILE ){ | |||
| 3153 | if( fts5yyact < fts5YY_MIN_REDUCE83 ){ | |||
| 3154 | fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n", | |||
| 3155 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact); | |||
| 3156 | }else{ | |||
| 3157 | fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n", | |||
| 3158 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE83); | |||
| 3159 | } | |||
| 3160 | } | |||
| 3161 | #endif | |||
| 3162 | ||||
| 3163 | while(1){ /* Exit by "break" */ | |||
| 3164 | assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack )((void) (0)); | |||
| 3165 | assert( fts5yyact==fts5yypParser->fts5yytos->stateno )((void) (0)); | |||
| 3166 | fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyact); | |||
| 3167 | if( fts5yyact >= fts5YY_MIN_REDUCE83 ){ | |||
| 3168 | unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE83; /* Reduce by this rule */ | |||
| 3169 | #ifndef NDEBUG1 | |||
| 3170 | assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) )((void) (0)); | |||
| 3171 | if( fts5yyTraceFILE ){ | |||
| 3172 | int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; | |||
| 3173 | if( fts5yysize ){ | |||
| 3174 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", | |||
| 3175 | fts5yyTracePrompt, | |||
| 3176 | fts5yyruleno, fts5yyRuleName[fts5yyruleno], | |||
| 3177 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action", | |||
| 3178 | fts5yypParser->fts5yytos[fts5yysize].stateno); | |||
| 3179 | }else{ | |||
| 3180 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n", | |||
| 3181 | fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno], | |||
| 3182 | fts5yyruleno<fts5YYNRULE_WITH_ACTION28 ? "" : " without external action"); | |||
| 3183 | } | |||
| 3184 | } | |||
| 3185 | #endif /* NDEBUG */ | |||
| 3186 | ||||
| 3187 | /* Check that the stack is large enough to grow by a single entry | |||
| 3188 | ** if the RHS of the rule is empty. This ensures that there is room | |||
| 3189 | ** enough on the stack to push the LHS value */ | |||
| 3190 | if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ | |||
| 3191 | #ifdef fts5YYTRACKMAXSTACKDEPTH | |||
| 3192 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ | |||
| 3193 | fts5yypParser->fts5yyhwm++; | |||
| 3194 | assert( fts5yypParser->fts5yyhwm ==((void) (0)) | |||
| 3195 | (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack))((void) (0)); | |||
| 3196 | } | |||
| 3197 | #endif | |||
| 3198 | if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ | |||
| 3199 | if( fts5yyGrowStack(fts5yypParser)1 ){ | |||
| 3200 | fts5yyStackOverflow(fts5yypParser); | |||
| 3201 | break; | |||
| 3202 | } | |||
| 3203 | } | |||
| 3204 | } | |||
| 3205 | fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM); | |||
| 3206 | }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE79 ){ | |||
| 3207 | fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPEunsigned char)fts5yymajor,fts5yyminor); | |||
| 3208 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 3209 | fts5yypParser->fts5yyerrcnt--; | |||
| 3210 | #endif | |||
| 3211 | break; | |||
| 3212 | }else if( fts5yyact==fts5YY_ACCEPT_ACTION81 ){ | |||
| 3213 | fts5yypParser->fts5yytos--; | |||
| 3214 | fts5yy_accept(fts5yypParser); | |||
| 3215 | return; | |||
| 3216 | }else{ | |||
| 3217 | assert( fts5yyact == fts5YY_ERROR_ACTION )((void) (0)); | |||
| 3218 | fts5yyminorunion.fts5yy0 = fts5yyminor; | |||
| 3219 | #ifdef fts5YYERRORSYMBOL | |||
| 3220 | int fts5yymx; | |||
| 3221 | #endif | |||
| 3222 | #ifndef NDEBUG1 | |||
| 3223 | if( fts5yyTraceFILE ){ | |||
| 3224 | fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt); | |||
| 3225 | } | |||
| 3226 | #endif | |||
| 3227 | #ifdef fts5YYERRORSYMBOL | |||
| 3228 | /* A syntax error has occurred. | |||
| 3229 | ** The response to an error depends upon whether or not the | |||
| 3230 | ** grammar defines an error token "ERROR". | |||
| 3231 | ** | |||
| 3232 | ** This is what we do if the grammar does define ERROR: | |||
| 3233 | ** | |||
| 3234 | ** * Call the %syntax_error function. | |||
| 3235 | ** | |||
| 3236 | ** * Begin popping the stack until we enter a state where | |||
| 3237 | ** it is legal to shift the error symbol, then shift | |||
| 3238 | ** the error symbol. | |||
| 3239 | ** | |||
| 3240 | ** * Set the error count to three. | |||
| 3241 | ** | |||
| 3242 | ** * Begin accepting and shifting new tokens. No new error | |||
| 3243 | ** processing will occur until three tokens have been | |||
| 3244 | ** shifted successfully. | |||
| 3245 | ** | |||
| 3246 | */ | |||
| 3247 | if( fts5yypParser->fts5yyerrcnt<0 ){ | |||
| 3248 | fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); | |||
| 3249 | } | |||
| 3250 | fts5yymx = fts5yypParser->fts5yytos->major; | |||
| 3251 | if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ | |||
| 3252 | #ifndef NDEBUG1 | |||
| 3253 | if( fts5yyTraceFILE ){ | |||
| 3254 | fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n", | |||
| 3255 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); | |||
| 3256 | } | |||
| 3257 | #endif | |||
| 3258 | fts5yy_destructor(fts5yypParser, (fts5YYCODETYPEunsigned char)fts5yymajor, &fts5yyminorunion); | |||
| 3259 | fts5yymajor = fts5YYNOCODE27; | |||
| 3260 | }else{ | |||
| 3261 | while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){ | |||
| 3262 | fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno, | |||
| 3263 | fts5YYERRORSYMBOL); | |||
| 3264 | if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE79 ) break; | |||
| 3265 | fts5yy_pop_parser_stack(fts5yypParser); | |||
| 3266 | } | |||
| 3267 | if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){ | |||
| 3268 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
| 3269 | fts5yy_parse_failed(fts5yypParser); | |||
| 3270 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 3271 | fts5yypParser->fts5yyerrcnt = -1; | |||
| 3272 | #endif | |||
| 3273 | fts5yymajor = fts5YYNOCODE27; | |||
| 3274 | }else if( fts5yymx!=fts5YYERRORSYMBOL ){ | |||
| 3275 | fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); | |||
| 3276 | } | |||
| 3277 | } | |||
| 3278 | fts5yypParser->fts5yyerrcnt = 3; | |||
| 3279 | fts5yyerrorhit = 1; | |||
| 3280 | if( fts5yymajor==fts5YYNOCODE27 ) break; | |||
| 3281 | fts5yyact = fts5yypParser->fts5yytos->stateno; | |||
| 3282 | #elif defined(fts5YYNOERRORRECOVERY1) | |||
| 3283 | /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to | |||
| 3284 | ** do any kind of error recovery. Instead, simply invoke the syntax | |||
| 3285 | ** error routine and continue going as if nothing had happened. | |||
| 3286 | ** | |||
| 3287 | ** Applications can set this macro (for example inside %include) if | |||
| 3288 | ** they intend to abandon the parse upon the first syntax error seen. | |||
| 3289 | */ | |||
| 3290 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | |||
| 3291 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
| 3292 | break; | |||
| 3293 | #else /* fts5YYERRORSYMBOL is not defined */ | |||
| 3294 | /* This is what we do if the grammar does not define ERROR: | |||
| 3295 | ** | |||
| 3296 | ** * Report an error message, and throw away the input token. | |||
| 3297 | ** | |||
| 3298 | ** * If the input token is $, then fail the parse. | |||
| 3299 | ** | |||
| 3300 | ** As before, subsequent error messages are suppressed until | |||
| 3301 | ** three input tokens have been successfully shifted. | |||
| 3302 | */ | |||
| 3303 | if( fts5yypParser->fts5yyerrcnt<=0 ){ | |||
| 3304 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); | |||
| 3305 | } | |||
| 3306 | fts5yypParser->fts5yyerrcnt = 3; | |||
| 3307 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPEunsigned char)fts5yymajor,&fts5yyminorunion); | |||
| 3308 | if( fts5yyendofinput ){ | |||
| 3309 | fts5yy_parse_failed(fts5yypParser); | |||
| 3310 | #ifndef fts5YYNOERRORRECOVERY1 | |||
| 3311 | fts5yypParser->fts5yyerrcnt = -1; | |||
| 3312 | #endif | |||
| 3313 | } | |||
| 3314 | break; | |||
| 3315 | #endif | |||
| 3316 | } | |||
| 3317 | } | |||
| 3318 | #ifndef NDEBUG1 | |||
| 3319 | if( fts5yyTraceFILE ){ | |||
| 3320 | fts5yyStackEntry *i; | |||
| 3321 | char cDiv = '['; | |||
| 3322 | fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); | |||
| 3323 | for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ | |||
| 3324 | fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]); | |||
| 3325 | cDiv = ' '; | |||
| 3326 | } | |||
| 3327 | fprintf(fts5yyTraceFILE,"]\n"); | |||
| 3328 | } | |||
| 3329 | #endif | |||
| 3330 | return; | |||
| 3331 | } | |||
| 3332 | ||||
| 3333 | /* | |||
| 3334 | ** Return the fallback token corresponding to canonical token iToken, or | |||
| 3335 | ** 0 if iToken has no fallback. | |||
| 3336 | */ | |||
| 3337 | static int sqlite3Fts5ParserFallback(int iToken){ | |||
| 3338 | #ifdef fts5YYFALLBACK | |||
| 3339 | assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) )((void) (0)); | |||
| 3340 | return fts5yyFallback[iToken]; | |||
| 3341 | #else | |||
| 3342 | (void)iToken; | |||
| 3343 | return 0; | |||
| 3344 | #endif | |||
| 3345 | } | |||
| 3346 | ||||
| 3347 | #line 1 "fts5_aux.c" | |||
| 3348 | /* | |||
| 3349 | ** 2014 May 31 | |||
| 3350 | ** | |||
| 3351 | ** The author disclaims copyright to this source code. In place of | |||
| 3352 | ** a legal notice, here is a blessing: | |||
| 3353 | ** | |||
| 3354 | ** May you do good and not evil. | |||
| 3355 | ** May you find forgiveness for yourself and forgive others. | |||
| 3356 | ** May you share freely, never taking more than you give. | |||
| 3357 | ** | |||
| 3358 | ****************************************************************************** | |||
| 3359 | */ | |||
| 3360 | ||||
| 3361 | ||||
| 3362 | /* #include "fts5Int.h" */ | |||
| 3363 | #include <math.h> /* amalgamator: keep */ | |||
| 3364 | ||||
| 3365 | /* | |||
| 3366 | ** Object used to iterate through all "coalesced phrase instances" in | |||
| 3367 | ** a single column of the current row. If the phrase instances in the | |||
| 3368 | ** column being considered do not overlap, this object simply iterates | |||
| 3369 | ** through them. Or, if they do overlap (share one or more tokens in | |||
| 3370 | ** common), each set of overlapping instances is treated as a single | |||
| 3371 | ** match. See documentation for the highlight() auxiliary function for | |||
| 3372 | ** details. | |||
| 3373 | ** | |||
| 3374 | ** Usage is: | |||
| 3375 | ** | |||
| 3376 | ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); | |||
| 3377 | ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); | |||
| 3378 | ** rc = fts5CInstIterNext(&iter) | |||
| 3379 | ** ){ | |||
| 3380 | ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); | |||
| 3381 | ** } | |||
| 3382 | ** | |||
| 3383 | */ | |||
| 3384 | typedef struct CInstIter CInstIter; | |||
| 3385 | struct CInstIter { | |||
| 3386 | const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ | |||
| 3387 | Fts5Context *pFts; /* First arg to pass to pApi functions */ | |||
| 3388 | int iCol; /* Column to search */ | |||
| 3389 | int iInst; /* Next phrase instance index */ | |||
| 3390 | int nInst; /* Total number of phrase instances */ | |||
| 3391 | ||||
| 3392 | /* Output variables */ | |||
| 3393 | int iStart; /* First token in coalesced phrase instance */ | |||
| 3394 | int iEnd; /* Last token in coalesced phrase instance */ | |||
| 3395 | }; | |||
| 3396 | ||||
| 3397 | /* | |||
| 3398 | ** Advance the iterator to the next coalesced phrase instance. Return | |||
| 3399 | ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. | |||
| 3400 | */ | |||
| 3401 | static int fts5CInstIterNext(CInstIter *pIter){ | |||
| 3402 | int rc = SQLITE_OK0; | |||
| 3403 | pIter->iStart = -1; | |||
| 3404 | pIter->iEnd = -1; | |||
| 3405 | ||||
| 3406 | while( rc==SQLITE_OK0 && pIter->iInst<pIter->nInst ){ | |||
| 3407 | int ip; int ic; int io; | |||
| 3408 | rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); | |||
| 3409 | if( rc==SQLITE_OK0 ){ | |||
| 3410 | if( ic==pIter->iCol ){ | |||
| 3411 | int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); | |||
| 3412 | if( pIter->iStart<0 ){ | |||
| 3413 | pIter->iStart = io; | |||
| 3414 | pIter->iEnd = iEnd; | |||
| 3415 | }else if( io<=pIter->iEnd ){ | |||
| 3416 | if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; | |||
| 3417 | }else{ | |||
| 3418 | break; | |||
| 3419 | } | |||
| 3420 | } | |||
| 3421 | pIter->iInst++; | |||
| 3422 | } | |||
| 3423 | } | |||
| 3424 | ||||
| 3425 | return rc; | |||
| 3426 | } | |||
| 3427 | ||||
| 3428 | /* | |||
| 3429 | ** Initialize the iterator object indicated by the final parameter to | |||
| 3430 | ** iterate through coalesced phrase instances in column iCol. | |||
| 3431 | */ | |||
| 3432 | static int fts5CInstIterInit( | |||
| 3433 | const Fts5ExtensionApi *pApi, | |||
| 3434 | Fts5Context *pFts, | |||
| 3435 | int iCol, | |||
| 3436 | CInstIter *pIter | |||
| 3437 | ){ | |||
| 3438 | int rc; | |||
| 3439 | ||||
| 3440 | memset(pIter, 0, sizeof(CInstIter)); | |||
| 3441 | pIter->pApi = pApi; | |||
| 3442 | pIter->pFts = pFts; | |||
| 3443 | pIter->iCol = iCol; | |||
| 3444 | rc = pApi->xInstCount(pFts, &pIter->nInst); | |||
| 3445 | ||||
| 3446 | if( rc==SQLITE_OK0 ){ | |||
| 3447 | rc = fts5CInstIterNext(pIter); | |||
| 3448 | } | |||
| 3449 | ||||
| 3450 | return rc; | |||
| 3451 | } | |||
| 3452 | ||||
| 3453 | ||||
| 3454 | ||||
| 3455 | /************************************************************************* | |||
| 3456 | ** Start of highlight() implementation. | |||
| 3457 | */ | |||
| 3458 | typedef struct HighlightContext HighlightContext; | |||
| 3459 | struct HighlightContext { | |||
| 3460 | /* Constant parameters to fts5HighlightCb() */ | |||
| 3461 | int iRangeStart; /* First token to include */ | |||
| 3462 | int iRangeEnd; /* If non-zero, last token to include */ | |||
| 3463 | const char *zOpen; /* Opening highlight */ | |||
| 3464 | const char *zClose; /* Closing highlight */ | |||
| 3465 | const char *zIn; /* Input text */ | |||
| 3466 | int nIn; /* Size of input text in bytes */ | |||
| 3467 | ||||
| 3468 | /* Variables modified by fts5HighlightCb() */ | |||
| 3469 | CInstIter iter; /* Coalesced Instance Iterator */ | |||
| 3470 | int iPos; /* Current token offset in zIn[] */ | |||
| 3471 | int iOff; /* Have copied up to this offset in zIn[] */ | |||
| 3472 | int bOpen; /* True if highlight is open */ | |||
| 3473 | char *zOut; /* Output value */ | |||
| 3474 | }; | |||
| 3475 | ||||
| 3476 | /* | |||
| 3477 | ** Append text to the HighlightContext output string - p->zOut. Argument | |||
| 3478 | ** z points to a buffer containing n bytes of text to append. If n is | |||
| 3479 | ** negative, everything up until the first '\0' is appended to the output. | |||
| 3480 | ** | |||
| 3481 | ** If *pRc is set to any value other than SQLITE_OK when this function is | |||
| 3482 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, | |||
| 3483 | ** *pRc is set to an error code before returning. | |||
| 3484 | */ | |||
| 3485 | static void fts5HighlightAppend( | |||
| 3486 | int *pRc, | |||
| 3487 | HighlightContext *p, | |||
| 3488 | const char *z, int n | |||
| 3489 | ){ | |||
| 3490 | if( *pRc==SQLITE_OK0 && z ){ | |||
| 3491 | if( n<0 ) n = (int)strlen(z); | |||
| 3492 | p->zOut = sqlite3_mprintfsqlite3_api->mprintf("%z%.*s", p->zOut, n, z); | |||
| 3493 | if( p->zOut==0 ) *pRc = SQLITE_NOMEM7; | |||
| 3494 | } | |||
| 3495 | } | |||
| 3496 | ||||
| 3497 | /* | |||
| 3498 | ** Tokenizer callback used by implementation of highlight() function. | |||
| 3499 | */ | |||
| 3500 | static int fts5HighlightCb( | |||
| 3501 | void *pContext, /* Pointer to HighlightContext object */ | |||
| 3502 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 3503 | const char *pToken, /* Buffer containing token */ | |||
| 3504 | int nToken, /* Size of token in bytes */ | |||
| 3505 | int iStartOff, /* Start byte offset of token */ | |||
| 3506 | int iEndOff /* End byte offset of token */ | |||
| 3507 | ){ | |||
| 3508 | HighlightContext *p = (HighlightContext*)pContext; | |||
| 3509 | int rc = SQLITE_OK0; | |||
| 3510 | int iPos; | |||
| 3511 | ||||
| 3512 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | |||
| 3513 | ||||
| 3514 | if( tflags & FTS5_TOKEN_COLOCATED0x0001 ) return SQLITE_OK0; | |||
| 3515 | iPos = p->iPos++; | |||
| 3516 | ||||
| 3517 | if( p->iRangeEnd>=0 ){ | |||
| 3518 | if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK0; | |||
| 3519 | if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; | |||
| 3520 | } | |||
| 3521 | ||||
| 3522 | /* If the parenthesis is open, and this token is not part of the current | |||
| 3523 | ** phrase, and the starting byte offset of this token is past the point | |||
| 3524 | ** that has currently been copied into the output buffer, close the | |||
| 3525 | ** parenthesis. */ | |||
| 3526 | if( p->bOpen | |||
| 3527 | && (iPos<=p->iter.iStart || p->iter.iStart<0) | |||
| 3528 | && iStartOff>p->iOff | |||
| 3529 | ){ | |||
| 3530 | fts5HighlightAppend(&rc, p, p->zClose, -1); | |||
| 3531 | p->bOpen = 0; | |||
| 3532 | } | |||
| 3533 | ||||
| 3534 | /* If this is the start of a new phrase, and the highlight is not open: | |||
| 3535 | ** | |||
| 3536 | ** * copy text from the input up to the start of the phrase, and | |||
| 3537 | ** * open the highlight. | |||
| 3538 | */ | |||
| 3539 | if( iPos==p->iter.iStart && p->bOpen==0 ){ | |||
| 3540 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); | |||
| 3541 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | |||
| 3542 | p->iOff = iStartOff; | |||
| 3543 | p->bOpen = 1; | |||
| 3544 | } | |||
| 3545 | ||||
| 3546 | if( iPos==p->iter.iEnd ){ | |||
| 3547 | if( p->bOpen==0 ){ | |||
| 3548 | assert( p->iRangeEnd>=0 )((void) (0)); | |||
| 3549 | fts5HighlightAppend(&rc, p, p->zOpen, -1); | |||
| 3550 | p->bOpen = 1; | |||
| 3551 | } | |||
| 3552 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
| 3553 | p->iOff = iEndOff; | |||
| 3554 | ||||
| 3555 | if( rc==SQLITE_OK0 ){ | |||
| 3556 | rc = fts5CInstIterNext(&p->iter); | |||
| 3557 | } | |||
| 3558 | } | |||
| 3559 | ||||
| 3560 | if( iPos==p->iRangeEnd ){ | |||
| 3561 | if( p->bOpen ){ | |||
| 3562 | if( p->iter.iStart>=0 && iPos>=p->iter.iStart ){ | |||
| 3563 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
| 3564 | p->iOff = iEndOff; | |||
| 3565 | } | |||
| 3566 | fts5HighlightAppend(&rc, p, p->zClose, -1); | |||
| 3567 | p->bOpen = 0; | |||
| 3568 | } | |||
| 3569 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |||
| 3570 | p->iOff = iEndOff; | |||
| 3571 | } | |||
| 3572 | ||||
| 3573 | return rc; | |||
| 3574 | } | |||
| 3575 | ||||
| 3576 | ||||
| 3577 | /* | |||
| 3578 | ** Implementation of highlight() function. | |||
| 3579 | */ | |||
| 3580 | static void fts5HighlightFunction( | |||
| 3581 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 3582 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 3583 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
| 3584 | int nVal, /* Number of values in apVal[] array */ | |||
| 3585 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
| 3586 | ){ | |||
| 3587 | HighlightContext ctx; | |||
| 3588 | int rc; | |||
| 3589 | int iCol; | |||
| 3590 | ||||
| 3591 | if( nVal!=3 ){ | |||
| 3592 | const char *zErr = "wrong number of arguments to function highlight()"; | |||
| 3593 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
| 3594 | return; | |||
| 3595 | } | |||
| 3596 | ||||
| 3597 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
| 3598 | memset(&ctx, 0, sizeof(HighlightContext)); | |||
| 3599 | ctx.zOpen = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | |||
| 3600 | ctx.zClose = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2]); | |||
| 3601 | ctx.iRangeEnd = -1; | |||
| 3602 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); | |||
| 3603 | if( rc==SQLITE_RANGE25 ){ | |||
| 3604 | sqlite3_result_textsqlite3_api->result_text(pCtx, "", -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 3605 | rc = SQLITE_OK0; | |||
| 3606 | }else if( ctx.zIn ){ | |||
| 3607 | const char *pLoc = 0; /* Locale of column iCol */ | |||
| 3608 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
| 3609 | if( rc==SQLITE_OK0 ){ | |||
| 3610 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); | |||
| 3611 | } | |||
| 3612 | ||||
| 3613 | if( rc==SQLITE_OK0 ){ | |||
| 3614 | rc = pApi->xColumnLocale(pFts, iCol, &pLoc, &nLoc); | |||
| 3615 | } | |||
| 3616 | if( rc==SQLITE_OK0 ){ | |||
| 3617 | rc = pApi->xTokenize_v2( | |||
| 3618 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx, fts5HighlightCb | |||
| 3619 | ); | |||
| 3620 | } | |||
| 3621 | if( ctx.bOpen ){ | |||
| 3622 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | |||
| 3623 | } | |||
| 3624 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |||
| 3625 | ||||
| 3626 | if( rc==SQLITE_OK0 ){ | |||
| 3627 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 3628 | } | |||
| 3629 | sqlite3_freesqlite3_api->free(ctx.zOut); | |||
| 3630 | } | |||
| 3631 | if( rc!=SQLITE_OK0 ){ | |||
| 3632 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 3633 | } | |||
| 3634 | } | |||
| 3635 | /* | |||
| 3636 | ** End of highlight() implementation. | |||
| 3637 | **************************************************************************/ | |||
| 3638 | ||||
| 3639 | /* | |||
| 3640 | ** Context object passed to the fts5SentenceFinderCb() function. | |||
| 3641 | */ | |||
| 3642 | typedef struct Fts5SFinder Fts5SFinder; | |||
| 3643 | struct Fts5SFinder { | |||
| 3644 | int iPos; /* Current token position */ | |||
| 3645 | int nFirstAlloc; /* Allocated size of aFirst[] */ | |||
| 3646 | int nFirst; /* Number of entries in aFirst[] */ | |||
| 3647 | int *aFirst; /* Array of first token in each sentence */ | |||
| 3648 | const char *zDoc; /* Document being tokenized */ | |||
| 3649 | }; | |||
| 3650 | ||||
| 3651 | /* | |||
| 3652 | ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if | |||
| 3653 | ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an | |||
| 3654 | ** error occurs. | |||
| 3655 | */ | |||
| 3656 | static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ | |||
| 3657 | if( p->nFirstAlloc==p->nFirst ){ | |||
| 3658 | int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; | |||
| 3659 | int *aNew; | |||
| 3660 | ||||
| 3661 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aFirst, nNew*sizeof(int)); | |||
| 3662 | if( aNew==0 ) return SQLITE_NOMEM7; | |||
| 3663 | p->aFirst = aNew; | |||
| 3664 | p->nFirstAlloc = nNew; | |||
| 3665 | } | |||
| 3666 | p->aFirst[p->nFirst++] = iAdd; | |||
| 3667 | return SQLITE_OK0; | |||
| 3668 | } | |||
| 3669 | ||||
| 3670 | /* | |||
| 3671 | ** This function is an xTokenize() callback used by the auxiliary snippet() | |||
| 3672 | ** function. Its job is to identify tokens that are the first in a sentence. | |||
| 3673 | ** For each such token, an entry is added to the SFinder.aFirst[] array. | |||
| 3674 | */ | |||
| 3675 | static int fts5SentenceFinderCb( | |||
| 3676 | void *pContext, /* Pointer to HighlightContext object */ | |||
| 3677 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 3678 | const char *pToken, /* Buffer containing token */ | |||
| 3679 | int nToken, /* Size of token in bytes */ | |||
| 3680 | int iStartOff, /* Start offset of token */ | |||
| 3681 | int iEndOff /* End offset of token */ | |||
| 3682 | ){ | |||
| 3683 | int rc = SQLITE_OK0; | |||
| 3684 | ||||
| 3685 | UNUSED_PARAM2(pToken, nToken)(void)(pToken), (void)(nToken); | |||
| 3686 | UNUSED_PARAM(iEndOff)(void)(iEndOff); | |||
| 3687 | ||||
| 3688 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | |||
| 3689 | Fts5SFinder *p = (Fts5SFinder*)pContext; | |||
| 3690 | if( p->iPos>0 ){ | |||
| 3691 | int i; | |||
| 3692 | char c = 0; | |||
| 3693 | for(i=iStartOff-1; i>=0; i--){ | |||
| 3694 | c = p->zDoc[i]; | |||
| 3695 | if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; | |||
| 3696 | } | |||
| 3697 | if( i!=iStartOff-1 && (c=='.' || c==':') ){ | |||
| 3698 | rc = fts5SentenceFinderAdd(p, p->iPos); | |||
| 3699 | } | |||
| 3700 | }else{ | |||
| 3701 | rc = fts5SentenceFinderAdd(p, 0); | |||
| 3702 | } | |||
| 3703 | p->iPos++; | |||
| 3704 | } | |||
| 3705 | return rc; | |||
| 3706 | } | |||
| 3707 | ||||
| 3708 | static int fts5SnippetScore( | |||
| 3709 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 3710 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 3711 | int nDocsize, /* Size of column in tokens */ | |||
| 3712 | unsigned char *aSeen, /* Array with one element per query phrase */ | |||
| 3713 | int iCol, /* Column to score */ | |||
| 3714 | int iPos, /* Starting offset to score */ | |||
| 3715 | int nToken, /* Max tokens per snippet */ | |||
| 3716 | int *pnScore, /* OUT: Score */ | |||
| 3717 | int *piPos /* OUT: Adjusted offset */ | |||
| 3718 | ){ | |||
| 3719 | int rc; | |||
| 3720 | int i; | |||
| 3721 | int ip = 0; | |||
| 3722 | int ic = 0; | |||
| 3723 | int iOff = 0; | |||
| 3724 | int iFirst = -1; | |||
| 3725 | int nInst; | |||
| 3726 | int nScore = 0; | |||
| 3727 | int iLast = 0; | |||
| 3728 | sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; | |||
| 3729 | ||||
| 3730 | rc = pApi->xInstCount(pFts, &nInst); | |||
| 3731 | for(i=0; i<nInst && rc==SQLITE_OK0; i++){ | |||
| 3732 | rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); | |||
| 3733 | if( rc==SQLITE_OK0 && ic==iCol && iOff>=iPos && iOff<iEnd ){ | |||
| 3734 | nScore += (aSeen[ip] ? 1 : 1000); | |||
| 3735 | aSeen[ip] = 1; | |||
| 3736 | if( iFirst<0 ) iFirst = iOff; | |||
| 3737 | iLast = iOff + pApi->xPhraseSize(pFts, ip); | |||
| 3738 | } | |||
| 3739 | } | |||
| 3740 | ||||
| 3741 | *pnScore = nScore; | |||
| 3742 | if( piPos ){ | |||
| 3743 | sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; | |||
| 3744 | if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; | |||
| 3745 | if( iAdj<0 ) iAdj = 0; | |||
| 3746 | *piPos = (int)iAdj; | |||
| 3747 | } | |||
| 3748 | ||||
| 3749 | return rc; | |||
| 3750 | } | |||
| 3751 | ||||
| 3752 | /* | |||
| 3753 | ** Return the value in pVal interpreted as utf-8 text. Except, if pVal | |||
| 3754 | ** contains a NULL value, return a pointer to a static string zero | |||
| 3755 | ** bytes in length instead of a NULL pointer. | |||
| 3756 | */ | |||
| 3757 | static const char *fts5ValueToText(sqlite3_value *pVal){ | |||
| 3758 | const char *zRet = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 3759 | return zRet ? zRet : ""; | |||
| 3760 | } | |||
| 3761 | ||||
| 3762 | /* | |||
| 3763 | ** Implementation of snippet() function. | |||
| 3764 | */ | |||
| 3765 | static void fts5SnippetFunction( | |||
| 3766 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 3767 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 3768 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
| 3769 | int nVal, /* Number of values in apVal[] array */ | |||
| 3770 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
| 3771 | ){ | |||
| 3772 | HighlightContext ctx; | |||
| 3773 | int rc = SQLITE_OK0; /* Return code */ | |||
| 3774 | int iCol; /* 1st argument to snippet() */ | |||
| 3775 | const char *zEllips; /* 4th argument to snippet() */ | |||
| 3776 | int nToken; /* 5th argument to snippet() */ | |||
| 3777 | int nInst = 0; /* Number of instance matches this row */ | |||
| 3778 | int i; /* Used to iterate through instances */ | |||
| 3779 | int nPhrase; /* Number of phrases in query */ | |||
| 3780 | unsigned char *aSeen; /* Array of "seen instance" flags */ | |||
| 3781 | int iBestCol; /* Column containing best snippet */ | |||
| 3782 | int iBestStart = 0; /* First token of best snippet */ | |||
| 3783 | int nBestScore = 0; /* Score of best snippet */ | |||
| 3784 | int nColSize = 0; /* Total size of iBestCol in tokens */ | |||
| 3785 | Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ | |||
| 3786 | int nCol; | |||
| 3787 | ||||
| 3788 | if( nVal!=5 ){ | |||
| 3789 | const char *zErr = "wrong number of arguments to function snippet()"; | |||
| 3790 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
| 3791 | return; | |||
| 3792 | } | |||
| 3793 | ||||
| 3794 | nCol = pApi->xColumnCount(pFts); | |||
| 3795 | memset(&ctx, 0, sizeof(HighlightContext)); | |||
| 3796 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
| 3797 | ctx.zOpen = fts5ValueToText(apVal[1]); | |||
| 3798 | ctx.zClose = fts5ValueToText(apVal[2]); | |||
| 3799 | ctx.iRangeEnd = -1; | |||
| 3800 | zEllips = fts5ValueToText(apVal[3]); | |||
| 3801 | nToken = sqlite3_value_intsqlite3_api->value_int(apVal[4]); | |||
| 3802 | ||||
| 3803 | iBestCol = (iCol>=0 ? iCol : 0); | |||
| 3804 | nPhrase = pApi->xPhraseCount(pFts); | |||
| 3805 | aSeen = sqlite3_mallocsqlite3_api->malloc(nPhrase); | |||
| 3806 | if( aSeen==0 ){ | |||
| 3807 | rc = SQLITE_NOMEM7; | |||
| 3808 | } | |||
| 3809 | if( rc==SQLITE_OK0 ){ | |||
| 3810 | rc = pApi->xInstCount(pFts, &nInst); | |||
| 3811 | } | |||
| 3812 | ||||
| 3813 | memset(&sFinder, 0, sizeof(Fts5SFinder)); | |||
| 3814 | for(i=0; i<nCol; i++){ | |||
| 3815 | if( iCol<0 || iCol==i ){ | |||
| 3816 | const char *pLoc = 0; /* Locale of column iCol */ | |||
| 3817 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
| 3818 | int nDoc; | |||
| 3819 | int nDocsize; | |||
| 3820 | int ii; | |||
| 3821 | sFinder.iPos = 0; | |||
| 3822 | sFinder.nFirst = 0; | |||
| 3823 | rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); | |||
| 3824 | if( rc!=SQLITE_OK0 ) break; | |||
| 3825 | rc = pApi->xColumnLocale(pFts, i, &pLoc, &nLoc); | |||
| 3826 | if( rc!=SQLITE_OK0 ) break; | |||
| 3827 | rc = pApi->xTokenize_v2(pFts, | |||
| 3828 | sFinder.zDoc, nDoc, pLoc, nLoc, (void*)&sFinder, fts5SentenceFinderCb | |||
| 3829 | ); | |||
| 3830 | if( rc!=SQLITE_OK0 ) break; | |||
| 3831 | rc = pApi->xColumnSize(pFts, i, &nDocsize); | |||
| 3832 | if( rc!=SQLITE_OK0 ) break; | |||
| 3833 | ||||
| 3834 | for(ii=0; rc==SQLITE_OK0 && ii<nInst; ii++){ | |||
| 3835 | int ip, ic, io; | |||
| 3836 | int iAdj; | |||
| 3837 | int nScore; | |||
| 3838 | int jj; | |||
| 3839 | ||||
| 3840 | rc = pApi->xInst(pFts, ii, &ip, &ic, &io); | |||
| 3841 | if( ic!=i ) continue; | |||
| 3842 | if( io>nDocsize ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 3843 | if( rc!=SQLITE_OK0 ) continue; | |||
| 3844 | memset(aSeen, 0, nPhrase); | |||
| 3845 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | |||
| 3846 | io, nToken, &nScore, &iAdj | |||
| 3847 | ); | |||
| 3848 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | |||
| 3849 | nBestScore = nScore; | |||
| 3850 | iBestCol = i; | |||
| 3851 | iBestStart = iAdj; | |||
| 3852 | nColSize = nDocsize; | |||
| 3853 | } | |||
| 3854 | ||||
| 3855 | if( rc==SQLITE_OK0 && sFinder.nFirst && nDocsize>nToken ){ | |||
| 3856 | for(jj=0; jj<(sFinder.nFirst-1); jj++){ | |||
| 3857 | if( sFinder.aFirst[jj+1]>io ) break; | |||
| 3858 | } | |||
| 3859 | ||||
| 3860 | if( sFinder.aFirst[jj]<io ){ | |||
| 3861 | memset(aSeen, 0, nPhrase); | |||
| 3862 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, | |||
| 3863 | sFinder.aFirst[jj], nToken, &nScore, 0 | |||
| 3864 | ); | |||
| 3865 | ||||
| 3866 | nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); | |||
| 3867 | if( rc==SQLITE_OK0 && nScore>nBestScore ){ | |||
| 3868 | nBestScore = nScore; | |||
| 3869 | iBestCol = i; | |||
| 3870 | iBestStart = sFinder.aFirst[jj]; | |||
| 3871 | nColSize = nDocsize; | |||
| 3872 | } | |||
| 3873 | } | |||
| 3874 | } | |||
| 3875 | } | |||
| 3876 | } | |||
| 3877 | } | |||
| 3878 | ||||
| 3879 | if( rc==SQLITE_OK0 ){ | |||
| 3880 | rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); | |||
| 3881 | } | |||
| 3882 | if( rc==SQLITE_OK0 && nColSize==0 ){ | |||
| 3883 | rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); | |||
| 3884 | } | |||
| 3885 | if( ctx.zIn ){ | |||
| 3886 | const char *pLoc = 0; /* Locale of column iBestCol */ | |||
| 3887 | int nLoc = 0; /* Bytes in pLoc */ | |||
| 3888 | ||||
| 3889 | if( rc==SQLITE_OK0 ){ | |||
| 3890 | rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); | |||
| 3891 | } | |||
| 3892 | ||||
| 3893 | ctx.iRangeStart = iBestStart; | |||
| 3894 | ctx.iRangeEnd = iBestStart + nToken - 1; | |||
| 3895 | ||||
| 3896 | if( iBestStart>0 ){ | |||
| 3897 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |||
| 3898 | } | |||
| 3899 | ||||
| 3900 | /* Advance iterator ctx.iter so that it points to the first coalesced | |||
| 3901 | ** phrase instance at or following position iBestStart. */ | |||
| 3902 | while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK0 ){ | |||
| 3903 | rc = fts5CInstIterNext(&ctx.iter); | |||
| 3904 | } | |||
| 3905 | ||||
| 3906 | if( rc==SQLITE_OK0 ){ | |||
| 3907 | rc = pApi->xColumnLocale(pFts, iBestCol, &pLoc, &nLoc); | |||
| 3908 | } | |||
| 3909 | if( rc==SQLITE_OK0 ){ | |||
| 3910 | rc = pApi->xTokenize_v2( | |||
| 3911 | pFts, ctx.zIn, ctx.nIn, pLoc, nLoc, (void*)&ctx,fts5HighlightCb | |||
| 3912 | ); | |||
| 3913 | } | |||
| 3914 | if( ctx.bOpen ){ | |||
| 3915 | fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1); | |||
| 3916 | } | |||
| 3917 | if( ctx.iRangeEnd>=(nColSize-1) ){ | |||
| 3918 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |||
| 3919 | }else{ | |||
| 3920 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |||
| 3921 | } | |||
| 3922 | } | |||
| 3923 | if( rc==SQLITE_OK0 ){ | |||
| 3924 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 3925 | }else{ | |||
| 3926 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 3927 | } | |||
| 3928 | sqlite3_freesqlite3_api->free(ctx.zOut); | |||
| 3929 | sqlite3_freesqlite3_api->free(aSeen); | |||
| 3930 | sqlite3_freesqlite3_api->free(sFinder.aFirst); | |||
| 3931 | } | |||
| 3932 | ||||
| 3933 | /************************************************************************/ | |||
| 3934 | ||||
| 3935 | /* | |||
| 3936 | ** The first time the bm25() function is called for a query, an instance | |||
| 3937 | ** of the following structure is allocated and populated. | |||
| 3938 | */ | |||
| 3939 | typedef struct Fts5Bm25Data Fts5Bm25Data; | |||
| 3940 | struct Fts5Bm25Data { | |||
| 3941 | int nPhrase; /* Number of phrases in query */ | |||
| 3942 | double avgdl; /* Average number of tokens in each row */ | |||
| 3943 | double *aIDF; /* IDF for each phrase */ | |||
| 3944 | double *aFreq; /* Array used to calculate phrase freq. */ | |||
| 3945 | }; | |||
| 3946 | ||||
| 3947 | /* | |||
| 3948 | ** Callback used by fts5Bm25GetData() to count the number of rows in the | |||
| 3949 | ** table matched by each individual phrase within the query. | |||
| 3950 | */ | |||
| 3951 | static int fts5CountCb( | |||
| 3952 | const Fts5ExtensionApi *pApi, | |||
| 3953 | Fts5Context *pFts, | |||
| 3954 | void *pUserData /* Pointer to sqlite3_int64 variable */ | |||
| 3955 | ){ | |||
| 3956 | sqlite3_int64 *pn = (sqlite3_int64*)pUserData; | |||
| 3957 | UNUSED_PARAM2(pApi, pFts)(void)(pApi), (void)(pFts); | |||
| 3958 | (*pn)++; | |||
| 3959 | return SQLITE_OK0; | |||
| 3960 | } | |||
| 3961 | ||||
| 3962 | /* | |||
| 3963 | ** Set *ppData to point to the Fts5Bm25Data object for the current query. | |||
| 3964 | ** If the object has not already been allocated, allocate and populate it | |||
| 3965 | ** now. | |||
| 3966 | */ | |||
| 3967 | static int fts5Bm25GetData( | |||
| 3968 | const Fts5ExtensionApi *pApi, | |||
| 3969 | Fts5Context *pFts, | |||
| 3970 | Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ | |||
| 3971 | ){ | |||
| 3972 | int rc = SQLITE_OK0; /* Return code */ | |||
| 3973 | Fts5Bm25Data *p; /* Object to return */ | |||
| 3974 | ||||
| 3975 | p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); | |||
| 3976 | if( p==0 ){ | |||
| 3977 | int nPhrase; /* Number of phrases in query */ | |||
| 3978 | sqlite3_int64 nRow = 0; /* Number of rows in table */ | |||
| 3979 | sqlite3_int64 nToken = 0; /* Number of tokens in table */ | |||
| 3980 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
| 3981 | int i; | |||
| 3982 | ||||
| 3983 | /* Allocate the Fts5Bm25Data object */ | |||
| 3984 | nPhrase = pApi->xPhraseCount(pFts); | |||
| 3985 | nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); | |||
| 3986 | p = (Fts5Bm25Data*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 3987 | if( p==0 ){ | |||
| 3988 | rc = SQLITE_NOMEM7; | |||
| 3989 | }else{ | |||
| 3990 | memset(p, 0, (size_t)nByte); | |||
| 3991 | p->nPhrase = nPhrase; | |||
| 3992 | p->aIDF = (double*)&p[1]; | |||
| 3993 | p->aFreq = &p->aIDF[nPhrase]; | |||
| 3994 | } | |||
| 3995 | ||||
| 3996 | /* Calculate the average document length for this FTS5 table */ | |||
| 3997 | if( rc==SQLITE_OK0 ) rc = pApi->xRowCount(pFts, &nRow); | |||
| 3998 | assert( rc!=SQLITE_OK || nRow>0 )((void) (0)); | |||
| 3999 | if( rc==SQLITE_OK0 ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); | |||
| 4000 | if( rc==SQLITE_OK0 ) p->avgdl = (double)nToken / (double)nRow; | |||
| 4001 | ||||
| 4002 | /* Calculate an IDF for each phrase in the query */ | |||
| 4003 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | |||
| 4004 | sqlite3_int64 nHit = 0; | |||
| 4005 | rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); | |||
| 4006 | if( rc==SQLITE_OK0 ){ | |||
| 4007 | /* Calculate the IDF (Inverse Document Frequency) for phrase i. | |||
| 4008 | ** This is done using the standard BM25 formula as found on wikipedia: | |||
| 4009 | ** | |||
| 4010 | ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) | |||
| 4011 | ** | |||
| 4012 | ** where "N" is the total number of documents in the set and nHit | |||
| 4013 | ** is the number that contain at least one instance of the phrase | |||
| 4014 | ** under consideration. | |||
| 4015 | ** | |||
| 4016 | ** The problem with this is that if (N < 2*nHit), the IDF is | |||
| 4017 | ** negative. Which is undesirable. So the minimum allowable IDF is | |||
| 4018 | ** (1e-6) - roughly the same as a term that appears in just over | |||
| 4019 | ** half of set of 5,000,000 documents. */ | |||
| 4020 | double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); | |||
| 4021 | if( idf<=0.0 ) idf = 1e-6; | |||
| 4022 | p->aIDF[i] = idf; | |||
| 4023 | } | |||
| 4024 | } | |||
| 4025 | ||||
| 4026 | if( rc!=SQLITE_OK0 ){ | |||
| 4027 | sqlite3_freesqlite3_api->free(p); | |||
| 4028 | }else{ | |||
| 4029 | rc = pApi->xSetAuxdata(pFts, p, sqlite3_freesqlite3_api->free); | |||
| 4030 | } | |||
| 4031 | if( rc!=SQLITE_OK0 ) p = 0; | |||
| 4032 | } | |||
| 4033 | *ppData = p; | |||
| 4034 | return rc; | |||
| 4035 | } | |||
| 4036 | ||||
| 4037 | /* | |||
| 4038 | ** Implementation of bm25() function. | |||
| 4039 | */ | |||
| 4040 | static void fts5Bm25Function( | |||
| 4041 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 4042 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 4043 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
| 4044 | int nVal, /* Number of values in apVal[] array */ | |||
| 4045 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
| 4046 | ){ | |||
| 4047 | const double k1 = 1.2; /* Constant "k1" from BM25 formula */ | |||
| 4048 | const double b = 0.75; /* Constant "b" from BM25 formula */ | |||
| 4049 | int rc; /* Error code */ | |||
| 4050 | double score = 0.0; /* SQL function return value */ | |||
| 4051 | Fts5Bm25Data *pData; /* Values allocated/calculated once only */ | |||
| 4052 | int i; /* Iterator variable */ | |||
| 4053 | int nInst = 0; /* Value returned by xInstCount() */ | |||
| 4054 | double D = 0.0; /* Total number of tokens in row */ | |||
| 4055 | double *aFreq = 0; /* Array of phrase freq. for current row */ | |||
| 4056 | ||||
| 4057 | /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) | |||
| 4058 | ** for each phrase in the query for the current row. */ | |||
| 4059 | rc = fts5Bm25GetData(pApi, pFts, &pData); | |||
| 4060 | if( rc==SQLITE_OK0 ){ | |||
| 4061 | aFreq = pData->aFreq; | |||
| 4062 | memset(aFreq, 0, sizeof(double) * pData->nPhrase); | |||
| 4063 | rc = pApi->xInstCount(pFts, &nInst); | |||
| 4064 | } | |||
| 4065 | for(i=0; rc==SQLITE_OK0 && i<nInst; i++){ | |||
| 4066 | int ip; int ic; int io; | |||
| 4067 | rc = pApi->xInst(pFts, i, &ip, &ic, &io); | |||
| 4068 | if( rc==SQLITE_OK0 ){ | |||
| 4069 | double w = (nVal > ic) ? sqlite3_value_doublesqlite3_api->value_double(apVal[ic]) : 1.0; | |||
| 4070 | aFreq[ip] += w; | |||
| 4071 | } | |||
| 4072 | } | |||
| 4073 | ||||
| 4074 | /* Figure out the total size of the current row in tokens. */ | |||
| 4075 | if( rc==SQLITE_OK0 ){ | |||
| 4076 | int nTok; | |||
| 4077 | rc = pApi->xColumnSize(pFts, -1, &nTok); | |||
| 4078 | D = (double)nTok; | |||
| 4079 | } | |||
| 4080 | ||||
| 4081 | /* Determine and return the BM25 score for the current row. Or, if an | |||
| 4082 | ** error has occurred, throw an exception. */ | |||
| 4083 | if( rc==SQLITE_OK0 ){ | |||
| 4084 | for(i=0; i<pData->nPhrase; i++){ | |||
| 4085 | score += pData->aIDF[i] * ( | |||
| 4086 | ( aFreq[i] * (k1 + 1.0) ) / | |||
| 4087 | ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) | |||
| 4088 | ); | |||
| 4089 | } | |||
| 4090 | sqlite3_result_doublesqlite3_api->result_double(pCtx, -1.0 * score); | |||
| 4091 | }else{ | |||
| 4092 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 4093 | } | |||
| 4094 | } | |||
| 4095 | ||||
| 4096 | /* | |||
| 4097 | ** Implementation of fts5_get_locale() function. | |||
| 4098 | */ | |||
| 4099 | static void fts5GetLocaleFunction( | |||
| 4100 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |||
| 4101 | Fts5Context *pFts, /* First arg to pass to pApi functions */ | |||
| 4102 | sqlite3_context *pCtx, /* Context for returning result/error */ | |||
| 4103 | int nVal, /* Number of values in apVal[] array */ | |||
| 4104 | sqlite3_value **apVal /* Array of trailing arguments */ | |||
| 4105 | ){ | |||
| 4106 | int iCol = 0; | |||
| 4107 | int eType = 0; | |||
| 4108 | int rc = SQLITE_OK0; | |||
| 4109 | const char *zLocale = 0; | |||
| 4110 | int nLocale = 0; | |||
| 4111 | ||||
| 4112 | /* xColumnLocale() must be available */ | |||
| 4113 | assert( pApi->iVersion>=4 )((void) (0)); | |||
| 4114 | ||||
| 4115 | if( nVal!=1 ){ | |||
| 4116 | const char *z = "wrong number of arguments to function fts5_get_locale()"; | |||
| 4117 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | |||
| 4118 | return; | |||
| 4119 | } | |||
| 4120 | ||||
| 4121 | eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[0]); | |||
| 4122 | if( eType!=SQLITE_INTEGER1 ){ | |||
| 4123 | const char *z = "non-integer argument passed to function fts5_get_locale()"; | |||
| 4124 | sqlite3_result_errorsqlite3_api->result_error(pCtx, z, -1); | |||
| 4125 | return; | |||
| 4126 | } | |||
| 4127 | ||||
| 4128 | iCol = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
| 4129 | if( iCol<0 || iCol>=pApi->xColumnCount(pFts) ){ | |||
| 4130 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, SQLITE_RANGE25); | |||
| 4131 | return; | |||
| 4132 | } | |||
| 4133 | ||||
| 4134 | rc = pApi->xColumnLocale(pFts, iCol, &zLocale, &nLocale); | |||
| 4135 | if( rc!=SQLITE_OK0 ){ | |||
| 4136 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 4137 | return; | |||
| 4138 | } | |||
| 4139 | ||||
| 4140 | sqlite3_result_textsqlite3_api->result_text(pCtx, zLocale, nLocale, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 4141 | } | |||
| 4142 | ||||
| 4143 | static int sqlite3Fts5AuxInit(fts5_api *pApi){ | |||
| 4144 | struct Builtin { | |||
| 4145 | const char *zFunc; /* Function name (nul-terminated) */ | |||
| 4146 | void *pUserData; /* User-data pointer */ | |||
| 4147 | fts5_extension_function xFunc;/* Callback function */ | |||
| 4148 | void (*xDestroy)(void*); /* Destructor function */ | |||
| 4149 | } aBuiltin [] = { | |||
| 4150 | { "snippet", 0, fts5SnippetFunction, 0 }, | |||
| 4151 | { "highlight", 0, fts5HighlightFunction, 0 }, | |||
| 4152 | { "bm25", 0, fts5Bm25Function, 0 }, | |||
| 4153 | { "fts5_get_locale", 0, fts5GetLocaleFunction, 0 }, | |||
| 4154 | }; | |||
| 4155 | int rc = SQLITE_OK0; /* Return code */ | |||
| 4156 | int i; /* To iterate through builtin functions */ | |||
| 4157 | ||||
| 4158 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | |||
| 4159 | rc = pApi->xCreateFunction(pApi, | |||
| 4160 | aBuiltin[i].zFunc, | |||
| 4161 | aBuiltin[i].pUserData, | |||
| 4162 | aBuiltin[i].xFunc, | |||
| 4163 | aBuiltin[i].xDestroy | |||
| 4164 | ); | |||
| 4165 | } | |||
| 4166 | ||||
| 4167 | return rc; | |||
| 4168 | } | |||
| 4169 | ||||
| 4170 | #line 1 "fts5_buffer.c" | |||
| 4171 | /* | |||
| 4172 | ** 2014 May 31 | |||
| 4173 | ** | |||
| 4174 | ** The author disclaims copyright to this source code. In place of | |||
| 4175 | ** a legal notice, here is a blessing: | |||
| 4176 | ** | |||
| 4177 | ** May you do good and not evil. | |||
| 4178 | ** May you find forgiveness for yourself and forgive others. | |||
| 4179 | ** May you share freely, never taking more than you give. | |||
| 4180 | ** | |||
| 4181 | ****************************************************************************** | |||
| 4182 | */ | |||
| 4183 | ||||
| 4184 | ||||
| 4185 | ||||
| 4186 | /* #include "fts5Int.h" */ | |||
| 4187 | ||||
| 4188 | static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ | |||
| 4189 | if( (u32)pBuf->nSpace<nByte ){ | |||
| 4190 | u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64; | |||
| 4191 | u8 *pNew; | |||
| 4192 | while( nNew<nByte ){ | |||
| 4193 | nNew = nNew * 2; | |||
| 4194 | } | |||
| 4195 | pNew = sqlite3_realloc64sqlite3_api->realloc64(pBuf->p, nNew); | |||
| 4196 | if( pNew==0 ){ | |||
| 4197 | *pRc = SQLITE_NOMEM7; | |||
| 4198 | return 1; | |||
| 4199 | }else{ | |||
| 4200 | pBuf->nSpace = (int)nNew; | |||
| 4201 | pBuf->p = pNew; | |||
| 4202 | } | |||
| 4203 | } | |||
| 4204 | return 0; | |||
| 4205 | } | |||
| 4206 | ||||
| 4207 | ||||
| 4208 | /* | |||
| 4209 | ** Encode value iVal as an SQLite varint and append it to the buffer object | |||
| 4210 | ** pBuf. If an OOM error occurs, set the error code in p. | |||
| 4211 | */ | |||
| 4212 | static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ | |||
| 4213 | if( fts5BufferGrow(pRc, pBuf, 9)( (u32)((pBuf)->n) + (u32)(9) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(9)+(pBuf)->n) ) ) return; | |||
| 4214 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); | |||
| 4215 | } | |||
| 4216 | ||||
| 4217 | static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ | |||
| 4218 | aBuf[0] = (iVal>>24) & 0x00FF; | |||
| 4219 | aBuf[1] = (iVal>>16) & 0x00FF; | |||
| 4220 | aBuf[2] = (iVal>> 8) & 0x00FF; | |||
| 4221 | aBuf[3] = (iVal>> 0) & 0x00FF; | |||
| 4222 | } | |||
| 4223 | ||||
| 4224 | static int sqlite3Fts5Get32(const u8 *aBuf){ | |||
| 4225 | return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]); | |||
| 4226 | } | |||
| 4227 | ||||
| 4228 | /* | |||
| 4229 | ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set | |||
| 4230 | ** the error code in p. If an error has already occurred when this function | |||
| 4231 | ** is called, it is a no-op. | |||
| 4232 | */ | |||
| 4233 | static void sqlite3Fts5BufferAppendBlob( | |||
| 4234 | int *pRc, | |||
| 4235 | Fts5Buffer *pBuf, | |||
| 4236 | u32 nData, | |||
| 4237 | const u8 *pData | |||
| 4238 | ){ | |||
| 4239 | if( nData ){ | |||
| 4240 | if( fts5BufferGrow(pRc, pBuf, nData)( (u32)((pBuf)->n) + (u32)(nData) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),(nData)+(pBuf)-> n) ) ) return; | |||
| 4241 | assert( pBuf->p!=0 )((void) (0)); | |||
| 4242 | memcpy(&pBuf->p[pBuf->n], pData, nData); | |||
| 4243 | pBuf->n += nData; | |||
| 4244 | } | |||
| 4245 | } | |||
| 4246 | ||||
| 4247 | /* | |||
| 4248 | ** Append the nul-terminated string zStr to the buffer pBuf. This function | |||
| 4249 | ** ensures that the byte following the buffer data is set to 0x00, even | |||
| 4250 | ** though this byte is not included in the pBuf->n count. | |||
| 4251 | */ | |||
| 4252 | static void sqlite3Fts5BufferAppendString( | |||
| 4253 | int *pRc, | |||
| 4254 | Fts5Buffer *pBuf, | |||
| 4255 | const char *zStr | |||
| 4256 | ){ | |||
| 4257 | int nStr = (int)strlen(zStr); | |||
| 4258 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); | |||
| 4259 | pBuf->n--; | |||
| 4260 | } | |||
| 4261 | ||||
| 4262 | /* | |||
| 4263 | ** Argument zFmt is a printf() style format string. This function performs | |||
| 4264 | ** the printf() style processing, then appends the results to buffer pBuf. | |||
| 4265 | ** | |||
| 4266 | ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte | |||
| 4267 | ** following the buffer data is set to 0x00, even though this byte is not | |||
| 4268 | ** included in the pBuf->n count. | |||
| 4269 | */ | |||
| 4270 | static void sqlite3Fts5BufferAppendPrintf( | |||
| 4271 | int *pRc, | |||
| 4272 | Fts5Buffer *pBuf, | |||
| 4273 | char *zFmt, ... | |||
| 4274 | ){ | |||
| 4275 | if( *pRc==SQLITE_OK0 ){ | |||
| 4276 | char *zTmp; | |||
| 4277 | va_list ap; | |||
| 4278 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 4279 | zTmp = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 4280 | va_end(ap)__builtin_va_end(ap); | |||
| 4281 | ||||
| 4282 | if( zTmp==0 ){ | |||
| 4283 | *pRc = SQLITE_NOMEM7; | |||
| 4284 | }else{ | |||
| 4285 | sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); | |||
| 4286 | sqlite3_freesqlite3_api->free(zTmp); | |||
| 4287 | } | |||
| 4288 | } | |||
| 4289 | } | |||
| 4290 | ||||
| 4291 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ | |||
| 4292 | char *zRet = 0; | |||
| 4293 | if( *pRc==SQLITE_OK0 ){ | |||
| 4294 | va_list ap; | |||
| 4295 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 4296 | zRet = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 4297 | va_end(ap)__builtin_va_end(ap); | |||
| 4298 | if( zRet==0 ){ | |||
| 4299 | *pRc = SQLITE_NOMEM7; | |||
| 4300 | } | |||
| 4301 | } | |||
| 4302 | return zRet; | |||
| 4303 | } | |||
| 4304 | ||||
| 4305 | ||||
| 4306 | /* | |||
| 4307 | ** Free any buffer allocated by pBuf. Zero the structure before returning. | |||
| 4308 | */ | |||
| 4309 | static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ | |||
| 4310 | sqlite3_freesqlite3_api->free(pBuf->p); | |||
| 4311 | memset(pBuf, 0, sizeof(Fts5Buffer)); | |||
| 4312 | } | |||
| 4313 | ||||
| 4314 | /* | |||
| 4315 | ** Zero the contents of the buffer object. But do not free the associated | |||
| 4316 | ** memory allocation. | |||
| 4317 | */ | |||
| 4318 | static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ | |||
| 4319 | pBuf->n = 0; | |||
| 4320 | } | |||
| 4321 | ||||
| 4322 | /* | |||
| 4323 | ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an | |||
| 4324 | ** the error code in p. If an error has already occurred when this function | |||
| 4325 | ** is called, it is a no-op. | |||
| 4326 | */ | |||
| 4327 | static void sqlite3Fts5BufferSet( | |||
| 4328 | int *pRc, | |||
| 4329 | Fts5Buffer *pBuf, | |||
| 4330 | int nData, | |||
| 4331 | const u8 *pData | |||
| 4332 | ){ | |||
| 4333 | pBuf->n = 0; | |||
| 4334 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); | |||
| 4335 | } | |||
| 4336 | ||||
| 4337 | static int sqlite3Fts5PoslistNext64( | |||
| 4338 | const u8 *a, int n, /* Buffer containing poslist */ | |||
| 4339 | int *pi, /* IN/OUT: Offset within a[] */ | |||
| 4340 | i64 *piOff /* IN/OUT: Current offset */ | |||
| 4341 | ){ | |||
| 4342 | int i = *pi; | |||
| 4343 | assert( a!=0 || i==0 )((void) (0)); | |||
| 4344 | if( i>=n ){ | |||
| 4345 | /* EOF */ | |||
| 4346 | *piOff = -1; | |||
| 4347 | return 1; | |||
| 4348 | }else{ | |||
| 4349 | i64 iOff = *piOff; | |||
| 4350 | u32 iVal; | |||
| 4351 | assert( a!=0 )((void) (0)); | |||
| 4352 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
| 4353 | if( iVal<=1 ){ | |||
| 4354 | if( iVal==0 ){ | |||
| 4355 | *pi = i; | |||
| 4356 | return 0; | |||
| 4357 | } | |||
| 4358 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
| 4359 | iOff = ((i64)iVal) << 32; | |||
| 4360 | assert( iOff>=0 )((void) (0)); | |||
| 4361 | fts5FastGetVarint32(a, i, iVal){ iVal = (a)[i++]; if( iVal & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(a)[i],(u32*)&(iVal)); } }; | |||
| 4362 | if( iVal<2 ){ | |||
| 4363 | /* This is a corrupt record. So stop parsing it here. */ | |||
| 4364 | *piOff = -1; | |||
| 4365 | return 1; | |||
| 4366 | } | |||
| 4367 | *piOff = iOff + ((iVal-2) & 0x7FFFFFFF); | |||
| 4368 | }else{ | |||
| 4369 | *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF); | |||
| 4370 | } | |||
| 4371 | *pi = i; | |||
| 4372 | assert_nc( *piOff>=iOff )((void) (0)); | |||
| 4373 | return 0; | |||
| 4374 | } | |||
| 4375 | } | |||
| 4376 | ||||
| 4377 | ||||
| 4378 | /* | |||
| 4379 | ** Advance the iterator object passed as the only argument. Return true | |||
| 4380 | ** if the iterator reaches EOF, or false otherwise. | |||
| 4381 | */ | |||
| 4382 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ | |||
| 4383 | if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ | |||
| 4384 | pIter->bEof = 1; | |||
| 4385 | } | |||
| 4386 | return pIter->bEof; | |||
| 4387 | } | |||
| 4388 | ||||
| 4389 | static int sqlite3Fts5PoslistReaderInit( | |||
| 4390 | const u8 *a, int n, /* Poslist buffer to iterate through */ | |||
| 4391 | Fts5PoslistReader *pIter /* Iterator object to initialize */ | |||
| 4392 | ){ | |||
| 4393 | memset(pIter, 0, sizeof(*pIter)); | |||
| 4394 | pIter->a = a; | |||
| 4395 | pIter->n = n; | |||
| 4396 | sqlite3Fts5PoslistReaderNext(pIter); | |||
| 4397 | return pIter->bEof; | |||
| 4398 | } | |||
| 4399 | ||||
| 4400 | /* | |||
| 4401 | ** Append position iPos to the position list being accumulated in buffer | |||
| 4402 | ** pBuf, which must be already be large enough to hold the new data. | |||
| 4403 | ** The previous position written to this list is *piPrev. *piPrev is set | |||
| 4404 | ** to iPos before returning. | |||
| 4405 | */ | |||
| 4406 | static void sqlite3Fts5PoslistSafeAppend( | |||
| 4407 | Fts5Buffer *pBuf, | |||
| 4408 | i64 *piPrev, | |||
| 4409 | i64 iPos | |||
| 4410 | ){ | |||
| 4411 | if( iPos>=*piPrev ){ | |||
| 4412 | static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; | |||
| 4413 | if( (iPos & colmask) != (*piPrev & colmask) ){ | |||
| 4414 | pBuf->p[pBuf->n++] = 1; | |||
| 4415 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); | |||
| 4416 | *piPrev = (iPos & colmask); | |||
| 4417 | } | |||
| 4418 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); | |||
| 4419 | *piPrev = iPos; | |||
| 4420 | } | |||
| 4421 | } | |||
| 4422 | ||||
| 4423 | static int sqlite3Fts5PoslistWriterAppend( | |||
| 4424 | Fts5Buffer *pBuf, | |||
| 4425 | Fts5PoslistWriter *pWriter, | |||
| 4426 | i64 iPos | |||
| 4427 | ){ | |||
| 4428 | int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ | |||
| 4429 | if( fts5BufferGrow(&rc, pBuf, 5+5+5)( (u32)((pBuf)->n) + (u32)(5+5+5) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&rc),(pBuf),(5+5+5)+(pBuf) ->n) ) ) return rc; | |||
| 4430 | sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); | |||
| 4431 | return SQLITE_OK0; | |||
| 4432 | } | |||
| 4433 | ||||
| 4434 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){ | |||
| 4435 | void *pRet = 0; | |||
| 4436 | if( *pRc==SQLITE_OK0 ){ | |||
| 4437 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 4438 | if( pRet==0 ){ | |||
| 4439 | if( nByte>0 ) *pRc = SQLITE_NOMEM7; | |||
| 4440 | }else{ | |||
| 4441 | memset(pRet, 0, (size_t)nByte); | |||
| 4442 | } | |||
| 4443 | } | |||
| 4444 | return pRet; | |||
| 4445 | } | |||
| 4446 | ||||
| 4447 | /* | |||
| 4448 | ** Return a nul-terminated copy of the string indicated by pIn. If nIn | |||
| 4449 | ** is non-negative, then it is the length of the string in bytes. Otherwise, | |||
| 4450 | ** the length of the string is determined using strlen(). | |||
| 4451 | ** | |||
| 4452 | ** It is the responsibility of the caller to eventually free the returned | |||
| 4453 | ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. | |||
| 4454 | */ | |||
| 4455 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ | |||
| 4456 | char *zRet = 0; | |||
| 4457 | if( *pRc==SQLITE_OK0 ){ | |||
| 4458 | if( nIn<0 ){ | |||
| 4459 | nIn = (int)strlen(pIn); | |||
| 4460 | } | |||
| 4461 | zRet = (char*)sqlite3_mallocsqlite3_api->malloc(nIn+1); | |||
| 4462 | if( zRet ){ | |||
| 4463 | memcpy(zRet, pIn, nIn); | |||
| 4464 | zRet[nIn] = '\0'; | |||
| 4465 | }else{ | |||
| 4466 | *pRc = SQLITE_NOMEM7; | |||
| 4467 | } | |||
| 4468 | } | |||
| 4469 | return zRet; | |||
| 4470 | } | |||
| 4471 | ||||
| 4472 | ||||
| 4473 | /* | |||
| 4474 | ** Return true if character 't' may be part of an FTS5 bareword, or false | |||
| 4475 | ** otherwise. Characters that may be part of barewords: | |||
| 4476 | ** | |||
| 4477 | ** * All non-ASCII characters, | |||
| 4478 | ** * The 52 upper and lower case ASCII characters, and | |||
| 4479 | ** * The 10 integer ASCII characters. | |||
| 4480 | ** * The underscore character "_" (0x5F). | |||
| 4481 | ** * The unicode "substitute" character (0x1A). | |||
| 4482 | */ | |||
| 4483 | static int sqlite3Fts5IsBareword(char t){ | |||
| 4484 | u8 aBareword[128] = { | |||
| 4485 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ | |||
| 4486 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ | |||
| 4487 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ | |||
| 4488 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ | |||
| 4489 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ | |||
| 4490 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ | |||
| 4491 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ | |||
| 4492 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ | |||
| 4493 | }; | |||
| 4494 | ||||
| 4495 | return (t & 0x80) || aBareword[(int)t]; | |||
| 4496 | } | |||
| 4497 | ||||
| 4498 | ||||
| 4499 | /************************************************************************* | |||
| 4500 | */ | |||
| 4501 | typedef struct Fts5TermsetEntry Fts5TermsetEntry; | |||
| 4502 | struct Fts5TermsetEntry { | |||
| 4503 | char *pTerm; | |||
| 4504 | int nTerm; | |||
| 4505 | int iIdx; /* Index (main or aPrefix[] entry) */ | |||
| 4506 | Fts5TermsetEntry *pNext; | |||
| 4507 | }; | |||
| 4508 | ||||
| 4509 | struct Fts5Termset { | |||
| 4510 | Fts5TermsetEntry *apHash[512]; | |||
| 4511 | }; | |||
| 4512 | ||||
| 4513 | static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ | |||
| 4514 | int rc = SQLITE_OK0; | |||
| 4515 | *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); | |||
| 4516 | return rc; | |||
| 4517 | } | |||
| 4518 | ||||
| 4519 | static int sqlite3Fts5TermsetAdd( | |||
| 4520 | Fts5Termset *p, | |||
| 4521 | int iIdx, | |||
| 4522 | const char *pTerm, int nTerm, | |||
| 4523 | int *pbPresent | |||
| 4524 | ){ | |||
| 4525 | int rc = SQLITE_OK0; | |||
| 4526 | *pbPresent = 0; | |||
| 4527 | if( p ){ | |||
| 4528 | int i; | |||
| 4529 | u32 hash = 13; | |||
| 4530 | Fts5TermsetEntry *pEntry; | |||
| 4531 | ||||
| 4532 | /* Calculate a hash value for this term. This is the same hash checksum | |||
| 4533 | ** used by the fts5_hash.c module. This is not important for correct | |||
| 4534 | ** operation of the module, but is necessary to ensure that some tests | |||
| 4535 | ** designed to produce hash table collisions really do work. */ | |||
| 4536 | for(i=nTerm-1; i>=0; i--){ | |||
| 4537 | hash = (hash << 3) ^ hash ^ pTerm[i]; | |||
| 4538 | } | |||
| 4539 | hash = (hash << 3) ^ hash ^ iIdx; | |||
| 4540 | hash = hash % ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); | |||
| 4541 | ||||
| 4542 | for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ | |||
| 4543 | if( pEntry->iIdx==iIdx | |||
| 4544 | && pEntry->nTerm==nTerm | |||
| 4545 | && memcmp(pEntry->pTerm, pTerm, nTerm)==0 | |||
| 4546 | ){ | |||
| 4547 | *pbPresent = 1; | |||
| 4548 | break; | |||
| 4549 | } | |||
| 4550 | } | |||
| 4551 | ||||
| 4552 | if( pEntry==0 ){ | |||
| 4553 | pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); | |||
| 4554 | if( pEntry ){ | |||
| 4555 | pEntry->pTerm = (char*)&pEntry[1]; | |||
| 4556 | pEntry->nTerm = nTerm; | |||
| 4557 | pEntry->iIdx = iIdx; | |||
| 4558 | memcpy(pEntry->pTerm, pTerm, nTerm); | |||
| 4559 | pEntry->pNext = p->apHash[hash]; | |||
| 4560 | p->apHash[hash] = pEntry; | |||
| 4561 | } | |||
| 4562 | } | |||
| 4563 | } | |||
| 4564 | ||||
| 4565 | return rc; | |||
| 4566 | } | |||
| 4567 | ||||
| 4568 | static void sqlite3Fts5TermsetFree(Fts5Termset *p){ | |||
| 4569 | if( p ){ | |||
| 4570 | u32 i; | |||
| 4571 | for(i=0; i<ArraySize(p->apHash)((int)(sizeof(p->apHash) / sizeof(p->apHash[0]))); i++){ | |||
| 4572 | Fts5TermsetEntry *pEntry = p->apHash[i]; | |||
| 4573 | while( pEntry ){ | |||
| 4574 | Fts5TermsetEntry *pDel = pEntry; | |||
| 4575 | pEntry = pEntry->pNext; | |||
| 4576 | sqlite3_freesqlite3_api->free(pDel); | |||
| 4577 | } | |||
| 4578 | } | |||
| 4579 | sqlite3_freesqlite3_api->free(p); | |||
| 4580 | } | |||
| 4581 | } | |||
| 4582 | ||||
| 4583 | #line 1 "fts5_config.c" | |||
| 4584 | /* | |||
| 4585 | ** 2014 Jun 09 | |||
| 4586 | ** | |||
| 4587 | ** The author disclaims copyright to this source code. In place of | |||
| 4588 | ** a legal notice, here is a blessing: | |||
| 4589 | ** | |||
| 4590 | ** May you do good and not evil. | |||
| 4591 | ** May you find forgiveness for yourself and forgive others. | |||
| 4592 | ** May you share freely, never taking more than you give. | |||
| 4593 | ** | |||
| 4594 | ****************************************************************************** | |||
| 4595 | ** | |||
| 4596 | ** This is an SQLite module implementing full-text search. | |||
| 4597 | */ | |||
| 4598 | ||||
| 4599 | ||||
| 4600 | /* #include "fts5Int.h" */ | |||
| 4601 | ||||
| 4602 | #define FTS5_DEFAULT_PAGE_SIZE4050 4050 | |||
| 4603 | #define FTS5_DEFAULT_AUTOMERGE4 4 | |||
| 4604 | #define FTS5_DEFAULT_USERMERGE4 4 | |||
| 4605 | #define FTS5_DEFAULT_CRISISMERGE16 16 | |||
| 4606 | #define FTS5_DEFAULT_HASHSIZE(1024*1024) (1024*1024) | |||
| 4607 | ||||
| 4608 | #define FTS5_DEFAULT_DELETE_AUTOMERGE10 10 /* default 10% */ | |||
| 4609 | ||||
| 4610 | /* Maximum allowed page size */ | |||
| 4611 | #define FTS5_MAX_PAGE_SIZE(64*1024) (64*1024) | |||
| 4612 | ||||
| 4613 | static int fts5_iswhitespace(char x){ | |||
| 4614 | return (x==' '); | |||
| 4615 | } | |||
| 4616 | ||||
| 4617 | static int fts5_isopenquote(char x){ | |||
| 4618 | return (x=='"' || x=='\'' || x=='[' || x=='`'); | |||
| 4619 | } | |||
| 4620 | ||||
| 4621 | /* | |||
| 4622 | ** Argument pIn points to a character that is part of a nul-terminated | |||
| 4623 | ** string. Return a pointer to the first character following *pIn in | |||
| 4624 | ** the string that is not a white-space character. | |||
| 4625 | */ | |||
| 4626 | static const char *fts5ConfigSkipWhitespace(const char *pIn){ | |||
| 4627 | const char *p = pIn; | |||
| 4628 | if( p ){ | |||
| 4629 | while( fts5_iswhitespace(*p) ){ p++; } | |||
| 4630 | } | |||
| 4631 | return p; | |||
| 4632 | } | |||
| 4633 | ||||
| 4634 | /* | |||
| 4635 | ** Argument pIn points to a character that is part of a nul-terminated | |||
| 4636 | ** string. Return a pointer to the first character following *pIn in | |||
| 4637 | ** the string that is not a "bareword" character. | |||
| 4638 | */ | |||
| 4639 | static const char *fts5ConfigSkipBareword(const char *pIn){ | |||
| 4640 | const char *p = pIn; | |||
| 4641 | while ( sqlite3Fts5IsBareword(*p) ) p++; | |||
| 4642 | if( p==pIn ) p = 0; | |||
| 4643 | return p; | |||
| 4644 | } | |||
| 4645 | ||||
| 4646 | static int fts5_isdigit(char a){ | |||
| 4647 | return (a>='0' && a<='9'); | |||
| 4648 | } | |||
| 4649 | ||||
| 4650 | ||||
| 4651 | ||||
| 4652 | static const char *fts5ConfigSkipLiteral(const char *pIn){ | |||
| 4653 | const char *p = pIn; | |||
| 4654 | switch( *p ){ | |||
| 4655 | case 'n': case 'N': | |||
| 4656 | if( sqlite3_strnicmpsqlite3_api->strnicmp("null", p, 4)==0 ){ | |||
| 4657 | p = &p[4]; | |||
| 4658 | }else{ | |||
| 4659 | p = 0; | |||
| 4660 | } | |||
| 4661 | break; | |||
| 4662 | ||||
| 4663 | case 'x': case 'X': | |||
| 4664 | p++; | |||
| 4665 | if( *p=='\'' ){ | |||
| 4666 | p++; | |||
| 4667 | while( (*p>='a' && *p<='f') | |||
| 4668 | || (*p>='A' && *p<='F') | |||
| 4669 | || (*p>='0' && *p<='9') | |||
| 4670 | ){ | |||
| 4671 | p++; | |||
| 4672 | } | |||
| 4673 | if( *p=='\'' && 0==((p-pIn)%2) ){ | |||
| 4674 | p++; | |||
| 4675 | }else{ | |||
| 4676 | p = 0; | |||
| 4677 | } | |||
| 4678 | }else{ | |||
| 4679 | p = 0; | |||
| 4680 | } | |||
| 4681 | break; | |||
| 4682 | ||||
| 4683 | case '\'': | |||
| 4684 | p++; | |||
| 4685 | while( p ){ | |||
| 4686 | if( *p=='\'' ){ | |||
| 4687 | p++; | |||
| 4688 | if( *p!='\'' ) break; | |||
| 4689 | } | |||
| 4690 | p++; | |||
| 4691 | if( *p==0 ) p = 0; | |||
| 4692 | } | |||
| 4693 | break; | |||
| 4694 | ||||
| 4695 | default: | |||
| 4696 | /* maybe a number */ | |||
| 4697 | if( *p=='+' || *p=='-' ) p++; | |||
| 4698 | while( fts5_isdigit(*p) ) p++; | |||
| 4699 | ||||
| 4700 | /* At this point, if the literal was an integer, the parse is | |||
| 4701 | ** finished. Or, if it is a floating point value, it may continue | |||
| 4702 | ** with either a decimal point or an 'E' character. */ | |||
| 4703 | if( *p=='.' && fts5_isdigit(p[1]) ){ | |||
| 4704 | p += 2; | |||
| 4705 | while( fts5_isdigit(*p) ) p++; | |||
| 4706 | } | |||
| 4707 | if( p==pIn ) p = 0; | |||
| 4708 | ||||
| 4709 | break; | |||
| 4710 | } | |||
| 4711 | ||||
| 4712 | return p; | |||
| 4713 | } | |||
| 4714 | ||||
| 4715 | /* | |||
| 4716 | ** The first character of the string pointed to by argument z is guaranteed | |||
| 4717 | ** to be an open-quote character (see function fts5_isopenquote()). | |||
| 4718 | ** | |||
| 4719 | ** This function searches for the corresponding close-quote character within | |||
| 4720 | ** the string and, if found, dequotes the string in place and adds a new | |||
| 4721 | ** nul-terminator byte. | |||
| 4722 | ** | |||
| 4723 | ** If the close-quote is found, the value returned is the byte offset of | |||
| 4724 | ** the character immediately following it. Or, if the close-quote is not | |||
| 4725 | ** found, -1 is returned. If -1 is returned, the buffer is left in an | |||
| 4726 | ** undefined state. | |||
| 4727 | */ | |||
| 4728 | static int fts5Dequote(char *z){ | |||
| 4729 | char q; | |||
| 4730 | int iIn = 1; | |||
| 4731 | int iOut = 0; | |||
| 4732 | q = z[0]; | |||
| 4733 | ||||
| 4734 | /* Set stack variable q to the close-quote character */ | |||
| 4735 | assert( q=='[' || q=='\'' || q=='"' || q=='`' )((void) (0)); | |||
| 4736 | if( q=='[' ) q = ']'; | |||
| 4737 | ||||
| 4738 | while( z[iIn] ){ | |||
| 4739 | if( z[iIn]==q ){ | |||
| 4740 | if( z[iIn+1]!=q ){ | |||
| 4741 | /* Character iIn was the close quote. */ | |||
| 4742 | iIn++; | |||
| 4743 | break; | |||
| 4744 | }else{ | |||
| 4745 | /* Character iIn and iIn+1 form an escaped quote character. Skip | |||
| 4746 | ** the input cursor past both and copy a single quote character | |||
| 4747 | ** to the output buffer. */ | |||
| 4748 | iIn += 2; | |||
| 4749 | z[iOut++] = q; | |||
| 4750 | } | |||
| 4751 | }else{ | |||
| 4752 | z[iOut++] = z[iIn++]; | |||
| 4753 | } | |||
| 4754 | } | |||
| 4755 | ||||
| 4756 | z[iOut] = '\0'; | |||
| 4757 | return iIn; | |||
| 4758 | } | |||
| 4759 | ||||
| 4760 | /* | |||
| 4761 | ** Convert an SQL-style quoted string into a normal string by removing | |||
| 4762 | ** the quote characters. The conversion is done in-place. If the | |||
| 4763 | ** input does not begin with a quote character, then this routine | |||
| 4764 | ** is a no-op. | |||
| 4765 | ** | |||
| 4766 | ** Examples: | |||
| 4767 | ** | |||
| 4768 | ** "abc" becomes abc | |||
| 4769 | ** 'xyz' becomes xyz | |||
| 4770 | ** [pqr] becomes pqr | |||
| 4771 | ** `mno` becomes mno | |||
| 4772 | */ | |||
| 4773 | static void sqlite3Fts5Dequote(char *z){ | |||
| 4774 | char quote; /* Quote character (if any ) */ | |||
| 4775 | ||||
| 4776 | assert( 0==fts5_iswhitespace(z[0]) )((void) (0)); | |||
| 4777 | quote = z[0]; | |||
| 4778 | if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ | |||
| 4779 | fts5Dequote(z); | |||
| 4780 | } | |||
| 4781 | } | |||
| 4782 | ||||
| 4783 | ||||
| 4784 | struct Fts5Enum { | |||
| 4785 | const char *zName; | |||
| 4786 | int eVal; | |||
| 4787 | }; | |||
| 4788 | typedef struct Fts5Enum Fts5Enum; | |||
| 4789 | ||||
| 4790 | static int fts5ConfigSetEnum( | |||
| 4791 | const Fts5Enum *aEnum, | |||
| 4792 | const char *zEnum, | |||
| 4793 | int *peVal | |||
| 4794 | ){ | |||
| 4795 | int nEnum = (int)strlen(zEnum); | |||
| 4796 | int i; | |||
| 4797 | int iVal = -1; | |||
| 4798 | ||||
| 4799 | for(i=0; aEnum[i].zName; i++){ | |||
| 4800 | if( sqlite3_strnicmpsqlite3_api->strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ | |||
| 4801 | if( iVal>=0 ) return SQLITE_ERROR1; | |||
| 4802 | iVal = aEnum[i].eVal; | |||
| 4803 | } | |||
| 4804 | } | |||
| 4805 | ||||
| 4806 | *peVal = iVal; | |||
| 4807 | return iVal<0 ? SQLITE_ERROR1 : SQLITE_OK0; | |||
| 4808 | } | |||
| 4809 | ||||
| 4810 | /* | |||
| 4811 | ** Parse a "special" CREATE VIRTUAL TABLE directive and update | |||
| 4812 | ** configuration object pConfig as appropriate. | |||
| 4813 | ** | |||
| 4814 | ** If successful, object pConfig is updated and SQLITE_OK returned. If | |||
| 4815 | ** an error occurs, an SQLite error code is returned and an error message | |||
| 4816 | ** may be left in *pzErr. It is the responsibility of the caller to | |||
| 4817 | ** eventually free any such error message using sqlite3_free(). | |||
| 4818 | */ | |||
| 4819 | static int fts5ConfigParseSpecial( | |||
| 4820 | Fts5Config *pConfig, /* Configuration object to update */ | |||
| 4821 | const char *zCmd, /* Special command to parse */ | |||
| 4822 | const char *zArg, /* Argument to parse */ | |||
| 4823 | char **pzErr /* OUT: Error message */ | |||
| 4824 | ){ | |||
| 4825 | int rc = SQLITE_OK0; | |||
| 4826 | int nCmd = (int)strlen(zCmd); | |||
| 4827 | ||||
| 4828 | if( sqlite3_strnicmpsqlite3_api->strnicmp("prefix", zCmd, nCmd)==0 ){ | |||
| 4829 | const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES31; | |||
| 4830 | const char *p; | |||
| 4831 | int bFirst = 1; | |||
| 4832 | if( pConfig->aPrefix==0 ){ | |||
| 4833 | pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); | |||
| 4834 | if( rc ) return rc; | |||
| 4835 | } | |||
| 4836 | ||||
| 4837 | p = zArg; | |||
| 4838 | while( 1 ){ | |||
| 4839 | int nPre = 0; | |||
| 4840 | ||||
| 4841 | while( p[0]==' ' ) p++; | |||
| 4842 | if( bFirst==0 && p[0]==',' ){ | |||
| 4843 | p++; | |||
| 4844 | while( p[0]==' ' ) p++; | |||
| 4845 | }else if( p[0]=='\0' ){ | |||
| 4846 | break; | |||
| 4847 | } | |||
| 4848 | if( p[0]<'0' || p[0]>'9' ){ | |||
| 4849 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed prefix=... directive"); | |||
| 4850 | rc = SQLITE_ERROR1; | |||
| 4851 | break; | |||
| 4852 | } | |||
| 4853 | ||||
| 4854 | if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES31 ){ | |||
| 4855 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 4856 | "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES31 | |||
| 4857 | ); | |||
| 4858 | rc = SQLITE_ERROR1; | |||
| 4859 | break; | |||
| 4860 | } | |||
| 4861 | ||||
| 4862 | while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ | |||
| 4863 | nPre = nPre*10 + (p[0] - '0'); | |||
| 4864 | p++; | |||
| 4865 | } | |||
| 4866 | ||||
| 4867 | if( nPre<=0 || nPre>=1000 ){ | |||
| 4868 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("prefix length out of range (max 999)"); | |||
| 4869 | rc = SQLITE_ERROR1; | |||
| 4870 | break; | |||
| 4871 | } | |||
| 4872 | ||||
| 4873 | pConfig->aPrefix[pConfig->nPrefix] = nPre; | |||
| 4874 | pConfig->nPrefix++; | |||
| 4875 | bFirst = 0; | |||
| 4876 | } | |||
| 4877 | assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES )((void) (0)); | |||
| 4878 | return rc; | |||
| 4879 | } | |||
| 4880 | ||||
| 4881 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokenize", zCmd, nCmd)==0 ){ | |||
| 4882 | const char *p = (const char*)zArg; | |||
| 4883 | sqlite3_int64 nArg = strlen(zArg) + 1; | |||
| 4884 | char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg); | |||
| 4885 | ||||
| 4886 | if( azArg ){ | |||
| 4887 | char *pSpace = (char*)&azArg[nArg]; | |||
| 4888 | if( pConfig->t.azArg ){ | |||
| 4889 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple tokenize=... directives"); | |||
| 4890 | rc = SQLITE_ERROR1; | |||
| 4891 | }else{ | |||
| 4892 | for(nArg=0; p && *p; nArg++){ | |||
| 4893 | const char *p2 = fts5ConfigSkipWhitespace(p); | |||
| 4894 | if( *p2=='\'' ){ | |||
| 4895 | p = fts5ConfigSkipLiteral(p2); | |||
| 4896 | }else{ | |||
| 4897 | p = fts5ConfigSkipBareword(p2); | |||
| 4898 | } | |||
| 4899 | if( p ){ | |||
| 4900 | memcpy(pSpace, p2, p-p2); | |||
| 4901 | azArg[nArg] = pSpace; | |||
| 4902 | sqlite3Fts5Dequote(pSpace); | |||
| 4903 | pSpace += (p - p2) + 1; | |||
| 4904 | p = fts5ConfigSkipWhitespace(p); | |||
| 4905 | } | |||
| 4906 | } | |||
| 4907 | if( p==0 ){ | |||
| 4908 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in tokenize directive"); | |||
| 4909 | rc = SQLITE_ERROR1; | |||
| 4910 | }else{ | |||
| 4911 | pConfig->t.azArg = (const char**)azArg; | |||
| 4912 | pConfig->t.nArg = nArg; | |||
| 4913 | azArg = 0; | |||
| 4914 | } | |||
| 4915 | } | |||
| 4916 | } | |||
| 4917 | sqlite3_freesqlite3_api->free(azArg); | |||
| 4918 | ||||
| 4919 | return rc; | |||
| 4920 | } | |||
| 4921 | ||||
| 4922 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content", zCmd, nCmd)==0 ){ | |||
| 4923 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 ){ | |||
| 4924 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content=... directives"); | |||
| 4925 | rc = SQLITE_ERROR1; | |||
| 4926 | }else{ | |||
| 4927 | if( zArg[0] ){ | |||
| 4928 | pConfig->eContent = FTS5_CONTENT_EXTERNAL2; | |||
| 4929 | pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); | |||
| 4930 | }else{ | |||
| 4931 | pConfig->eContent = FTS5_CONTENT_NONE1; | |||
| 4932 | } | |||
| 4933 | } | |||
| 4934 | return rc; | |||
| 4935 | } | |||
| 4936 | ||||
| 4937 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_delete", zCmd, nCmd)==0 ){ | |||
| 4938 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
| 4939 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | |||
| 4940 | rc = SQLITE_ERROR1; | |||
| 4941 | }else{ | |||
| 4942 | pConfig->bContentlessDelete = (zArg[0]=='1'); | |||
| 4943 | } | |||
| 4944 | return rc; | |||
| 4945 | } | |||
| 4946 | ||||
| 4947 | if( sqlite3_strnicmpsqlite3_api->strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){ | |||
| 4948 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
| 4949 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed contentless_delete=... directive"); | |||
| 4950 | rc = SQLITE_ERROR1; | |||
| 4951 | }else{ | |||
| 4952 | pConfig->bContentlessUnindexed = (zArg[0]=='1'); | |||
| 4953 | } | |||
| 4954 | return rc; | |||
| 4955 | } | |||
| 4956 | ||||
| 4957 | if( sqlite3_strnicmpsqlite3_api->strnicmp("content_rowid", zCmd, nCmd)==0 ){ | |||
| 4958 | if( pConfig->zContentRowid ){ | |||
| 4959 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("multiple content_rowid=... directives"); | |||
| 4960 | rc = SQLITE_ERROR1; | |||
| 4961 | }else{ | |||
| 4962 | pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); | |||
| 4963 | } | |||
| 4964 | return rc; | |||
| 4965 | } | |||
| 4966 | ||||
| 4967 | if( sqlite3_strnicmpsqlite3_api->strnicmp("columnsize", zCmd, nCmd)==0 ){ | |||
| 4968 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
| 4969 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed columnsize=... directive"); | |||
| 4970 | rc = SQLITE_ERROR1; | |||
| 4971 | }else{ | |||
| 4972 | pConfig->bColumnsize = (zArg[0]=='1'); | |||
| 4973 | } | |||
| 4974 | return rc; | |||
| 4975 | } | |||
| 4976 | ||||
| 4977 | if( sqlite3_strnicmpsqlite3_api->strnicmp("locale", zCmd, nCmd)==0 ){ | |||
| 4978 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
| 4979 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed locale=... directive"); | |||
| 4980 | rc = SQLITE_ERROR1; | |||
| 4981 | }else{ | |||
| 4982 | pConfig->bLocale = (zArg[0]=='1'); | |||
| 4983 | } | |||
| 4984 | return rc; | |||
| 4985 | } | |||
| 4986 | ||||
| 4987 | if( sqlite3_strnicmpsqlite3_api->strnicmp("detail", zCmd, nCmd)==0 ){ | |||
| 4988 | const Fts5Enum aDetail[] = { | |||
| 4989 | { "none", FTS5_DETAIL_NONE1 }, | |||
| 4990 | { "full", FTS5_DETAIL_FULL0 }, | |||
| 4991 | { "columns", FTS5_DETAIL_COLUMNS2 }, | |||
| 4992 | { 0, 0 } | |||
| 4993 | }; | |||
| 4994 | ||||
| 4995 | if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ | |||
| 4996 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed detail=... directive"); | |||
| 4997 | } | |||
| 4998 | return rc; | |||
| 4999 | } | |||
| 5000 | ||||
| 5001 | if( sqlite3_strnicmpsqlite3_api->strnicmp("tokendata", zCmd, nCmd)==0 ){ | |||
| 5002 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ | |||
| 5003 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed tokendata=... directive"); | |||
| 5004 | rc = SQLITE_ERROR1; | |||
| 5005 | }else{ | |||
| 5006 | pConfig->bTokendata = (zArg[0]=='1'); | |||
| 5007 | } | |||
| 5008 | return rc; | |||
| 5009 | } | |||
| 5010 | ||||
| 5011 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); | |||
| 5012 | return SQLITE_ERROR1; | |||
| 5013 | } | |||
| 5014 | ||||
| 5015 | /* | |||
| 5016 | ** Gobble up the first bareword or quoted word from the input buffer zIn. | |||
| 5017 | ** Return a pointer to the character immediately following the last in | |||
| 5018 | ** the gobbled word if successful, or a NULL pointer otherwise (failed | |||
| 5019 | ** to find close-quote character). | |||
| 5020 | ** | |||
| 5021 | ** Before returning, set pzOut to point to a new buffer containing a | |||
| 5022 | ** nul-terminated, dequoted copy of the gobbled word. If the word was | |||
| 5023 | ** quoted, *pbQuoted is also set to 1 before returning. | |||
| 5024 | ** | |||
| 5025 | ** If *pRc is other than SQLITE_OK when this function is called, it is | |||
| 5026 | ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this | |||
| 5027 | ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* | |||
| 5028 | ** set if a parse error (failed to find close quote) occurs. | |||
| 5029 | */ | |||
| 5030 | static const char *fts5ConfigGobbleWord( | |||
| 5031 | int *pRc, /* IN/OUT: Error code */ | |||
| 5032 | const char *zIn, /* Buffer to gobble string/bareword from */ | |||
| 5033 | char **pzOut, /* OUT: malloc'd buffer containing str/bw */ | |||
| 5034 | int *pbQuoted /* OUT: Set to true if dequoting required */ | |||
| 5035 | ){ | |||
| 5036 | const char *zRet = 0; | |||
| 5037 | ||||
| 5038 | sqlite3_int64 nIn = strlen(zIn); | |||
| 5039 | char *zOut = sqlite3_malloc64sqlite3_api->malloc64(nIn+1); | |||
| 5040 | ||||
| 5041 | assert( *pRc==SQLITE_OK )((void) (0)); | |||
| 5042 | *pbQuoted = 0; | |||
| 5043 | *pzOut = 0; | |||
| 5044 | ||||
| 5045 | if( zOut==0 ){ | |||
| 5046 | *pRc = SQLITE_NOMEM7; | |||
| 5047 | }else{ | |||
| 5048 | memcpy(zOut, zIn, (size_t)(nIn+1)); | |||
| 5049 | if( fts5_isopenquote(zOut[0]) ){ | |||
| 5050 | int ii = fts5Dequote(zOut); | |||
| 5051 | zRet = &zIn[ii]; | |||
| 5052 | *pbQuoted = 1; | |||
| 5053 | }else{ | |||
| 5054 | zRet = fts5ConfigSkipBareword(zIn); | |||
| 5055 | if( zRet ){ | |||
| 5056 | zOut[zRet-zIn] = '\0'; | |||
| 5057 | } | |||
| 5058 | } | |||
| 5059 | } | |||
| 5060 | ||||
| 5061 | if( zRet==0 ){ | |||
| 5062 | sqlite3_freesqlite3_api->free(zOut); | |||
| 5063 | }else{ | |||
| 5064 | *pzOut = zOut; | |||
| 5065 | } | |||
| 5066 | ||||
| 5067 | return zRet; | |||
| 5068 | } | |||
| 5069 | ||||
| 5070 | static int fts5ConfigParseColumn( | |||
| 5071 | Fts5Config *p, | |||
| 5072 | char *zCol, | |||
| 5073 | char *zArg, | |||
| 5074 | char **pzErr, | |||
| 5075 | int *pbUnindexed | |||
| 5076 | ){ | |||
| 5077 | int rc = SQLITE_OK0; | |||
| 5078 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_RANK_NAME"rank") | |||
| 5079 | || 0==sqlite3_stricmpsqlite3_api->stricmp(zCol, FTS5_ROWID_NAME"rowid") | |||
| 5080 | ){ | |||
| 5081 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 column name: %s", zCol); | |||
| 5082 | rc = SQLITE_ERROR1; | |||
| 5083 | }else if( zArg ){ | |||
| 5084 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "unindexed") ){ | |||
| 5085 | p->abUnindexed[p->nCol] = 1; | |||
| 5086 | *pbUnindexed = 1; | |||
| 5087 | }else{ | |||
| 5088 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unrecognized column option: %s", zArg); | |||
| 5089 | rc = SQLITE_ERROR1; | |||
| 5090 | } | |||
| 5091 | } | |||
| 5092 | ||||
| 5093 | p->azCol[p->nCol++] = zCol; | |||
| 5094 | return rc; | |||
| 5095 | } | |||
| 5096 | ||||
| 5097 | /* | |||
| 5098 | ** Populate the Fts5Config.zContentExprlist string. | |||
| 5099 | */ | |||
| 5100 | static int fts5ConfigMakeExprlist(Fts5Config *p){ | |||
| 5101 | int i; | |||
| 5102 | int rc = SQLITE_OK0; | |||
| 5103 | Fts5Buffer buf = {0, 0, 0}; | |||
| 5104 | ||||
| 5105 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); | |||
| 5106 | if( p->eContent!=FTS5_CONTENT_NONE1 ){ | |||
| 5107 | assert( p->eContent==FTS5_CONTENT_EXTERNAL((void) (0)) | |||
| 5108 | || p->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
| 5109 | || p->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
| 5110 | )((void) (0)); | |||
| 5111 | for(i=0; i<p->nCol; i++){ | |||
| 5112 | if( p->eContent==FTS5_CONTENT_EXTERNAL2 ){ | |||
| 5113 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); | |||
| 5114 | }else if( p->eContent==FTS5_CONTENT_NORMAL0 || p->abUnindexed[i] ){ | |||
| 5115 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); | |||
| 5116 | }else{ | |||
| 5117 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | |||
| 5118 | } | |||
| 5119 | } | |||
| 5120 | } | |||
| 5121 | if( p->eContent==FTS5_CONTENT_NORMAL0 && p->bLocale ){ | |||
| 5122 | for(i=0; i<p->nCol; i++){ | |||
| 5123 | if( p->abUnindexed[i]==0 ){ | |||
| 5124 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i); | |||
| 5125 | }else{ | |||
| 5126 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); | |||
| 5127 | } | |||
| 5128 | } | |||
| 5129 | } | |||
| 5130 | ||||
| 5131 | assert( p->zContentExprlist==0 )((void) (0)); | |||
| 5132 | p->zContentExprlist = (char*)buf.p; | |||
| 5133 | return rc; | |||
| 5134 | } | |||
| 5135 | ||||
| 5136 | /* | |||
| 5137 | ** Arguments nArg/azArg contain the string arguments passed to the xCreate | |||
| 5138 | ** or xConnect method of the virtual table. This function attempts to | |||
| 5139 | ** allocate an instance of Fts5Config containing the results of parsing | |||
| 5140 | ** those arguments. | |||
| 5141 | ** | |||
| 5142 | ** If successful, SQLITE_OK is returned and *ppOut is set to point to the | |||
| 5143 | ** new Fts5Config object. If an error occurs, an SQLite error code is | |||
| 5144 | ** returned, *ppOut is set to NULL and an error message may be left in | |||
| 5145 | ** *pzErr. It is the responsibility of the caller to eventually free any | |||
| 5146 | ** such error message using sqlite3_free(). | |||
| 5147 | */ | |||
| 5148 | static int sqlite3Fts5ConfigParse( | |||
| 5149 | Fts5Global *pGlobal, | |||
| 5150 | sqlite3 *db, | |||
| 5151 | int nArg, /* Number of arguments */ | |||
| 5152 | const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ | |||
| 5153 | Fts5Config **ppOut, /* OUT: Results of parse */ | |||
| 5154 | char **pzErr /* OUT: Error message */ | |||
| 5155 | ){ | |||
| 5156 | int rc = SQLITE_OK0; /* Return code */ | |||
| 5157 | Fts5Config *pRet; /* New object to return */ | |||
| 5158 | int i; | |||
| 5159 | sqlite3_int64 nByte; | |||
| 5160 | int bUnindexed = 0; /* True if there are one or more UNINDEXED */ | |||
| 5161 | ||||
| 5162 | *ppOut = pRet = (Fts5Config*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Config)); | |||
| 5163 | if( pRet==0 ) return SQLITE_NOMEM7; | |||
| 5164 | memset(pRet, 0, sizeof(Fts5Config)); | |||
| 5165 | pRet->pGlobal = pGlobal; | |||
| 5166 | pRet->db = db; | |||
| 5167 | pRet->iCookie = -1; | |||
| 5168 | ||||
| 5169 | nByte = nArg * (sizeof(char*) + sizeof(u8)); | |||
| 5170 | pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 5171 | pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0; | |||
| 5172 | pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); | |||
| 5173 | pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); | |||
| 5174 | pRet->bColumnsize = 1; | |||
| 5175 | pRet->eDetail = FTS5_DETAIL_FULL0; | |||
| 5176 | #ifdef SQLITE_DEBUG | |||
| 5177 | pRet->bPrefixIndex = 1; | |||
| 5178 | #endif | |||
| 5179 | if( rc==SQLITE_OK0 && sqlite3_stricmpsqlite3_api->stricmp(pRet->zName, FTS5_RANK_NAME"rank")==0 ){ | |||
| 5180 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("reserved fts5 table name: %s", pRet->zName); | |||
| 5181 | rc = SQLITE_ERROR1; | |||
| 5182 | } | |||
| 5183 | ||||
| 5184 | assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK )((void) (0)); | |||
| 5185 | for(i=3; rc==SQLITE_OK0 && i<nArg; i++){ | |||
| 5186 | const char *zOrig = azArg[i]; | |||
| 5187 | const char *z; | |||
| 5188 | char *zOne = 0; | |||
| 5189 | char *zTwo = 0; | |||
| 5190 | int bOption = 0; | |||
| 5191 | int bMustBeCol = 0; | |||
| 5192 | ||||
| 5193 | z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); | |||
| 5194 | z = fts5ConfigSkipWhitespace(z); | |||
| 5195 | if( z && *z=='=' ){ | |||
| 5196 | bOption = 1; | |||
| 5197 | assert( zOne!=0 )((void) (0)); | |||
| 5198 | z++; | |||
| 5199 | if( bMustBeCol ) z = 0; | |||
| 5200 | } | |||
| 5201 | z = fts5ConfigSkipWhitespace(z); | |||
| 5202 | if( z && z[0] ){ | |||
| 5203 | int bDummy; | |||
| 5204 | z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); | |||
| 5205 | if( z && z[0] ) z = 0; | |||
| 5206 | } | |||
| 5207 | ||||
| 5208 | if( rc==SQLITE_OK0 ){ | |||
| 5209 | if( z==0 ){ | |||
| 5210 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("parse error in \"%s\"", zOrig); | |||
| 5211 | rc = SQLITE_ERROR1; | |||
| 5212 | }else{ | |||
| 5213 | if( bOption ){ | |||
| 5214 | rc = fts5ConfigParseSpecial(pRet, | |||
| 5215 | ALWAYS(zOne)(zOne)?zOne:"", | |||
| 5216 | zTwo?zTwo:"", | |||
| 5217 | pzErr | |||
| 5218 | ); | |||
| 5219 | }else{ | |||
| 5220 | rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed); | |||
| 5221 | zOne = 0; | |||
| 5222 | } | |||
| 5223 | } | |||
| 5224 | } | |||
| 5225 | ||||
| 5226 | sqlite3_freesqlite3_api->free(zOne); | |||
| 5227 | sqlite3_freesqlite3_api->free(zTwo); | |||
| 5228 | } | |||
| 5229 | ||||
| 5230 | /* We only allow contentless_delete=1 if the table is indeed contentless. */ | |||
| 5231 | if( rc==SQLITE_OK0 | |||
| 5232 | && pRet->bContentlessDelete | |||
| 5233 | && pRet->eContent!=FTS5_CONTENT_NONE1 | |||
| 5234 | ){ | |||
| 5235 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 5236 | "contentless_delete=1 requires a contentless table" | |||
| 5237 | ); | |||
| 5238 | rc = SQLITE_ERROR1; | |||
| 5239 | } | |||
| 5240 | ||||
| 5241 | /* We only allow contentless_delete=1 if columnsize=0 is not present. | |||
| 5242 | ** | |||
| 5243 | ** This restriction may be removed at some point. | |||
| 5244 | */ | |||
| 5245 | if( rc==SQLITE_OK0 && pRet->bContentlessDelete && pRet->bColumnsize==0 ){ | |||
| 5246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 5247 | "contentless_delete=1 is incompatible with columnsize=0" | |||
| 5248 | ); | |||
| 5249 | rc = SQLITE_ERROR1; | |||
| 5250 | } | |||
| 5251 | ||||
| 5252 | /* We only allow contentless_unindexed=1 if the table is actually a | |||
| 5253 | ** contentless one. | |||
| 5254 | */ | |||
| 5255 | if( rc==SQLITE_OK0 | |||
| 5256 | && pRet->bContentlessUnindexed | |||
| 5257 | && pRet->eContent!=FTS5_CONTENT_NONE1 | |||
| 5258 | ){ | |||
| 5259 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 5260 | "contentless_unindexed=1 requires a contentless table" | |||
| 5261 | ); | |||
| 5262 | rc = SQLITE_ERROR1; | |||
| 5263 | } | |||
| 5264 | ||||
| 5265 | /* If no zContent option was specified, fill in the default values. */ | |||
| 5266 | if( rc==SQLITE_OK0 && pRet->zContent==0 ){ | |||
| 5267 | const char *zTail = 0; | |||
| 5268 | assert( pRet->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
| 5269 | || pRet->eContent==FTS5_CONTENT_NONE((void) (0)) | |||
| 5270 | )((void) (0)); | |||
| 5271 | if( pRet->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 5272 | zTail = "content"; | |||
| 5273 | }else if( bUnindexed && pRet->bContentlessUnindexed ){ | |||
| 5274 | pRet->eContent = FTS5_CONTENT_UNINDEXED3; | |||
| 5275 | zTail = "content"; | |||
| 5276 | }else if( pRet->bColumnsize ){ | |||
| 5277 | zTail = "docsize"; | |||
| 5278 | } | |||
| 5279 | ||||
| 5280 | if( zTail ){ | |||
| 5281 | pRet->zContent = sqlite3Fts5Mprintf( | |||
| 5282 | &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail | |||
| 5283 | ); | |||
| 5284 | } | |||
| 5285 | } | |||
| 5286 | ||||
| 5287 | if( rc==SQLITE_OK0 && pRet->zContentRowid==0 ){ | |||
| 5288 | pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); | |||
| 5289 | } | |||
| 5290 | ||||
| 5291 | /* Formulate the zContentExprlist text */ | |||
| 5292 | if( rc==SQLITE_OK0 ){ | |||
| 5293 | rc = fts5ConfigMakeExprlist(pRet); | |||
| 5294 | } | |||
| 5295 | ||||
| 5296 | if( rc!=SQLITE_OK0 ){ | |||
| 5297 | sqlite3Fts5ConfigFree(pRet); | |||
| 5298 | *ppOut = 0; | |||
| 5299 | } | |||
| 5300 | return rc; | |||
| 5301 | } | |||
| 5302 | ||||
| 5303 | /* | |||
| 5304 | ** Free the configuration object passed as the only argument. | |||
| 5305 | */ | |||
| 5306 | static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ | |||
| 5307 | if( pConfig ){ | |||
| 5308 | int i; | |||
| 5309 | if( pConfig->t.pTok ){ | |||
| 5310 | if( pConfig->t.pApi1 ){ | |||
| 5311 | pConfig->t.pApi1->xDelete(pConfig->t.pTok); | |||
| 5312 | }else{ | |||
| 5313 | pConfig->t.pApi2->xDelete(pConfig->t.pTok); | |||
| 5314 | } | |||
| 5315 | } | |||
| 5316 | sqlite3_freesqlite3_api->free((char*)pConfig->t.azArg); | |||
| 5317 | sqlite3_freesqlite3_api->free(pConfig->zDb); | |||
| 5318 | sqlite3_freesqlite3_api->free(pConfig->zName); | |||
| 5319 | for(i=0; i<pConfig->nCol; i++){ | |||
| 5320 | sqlite3_freesqlite3_api->free(pConfig->azCol[i]); | |||
| 5321 | } | |||
| 5322 | sqlite3_freesqlite3_api->free(pConfig->azCol); | |||
| 5323 | sqlite3_freesqlite3_api->free(pConfig->aPrefix); | |||
| 5324 | sqlite3_freesqlite3_api->free(pConfig->zRank); | |||
| 5325 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | |||
| 5326 | sqlite3_freesqlite3_api->free(pConfig->zContent); | |||
| 5327 | sqlite3_freesqlite3_api->free(pConfig->zContentRowid); | |||
| 5328 | sqlite3_freesqlite3_api->free(pConfig->zContentExprlist); | |||
| 5329 | sqlite3_freesqlite3_api->free(pConfig); | |||
| 5330 | } | |||
| 5331 | } | |||
| 5332 | ||||
| 5333 | /* | |||
| 5334 | ** Call sqlite3_declare_vtab() based on the contents of the configuration | |||
| 5335 | ** object passed as the only argument. Return SQLITE_OK if successful, or | |||
| 5336 | ** an SQLite error code if an error occurs. | |||
| 5337 | */ | |||
| 5338 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ | |||
| 5339 | int i; | |||
| 5340 | int rc = SQLITE_OK0; | |||
| 5341 | char *zSql; | |||
| 5342 | ||||
| 5343 | zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); | |||
| 5344 | for(i=0; zSql && i<pConfig->nCol; i++){ | |||
| 5345 | const char *zSep = (i==0?"":", "); | |||
| 5346 | zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); | |||
| 5347 | } | |||
| 5348 | zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", | |||
| 5349 | zSql, pConfig->zName, FTS5_RANK_NAME"rank" | |||
| 5350 | ); | |||
| 5351 | ||||
| 5352 | assert( zSql || rc==SQLITE_NOMEM )((void) (0)); | |||
| 5353 | if( zSql ){ | |||
| 5354 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(pConfig->db, zSql); | |||
| 5355 | sqlite3_freesqlite3_api->free(zSql); | |||
| 5356 | } | |||
| 5357 | ||||
| 5358 | return rc; | |||
| 5359 | } | |||
| 5360 | ||||
| 5361 | /* | |||
| 5362 | ** Tokenize the text passed via the second and third arguments. | |||
| 5363 | ** | |||
| 5364 | ** The callback is invoked once for each token in the input text. The | |||
| 5365 | ** arguments passed to it are, in order: | |||
| 5366 | ** | |||
| 5367 | ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() | |||
| 5368 | ** const char *pToken // Pointer to buffer containing token | |||
| 5369 | ** int nToken // Size of token in bytes | |||
| 5370 | ** int iStart // Byte offset of start of token within input text | |||
| 5371 | ** int iEnd // Byte offset of end of token within input text | |||
| 5372 | ** int iPos // Position of token in input (first token is 0) | |||
| 5373 | ** | |||
| 5374 | ** If the callback returns a non-zero value the tokenization is abandoned | |||
| 5375 | ** and no further callbacks are issued. | |||
| 5376 | ** | |||
| 5377 | ** This function returns SQLITE_OK if successful or an SQLite error code | |||
| 5378 | ** if an error occurs. If the tokenization was abandoned early because | |||
| 5379 | ** the callback returned SQLITE_DONE, this is not an error and this function | |||
| 5380 | ** still returns SQLITE_OK. Or, if the tokenization was abandoned early | |||
| 5381 | ** because the callback returned another non-zero value, it is assumed | |||
| 5382 | ** to be an SQLite error code and returned to the caller. | |||
| 5383 | */ | |||
| 5384 | static int sqlite3Fts5Tokenize( | |||
| 5385 | Fts5Config *pConfig, /* FTS5 Configuration object */ | |||
| 5386 | int flags, /* FTS5_TOKENIZE_* flags */ | |||
| 5387 | const char *pText, int nText, /* Text to tokenize */ | |||
| 5388 | void *pCtx, /* Context passed to xToken() */ | |||
| 5389 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ | |||
| 5390 | ){ | |||
| 5391 | int rc = SQLITE_OK0; | |||
| 5392 | if( pText ){ | |||
| 5393 | if( pConfig->t.pTok==0 ){ | |||
| 5394 | rc = sqlite3Fts5LoadTokenizer(pConfig); | |||
| 5395 | } | |||
| 5396 | if( rc==SQLITE_OK0 ){ | |||
| 5397 | if( pConfig->t.pApi1 ){ | |||
| 5398 | rc = pConfig->t.pApi1->xTokenize( | |||
| 5399 | pConfig->t.pTok, pCtx, flags, pText, nText, xToken | |||
| 5400 | ); | |||
| 5401 | }else{ | |||
| 5402 | rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags, | |||
| 5403 | pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken | |||
| 5404 | ); | |||
| 5405 | } | |||
| 5406 | } | |||
| 5407 | } | |||
| 5408 | return rc; | |||
| 5409 | } | |||
| 5410 | ||||
| 5411 | /* | |||
| 5412 | ** Argument pIn points to the first character in what is expected to be | |||
| 5413 | ** a comma-separated list of SQL literals followed by a ')' character. | |||
| 5414 | ** If it actually is this, return a pointer to the ')'. Otherwise, return | |||
| 5415 | ** NULL to indicate a parse error. | |||
| 5416 | */ | |||
| 5417 | static const char *fts5ConfigSkipArgs(const char *pIn){ | |||
| 5418 | const char *p = pIn; | |||
| 5419 | ||||
| 5420 | while( 1 ){ | |||
| 5421 | p = fts5ConfigSkipWhitespace(p); | |||
| 5422 | p = fts5ConfigSkipLiteral(p); | |||
| 5423 | p = fts5ConfigSkipWhitespace(p); | |||
| 5424 | if( p==0 || *p==')' ) break; | |||
| 5425 | if( *p!=',' ){ | |||
| 5426 | p = 0; | |||
| 5427 | break; | |||
| 5428 | } | |||
| 5429 | p++; | |||
| 5430 | } | |||
| 5431 | ||||
| 5432 | return p; | |||
| 5433 | } | |||
| 5434 | ||||
| 5435 | /* | |||
| 5436 | ** Parameter zIn contains a rank() function specification. The format of | |||
| 5437 | ** this is: | |||
| 5438 | ** | |||
| 5439 | ** + Bareword (function name) | |||
| 5440 | ** + Open parenthesis - "(" | |||
| 5441 | ** + Zero or more SQL literals in a comma separated list | |||
| 5442 | ** + Close parenthesis - ")" | |||
| 5443 | */ | |||
| 5444 | static int sqlite3Fts5ConfigParseRank( | |||
| 5445 | const char *zIn, /* Input string */ | |||
| 5446 | char **pzRank, /* OUT: Rank function name */ | |||
| 5447 | char **pzRankArgs /* OUT: Rank function arguments */ | |||
| 5448 | ){ | |||
| 5449 | const char *p = zIn; | |||
| 5450 | const char *pRank; | |||
| 5451 | char *zRank = 0; | |||
| 5452 | char *zRankArgs = 0; | |||
| 5453 | int rc = SQLITE_OK0; | |||
| 5454 | ||||
| 5455 | *pzRank = 0; | |||
| 5456 | *pzRankArgs = 0; | |||
| 5457 | ||||
| 5458 | if( p==0 ){ | |||
| 5459 | rc = SQLITE_ERROR1; | |||
| 5460 | }else{ | |||
| 5461 | p = fts5ConfigSkipWhitespace(p); | |||
| 5462 | pRank = p; | |||
| 5463 | p = fts5ConfigSkipBareword(p); | |||
| 5464 | ||||
| 5465 | if( p ){ | |||
| 5466 | zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); | |||
| 5467 | if( zRank ) memcpy(zRank, pRank, p-pRank); | |||
| 5468 | }else{ | |||
| 5469 | rc = SQLITE_ERROR1; | |||
| 5470 | } | |||
| 5471 | ||||
| 5472 | if( rc==SQLITE_OK0 ){ | |||
| 5473 | p = fts5ConfigSkipWhitespace(p); | |||
| 5474 | if( *p!='(' ) rc = SQLITE_ERROR1; | |||
| 5475 | p++; | |||
| 5476 | } | |||
| 5477 | if( rc==SQLITE_OK0 ){ | |||
| 5478 | const char *pArgs; | |||
| 5479 | p = fts5ConfigSkipWhitespace(p); | |||
| 5480 | pArgs = p; | |||
| 5481 | if( *p!=')' ){ | |||
| 5482 | p = fts5ConfigSkipArgs(p); | |||
| 5483 | if( p==0 ){ | |||
| 5484 | rc = SQLITE_ERROR1; | |||
| 5485 | }else{ | |||
| 5486 | zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); | |||
| 5487 | if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); | |||
| 5488 | } | |||
| 5489 | } | |||
| 5490 | } | |||
| 5491 | } | |||
| 5492 | ||||
| 5493 | if( rc!=SQLITE_OK0 ){ | |||
| 5494 | sqlite3_freesqlite3_api->free(zRank); | |||
| 5495 | assert( zRankArgs==0 )((void) (0)); | |||
| 5496 | }else{ | |||
| 5497 | *pzRank = zRank; | |||
| 5498 | *pzRankArgs = zRankArgs; | |||
| 5499 | } | |||
| 5500 | return rc; | |||
| 5501 | } | |||
| 5502 | ||||
| 5503 | static int sqlite3Fts5ConfigSetValue( | |||
| 5504 | Fts5Config *pConfig, | |||
| 5505 | const char *zKey, | |||
| 5506 | sqlite3_value *pVal, | |||
| 5507 | int *pbBadkey | |||
| 5508 | ){ | |||
| 5509 | int rc = SQLITE_OK0; | |||
| 5510 | ||||
| 5511 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "pgsz") ){ | |||
| 5512 | int pgsz = 0; | |||
| 5513 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5514 | pgsz = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5515 | } | |||
| 5516 | if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE(64*1024) ){ | |||
| 5517 | *pbBadkey = 1; | |||
| 5518 | }else{ | |||
| 5519 | pConfig->pgsz = pgsz; | |||
| 5520 | } | |||
| 5521 | } | |||
| 5522 | ||||
| 5523 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "hashsize") ){ | |||
| 5524 | int nHashSize = -1; | |||
| 5525 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5526 | nHashSize = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5527 | } | |||
| 5528 | if( nHashSize<=0 ){ | |||
| 5529 | *pbBadkey = 1; | |||
| 5530 | }else{ | |||
| 5531 | pConfig->nHashSize = nHashSize; | |||
| 5532 | } | |||
| 5533 | } | |||
| 5534 | ||||
| 5535 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "automerge") ){ | |||
| 5536 | int nAutomerge = -1; | |||
| 5537 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5538 | nAutomerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5539 | } | |||
| 5540 | if( nAutomerge<0 || nAutomerge>64 ){ | |||
| 5541 | *pbBadkey = 1; | |||
| 5542 | }else{ | |||
| 5543 | if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | |||
| 5544 | pConfig->nAutomerge = nAutomerge; | |||
| 5545 | } | |||
| 5546 | } | |||
| 5547 | ||||
| 5548 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "usermerge") ){ | |||
| 5549 | int nUsermerge = -1; | |||
| 5550 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5551 | nUsermerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5552 | } | |||
| 5553 | if( nUsermerge<2 || nUsermerge>16 ){ | |||
| 5554 | *pbBadkey = 1; | |||
| 5555 | }else{ | |||
| 5556 | pConfig->nUsermerge = nUsermerge; | |||
| 5557 | } | |||
| 5558 | } | |||
| 5559 | ||||
| 5560 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "crisismerge") ){ | |||
| 5561 | int nCrisisMerge = -1; | |||
| 5562 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5563 | nCrisisMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5564 | } | |||
| 5565 | if( nCrisisMerge<0 ){ | |||
| 5566 | *pbBadkey = 1; | |||
| 5567 | }else{ | |||
| 5568 | if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | |||
| 5569 | if( nCrisisMerge>=FTS5_MAX_SEGMENT2000 ) nCrisisMerge = FTS5_MAX_SEGMENT2000-1; | |||
| 5570 | pConfig->nCrisisMerge = nCrisisMerge; | |||
| 5571 | } | |||
| 5572 | } | |||
| 5573 | ||||
| 5574 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "deletemerge") ){ | |||
| 5575 | int nVal = -1; | |||
| 5576 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5577 | nVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5578 | }else{ | |||
| 5579 | *pbBadkey = 1; | |||
| 5580 | } | |||
| 5581 | if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE10; | |||
| 5582 | if( nVal>100 ) nVal = 0; | |||
| 5583 | pConfig->nDeleteMerge = nVal; | |||
| 5584 | } | |||
| 5585 | ||||
| 5586 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "rank") ){ | |||
| 5587 | const char *zIn = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 5588 | char *zRank; | |||
| 5589 | char *zRankArgs; | |||
| 5590 | rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); | |||
| 5591 | if( rc==SQLITE_OK0 ){ | |||
| 5592 | sqlite3_freesqlite3_api->free(pConfig->zRank); | |||
| 5593 | sqlite3_freesqlite3_api->free(pConfig->zRankArgs); | |||
| 5594 | pConfig->zRank = zRank; | |||
| 5595 | pConfig->zRankArgs = zRankArgs; | |||
| 5596 | }else if( rc==SQLITE_ERROR1 ){ | |||
| 5597 | rc = SQLITE_OK0; | |||
| 5598 | *pbBadkey = 1; | |||
| 5599 | } | |||
| 5600 | } | |||
| 5601 | ||||
| 5602 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "secure-delete") ){ | |||
| 5603 | int bVal = -1; | |||
| 5604 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5605 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5606 | } | |||
| 5607 | if( bVal<0 ){ | |||
| 5608 | *pbBadkey = 1; | |||
| 5609 | }else{ | |||
| 5610 | pConfig->bSecureDelete = (bVal ? 1 : 0); | |||
| 5611 | } | |||
| 5612 | } | |||
| 5613 | ||||
| 5614 | else if( 0==sqlite3_stricmpsqlite3_api->stricmp(zKey, "insttoken") ){ | |||
| 5615 | int bVal = -1; | |||
| 5616 | if( SQLITE_INTEGER1==sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal) ){ | |||
| 5617 | bVal = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5618 | } | |||
| 5619 | if( bVal<0 ){ | |||
| 5620 | *pbBadkey = 1; | |||
| 5621 | }else{ | |||
| 5622 | pConfig->bPrefixInsttoken = (bVal ? 1 : 0); | |||
| 5623 | } | |||
| 5624 | ||||
| 5625 | }else{ | |||
| 5626 | *pbBadkey = 1; | |||
| 5627 | } | |||
| 5628 | return rc; | |||
| 5629 | } | |||
| 5630 | ||||
| 5631 | /* | |||
| 5632 | ** Load the contents of the %_config table into memory. | |||
| 5633 | */ | |||
| 5634 | static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ | |||
| 5635 | const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; | |||
| 5636 | char *zSql; | |||
| 5637 | sqlite3_stmt *p = 0; | |||
| 5638 | int rc = SQLITE_OK0; | |||
| 5639 | int iVersion = 0; | |||
| 5640 | ||||
| 5641 | /* Set default values */ | |||
| 5642 | pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE4050; | |||
| 5643 | pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE4; | |||
| 5644 | pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE4; | |||
| 5645 | pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE16; | |||
| 5646 | pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE(1024*1024); | |||
| 5647 | pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE10; | |||
| 5648 | ||||
| 5649 | zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); | |||
| 5650 | if( zSql ){ | |||
| 5651 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &p, 0); | |||
| 5652 | sqlite3_freesqlite3_api->free(zSql); | |||
| 5653 | } | |||
| 5654 | ||||
| 5655 | assert( rc==SQLITE_OK || p==0 )((void) (0)); | |||
| 5656 | if( rc==SQLITE_OK0 ){ | |||
| 5657 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p) ){ | |||
| 5658 | const char *zK = (const char*)sqlite3_column_textsqlite3_api->column_text(p, 0); | |||
| 5659 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(p, 1); | |||
| 5660 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zK, "version") ){ | |||
| 5661 | iVersion = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 5662 | }else{ | |||
| 5663 | int bDummy = 0; | |||
| 5664 | sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); | |||
| 5665 | } | |||
| 5666 | } | |||
| 5667 | rc = sqlite3_finalizesqlite3_api->finalize(p); | |||
| 5668 | } | |||
| 5669 | ||||
| 5670 | if( rc==SQLITE_OK0 | |||
| 5671 | && iVersion!=FTS5_CURRENT_VERSION4 | |||
| 5672 | && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
| 5673 | ){ | |||
| 5674 | rc = SQLITE_ERROR1; | |||
| 5675 | sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format " | |||
| 5676 | "(found %d, expected %d or %d) - run 'rebuild'", | |||
| 5677 | iVersion, FTS5_CURRENT_VERSION4, FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
| 5678 | ); | |||
| 5679 | }else{ | |||
| 5680 | pConfig->iVersion = iVersion; | |||
| 5681 | } | |||
| 5682 | ||||
| 5683 | if( rc==SQLITE_OK0 ){ | |||
| 5684 | pConfig->iCookie = iCookie; | |||
| 5685 | } | |||
| 5686 | return rc; | |||
| 5687 | } | |||
| 5688 | ||||
| 5689 | /* | |||
| 5690 | ** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer | |||
| 5691 | ** containing the error message created using printf() style formatting | |||
| 5692 | ** string zFmt and its trailing arguments. | |||
| 5693 | */ | |||
| 5694 | static void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){ | |||
| 5695 | va_list ap; /* ... printf arguments */ | |||
| 5696 | char *zMsg = 0; | |||
| 5697 | ||||
| 5698 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 5699 | zMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 5700 | if( pConfig->pzErrmsg ){ | |||
| 5701 | assert( *pConfig->pzErrmsg==0 )((void) (0)); | |||
| 5702 | *pConfig->pzErrmsg = zMsg; | |||
| 5703 | }else{ | |||
| 5704 | sqlite3_freesqlite3_api->free(zMsg); | |||
| 5705 | } | |||
| 5706 | ||||
| 5707 | va_end(ap)__builtin_va_end(ap); | |||
| 5708 | } | |||
| 5709 | ||||
| 5710 | ||||
| 5711 | ||||
| 5712 | #line 1 "fts5_expr.c" | |||
| 5713 | /* | |||
| 5714 | ** 2014 May 31 | |||
| 5715 | ** | |||
| 5716 | ** The author disclaims copyright to this source code. In place of | |||
| 5717 | ** a legal notice, here is a blessing: | |||
| 5718 | ** | |||
| 5719 | ** May you do good and not evil. | |||
| 5720 | ** May you find forgiveness for yourself and forgive others. | |||
| 5721 | ** May you share freely, never taking more than you give. | |||
| 5722 | ** | |||
| 5723 | ****************************************************************************** | |||
| 5724 | ** | |||
| 5725 | */ | |||
| 5726 | ||||
| 5727 | ||||
| 5728 | ||||
| 5729 | /* #include "fts5Int.h" */ | |||
| 5730 | /* #include "fts5parse.h" */ | |||
| 5731 | ||||
| 5732 | #ifndef SQLITE_FTS5_MAX_EXPR_DEPTH256 | |||
| 5733 | # define SQLITE_FTS5_MAX_EXPR_DEPTH256 256 | |||
| 5734 | #endif | |||
| 5735 | ||||
| 5736 | /* | |||
| 5737 | ** All token types in the generated fts5parse.h file are greater than 0. | |||
| 5738 | */ | |||
| 5739 | #define FTS5_EOF0 0 | |||
| 5740 | ||||
| 5741 | #define FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) (0xffffffff|(((i64)0x7fffffff)<<32)) | |||
| 5742 | ||||
| 5743 | typedef struct Fts5ExprTerm Fts5ExprTerm; | |||
| 5744 | ||||
| 5745 | /* | |||
| 5746 | ** Functions generated by lemon from fts5parse.y. | |||
| 5747 | */ | |||
| 5748 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); | |||
| 5749 | static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); | |||
| 5750 | static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); | |||
| 5751 | #ifndef NDEBUG1 | |||
| 5752 | #include <stdio.h> | |||
| 5753 | static void sqlite3Fts5ParserTrace(FILE*, char*); | |||
| 5754 | #endif | |||
| 5755 | static int sqlite3Fts5ParserFallback(int); | |||
| 5756 | ||||
| 5757 | ||||
| 5758 | struct Fts5Expr { | |||
| 5759 | Fts5Index *pIndex; | |||
| 5760 | Fts5Config *pConfig; | |||
| 5761 | Fts5ExprNode *pRoot; | |||
| 5762 | int bDesc; /* Iterate in descending rowid order */ | |||
| 5763 | int nPhrase; /* Number of phrases in expression */ | |||
| 5764 | Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ | |||
| 5765 | }; | |||
| 5766 | ||||
| 5767 | /* | |||
| 5768 | ** eType: | |||
| 5769 | ** Expression node type. Usually one of: | |||
| 5770 | ** | |||
| 5771 | ** FTS5_AND (nChild, apChild valid) | |||
| 5772 | ** FTS5_OR (nChild, apChild valid) | |||
| 5773 | ** FTS5_NOT (nChild, apChild valid) | |||
| 5774 | ** FTS5_STRING (pNear valid) | |||
| 5775 | ** FTS5_TERM (pNear valid) | |||
| 5776 | ** | |||
| 5777 | ** An expression node with eType==0 may also exist. It always matches zero | |||
| 5778 | ** rows. This is created when a phrase containing no tokens is parsed. | |||
| 5779 | ** e.g. "". | |||
| 5780 | ** | |||
| 5781 | ** iHeight: | |||
| 5782 | ** Distance from this node to furthest leaf. This is always 0 for nodes | |||
| 5783 | ** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one | |||
| 5784 | ** greater than the largest child value. | |||
| 5785 | */ | |||
| 5786 | struct Fts5ExprNode { | |||
| 5787 | int eType; /* Node type */ | |||
| 5788 | int bEof; /* True at EOF */ | |||
| 5789 | int bNomatch; /* True if entry is not a match */ | |||
| 5790 | int iHeight; /* Distance to tree leaf nodes */ | |||
| 5791 | ||||
| 5792 | /* Next method for this node. */ | |||
| 5793 | int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); | |||
| 5794 | ||||
| 5795 | i64 iRowid; /* Current rowid */ | |||
| 5796 | Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ | |||
| 5797 | ||||
| 5798 | /* Child nodes. For a NOT node, this array always contains 2 entries. For | |||
| 5799 | ** AND or OR nodes, it contains 2 or more entries. */ | |||
| 5800 | int nChild; /* Number of child nodes */ | |||
| 5801 | Fts5ExprNode *apChild[FLEXARRAY]; /* Array of child nodes */ | |||
| 5802 | }; | |||
| 5803 | ||||
| 5804 | /* Size (in bytes) of an Fts5ExprNode object that holds up to N children */ | |||
| 5805 | #define SZ_FTS5EXPRNODE(N)(__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode *)) \ | |||
| 5806 | (offsetof(Fts5ExprNode,apChild)__builtin_offsetof(Fts5ExprNode, apChild) + (N)*sizeof(Fts5ExprNode*)) | |||
| 5807 | ||||
| 5808 | #define Fts5NodeIsString(p)((p)->eType==4 || (p)->eType==9) ((p)->eType==FTS5_TERM4 || (p)->eType==FTS5_STRING9) | |||
| 5809 | ||||
| 5810 | /* | |||
| 5811 | ** Invoke the xNext method of an Fts5ExprNode object. This macro should be | |||
| 5812 | ** used as if it has the same signature as the xNext() methods themselves. | |||
| 5813 | */ | |||
| 5814 | #define fts5ExprNodeNext(a,b,c,d)(b)->xNext((a), (b), (c), (d)) (b)->xNext((a), (b), (c), (d)) | |||
| 5815 | ||||
| 5816 | /* | |||
| 5817 | ** An instance of the following structure represents a single search term | |||
| 5818 | ** or term prefix. | |||
| 5819 | */ | |||
| 5820 | struct Fts5ExprTerm { | |||
| 5821 | u8 bPrefix; /* True for a prefix term */ | |||
| 5822 | u8 bFirst; /* True if token must be first in column */ | |||
| 5823 | char *pTerm; /* Term data */ | |||
| 5824 | int nQueryTerm; /* Effective size of term in bytes */ | |||
| 5825 | int nFullTerm; /* Size of term in bytes incl. tokendata */ | |||
| 5826 | Fts5IndexIter *pIter; /* Iterator for this term */ | |||
| 5827 | Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ | |||
| 5828 | }; | |||
| 5829 | ||||
| 5830 | /* | |||
| 5831 | ** A phrase. One or more terms that must appear in a contiguous sequence | |||
| 5832 | ** within a document for it to match. | |||
| 5833 | */ | |||
| 5834 | struct Fts5ExprPhrase { | |||
| 5835 | Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ | |||
| 5836 | Fts5Buffer poslist; /* Current position list */ | |||
| 5837 | int nTerm; /* Number of entries in aTerm[] */ | |||
| 5838 | Fts5ExprTerm aTerm[FLEXARRAY]; /* Terms that make up this phrase */ | |||
| 5839 | }; | |||
| 5840 | ||||
| 5841 | /* Size (in bytes) of an Fts5ExprPhrase object that holds up to N terms */ | |||
| 5842 | #define SZ_FTS5EXPRPHRASE(N)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm )) \ | |||
| 5843 | (offsetof(Fts5ExprPhrase,aTerm)__builtin_offsetof(Fts5ExprPhrase, aTerm) + (N)*sizeof(Fts5ExprTerm)) | |||
| 5844 | ||||
| 5845 | /* | |||
| 5846 | ** One or more phrases that must appear within a certain token distance of | |||
| 5847 | ** each other within each matching document. | |||
| 5848 | */ | |||
| 5849 | struct Fts5ExprNearset { | |||
| 5850 | int nNear; /* NEAR parameter */ | |||
| 5851 | Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ | |||
| 5852 | int nPhrase; /* Number of entries in aPhrase[] array */ | |||
| 5853 | Fts5ExprPhrase *apPhrase[FLEXARRAY]; /* Array of phrase pointers */ | |||
| 5854 | }; | |||
| 5855 | ||||
| 5856 | /* Size (in bytes) of an Fts5ExprNearset object covering up to N phrases */ | |||
| 5857 | #define SZ_FTS5EXPRNEARSET(N)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase *)) \ | |||
| 5858 | (offsetof(Fts5ExprNearset,apPhrase)__builtin_offsetof(Fts5ExprNearset, apPhrase)+(N)*sizeof(Fts5ExprPhrase*)) | |||
| 5859 | ||||
| 5860 | /* | |||
| 5861 | ** Parse context. | |||
| 5862 | */ | |||
| 5863 | struct Fts5Parse { | |||
| 5864 | Fts5Config *pConfig; | |||
| 5865 | char *zErr; | |||
| 5866 | int rc; | |||
| 5867 | int nPhrase; /* Size of apPhrase array */ | |||
| 5868 | Fts5ExprPhrase **apPhrase; /* Array of all phrases */ | |||
| 5869 | Fts5ExprNode *pExpr; /* Result of a successful parse */ | |||
| 5870 | int bPhraseToAnd; /* Convert "a+b" to "a AND b" */ | |||
| 5871 | }; | |||
| 5872 | ||||
| 5873 | /* | |||
| 5874 | ** Check that the Fts5ExprNode.iHeight variables are set correctly in | |||
| 5875 | ** the expression tree passed as the only argument. | |||
| 5876 | */ | |||
| 5877 | #ifndef NDEBUG1 | |||
| 5878 | static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){ | |||
| 5879 | if( rc==SQLITE_OK0 ){ | |||
| 5880 | if( p->eType==FTS5_TERM4 || p->eType==FTS5_STRING9 || p->eType==0 ){ | |||
| 5881 | assert( p->iHeight==0 )((void) (0)); | |||
| 5882 | }else{ | |||
| 5883 | int ii; | |||
| 5884 | int iMaxChild = 0; | |||
| 5885 | for(ii=0; ii<p->nChild; ii++){ | |||
| 5886 | Fts5ExprNode *pChild = p->apChild[ii]; | |||
| 5887 | iMaxChild = MAX(iMaxChild, pChild->iHeight)(((iMaxChild) > (pChild->iHeight)) ? (iMaxChild) : (pChild ->iHeight)); | |||
| 5888 | assert_expr_depth_ok(SQLITE_OK, pChild); | |||
| 5889 | } | |||
| 5890 | assert( p->iHeight==iMaxChild+1 )((void) (0)); | |||
| 5891 | } | |||
| 5892 | } | |||
| 5893 | } | |||
| 5894 | #else | |||
| 5895 | # define assert_expr_depth_ok(rc, p) | |||
| 5896 | #endif | |||
| 5897 | ||||
| 5898 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ | |||
| 5899 | va_list ap; | |||
| 5900 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 5901 | if( pParse->rc==SQLITE_OK0 ){ | |||
| 5902 | assert( pParse->zErr==0 )((void) (0)); | |||
| 5903 | pParse->zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 5904 | pParse->rc = SQLITE_ERROR1; | |||
| 5905 | } | |||
| 5906 | va_end(ap)__builtin_va_end(ap); | |||
| 5907 | } | |||
| 5908 | ||||
| 5909 | static int fts5ExprIsspace(char t){ | |||
| 5910 | return t==' ' || t=='\t' || t=='\n' || t=='\r'; | |||
| 5911 | } | |||
| 5912 | ||||
| 5913 | /* | |||
| 5914 | ** Read the first token from the nul-terminated string at *pz. | |||
| 5915 | */ | |||
| 5916 | static int fts5ExprGetToken( | |||
| 5917 | Fts5Parse *pParse, | |||
| 5918 | const char **pz, /* IN/OUT: Pointer into buffer */ | |||
| 5919 | Fts5Token *pToken | |||
| 5920 | ){ | |||
| 5921 | const char *z = *pz; | |||
| 5922 | int tok; | |||
| 5923 | ||||
| 5924 | /* Skip past any whitespace */ | |||
| 5925 | while( fts5ExprIsspace(*z) ) z++; | |||
| 5926 | ||||
| 5927 | pToken->p = z; | |||
| 5928 | pToken->n = 1; | |||
| 5929 | switch( *z ){ | |||
| 5930 | case '(': tok = FTS5_LP10; break; | |||
| 5931 | case ')': tok = FTS5_RP11; break; | |||
| 5932 | case '{': tok = FTS5_LCP7; break; | |||
| 5933 | case '}': tok = FTS5_RCP8; break; | |||
| 5934 | case ':': tok = FTS5_COLON5; break; | |||
| 5935 | case ',': tok = FTS5_COMMA13; break; | |||
| 5936 | case '+': tok = FTS5_PLUS14; break; | |||
| 5937 | case '*': tok = FTS5_STAR15; break; | |||
| 5938 | case '-': tok = FTS5_MINUS6; break; | |||
| 5939 | case '^': tok = FTS5_CARET12; break; | |||
| 5940 | case '\0': tok = FTS5_EOF0; break; | |||
| 5941 | ||||
| 5942 | case '"': { | |||
| 5943 | const char *z2; | |||
| 5944 | tok = FTS5_STRING9; | |||
| 5945 | ||||
| 5946 | for(z2=&z[1]; 1; z2++){ | |||
| 5947 | if( z2[0]=='"' ){ | |||
| 5948 | z2++; | |||
| 5949 | if( z2[0]!='"' ) break; | |||
| 5950 | } | |||
| 5951 | if( z2[0]=='\0' ){ | |||
| 5952 | sqlite3Fts5ParseError(pParse, "unterminated string"); | |||
| 5953 | return FTS5_EOF0; | |||
| 5954 | } | |||
| 5955 | } | |||
| 5956 | pToken->n = (z2 - z); | |||
| 5957 | break; | |||
| 5958 | } | |||
| 5959 | ||||
| 5960 | default: { | |||
| 5961 | const char *z2; | |||
| 5962 | if( sqlite3Fts5IsBareword(z[0])==0 ){ | |||
| 5963 | sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); | |||
| 5964 | return FTS5_EOF0; | |||
| 5965 | } | |||
| 5966 | tok = FTS5_STRING9; | |||
| 5967 | for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); | |||
| 5968 | pToken->n = (z2 - z); | |||
| 5969 | if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR1; | |||
| 5970 | if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT3; | |||
| 5971 | if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND2; | |||
| 5972 | break; | |||
| 5973 | } | |||
| 5974 | } | |||
| 5975 | ||||
| 5976 | *pz = &pToken->p[pToken->n]; | |||
| 5977 | return tok; | |||
| 5978 | } | |||
| 5979 | ||||
| 5980 | static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)t);} | |||
| 5981 | static void fts5ParseFree(void *p){ sqlite3_freesqlite3_api->free(p); } | |||
| 5982 | ||||
| 5983 | static int sqlite3Fts5ExprNew( | |||
| 5984 | Fts5Config *pConfig, /* FTS5 Configuration */ | |||
| 5985 | int bPhraseToAnd, | |||
| 5986 | int iCol, | |||
| 5987 | const char *zExpr, /* Expression text */ | |||
| 5988 | Fts5Expr **ppNew, | |||
| 5989 | char **pzErr | |||
| 5990 | ){ | |||
| 5991 | Fts5Parse sParse; | |||
| 5992 | Fts5Token token; | |||
| 5993 | const char *z = zExpr; | |||
| 5994 | int t; /* Next token type */ | |||
| 5995 | void *pEngine; | |||
| 5996 | Fts5Expr *pNew; | |||
| 5997 | ||||
| 5998 | *ppNew = 0; | |||
| 5999 | *pzErr = 0; | |||
| 6000 | memset(&sParse, 0, sizeof(sParse)); | |||
| 6001 | sParse.bPhraseToAnd = bPhraseToAnd; | |||
| 6002 | pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); | |||
| 6003 | if( pEngine==0 ){ return SQLITE_NOMEM7; } | |||
| 6004 | sParse.pConfig = pConfig; | |||
| 6005 | ||||
| 6006 | do { | |||
| 6007 | t = fts5ExprGetToken(&sParse, &z, &token); | |||
| 6008 | sqlite3Fts5Parser(pEngine, t, token, &sParse); | |||
| 6009 | }while( sParse.rc==SQLITE_OK0 && t!=FTS5_EOF0 ); | |||
| 6010 | sqlite3Fts5ParserFree(pEngine, fts5ParseFree); | |||
| 6011 | ||||
| 6012 | assert( sParse.pExpr || sParse.rc!=SQLITE_OK )((void) (0)); | |||
| 6013 | assert_expr_depth_ok(sParse.rc, sParse.pExpr); | |||
| 6014 | ||||
| 6015 | /* If the LHS of the MATCH expression was a user column, apply the | |||
| 6016 | ** implicit column-filter. */ | |||
| 6017 | if( sParse.rc==SQLITE_OK0 && iCol<pConfig->nCol ){ | |||
| 6018 | int n = SZ_FTS5COLSET(1)(sizeof(i64)*((1 +2)/2)); | |||
| 6019 | Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); | |||
| 6020 | if( pColset ){ | |||
| 6021 | pColset->nCol = 1; | |||
| 6022 | pColset->aiCol[0] = iCol; | |||
| 6023 | sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset); | |||
| 6024 | } | |||
| 6025 | } | |||
| 6026 | ||||
| 6027 | assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 )((void) (0)); | |||
| 6028 | if( sParse.rc==SQLITE_OK0 ){ | |||
| 6029 | *ppNew = pNew = sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Expr)); | |||
| 6030 | if( pNew==0 ){ | |||
| 6031 | sParse.rc = SQLITE_NOMEM7; | |||
| 6032 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | |||
| 6033 | }else{ | |||
| 6034 | pNew->pRoot = sParse.pExpr; | |||
| 6035 | pNew->pIndex = 0; | |||
| 6036 | pNew->pConfig = pConfig; | |||
| 6037 | pNew->apExprPhrase = sParse.apPhrase; | |||
| 6038 | pNew->nPhrase = sParse.nPhrase; | |||
| 6039 | pNew->bDesc = 0; | |||
| 6040 | sParse.apPhrase = 0; | |||
| 6041 | } | |||
| 6042 | }else{ | |||
| 6043 | sqlite3Fts5ParseNodeFree(sParse.pExpr); | |||
| 6044 | } | |||
| 6045 | ||||
| 6046 | sqlite3_freesqlite3_api->free(sParse.apPhrase); | |||
| 6047 | if( 0==*pzErr ){ | |||
| 6048 | *pzErr = sParse.zErr; | |||
| 6049 | }else{ | |||
| 6050 | sqlite3_freesqlite3_api->free(sParse.zErr); | |||
| 6051 | } | |||
| 6052 | return sParse.rc; | |||
| 6053 | } | |||
| 6054 | ||||
| 6055 | /* | |||
| 6056 | ** Assuming that buffer z is at least nByte bytes in size and contains a | |||
| 6057 | ** valid utf-8 string, return the number of characters in the string. | |||
| 6058 | */ | |||
| 6059 | static int fts5ExprCountChar(const char *z, int nByte){ | |||
| 6060 | int nRet = 0; | |||
| 6061 | int ii; | |||
| 6062 | for(ii=0; ii<nByte; ii++){ | |||
| 6063 | if( (z[ii] & 0xC0)!=0x80 ) nRet++; | |||
| 6064 | } | |||
| 6065 | return nRet; | |||
| 6066 | } | |||
| 6067 | ||||
| 6068 | /* | |||
| 6069 | ** This function is only called when using the special 'trigram' tokenizer. | |||
| 6070 | ** Argument zText contains the text of a LIKE or GLOB pattern matched | |||
| 6071 | ** against column iCol. This function creates and compiles an FTS5 MATCH | |||
| 6072 | ** expression that will match a superset of the rows matched by the LIKE or | |||
| 6073 | ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error | |||
| 6074 | ** code. | |||
| 6075 | */ | |||
| 6076 | static int sqlite3Fts5ExprPattern( | |||
| 6077 | Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp | |||
| 6078 | ){ | |||
| 6079 | i64 nText = strlen(zText); | |||
| 6080 | char *zExpr = (char*)sqlite3_malloc64sqlite3_api->malloc64(nText*4 + 1); | |||
| 6081 | int rc = SQLITE_OK0; | |||
| 6082 | ||||
| 6083 | if( zExpr==0 ){ | |||
| 6084 | rc = SQLITE_NOMEM7; | |||
| 6085 | }else{ | |||
| 6086 | char aSpec[3]; | |||
| 6087 | int iOut = 0; | |||
| 6088 | int i = 0; | |||
| 6089 | int iFirst = 0; | |||
| 6090 | ||||
| 6091 | if( bGlob==0 ){ | |||
| 6092 | aSpec[0] = '_'; | |||
| 6093 | aSpec[1] = '%'; | |||
| 6094 | aSpec[2] = 0; | |||
| 6095 | }else{ | |||
| 6096 | aSpec[0] = '*'; | |||
| 6097 | aSpec[1] = '?'; | |||
| 6098 | aSpec[2] = '['; | |||
| 6099 | } | |||
| 6100 | ||||
| 6101 | while( i<=nText ){ | |||
| 6102 | if( i==nText | |||
| 6103 | || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] | |||
| 6104 | ){ | |||
| 6105 | ||||
| 6106 | if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){ | |||
| 6107 | int jj; | |||
| 6108 | zExpr[iOut++] = '"'; | |||
| 6109 | for(jj=iFirst; jj<i; jj++){ | |||
| 6110 | zExpr[iOut++] = zText[jj]; | |||
| 6111 | if( zText[jj]=='"' ) zExpr[iOut++] = '"'; | |||
| 6112 | } | |||
| 6113 | zExpr[iOut++] = '"'; | |||
| 6114 | zExpr[iOut++] = ' '; | |||
| 6115 | } | |||
| 6116 | if( zText[i]==aSpec[2] ){ | |||
| 6117 | i += 2; | |||
| 6118 | if( zText[i-1]=='^' ) i++; | |||
| 6119 | while( i<nText && zText[i]!=']' ) i++; | |||
| 6120 | } | |||
| 6121 | iFirst = i+1; | |||
| 6122 | } | |||
| 6123 | i++; | |||
| 6124 | } | |||
| 6125 | if( iOut>0 ){ | |||
| 6126 | int bAnd = 0; | |||
| 6127 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
| 6128 | bAnd = 1; | |||
| 6129 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 6130 | iCol = pConfig->nCol; | |||
| 6131 | } | |||
| 6132 | } | |||
| 6133 | zExpr[iOut] = '\0'; | |||
| 6134 | rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg); | |||
| 6135 | }else{ | |||
| 6136 | *pp = 0; | |||
| 6137 | } | |||
| 6138 | sqlite3_freesqlite3_api->free(zExpr); | |||
| 6139 | } | |||
| 6140 | ||||
| 6141 | return rc; | |||
| 6142 | } | |||
| 6143 | ||||
| 6144 | /* | |||
| 6145 | ** Free the expression node object passed as the only argument. | |||
| 6146 | */ | |||
| 6147 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ | |||
| 6148 | if( p ){ | |||
| 6149 | int i; | |||
| 6150 | for(i=0; i<p->nChild; i++){ | |||
| 6151 | sqlite3Fts5ParseNodeFree(p->apChild[i]); | |||
| 6152 | } | |||
| 6153 | sqlite3Fts5ParseNearsetFree(p->pNear); | |||
| 6154 | sqlite3_freesqlite3_api->free(p); | |||
| 6155 | } | |||
| 6156 | } | |||
| 6157 | ||||
| 6158 | /* | |||
| 6159 | ** Free the expression object passed as the only argument. | |||
| 6160 | */ | |||
| 6161 | static void sqlite3Fts5ExprFree(Fts5Expr *p){ | |||
| 6162 | if( p ){ | |||
| 6163 | sqlite3Fts5ParseNodeFree(p->pRoot); | |||
| 6164 | sqlite3_freesqlite3_api->free(p->apExprPhrase); | |||
| 6165 | sqlite3_freesqlite3_api->free(p); | |||
| 6166 | } | |||
| 6167 | } | |||
| 6168 | ||||
| 6169 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){ | |||
| 6170 | Fts5Parse sParse; | |||
| 6171 | memset(&sParse, 0, sizeof(sParse)); | |||
| 6172 | ||||
| 6173 | if( *pp1 && p2 ){ | |||
| 6174 | Fts5Expr *p1 = *pp1; | |||
| 6175 | int nPhrase = p1->nPhrase + p2->nPhrase; | |||
| 6176 | ||||
| 6177 | p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND2, p1->pRoot, p2->pRoot,0); | |||
| 6178 | p2->pRoot = 0; | |||
| 6179 | ||||
| 6180 | if( sParse.rc==SQLITE_OK0 ){ | |||
| 6181 | Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_reallocsqlite3_api->realloc( | |||
| 6182 | p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*) | |||
| 6183 | ); | |||
| 6184 | if( ap==0 ){ | |||
| 6185 | sParse.rc = SQLITE_NOMEM7; | |||
| 6186 | }else{ | |||
| 6187 | int i; | |||
| 6188 | memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*)); | |||
| 6189 | for(i=0; i<p2->nPhrase; i++){ | |||
| 6190 | ap[i] = p2->apExprPhrase[i]; | |||
| 6191 | } | |||
| 6192 | p1->nPhrase = nPhrase; | |||
| 6193 | p1->apExprPhrase = ap; | |||
| 6194 | } | |||
| 6195 | } | |||
| 6196 | sqlite3_freesqlite3_api->free(p2->apExprPhrase); | |||
| 6197 | sqlite3_freesqlite3_api->free(p2); | |||
| 6198 | }else if( p2 ){ | |||
| 6199 | *pp1 = p2; | |||
| 6200 | } | |||
| 6201 | ||||
| 6202 | return sParse.rc; | |||
| 6203 | } | |||
| 6204 | ||||
| 6205 | /* | |||
| 6206 | ** Argument pTerm must be a synonym iterator. Return the current rowid | |||
| 6207 | ** that it points to. | |||
| 6208 | */ | |||
| 6209 | static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ | |||
| 6210 | i64 iRet = 0; | |||
| 6211 | int bRetValid = 0; | |||
| 6212 | Fts5ExprTerm *p; | |||
| 6213 | ||||
| 6214 | assert( pTerm )((void) (0)); | |||
| 6215 | assert( pTerm->pSynonym )((void) (0)); | |||
| 6216 | assert( bDesc==0 || bDesc==1 )((void) (0)); | |||
| 6217 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 6218 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | |||
| 6219 | i64 iRowid = p->pIter->iRowid; | |||
| 6220 | if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ | |||
| 6221 | iRet = iRowid; | |||
| 6222 | bRetValid = 1; | |||
| 6223 | } | |||
| 6224 | } | |||
| 6225 | } | |||
| 6226 | ||||
| 6227 | if( pbEof && bRetValid==0 ) *pbEof = 1; | |||
| 6228 | return iRet; | |||
| 6229 | } | |||
| 6230 | ||||
| 6231 | /* | |||
| 6232 | ** Argument pTerm must be a synonym iterator. | |||
| 6233 | */ | |||
| 6234 | static int fts5ExprSynonymList( | |||
| 6235 | Fts5ExprTerm *pTerm, | |||
| 6236 | i64 iRowid, | |||
| 6237 | Fts5Buffer *pBuf, /* Use this buffer for space if required */ | |||
| 6238 | u8 **pa, int *pn | |||
| 6239 | ){ | |||
| 6240 | Fts5PoslistReader aStatic[4]; | |||
| 6241 | Fts5PoslistReader *aIter = aStatic; | |||
| 6242 | int nIter = 0; | |||
| 6243 | int nAlloc = 4; | |||
| 6244 | int rc = SQLITE_OK0; | |||
| 6245 | Fts5ExprTerm *p; | |||
| 6246 | ||||
| 6247 | assert( pTerm->pSynonym )((void) (0)); | |||
| 6248 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 6249 | Fts5IndexIter *pIter = p->pIter; | |||
| 6250 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 && pIter->iRowid==iRowid ){ | |||
| 6251 | if( pIter->nData==0 ) continue; | |||
| 6252 | if( nIter==nAlloc ){ | |||
| 6253 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; | |||
| 6254 | Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 6255 | if( aNew==0 ){ | |||
| 6256 | rc = SQLITE_NOMEM7; | |||
| 6257 | goto synonym_poslist_out; | |||
| 6258 | } | |||
| 6259 | memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); | |||
| 6260 | nAlloc = nAlloc*2; | |||
| 6261 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
| 6262 | aIter = aNew; | |||
| 6263 | } | |||
| 6264 | sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); | |||
| 6265 | assert( aIter[nIter].bEof==0 )((void) (0)); | |||
| 6266 | nIter++; | |||
| 6267 | } | |||
| 6268 | } | |||
| 6269 | ||||
| 6270 | if( nIter==1 ){ | |||
| 6271 | *pa = (u8*)aIter[0].a; | |||
| 6272 | *pn = aIter[0].n; | |||
| 6273 | }else{ | |||
| 6274 | Fts5PoslistWriter writer = {0}; | |||
| 6275 | i64 iPrev = -1; | |||
| 6276 | fts5BufferZero(pBuf)sqlite3Fts5BufferZero(pBuf); | |||
| 6277 | while( 1 ){ | |||
| 6278 | int i; | |||
| 6279 | i64 iMin = FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
| 6280 | for(i=0; i<nIter; i++){ | |||
| 6281 | if( aIter[i].bEof==0 ){ | |||
| 6282 | if( aIter[i].iPos==iPrev ){ | |||
| 6283 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; | |||
| 6284 | } | |||
| 6285 | if( aIter[i].iPos<iMin ){ | |||
| 6286 | iMin = aIter[i].iPos; | |||
| 6287 | } | |||
| 6288 | } | |||
| 6289 | } | |||
| 6290 | if( iMin==FTS5_LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) || rc!=SQLITE_OK0 ) break; | |||
| 6291 | rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); | |||
| 6292 | iPrev = iMin; | |||
| 6293 | } | |||
| 6294 | if( rc==SQLITE_OK0 ){ | |||
| 6295 | *pa = pBuf->p; | |||
| 6296 | *pn = pBuf->n; | |||
| 6297 | } | |||
| 6298 | } | |||
| 6299 | ||||
| 6300 | synonym_poslist_out: | |||
| 6301 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
| 6302 | return rc; | |||
| 6303 | } | |||
| 6304 | ||||
| 6305 | ||||
| 6306 | /* | |||
| 6307 | ** All individual term iterators in pPhrase are guaranteed to be valid and | |||
| 6308 | ** pointing to the same rowid when this function is called. This function | |||
| 6309 | ** checks if the current rowid really is a match, and if so populates | |||
| 6310 | ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch | |||
| 6311 | ** is set to true if this is really a match, or false otherwise. | |||
| 6312 | ** | |||
| 6313 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | |||
| 6314 | ** otherwise. It is not considered an error code if the current rowid is | |||
| 6315 | ** not a match. | |||
| 6316 | */ | |||
| 6317 | static int fts5ExprPhraseIsMatch( | |||
| 6318 | Fts5ExprNode *pNode, /* Node pPhrase belongs to */ | |||
| 6319 | Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ | |||
| 6320 | int *pbMatch /* OUT: Set to true if really a match */ | |||
| 6321 | ){ | |||
| 6322 | Fts5PoslistWriter writer = {0}; | |||
| 6323 | Fts5PoslistReader aStatic[4]; | |||
| 6324 | Fts5PoslistReader *aIter = aStatic; | |||
| 6325 | int i; | |||
| 6326 | int rc = SQLITE_OK0; | |||
| 6327 | int bFirst = pPhrase->aTerm[0].bFirst; | |||
| 6328 | ||||
| 6329 | fts5BufferZero(&pPhrase->poslist)sqlite3Fts5BufferZero(&pPhrase->poslist); | |||
| 6330 | ||||
| 6331 | /* If the aStatic[] array is not large enough, allocate a large array | |||
| 6332 | ** using sqlite3_malloc(). This approach could be improved upon. */ | |||
| 6333 | if( pPhrase->nTerm>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | |||
| 6334 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; | |||
| 6335 | aIter = (Fts5PoslistReader*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 6336 | if( !aIter ) return SQLITE_NOMEM7; | |||
| 6337 | } | |||
| 6338 | memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); | |||
| 6339 | ||||
| 6340 | /* Initialize a term iterator for each term in the phrase */ | |||
| 6341 | for(i=0; i<pPhrase->nTerm; i++){ | |||
| 6342 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | |||
| 6343 | int n = 0; | |||
| 6344 | int bFlag = 0; | |||
| 6345 | u8 *a = 0; | |||
| 6346 | if( pTerm->pSynonym ){ | |||
| 6347 | Fts5Buffer buf = {0, 0, 0}; | |||
| 6348 | rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); | |||
| 6349 | if( rc ){ | |||
| 6350 | sqlite3_freesqlite3_api->free(a); | |||
| 6351 | goto ismatch_out; | |||
| 6352 | } | |||
| 6353 | if( a==buf.p ) bFlag = 1; | |||
| 6354 | }else{ | |||
| 6355 | a = (u8*)pTerm->pIter->pData; | |||
| 6356 | n = pTerm->pIter->nData; | |||
| 6357 | } | |||
| 6358 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | |||
| 6359 | aIter[i].bFlag = (u8)bFlag; | |||
| 6360 | if( aIter[i].bEof ) goto ismatch_out; | |||
| 6361 | } | |||
| 6362 | ||||
| 6363 | while( 1 ){ | |||
| 6364 | int bMatch; | |||
| 6365 | i64 iPos = aIter[0].iPos; | |||
| 6366 | do { | |||
| 6367 | bMatch = 1; | |||
| 6368 | for(i=0; i<pPhrase->nTerm; i++){ | |||
| 6369 | Fts5PoslistReader *pPos = &aIter[i]; | |||
| 6370 | i64 iAdj = iPos + i; | |||
| 6371 | if( pPos->iPos!=iAdj ){ | |||
| 6372 | bMatch = 0; | |||
| 6373 | while( pPos->iPos<iAdj ){ | |||
| 6374 | if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; | |||
| 6375 | } | |||
| 6376 | if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; | |||
| 6377 | } | |||
| 6378 | } | |||
| 6379 | }while( bMatch==0 ); | |||
| 6380 | ||||
| 6381 | /* Append position iPos to the output */ | |||
| 6382 | if( bFirst==0 || FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF)==0 ){ | |||
| 6383 | rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); | |||
| 6384 | if( rc!=SQLITE_OK0 ) goto ismatch_out; | |||
| 6385 | } | |||
| 6386 | ||||
| 6387 | for(i=0; i<pPhrase->nTerm; i++){ | |||
| 6388 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; | |||
| 6389 | } | |||
| 6390 | } | |||
| 6391 | ||||
| 6392 | ismatch_out: | |||
| 6393 | *pbMatch = (pPhrase->poslist.n>0); | |||
| 6394 | for(i=0; i<pPhrase->nTerm; i++){ | |||
| 6395 | if( aIter[i].bFlag ) sqlite3_freesqlite3_api->free((u8*)aIter[i].a); | |||
| 6396 | } | |||
| 6397 | if( aIter!=aStatic ) sqlite3_freesqlite3_api->free(aIter); | |||
| 6398 | return rc; | |||
| 6399 | } | |||
| 6400 | ||||
| 6401 | typedef struct Fts5LookaheadReader Fts5LookaheadReader; | |||
| 6402 | struct Fts5LookaheadReader { | |||
| 6403 | const u8 *a; /* Buffer containing position list */ | |||
| 6404 | int n; /* Size of buffer a[] in bytes */ | |||
| 6405 | int i; /* Current offset in position list */ | |||
| 6406 | i64 iPos; /* Current position */ | |||
| 6407 | i64 iLookahead; /* Next position */ | |||
| 6408 | }; | |||
| 6409 | ||||
| 6410 | #define FTS5_LOOKAHEAD_EOF(((i64)1) << 62) (((i64)1) << 62) | |||
| 6411 | ||||
| 6412 | static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ | |||
| 6413 | p->iPos = p->iLookahead; | |||
| 6414 | if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ | |||
| 6415 | p->iLookahead = FTS5_LOOKAHEAD_EOF(((i64)1) << 62); | |||
| 6416 | } | |||
| 6417 | return (p->iPos==FTS5_LOOKAHEAD_EOF(((i64)1) << 62)); | |||
| 6418 | } | |||
| 6419 | ||||
| 6420 | static int fts5LookaheadReaderInit( | |||
| 6421 | const u8 *a, int n, /* Buffer to read position list from */ | |||
| 6422 | Fts5LookaheadReader *p /* Iterator object to initialize */ | |||
| 6423 | ){ | |||
| 6424 | memset(p, 0, sizeof(Fts5LookaheadReader)); | |||
| 6425 | p->a = a; | |||
| 6426 | p->n = n; | |||
| 6427 | fts5LookaheadReaderNext(p); | |||
| 6428 | return fts5LookaheadReaderNext(p); | |||
| 6429 | } | |||
| 6430 | ||||
| 6431 | typedef struct Fts5NearTrimmer Fts5NearTrimmer; | |||
| 6432 | struct Fts5NearTrimmer { | |||
| 6433 | Fts5LookaheadReader reader; /* Input iterator */ | |||
| 6434 | Fts5PoslistWriter writer; /* Writer context */ | |||
| 6435 | Fts5Buffer *pOut; /* Output poslist */ | |||
| 6436 | }; | |||
| 6437 | ||||
| 6438 | /* | |||
| 6439 | ** The near-set object passed as the first argument contains more than | |||
| 6440 | ** one phrase. All phrases currently point to the same row. The | |||
| 6441 | ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function | |||
| 6442 | ** tests if the current row contains instances of each phrase sufficiently | |||
| 6443 | ** close together to meet the NEAR constraint. Non-zero is returned if it | |||
| 6444 | ** does, or zero otherwise. | |||
| 6445 | ** | |||
| 6446 | ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this | |||
| 6447 | ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) | |||
| 6448 | ** occurs within this function (*pRc) is set accordingly before returning. | |||
| 6449 | ** The return value is undefined in both these cases. | |||
| 6450 | ** | |||
| 6451 | ** If no error occurs and non-zero (a match) is returned, the position-list | |||
| 6452 | ** of each phrase object is edited to contain only those entries that | |||
| 6453 | ** meet the constraint before returning. | |||
| 6454 | */ | |||
| 6455 | static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ | |||
| 6456 | Fts5NearTrimmer aStatic[4]; | |||
| 6457 | Fts5NearTrimmer *a = aStatic; | |||
| 6458 | Fts5ExprPhrase **apPhrase = pNear->apPhrase; | |||
| 6459 | ||||
| 6460 | int i; | |||
| 6461 | int rc = *pRc; | |||
| 6462 | int bMatch; | |||
| 6463 | ||||
| 6464 | assert( pNear->nPhrase>1 )((void) (0)); | |||
| 6465 | ||||
| 6466 | /* If the aStatic[] array is not large enough, allocate a large array | |||
| 6467 | ** using sqlite3_malloc(). This approach could be improved upon. */ | |||
| 6468 | if( pNear->nPhrase>ArraySize(aStatic)((int)(sizeof(aStatic) / sizeof(aStatic[0]))) ){ | |||
| 6469 | sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; | |||
| 6470 | a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 6471 | }else{ | |||
| 6472 | memset(aStatic, 0, sizeof(aStatic)); | |||
| 6473 | } | |||
| 6474 | if( rc!=SQLITE_OK0 ){ | |||
| 6475 | *pRc = rc; | |||
| 6476 | return 0; | |||
| 6477 | } | |||
| 6478 | ||||
| 6479 | /* Initialize a lookahead iterator for each phrase. After passing the | |||
| 6480 | ** buffer and buffer size to the lookaside-reader init function, zero | |||
| 6481 | ** the phrase poslist buffer. The new poslist for the phrase (containing | |||
| 6482 | ** the same entries as the original with some entries removed on account | |||
| 6483 | ** of the NEAR constraint) is written over the original even as it is | |||
| 6484 | ** being read. This is safe as the entries for the new poslist are a | |||
| 6485 | ** subset of the old, so it is not possible for data yet to be read to | |||
| 6486 | ** be overwritten. */ | |||
| 6487 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6488 | Fts5Buffer *pPoslist = &apPhrase[i]->poslist; | |||
| 6489 | fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); | |||
| 6490 | pPoslist->n = 0; | |||
| 6491 | a[i].pOut = pPoslist; | |||
| 6492 | } | |||
| 6493 | ||||
| 6494 | while( 1 ){ | |||
| 6495 | int iAdv; | |||
| 6496 | i64 iMin; | |||
| 6497 | i64 iMax; | |||
| 6498 | ||||
| 6499 | /* This block advances the phrase iterators until they point to a set of | |||
| 6500 | ** entries that together comprise a match. */ | |||
| 6501 | iMax = a[0].reader.iPos; | |||
| 6502 | do { | |||
| 6503 | bMatch = 1; | |||
| 6504 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6505 | Fts5LookaheadReader *pPos = &a[i].reader; | |||
| 6506 | iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; | |||
| 6507 | if( pPos->iPos<iMin || pPos->iPos>iMax ){ | |||
| 6508 | bMatch = 0; | |||
| 6509 | while( pPos->iPos<iMin ){ | |||
| 6510 | if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; | |||
| 6511 | } | |||
| 6512 | if( pPos->iPos>iMax ) iMax = pPos->iPos; | |||
| 6513 | } | |||
| 6514 | } | |||
| 6515 | }while( bMatch==0 ); | |||
| 6516 | ||||
| 6517 | /* Add an entry to each output position list */ | |||
| 6518 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6519 | i64 iPos = a[i].reader.iPos; | |||
| 6520 | Fts5PoslistWriter *pWriter = &a[i].writer; | |||
| 6521 | if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ | |||
| 6522 | sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); | |||
| 6523 | } | |||
| 6524 | } | |||
| 6525 | ||||
| 6526 | iAdv = 0; | |||
| 6527 | iMin = a[0].reader.iLookahead; | |||
| 6528 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6529 | if( a[i].reader.iLookahead < iMin ){ | |||
| 6530 | iMin = a[i].reader.iLookahead; | |||
| 6531 | iAdv = i; | |||
| 6532 | } | |||
| 6533 | } | |||
| 6534 | if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; | |||
| 6535 | } | |||
| 6536 | ||||
| 6537 | ismatch_out: { | |||
| 6538 | int bRet = a[0].pOut->n>0; | |||
| 6539 | *pRc = rc; | |||
| 6540 | if( a!=aStatic ) sqlite3_freesqlite3_api->free(a); | |||
| 6541 | return bRet; | |||
| 6542 | } | |||
| 6543 | } | |||
| 6544 | ||||
| 6545 | /* | |||
| 6546 | ** Advance iterator pIter until it points to a value equal to or laster | |||
| 6547 | ** than the initial value of *piLast. If this means the iterator points | |||
| 6548 | ** to a value laster than *piLast, update *piLast to the new lastest value. | |||
| 6549 | ** | |||
| 6550 | ** If the iterator reaches EOF, set *pbEof to true before returning. If | |||
| 6551 | ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc | |||
| 6552 | ** are set, return a non-zero value. Otherwise, return zero. | |||
| 6553 | */ | |||
| 6554 | static int fts5ExprAdvanceto( | |||
| 6555 | Fts5IndexIter *pIter, /* Iterator to advance */ | |||
| 6556 | int bDesc, /* True if iterator is "rowid DESC" */ | |||
| 6557 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | |||
| 6558 | int *pRc, /* OUT: Error code */ | |||
| 6559 | int *pbEof /* OUT: Set to true if EOF */ | |||
| 6560 | ){ | |||
| 6561 | i64 iLast = *piLast; | |||
| 6562 | i64 iRowid; | |||
| 6563 | ||||
| 6564 | iRowid = pIter->iRowid; | |||
| 6565 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | |||
| 6566 | int rc = sqlite3Fts5IterNextFrom(pIter, iLast); | |||
| 6567 | if( rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | |||
| 6568 | *pRc = rc; | |||
| 6569 | *pbEof = 1; | |||
| 6570 | return 1; | |||
| 6571 | } | |||
| 6572 | iRowid = pIter->iRowid; | |||
| 6573 | assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) )((void) (0)); | |||
| 6574 | } | |||
| 6575 | *piLast = iRowid; | |||
| 6576 | ||||
| 6577 | return 0; | |||
| 6578 | } | |||
| 6579 | ||||
| 6580 | static int fts5ExprSynonymAdvanceto( | |||
| 6581 | Fts5ExprTerm *pTerm, /* Term iterator to advance */ | |||
| 6582 | int bDesc, /* True if iterator is "rowid DESC" */ | |||
| 6583 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ | |||
| 6584 | int *pRc /* OUT: Error code */ | |||
| 6585 | ){ | |||
| 6586 | int rc = SQLITE_OK0; | |||
| 6587 | i64 iLast = *piLast; | |||
| 6588 | Fts5ExprTerm *p; | |||
| 6589 | int bEof = 0; | |||
| 6590 | ||||
| 6591 | for(p=pTerm; rc==SQLITE_OK0 && p; p=p->pSynonym){ | |||
| 6592 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
| 6593 | i64 iRowid = p->pIter->iRowid; | |||
| 6594 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ | |||
| 6595 | rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); | |||
| 6596 | } | |||
| 6597 | } | |||
| 6598 | } | |||
| 6599 | ||||
| 6600 | if( rc!=SQLITE_OK0 ){ | |||
| 6601 | *pRc = rc; | |||
| 6602 | bEof = 1; | |||
| 6603 | }else{ | |||
| 6604 | *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); | |||
| 6605 | } | |||
| 6606 | return bEof; | |||
| 6607 | } | |||
| 6608 | ||||
| 6609 | ||||
| 6610 | static int fts5ExprNearTest( | |||
| 6611 | int *pRc, | |||
| 6612 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | |||
| 6613 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ | |||
| 6614 | ){ | |||
| 6615 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 6616 | int rc = *pRc; | |||
| 6617 | ||||
| 6618 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
| 6619 | Fts5ExprTerm *pTerm; | |||
| 6620 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | |||
| 6621 | pPhrase->poslist.n = 0; | |||
| 6622 | for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ | |||
| 6623 | Fts5IndexIter *pIter = pTerm->pIter; | |||
| 6624 | if( sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | |||
| 6625 | if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ | |||
| 6626 | pPhrase->poslist.n = 1; | |||
| 6627 | } | |||
| 6628 | } | |||
| 6629 | } | |||
| 6630 | return pPhrase->poslist.n; | |||
| 6631 | }else{ | |||
| 6632 | int i; | |||
| 6633 | ||||
| 6634 | /* Check that each phrase in the nearset matches the current row. | |||
| 6635 | ** Populate the pPhrase->poslist buffers at the same time. If any | |||
| 6636 | ** phrase is not a match, break out of the loop early. */ | |||
| 6637 | for(i=0; rc==SQLITE_OK0 && i<pNear->nPhrase; i++){ | |||
| 6638 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 6639 | if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym | |||
| 6640 | || pNear->pColset || pPhrase->aTerm[0].bFirst | |||
| 6641 | ){ | |||
| 6642 | int bMatch = 0; | |||
| 6643 | rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); | |||
| 6644 | if( bMatch==0 ) break; | |||
| 6645 | }else{ | |||
| 6646 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | |||
| 6647 | fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData)sqlite3Fts5BufferSet(&rc,&pPhrase->poslist,pIter-> nData,pIter->pData); | |||
| 6648 | } | |||
| 6649 | } | |||
| 6650 | ||||
| 6651 | *pRc = rc; | |||
| 6652 | if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ | |||
| 6653 | return 1; | |||
| 6654 | } | |||
| 6655 | return 0; | |||
| 6656 | } | |||
| 6657 | } | |||
| 6658 | ||||
| 6659 | ||||
| 6660 | /* | |||
| 6661 | ** Initialize all term iterators in the pNear object. If any term is found | |||
| 6662 | ** to match no documents at all, return immediately without initializing any | |||
| 6663 | ** further iterators. | |||
| 6664 | ** | |||
| 6665 | ** If an error occurs, return an SQLite error code. Otherwise, return | |||
| 6666 | ** SQLITE_OK. It is not considered an error if some term matches zero | |||
| 6667 | ** documents. | |||
| 6668 | */ | |||
| 6669 | static int fts5ExprNearInitAll( | |||
| 6670 | Fts5Expr *pExpr, | |||
| 6671 | Fts5ExprNode *pNode | |||
| 6672 | ){ | |||
| 6673 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 6674 | int i; | |||
| 6675 | ||||
| 6676 | assert( pNode->bNomatch==0 )((void) (0)); | |||
| 6677 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6678 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 6679 | if( pPhrase->nTerm==0 ){ | |||
| 6680 | pNode->bEof = 1; | |||
| 6681 | return SQLITE_OK0; | |||
| 6682 | }else{ | |||
| 6683 | int j; | |||
| 6684 | for(j=0; j<pPhrase->nTerm; j++){ | |||
| 6685 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | |||
| 6686 | Fts5ExprTerm *p; | |||
| 6687 | int bHit = 0; | |||
| 6688 | ||||
| 6689 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 6690 | int rc; | |||
| 6691 | if( p->pIter ){ | |||
| 6692 | sqlite3Fts5IterClose(p->pIter); | |||
| 6693 | p->pIter = 0; | |||
| 6694 | } | |||
| 6695 | rc = sqlite3Fts5IndexQuery( | |||
| 6696 | pExpr->pIndex, p->pTerm, p->nQueryTerm, | |||
| 6697 | (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX0x0001 : 0) | | |||
| 6698 | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC0x0002 : 0), | |||
| 6699 | pNear->pColset, | |||
| 6700 | &p->pIter | |||
| 6701 | ); | |||
| 6702 | assert( (rc==SQLITE_OK)==(p->pIter!=0) )((void) (0)); | |||
| 6703 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 6704 | if( 0==sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof) ){ | |||
| 6705 | bHit = 1; | |||
| 6706 | } | |||
| 6707 | } | |||
| 6708 | ||||
| 6709 | if( bHit==0 ){ | |||
| 6710 | pNode->bEof = 1; | |||
| 6711 | return SQLITE_OK0; | |||
| 6712 | } | |||
| 6713 | } | |||
| 6714 | } | |||
| 6715 | } | |||
| 6716 | ||||
| 6717 | pNode->bEof = 0; | |||
| 6718 | return SQLITE_OK0; | |||
| 6719 | } | |||
| 6720 | ||||
| 6721 | /* | |||
| 6722 | ** If pExpr is an ASC iterator, this function returns a value with the | |||
| 6723 | ** same sign as: | |||
| 6724 | ** | |||
| 6725 | ** (iLhs - iRhs) | |||
| 6726 | ** | |||
| 6727 | ** Otherwise, if this is a DESC iterator, the opposite is returned: | |||
| 6728 | ** | |||
| 6729 | ** (iRhs - iLhs) | |||
| 6730 | */ | |||
| 6731 | static int fts5RowidCmp( | |||
| 6732 | Fts5Expr *pExpr, | |||
| 6733 | i64 iLhs, | |||
| 6734 | i64 iRhs | |||
| 6735 | ){ | |||
| 6736 | assert( pExpr->bDesc==0 || pExpr->bDesc==1 )((void) (0)); | |||
| 6737 | if( pExpr->bDesc==0 ){ | |||
| 6738 | if( iLhs<iRhs ) return -1; | |||
| 6739 | return (iLhs > iRhs); | |||
| 6740 | }else{ | |||
| 6741 | if( iLhs>iRhs ) return -1; | |||
| 6742 | return (iLhs < iRhs); | |||
| 6743 | } | |||
| 6744 | } | |||
| 6745 | ||||
| 6746 | static void fts5ExprSetEof(Fts5ExprNode *pNode){ | |||
| 6747 | int i; | |||
| 6748 | pNode->bEof = 1; | |||
| 6749 | pNode->bNomatch = 0; | |||
| 6750 | for(i=0; i<pNode->nChild; i++){ | |||
| 6751 | fts5ExprSetEof(pNode->apChild[i]); | |||
| 6752 | } | |||
| 6753 | } | |||
| 6754 | ||||
| 6755 | static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ | |||
| 6756 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | |||
| 6757 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 6758 | int i; | |||
| 6759 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6760 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 6761 | pPhrase->poslist.n = 0; | |||
| 6762 | } | |||
| 6763 | }else{ | |||
| 6764 | int i; | |||
| 6765 | for(i=0; i<pNode->nChild; i++){ | |||
| 6766 | fts5ExprNodeZeroPoslist(pNode->apChild[i]); | |||
| 6767 | } | |||
| 6768 | } | |||
| 6769 | } | |||
| 6770 | ||||
| 6771 | ||||
| 6772 | ||||
| 6773 | /* | |||
| 6774 | ** Compare the values currently indicated by the two nodes as follows: | |||
| 6775 | ** | |||
| 6776 | ** res = (*p1) - (*p2) | |||
| 6777 | ** | |||
| 6778 | ** Nodes that point to values that come later in the iteration order are | |||
| 6779 | ** considered to be larger. Nodes at EOF are the largest of all. | |||
| 6780 | ** | |||
| 6781 | ** This means that if the iteration order is ASC, then numerically larger | |||
| 6782 | ** rowids are considered larger. Or if it is the default DESC, numerically | |||
| 6783 | ** smaller rowids are larger. | |||
| 6784 | */ | |||
| 6785 | static int fts5NodeCompare( | |||
| 6786 | Fts5Expr *pExpr, | |||
| 6787 | Fts5ExprNode *p1, | |||
| 6788 | Fts5ExprNode *p2 | |||
| 6789 | ){ | |||
| 6790 | if( p2->bEof ) return -1; | |||
| 6791 | if( p1->bEof ) return +1; | |||
| 6792 | return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); | |||
| 6793 | } | |||
| 6794 | ||||
| 6795 | /* | |||
| 6796 | ** All individual term iterators in pNear are guaranteed to be valid when | |||
| 6797 | ** this function is called. This function checks if all term iterators | |||
| 6798 | ** point to the same rowid, and if not, advances them until they do. | |||
| 6799 | ** If an EOF is reached before this happens, *pbEof is set to true before | |||
| 6800 | ** returning. | |||
| 6801 | ** | |||
| 6802 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code | |||
| 6803 | ** otherwise. It is not considered an error code if an iterator reaches | |||
| 6804 | ** EOF. | |||
| 6805 | */ | |||
| 6806 | static int fts5ExprNodeTest_STRING( | |||
| 6807 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
| 6808 | Fts5ExprNode *pNode | |||
| 6809 | ){ | |||
| 6810 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 6811 | Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; | |||
| 6812 | int rc = SQLITE_OK0; | |||
| 6813 | i64 iLast; /* Lastest rowid any iterator points to */ | |||
| 6814 | int i, j; /* Phrase and token index, respectively */ | |||
| 6815 | int bMatch; /* True if all terms are at the same rowid */ | |||
| 6816 | const int bDesc = pExpr->bDesc; | |||
| 6817 | ||||
| 6818 | /* Check that this node should not be FTS5_TERM */ | |||
| 6819 | assert( pNear->nPhrase>1((void) (0)) | |||
| 6820 | || pNear->apPhrase[0]->nTerm>1((void) (0)) | |||
| 6821 | || pNear->apPhrase[0]->aTerm[0].pSynonym((void) (0)) | |||
| 6822 | || pNear->apPhrase[0]->aTerm[0].bFirst((void) (0)) | |||
| 6823 | )((void) (0)); | |||
| 6824 | ||||
| 6825 | /* Initialize iLast, the "lastest" rowid any iterator points to. If the | |||
| 6826 | ** iterator skips through rowids in the default ascending order, this means | |||
| 6827 | ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it | |||
| 6828 | ** means the minimum rowid. */ | |||
| 6829 | if( pLeft->aTerm[0].pSynonym ){ | |||
| 6830 | iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); | |||
| 6831 | }else{ | |||
| 6832 | iLast = pLeft->aTerm[0].pIter->iRowid; | |||
| 6833 | } | |||
| 6834 | ||||
| 6835 | do { | |||
| 6836 | bMatch = 1; | |||
| 6837 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 6838 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 6839 | for(j=0; j<pPhrase->nTerm; j++){ | |||
| 6840 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; | |||
| 6841 | if( pTerm->pSynonym ){ | |||
| 6842 | i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); | |||
| 6843 | if( iRowid==iLast ) continue; | |||
| 6844 | bMatch = 0; | |||
| 6845 | if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ | |||
| 6846 | pNode->bNomatch = 0; | |||
| 6847 | pNode->bEof = 1; | |||
| 6848 | return rc; | |||
| 6849 | } | |||
| 6850 | }else{ | |||
| 6851 | Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; | |||
| 6852 | if( pIter->iRowid==iLast ) continue; | |||
| 6853 | bMatch = 0; | |||
| 6854 | if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ | |||
| 6855 | return rc; | |||
| 6856 | } | |||
| 6857 | } | |||
| 6858 | } | |||
| 6859 | } | |||
| 6860 | }while( bMatch==0 ); | |||
| 6861 | ||||
| 6862 | pNode->iRowid = iLast; | |||
| 6863 | pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK0); | |||
| 6864 | assert( pNode->bEof==0 || pNode->bNomatch==0 )((void) (0)); | |||
| 6865 | ||||
| 6866 | return rc; | |||
| 6867 | } | |||
| 6868 | ||||
| 6869 | /* | |||
| 6870 | ** Advance the first term iterator in the first phrase of pNear. Set output | |||
| 6871 | ** variable *pbEof to true if it reaches EOF or if an error occurs. | |||
| 6872 | ** | |||
| 6873 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 6874 | ** occurs. | |||
| 6875 | */ | |||
| 6876 | static int fts5ExprNodeNext_STRING( | |||
| 6877 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
| 6878 | Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ | |||
| 6879 | int bFromValid, | |||
| 6880 | i64 iFrom | |||
| 6881 | ){ | |||
| 6882 | Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; | |||
| 6883 | int rc = SQLITE_OK0; | |||
| 6884 | ||||
| 6885 | pNode->bNomatch = 0; | |||
| 6886 | if( pTerm->pSynonym ){ | |||
| 6887 | int bEof = 1; | |||
| 6888 | Fts5ExprTerm *p; | |||
| 6889 | ||||
| 6890 | /* Find the firstest rowid any synonym points to. */ | |||
| 6891 | i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); | |||
| 6892 | ||||
| 6893 | /* Advance each iterator that currently points to iRowid. Or, if iFrom | |||
| 6894 | ** is valid - each iterator that points to a rowid before iFrom. */ | |||
| 6895 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 6896 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
| 6897 | i64 ii = p->pIter->iRowid; | |||
| 6898 | if( ii==iRowid | |||
| 6899 | || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) | |||
| 6900 | ){ | |||
| 6901 | if( bFromValid ){ | |||
| 6902 | rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); | |||
| 6903 | }else{ | |||
| 6904 | rc = sqlite3Fts5IterNext(p->pIter); | |||
| 6905 | } | |||
| 6906 | if( rc!=SQLITE_OK0 ) break; | |||
| 6907 | if( sqlite3Fts5IterEof(p->pIter)((p->pIter)->bEof)==0 ){ | |||
| 6908 | bEof = 0; | |||
| 6909 | } | |||
| 6910 | }else{ | |||
| 6911 | bEof = 0; | |||
| 6912 | } | |||
| 6913 | } | |||
| 6914 | } | |||
| 6915 | ||||
| 6916 | /* Set the EOF flag if either all synonym iterators are at EOF or an | |||
| 6917 | ** error has occurred. */ | |||
| 6918 | pNode->bEof = (rc || bEof); | |||
| 6919 | }else{ | |||
| 6920 | Fts5IndexIter *pIter = pTerm->pIter; | |||
| 6921 | ||||
| 6922 | assert( Fts5NodeIsString(pNode) )((void) (0)); | |||
| 6923 | if( bFromValid ){ | |||
| 6924 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | |||
| 6925 | }else{ | |||
| 6926 | rc = sqlite3Fts5IterNext(pIter); | |||
| 6927 | } | |||
| 6928 | ||||
| 6929 | pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)((pIter)->bEof)); | |||
| 6930 | } | |||
| 6931 | ||||
| 6932 | if( pNode->bEof==0 ){ | |||
| 6933 | assert( rc==SQLITE_OK )((void) (0)); | |||
| 6934 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | |||
| 6935 | } | |||
| 6936 | ||||
| 6937 | return rc; | |||
| 6938 | } | |||
| 6939 | ||||
| 6940 | ||||
| 6941 | static int fts5ExprNodeTest_TERM( | |||
| 6942 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ | |||
| 6943 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ | |||
| 6944 | ){ | |||
| 6945 | /* As this "NEAR" object is actually a single phrase that consists | |||
| 6946 | ** of a single term only, grab pointers into the poslist managed by the | |||
| 6947 | ** fts5_index.c iterator object. This is much faster than synthesizing | |||
| 6948 | ** a new poslist the way we have to for more complicated phrase or NEAR | |||
| 6949 | ** expressions. */ | |||
| 6950 | Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; | |||
| 6951 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | |||
| 6952 | ||||
| 6953 | assert( pNode->eType==FTS5_TERM )((void) (0)); | |||
| 6954 | assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 )((void) (0)); | |||
| 6955 | assert( pPhrase->aTerm[0].pSynonym==0 )((void) (0)); | |||
| 6956 | ||||
| 6957 | pPhrase->poslist.n = pIter->nData; | |||
| 6958 | if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 6959 | pPhrase->poslist.p = (u8*)pIter->pData; | |||
| 6960 | } | |||
| 6961 | pNode->iRowid = pIter->iRowid; | |||
| 6962 | pNode->bNomatch = (pPhrase->poslist.n==0); | |||
| 6963 | return SQLITE_OK0; | |||
| 6964 | } | |||
| 6965 | ||||
| 6966 | /* | |||
| 6967 | ** xNext() method for a node of type FTS5_TERM. | |||
| 6968 | */ | |||
| 6969 | static int fts5ExprNodeNext_TERM( | |||
| 6970 | Fts5Expr *pExpr, | |||
| 6971 | Fts5ExprNode *pNode, | |||
| 6972 | int bFromValid, | |||
| 6973 | i64 iFrom | |||
| 6974 | ){ | |||
| 6975 | int rc; | |||
| 6976 | Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; | |||
| 6977 | ||||
| 6978 | assert( pNode->bEof==0 )((void) (0)); | |||
| 6979 | if( bFromValid ){ | |||
| 6980 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); | |||
| 6981 | }else{ | |||
| 6982 | rc = sqlite3Fts5IterNext(pIter); | |||
| 6983 | } | |||
| 6984 | if( rc==SQLITE_OK0 && sqlite3Fts5IterEof(pIter)((pIter)->bEof)==0 ){ | |||
| 6985 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | |||
| 6986 | }else{ | |||
| 6987 | pNode->bEof = 1; | |||
| 6988 | pNode->bNomatch = 0; | |||
| 6989 | } | |||
| 6990 | return rc; | |||
| 6991 | } | |||
| 6992 | ||||
| 6993 | static void fts5ExprNodeTest_OR( | |||
| 6994 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | |||
| 6995 | Fts5ExprNode *pNode /* Expression node to test */ | |||
| 6996 | ){ | |||
| 6997 | Fts5ExprNode *pNext = pNode->apChild[0]; | |||
| 6998 | int i; | |||
| 6999 | ||||
| 7000 | for(i=1; i<pNode->nChild; i++){ | |||
| 7001 | Fts5ExprNode *pChild = pNode->apChild[i]; | |||
| 7002 | int cmp = fts5NodeCompare(pExpr, pNext, pChild); | |||
| 7003 | if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ | |||
| 7004 | pNext = pChild; | |||
| 7005 | } | |||
| 7006 | } | |||
| 7007 | pNode->iRowid = pNext->iRowid; | |||
| 7008 | pNode->bEof = pNext->bEof; | |||
| 7009 | pNode->bNomatch = pNext->bNomatch; | |||
| 7010 | } | |||
| 7011 | ||||
| 7012 | static int fts5ExprNodeNext_OR( | |||
| 7013 | Fts5Expr *pExpr, | |||
| 7014 | Fts5ExprNode *pNode, | |||
| 7015 | int bFromValid, | |||
| 7016 | i64 iFrom | |||
| 7017 | ){ | |||
| 7018 | int i; | |||
| 7019 | i64 iLast = pNode->iRowid; | |||
| 7020 | ||||
| 7021 | for(i=0; i<pNode->nChild; i++){ | |||
| 7022 | Fts5ExprNode *p1 = pNode->apChild[i]; | |||
| 7023 | assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 )((void) (0)); | |||
| 7024 | if( p1->bEof==0 ){ | |||
| 7025 | if( (p1->iRowid==iLast) | |||
| 7026 | || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) | |||
| 7027 | ){ | |||
| 7028 | int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom)(p1)->xNext((pExpr), (p1), (bFromValid), (iFrom)); | |||
| 7029 | if( rc!=SQLITE_OK0 ){ | |||
| 7030 | pNode->bNomatch = 0; | |||
| 7031 | return rc; | |||
| 7032 | } | |||
| 7033 | } | |||
| 7034 | } | |||
| 7035 | } | |||
| 7036 | ||||
| 7037 | fts5ExprNodeTest_OR(pExpr, pNode); | |||
| 7038 | return SQLITE_OK0; | |||
| 7039 | } | |||
| 7040 | ||||
| 7041 | /* | |||
| 7042 | ** Argument pNode is an FTS5_AND node. | |||
| 7043 | */ | |||
| 7044 | static int fts5ExprNodeTest_AND( | |||
| 7045 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
| 7046 | Fts5ExprNode *pAnd /* FTS5_AND node to advance */ | |||
| 7047 | ){ | |||
| 7048 | int iChild; | |||
| 7049 | i64 iLast = pAnd->iRowid; | |||
| 7050 | int rc = SQLITE_OK0; | |||
| 7051 | int bMatch; | |||
| 7052 | ||||
| 7053 | assert( pAnd->bEof==0 )((void) (0)); | |||
| 7054 | do { | |||
| 7055 | pAnd->bNomatch = 0; | |||
| 7056 | bMatch = 1; | |||
| 7057 | for(iChild=0; iChild<pAnd->nChild; iChild++){ | |||
| 7058 | Fts5ExprNode *pChild = pAnd->apChild[iChild]; | |||
| 7059 | int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); | |||
| 7060 | if( cmp>0 ){ | |||
| 7061 | /* Advance pChild until it points to iLast or laster */ | |||
| 7062 | rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast)(pChild)->xNext((pExpr), (pChild), (1), (iLast)); | |||
| 7063 | if( rc!=SQLITE_OK0 ){ | |||
| 7064 | pAnd->bNomatch = 0; | |||
| 7065 | return rc; | |||
| 7066 | } | |||
| 7067 | } | |||
| 7068 | ||||
| 7069 | /* If the child node is now at EOF, so is the parent AND node. Otherwise, | |||
| 7070 | ** the child node is guaranteed to have advanced at least as far as | |||
| 7071 | ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the | |||
| 7072 | ** new lastest rowid seen so far. */ | |||
| 7073 | assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 )((void) (0)); | |||
| 7074 | if( pChild->bEof ){ | |||
| 7075 | fts5ExprSetEof(pAnd); | |||
| 7076 | bMatch = 1; | |||
| 7077 | break; | |||
| 7078 | }else if( iLast!=pChild->iRowid ){ | |||
| 7079 | bMatch = 0; | |||
| 7080 | iLast = pChild->iRowid; | |||
| 7081 | } | |||
| 7082 | ||||
| 7083 | if( pChild->bNomatch ){ | |||
| 7084 | pAnd->bNomatch = 1; | |||
| 7085 | } | |||
| 7086 | } | |||
| 7087 | }while( bMatch==0 ); | |||
| 7088 | ||||
| 7089 | if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ | |||
| 7090 | fts5ExprNodeZeroPoslist(pAnd); | |||
| 7091 | } | |||
| 7092 | pAnd->iRowid = iLast; | |||
| 7093 | return SQLITE_OK0; | |||
| 7094 | } | |||
| 7095 | ||||
| 7096 | static int fts5ExprNodeNext_AND( | |||
| 7097 | Fts5Expr *pExpr, | |||
| 7098 | Fts5ExprNode *pNode, | |||
| 7099 | int bFromValid, | |||
| 7100 | i64 iFrom | |||
| 7101 | ){ | |||
| 7102 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | |||
| 7103 | if( rc==SQLITE_OK0 ){ | |||
| 7104 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | |||
| 7105 | }else{ | |||
| 7106 | pNode->bNomatch = 0; | |||
| 7107 | } | |||
| 7108 | return rc; | |||
| 7109 | } | |||
| 7110 | ||||
| 7111 | static int fts5ExprNodeTest_NOT( | |||
| 7112 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ | |||
| 7113 | Fts5ExprNode *pNode /* FTS5_NOT node to advance */ | |||
| 7114 | ){ | |||
| 7115 | int rc = SQLITE_OK0; | |||
| 7116 | Fts5ExprNode *p1 = pNode->apChild[0]; | |||
| 7117 | Fts5ExprNode *p2 = pNode->apChild[1]; | |||
| 7118 | assert( pNode->nChild==2 )((void) (0)); | |||
| 7119 | ||||
| 7120 | while( rc==SQLITE_OK0 && p1->bEof==0 ){ | |||
| 7121 | int cmp = fts5NodeCompare(pExpr, p1, p2); | |||
| 7122 | if( cmp>0 ){ | |||
| 7123 | rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid)(p2)->xNext((pExpr), (p2), (1), (p1->iRowid)); | |||
| 7124 | cmp = fts5NodeCompare(pExpr, p1, p2); | |||
| 7125 | } | |||
| 7126 | assert( rc!=SQLITE_OK || cmp<=0 )((void) (0)); | |||
| 7127 | if( cmp || p2->bNomatch ) break; | |||
| 7128 | rc = fts5ExprNodeNext(pExpr, p1, 0, 0)(p1)->xNext((pExpr), (p1), (0), (0)); | |||
| 7129 | } | |||
| 7130 | pNode->bEof = p1->bEof; | |||
| 7131 | pNode->bNomatch = p1->bNomatch; | |||
| 7132 | pNode->iRowid = p1->iRowid; | |||
| 7133 | if( p1->bEof ){ | |||
| 7134 | fts5ExprNodeZeroPoslist(p2); | |||
| 7135 | } | |||
| 7136 | return rc; | |||
| 7137 | } | |||
| 7138 | ||||
| 7139 | static int fts5ExprNodeNext_NOT( | |||
| 7140 | Fts5Expr *pExpr, | |||
| 7141 | Fts5ExprNode *pNode, | |||
| 7142 | int bFromValid, | |||
| 7143 | i64 iFrom | |||
| 7144 | ){ | |||
| 7145 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom)(pNode->apChild[0])->xNext((pExpr), (pNode->apChild[ 0]), (bFromValid), (iFrom)); | |||
| 7146 | if( rc==SQLITE_OK0 ){ | |||
| 7147 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | |||
| 7148 | } | |||
| 7149 | if( rc!=SQLITE_OK0 ){ | |||
| 7150 | pNode->bNomatch = 0; | |||
| 7151 | } | |||
| 7152 | return rc; | |||
| 7153 | } | |||
| 7154 | ||||
| 7155 | /* | |||
| 7156 | ** If pNode currently points to a match, this function returns SQLITE_OK | |||
| 7157 | ** without modifying it. Otherwise, pNode is advanced until it does point | |||
| 7158 | ** to a match or EOF is reached. | |||
| 7159 | */ | |||
| 7160 | static int fts5ExprNodeTest( | |||
| 7161 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ | |||
| 7162 | Fts5ExprNode *pNode /* Expression node to test */ | |||
| 7163 | ){ | |||
| 7164 | int rc = SQLITE_OK0; | |||
| 7165 | if( pNode->bEof==0 ){ | |||
| 7166 | switch( pNode->eType ){ | |||
| 7167 | ||||
| 7168 | case FTS5_STRING9: { | |||
| 7169 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); | |||
| 7170 | break; | |||
| 7171 | } | |||
| 7172 | ||||
| 7173 | case FTS5_TERM4: { | |||
| 7174 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); | |||
| 7175 | break; | |||
| 7176 | } | |||
| 7177 | ||||
| 7178 | case FTS5_AND2: { | |||
| 7179 | rc = fts5ExprNodeTest_AND(pExpr, pNode); | |||
| 7180 | break; | |||
| 7181 | } | |||
| 7182 | ||||
| 7183 | case FTS5_OR1: { | |||
| 7184 | fts5ExprNodeTest_OR(pExpr, pNode); | |||
| 7185 | break; | |||
| 7186 | } | |||
| 7187 | ||||
| 7188 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | |||
| 7189 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); | |||
| 7190 | break; | |||
| 7191 | } | |||
| 7192 | } | |||
| 7193 | } | |||
| 7194 | return rc; | |||
| 7195 | } | |||
| 7196 | ||||
| 7197 | ||||
| 7198 | /* | |||
| 7199 | ** Set node pNode, which is part of expression pExpr, to point to the first | |||
| 7200 | ** match. If there are no matches, set the Node.bEof flag to indicate EOF. | |||
| 7201 | ** | |||
| 7202 | ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. | |||
| 7203 | ** It is not an error if there are no matches. | |||
| 7204 | */ | |||
| 7205 | static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ | |||
| 7206 | int rc = SQLITE_OK0; | |||
| 7207 | pNode->bEof = 0; | |||
| 7208 | pNode->bNomatch = 0; | |||
| 7209 | ||||
| 7210 | if( Fts5NodeIsString(pNode)((pNode)->eType==4 || (pNode)->eType==9) ){ | |||
| 7211 | /* Initialize all term iterators in the NEAR object. */ | |||
| 7212 | rc = fts5ExprNearInitAll(pExpr, pNode); | |||
| 7213 | }else if( pNode->xNext==0 ){ | |||
| 7214 | pNode->bEof = 1; | |||
| 7215 | }else{ | |||
| 7216 | int i; | |||
| 7217 | int nEof = 0; | |||
| 7218 | for(i=0; i<pNode->nChild && rc==SQLITE_OK0; i++){ | |||
| 7219 | Fts5ExprNode *pChild = pNode->apChild[i]; | |||
| 7220 | rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); | |||
| 7221 | assert( pChild->bEof==0 || pChild->bEof==1 )((void) (0)); | |||
| 7222 | nEof += pChild->bEof; | |||
| 7223 | } | |||
| 7224 | pNode->iRowid = pNode->apChild[0]->iRowid; | |||
| 7225 | ||||
| 7226 | switch( pNode->eType ){ | |||
| 7227 | case FTS5_AND2: | |||
| 7228 | if( nEof>0 ) fts5ExprSetEof(pNode); | |||
| 7229 | break; | |||
| 7230 | ||||
| 7231 | case FTS5_OR1: | |||
| 7232 | if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); | |||
| 7233 | break; | |||
| 7234 | ||||
| 7235 | default: | |||
| 7236 | assert( pNode->eType==FTS5_NOT )((void) (0)); | |||
| 7237 | pNode->bEof = pNode->apChild[0]->bEof; | |||
| 7238 | break; | |||
| 7239 | } | |||
| 7240 | } | |||
| 7241 | ||||
| 7242 | if( rc==SQLITE_OK0 ){ | |||
| 7243 | rc = fts5ExprNodeTest(pExpr, pNode); | |||
| 7244 | } | |||
| 7245 | return rc; | |||
| 7246 | } | |||
| 7247 | ||||
| 7248 | ||||
| 7249 | /* | |||
| 7250 | ** Begin iterating through the set of documents in index pIdx matched by | |||
| 7251 | ** the MATCH expression passed as the first argument. If the "bDesc" | |||
| 7252 | ** parameter is passed a non-zero value, iteration is in descending rowid | |||
| 7253 | ** order. Or, if it is zero, in ascending order. | |||
| 7254 | ** | |||
| 7255 | ** If iterating in ascending rowid order (bDesc==0), the first document | |||
| 7256 | ** visited is that with the smallest rowid that is larger than or equal | |||
| 7257 | ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), | |||
| 7258 | ** then the first document visited must have a rowid smaller than or | |||
| 7259 | ** equal to iFirst. | |||
| 7260 | ** | |||
| 7261 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | |||
| 7262 | ** is not considered an error if the query does not match any documents. | |||
| 7263 | */ | |||
| 7264 | static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ | |||
| 7265 | Fts5ExprNode *pRoot = p->pRoot; | |||
| 7266 | int rc; /* Return code */ | |||
| 7267 | ||||
| 7268 | p->pIndex = pIdx; | |||
| 7269 | p->bDesc = bDesc; | |||
| 7270 | rc = fts5ExprNodeFirst(p, pRoot); | |||
| 7271 | ||||
| 7272 | /* If not at EOF but the current rowid occurs earlier than iFirst in | |||
| 7273 | ** the iteration order, move to document iFirst or later. */ | |||
| 7274 | if( rc==SQLITE_OK0 | |||
| 7275 | && 0==pRoot->bEof | |||
| 7276 | && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 | |||
| 7277 | ){ | |||
| 7278 | rc = fts5ExprNodeNext(p, pRoot, 1, iFirst)(pRoot)->xNext((p), (pRoot), (1), (iFirst)); | |||
| 7279 | } | |||
| 7280 | ||||
| 7281 | /* If the iterator is not at a real match, skip forward until it is. */ | |||
| 7282 | while( pRoot->bNomatch && rc==SQLITE_OK0 ){ | |||
| 7283 | assert( pRoot->bEof==0 )((void) (0)); | |||
| 7284 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | |||
| 7285 | } | |||
| 7286 | return rc; | |||
| 7287 | } | |||
| 7288 | ||||
| 7289 | /* | |||
| 7290 | ** Move to the next document | |||
| 7291 | ** | |||
| 7292 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It | |||
| 7293 | ** is not considered an error if the query does not match any documents. | |||
| 7294 | */ | |||
| 7295 | static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ | |||
| 7296 | int rc; | |||
| 7297 | Fts5ExprNode *pRoot = p->pRoot; | |||
| 7298 | assert( pRoot->bEof==0 && pRoot->bNomatch==0 )((void) (0)); | |||
| 7299 | do { | |||
| 7300 | rc = fts5ExprNodeNext(p, pRoot, 0, 0)(pRoot)->xNext((p), (pRoot), (0), (0)); | |||
| 7301 | assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) )((void) (0)); | |||
| 7302 | }while( pRoot->bNomatch ); | |||
| 7303 | if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ | |||
| 7304 | pRoot->bEof = 1; | |||
| 7305 | } | |||
| 7306 | return rc; | |||
| 7307 | } | |||
| 7308 | ||||
| 7309 | static int sqlite3Fts5ExprEof(Fts5Expr *p){ | |||
| 7310 | return p->pRoot->bEof; | |||
| 7311 | } | |||
| 7312 | ||||
| 7313 | static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ | |||
| 7314 | return p->pRoot->iRowid; | |||
| 7315 | } | |||
| 7316 | ||||
| 7317 | static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ | |||
| 7318 | int rc = SQLITE_OK0; | |||
| 7319 | *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); | |||
| 7320 | return rc; | |||
| 7321 | } | |||
| 7322 | ||||
| 7323 | /* | |||
| 7324 | ** Free the phrase object passed as the only argument. | |||
| 7325 | */ | |||
| 7326 | static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ | |||
| 7327 | if( pPhrase ){ | |||
| 7328 | int i; | |||
| 7329 | for(i=0; i<pPhrase->nTerm; i++){ | |||
| 7330 | Fts5ExprTerm *pSyn; | |||
| 7331 | Fts5ExprTerm *pNext; | |||
| 7332 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; | |||
| 7333 | sqlite3_freesqlite3_api->free(pTerm->pTerm); | |||
| 7334 | sqlite3Fts5IterClose(pTerm->pIter); | |||
| 7335 | for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ | |||
| 7336 | pNext = pSyn->pSynonym; | |||
| 7337 | sqlite3Fts5IterClose(pSyn->pIter); | |||
| 7338 | fts5BufferFree((Fts5Buffer*)&pSyn[1])sqlite3Fts5BufferFree((Fts5Buffer*)&pSyn[1]); | |||
| 7339 | sqlite3_freesqlite3_api->free(pSyn); | |||
| 7340 | } | |||
| 7341 | } | |||
| 7342 | if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist)sqlite3Fts5BufferFree(&pPhrase->poslist); | |||
| 7343 | sqlite3_freesqlite3_api->free(pPhrase); | |||
| 7344 | } | |||
| 7345 | } | |||
| 7346 | ||||
| 7347 | /* | |||
| 7348 | ** Set the "bFirst" flag on the first token of the phrase passed as the | |||
| 7349 | ** only argument. | |||
| 7350 | */ | |||
| 7351 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ | |||
| 7352 | if( pPhrase && pPhrase->nTerm ){ | |||
| 7353 | pPhrase->aTerm[0].bFirst = 1; | |||
| 7354 | } | |||
| 7355 | } | |||
| 7356 | ||||
| 7357 | /* | |||
| 7358 | ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated | |||
| 7359 | ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is | |||
| 7360 | ** appended to it and the results returned. | |||
| 7361 | ** | |||
| 7362 | ** If an OOM error occurs, both the pNear and pPhrase objects are freed and | |||
| 7363 | ** NULL returned. | |||
| 7364 | */ | |||
| 7365 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( | |||
| 7366 | Fts5Parse *pParse, /* Parse context */ | |||
| 7367 | Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ | |||
| 7368 | Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ | |||
| 7369 | ){ | |||
| 7370 | const int SZALLOC = 8; | |||
| 7371 | Fts5ExprNearset *pRet = 0; | |||
| 7372 | ||||
| 7373 | if( pParse->rc==SQLITE_OK0 ){ | |||
| 7374 | if( pNear==0 ){ | |||
| 7375 | sqlite3_int64 nByte; | |||
| 7376 | nByte = SZ_FTS5EXPRNEARSET(SZALLOC+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(SZALLOC+1)*sizeof (Fts5ExprPhrase*)); | |||
| 7377 | pRet = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 7378 | if( pRet==0 ){ | |||
| 7379 | pParse->rc = SQLITE_NOMEM7; | |||
| 7380 | }else{ | |||
| 7381 | memset(pRet, 0, (size_t)nByte); | |||
| 7382 | } | |||
| 7383 | }else if( (pNear->nPhrase % SZALLOC)==0 ){ | |||
| 7384 | int nNew = pNear->nPhrase + SZALLOC; | |||
| 7385 | sqlite3_int64 nByte; | |||
| 7386 | ||||
| 7387 | nByte = SZ_FTS5EXPRNEARSET(nNew+1)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(nNew+1)*sizeof (Fts5ExprPhrase*)); | |||
| 7388 | pRet = (Fts5ExprNearset*)sqlite3_realloc64sqlite3_api->realloc64(pNear, nByte); | |||
| 7389 | if( pRet==0 ){ | |||
| 7390 | pParse->rc = SQLITE_NOMEM7; | |||
| 7391 | } | |||
| 7392 | }else{ | |||
| 7393 | pRet = pNear; | |||
| 7394 | } | |||
| 7395 | } | |||
| 7396 | ||||
| 7397 | if( pRet==0 ){ | |||
| 7398 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
| 7399 | sqlite3Fts5ParseNearsetFree(pNear); | |||
| 7400 | sqlite3Fts5ParsePhraseFree(pPhrase); | |||
| 7401 | }else{ | |||
| 7402 | if( pRet->nPhrase>0 ){ | |||
| 7403 | Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; | |||
| 7404 | assert( pParse!=0 )((void) (0)); | |||
| 7405 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
| 7406 | assert( pParse->nPhrase>=2 )((void) (0)); | |||
| 7407 | assert( pLast==pParse->apPhrase[pParse->nPhrase-2] )((void) (0)); | |||
| 7408 | if( pPhrase->nTerm==0 ){ | |||
| 7409 | fts5ExprPhraseFree(pPhrase); | |||
| 7410 | pRet->nPhrase--; | |||
| 7411 | pParse->nPhrase--; | |||
| 7412 | pPhrase = pLast; | |||
| 7413 | }else if( pLast->nTerm==0 ){ | |||
| 7414 | fts5ExprPhraseFree(pLast); | |||
| 7415 | pParse->apPhrase[pParse->nPhrase-2] = pPhrase; | |||
| 7416 | pParse->nPhrase--; | |||
| 7417 | pRet->nPhrase--; | |||
| 7418 | } | |||
| 7419 | } | |||
| 7420 | pRet->apPhrase[pRet->nPhrase++] = pPhrase; | |||
| 7421 | } | |||
| 7422 | return pRet; | |||
| 7423 | } | |||
| 7424 | ||||
| 7425 | typedef struct TokenCtx TokenCtx; | |||
| 7426 | struct TokenCtx { | |||
| 7427 | Fts5ExprPhrase *pPhrase; | |||
| 7428 | Fts5Config *pConfig; | |||
| 7429 | int rc; | |||
| 7430 | }; | |||
| 7431 | ||||
| 7432 | /* | |||
| 7433 | ** Callback for tokenizing terms used by ParseTerm(). | |||
| 7434 | */ | |||
| 7435 | static int fts5ParseTokenize( | |||
| 7436 | void *pContext, /* Pointer to Fts5InsertCtx object */ | |||
| 7437 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 7438 | const char *pToken, /* Buffer containing token */ | |||
| 7439 | int nToken, /* Size of token in bytes */ | |||
| 7440 | int iUnused1, /* Start offset of token */ | |||
| 7441 | int iUnused2 /* End offset of token */ | |||
| 7442 | ){ | |||
| 7443 | int rc = SQLITE_OK0; | |||
| 7444 | const int SZALLOC = 8; | |||
| 7445 | TokenCtx *pCtx = (TokenCtx*)pContext; | |||
| 7446 | Fts5ExprPhrase *pPhrase = pCtx->pPhrase; | |||
| 7447 | ||||
| 7448 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
| 7449 | ||||
| 7450 | /* If an error has already occurred, this is a no-op */ | |||
| 7451 | if( pCtx->rc!=SQLITE_OK0 ) return pCtx->rc; | |||
| 7452 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
| 7453 | ||||
| 7454 | if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED0x0001) ){ | |||
| 7455 | Fts5ExprTerm *pSyn; | |||
| 7456 | sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; | |||
| 7457 | pSyn = (Fts5ExprTerm*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 7458 | if( pSyn==0 ){ | |||
| 7459 | rc = SQLITE_NOMEM7; | |||
| 7460 | }else{ | |||
| 7461 | memset(pSyn, 0, (size_t)nByte); | |||
| 7462 | pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); | |||
| 7463 | pSyn->nFullTerm = pSyn->nQueryTerm = nToken; | |||
| 7464 | if( pCtx->pConfig->bTokendata ){ | |||
| 7465 | pSyn->nQueryTerm = (int)strlen(pSyn->pTerm); | |||
| 7466 | } | |||
| 7467 | memcpy(pSyn->pTerm, pToken, nToken); | |||
| 7468 | pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; | |||
| 7469 | pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; | |||
| 7470 | } | |||
| 7471 | }else{ | |||
| 7472 | Fts5ExprTerm *pTerm; | |||
| 7473 | if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ | |||
| 7474 | Fts5ExprPhrase *pNew; | |||
| 7475 | int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); | |||
| 7476 | ||||
| 7477 | pNew = (Fts5ExprPhrase*)sqlite3_realloc64sqlite3_api->realloc64(pPhrase, | |||
| 7478 | SZ_FTS5EXPRPHRASE(nNew+1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (nNew+1)*sizeof( Fts5ExprTerm)) | |||
| 7479 | ); | |||
| 7480 | if( pNew==0 ){ | |||
| 7481 | rc = SQLITE_NOMEM7; | |||
| 7482 | }else{ | |||
| 7483 | if( pPhrase==0 ) memset(pNew, 0, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
| 7484 | pCtx->pPhrase = pPhrase = pNew; | |||
| 7485 | pNew->nTerm = nNew - SZALLOC; | |||
| 7486 | } | |||
| 7487 | } | |||
| 7488 | ||||
| 7489 | if( rc==SQLITE_OK0 ){ | |||
| 7490 | pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; | |||
| 7491 | memset(pTerm, 0, sizeof(Fts5ExprTerm)); | |||
| 7492 | pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); | |||
| 7493 | pTerm->nFullTerm = pTerm->nQueryTerm = nToken; | |||
| 7494 | if( pCtx->pConfig->bTokendata && rc==SQLITE_OK0 ){ | |||
| 7495 | pTerm->nQueryTerm = (int)strlen(pTerm->pTerm); | |||
| 7496 | } | |||
| 7497 | } | |||
| 7498 | } | |||
| 7499 | ||||
| 7500 | pCtx->rc = rc; | |||
| 7501 | return rc; | |||
| 7502 | } | |||
| 7503 | ||||
| 7504 | ||||
| 7505 | /* | |||
| 7506 | ** Free the phrase object passed as the only argument. | |||
| 7507 | */ | |||
| 7508 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ | |||
| 7509 | fts5ExprPhraseFree(pPhrase); | |||
| 7510 | } | |||
| 7511 | ||||
| 7512 | /* | |||
| 7513 | ** Free the phrase object passed as the second argument. | |||
| 7514 | */ | |||
| 7515 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ | |||
| 7516 | if( pNear ){ | |||
| 7517 | int i; | |||
| 7518 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 7519 | fts5ExprPhraseFree(pNear->apPhrase[i]); | |||
| 7520 | } | |||
| 7521 | sqlite3_freesqlite3_api->free(pNear->pColset); | |||
| 7522 | sqlite3_freesqlite3_api->free(pNear); | |||
| 7523 | } | |||
| 7524 | } | |||
| 7525 | ||||
| 7526 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ | |||
| 7527 | assert( pParse->pExpr==0 )((void) (0)); | |||
| 7528 | pParse->pExpr = p; | |||
| 7529 | } | |||
| 7530 | ||||
| 7531 | static int parseGrowPhraseArray(Fts5Parse *pParse){ | |||
| 7532 | if( (pParse->nPhrase % 8)==0 ){ | |||
| 7533 | sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); | |||
| 7534 | Fts5ExprPhrase **apNew; | |||
| 7535 | apNew = (Fts5ExprPhrase**)sqlite3_realloc64sqlite3_api->realloc64(pParse->apPhrase, nByte); | |||
| 7536 | if( apNew==0 ){ | |||
| 7537 | pParse->rc = SQLITE_NOMEM7; | |||
| 7538 | return SQLITE_NOMEM7; | |||
| 7539 | } | |||
| 7540 | pParse->apPhrase = apNew; | |||
| 7541 | } | |||
| 7542 | return SQLITE_OK0; | |||
| 7543 | } | |||
| 7544 | ||||
| 7545 | /* | |||
| 7546 | ** This function is called by the parser to process a string token. The | |||
| 7547 | ** string may or may not be quoted. In any case it is tokenized and a | |||
| 7548 | ** phrase object consisting of all tokens returned. | |||
| 7549 | */ | |||
| 7550 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( | |||
| 7551 | Fts5Parse *pParse, /* Parse context */ | |||
| 7552 | Fts5ExprPhrase *pAppend, /* Phrase to append to */ | |||
| 7553 | Fts5Token *pToken, /* String to tokenize */ | |||
| 7554 | int bPrefix /* True if there is a trailing "*" */ | |||
| 7555 | ){ | |||
| 7556 | Fts5Config *pConfig = pParse->pConfig; | |||
| 7557 | TokenCtx sCtx; /* Context object passed to callback */ | |||
| 7558 | int rc; /* Tokenize return code */ | |||
| 7559 | char *z = 0; | |||
| 7560 | ||||
| 7561 | memset(&sCtx, 0, sizeof(TokenCtx)); | |||
| 7562 | sCtx.pPhrase = pAppend; | |||
| 7563 | sCtx.pConfig = pConfig; | |||
| 7564 | ||||
| 7565 | rc = fts5ParseStringFromToken(pToken, &z); | |||
| 7566 | if( rc==SQLITE_OK0 ){ | |||
| 7567 | int flags = FTS5_TOKENIZE_QUERY0x0001 | (bPrefix ? FTS5_TOKENIZE_PREFIX0x0002 : 0); | |||
| 7568 | int n; | |||
| 7569 | sqlite3Fts5Dequote(z); | |||
| 7570 | n = (int)strlen(z); | |||
| 7571 | rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); | |||
| 7572 | } | |||
| 7573 | sqlite3_freesqlite3_api->free(z); | |||
| 7574 | if( rc || (rc = sCtx.rc) ){ | |||
| 7575 | pParse->rc = rc; | |||
| 7576 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
| 7577 | sCtx.pPhrase = 0; | |||
| 7578 | }else{ | |||
| 7579 | ||||
| 7580 | if( pAppend==0 ){ | |||
| 7581 | if( parseGrowPhraseArray(pParse) ){ | |||
| 7582 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
| 7583 | return 0; | |||
| 7584 | } | |||
| 7585 | pParse->nPhrase++; | |||
| 7586 | } | |||
| 7587 | ||||
| 7588 | if( sCtx.pPhrase==0 ){ | |||
| 7589 | /* This happens when parsing a token or quoted phrase that contains | |||
| 7590 | ** no token characters at all. (e.g ... MATCH '""'). */ | |||
| 7591 | sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
| 7592 | }else if( sCtx.pPhrase->nTerm ){ | |||
| 7593 | sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix; | |||
| 7594 | } | |||
| 7595 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
| 7596 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; | |||
| 7597 | } | |||
| 7598 | ||||
| 7599 | return sCtx.pPhrase; | |||
| 7600 | } | |||
| 7601 | ||||
| 7602 | /* | |||
| 7603 | ** Create a new FTS5 expression by cloning phrase iPhrase of the | |||
| 7604 | ** expression passed as the second argument. | |||
| 7605 | */ | |||
| 7606 | static int sqlite3Fts5ExprClonePhrase( | |||
| 7607 | Fts5Expr *pExpr, | |||
| 7608 | int iPhrase, | |||
| 7609 | Fts5Expr **ppNew | |||
| 7610 | ){ | |||
| 7611 | int rc = SQLITE_OK0; /* Return code */ | |||
| 7612 | Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ | |||
| 7613 | Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ | |||
| 7614 | TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */ | |||
| 7615 | if( !pExpr || iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
| 7616 | rc = SQLITE_RANGE25; | |||
| 7617 | }else{ | |||
| 7618 | pOrig = pExpr->apExprPhrase[iPhrase]; | |||
| 7619 | pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); | |||
| 7620 | } | |||
| 7621 | if( rc==SQLITE_OK0 ){ | |||
| 7622 | pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, | |||
| 7623 | sizeof(Fts5ExprPhrase*)); | |||
| 7624 | } | |||
| 7625 | if( rc==SQLITE_OK0 ){ | |||
| 7626 | pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRNODE(1)(__builtin_offsetof(Fts5ExprNode, apChild) + (1)*sizeof(Fts5ExprNode *))); | |||
| 7627 | } | |||
| 7628 | if( rc==SQLITE_OK0 ){ | |||
| 7629 | pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, | |||
| 7630 | SZ_FTS5EXPRNEARSET(2)(__builtin_offsetof(Fts5ExprNearset, apPhrase)+(2)*sizeof(Fts5ExprPhrase *))); | |||
| 7631 | } | |||
| 7632 | if( rc==SQLITE_OK0 && ALWAYS(pOrig!=0)(pOrig!=0) ){ | |||
| 7633 | Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; | |||
| 7634 | if( pColsetOrig ){ | |||
| 7635 | sqlite3_int64 nByte; | |||
| 7636 | Fts5Colset *pColset; | |||
| 7637 | nByte = SZ_FTS5COLSET(pColsetOrig->nCol)(sizeof(i64)*((pColsetOrig->nCol+2)/2)); | |||
| 7638 | pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 7639 | if( pColset ){ | |||
| 7640 | memcpy(pColset, pColsetOrig, (size_t)nByte); | |||
| 7641 | } | |||
| 7642 | pNew->pRoot->pNear->pColset = pColset; | |||
| 7643 | } | |||
| 7644 | } | |||
| 7645 | ||||
| 7646 | if( rc==SQLITE_OK0 ){ | |||
| 7647 | if( pOrig->nTerm ){ | |||
| 7648 | int i; /* Used to iterate through phrase terms */ | |||
| 7649 | sCtx.pConfig = pExpr->pConfig; | |||
| 7650 | for(i=0; rc==SQLITE_OK0 && i<pOrig->nTerm; i++){ | |||
| 7651 | int tflags = 0; | |||
| 7652 | Fts5ExprTerm *p; | |||
| 7653 | for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK0; p=p->pSynonym){ | |||
| 7654 | rc = fts5ParseTokenize((void*)&sCtx,tflags,p->pTerm,p->nFullTerm,0,0); | |||
| 7655 | tflags = FTS5_TOKEN_COLOCATED0x0001; | |||
| 7656 | } | |||
| 7657 | if( rc==SQLITE_OK0 ){ | |||
| 7658 | sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; | |||
| 7659 | sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; | |||
| 7660 | } | |||
| 7661 | } | |||
| 7662 | }else{ | |||
| 7663 | /* This happens when parsing a token or quoted phrase that contains | |||
| 7664 | ** no token characters at all. (e.g ... MATCH '""'). */ | |||
| 7665 | sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm ))); | |||
| 7666 | } | |||
| 7667 | } | |||
| 7668 | ||||
| 7669 | if( rc==SQLITE_OK0 && ALWAYS(sCtx.pPhrase)(sCtx.pPhrase) ){ | |||
| 7670 | /* All the allocations succeeded. Put the expression object together. */ | |||
| 7671 | pNew->pIndex = pExpr->pIndex; | |||
| 7672 | pNew->pConfig = pExpr->pConfig; | |||
| 7673 | pNew->nPhrase = 1; | |||
| 7674 | pNew->apExprPhrase[0] = sCtx.pPhrase; | |||
| 7675 | pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; | |||
| 7676 | pNew->pRoot->pNear->nPhrase = 1; | |||
| 7677 | sCtx.pPhrase->pNode = pNew->pRoot; | |||
| 7678 | ||||
| 7679 | if( pOrig->nTerm==1 | |||
| 7680 | && pOrig->aTerm[0].pSynonym==0 | |||
| 7681 | && pOrig->aTerm[0].bFirst==0 | |||
| 7682 | ){ | |||
| 7683 | pNew->pRoot->eType = FTS5_TERM4; | |||
| 7684 | pNew->pRoot->xNext = fts5ExprNodeNext_TERM; | |||
| 7685 | }else{ | |||
| 7686 | pNew->pRoot->eType = FTS5_STRING9; | |||
| 7687 | pNew->pRoot->xNext = fts5ExprNodeNext_STRING; | |||
| 7688 | } | |||
| 7689 | }else{ | |||
| 7690 | sqlite3Fts5ExprFree(pNew); | |||
| 7691 | fts5ExprPhraseFree(sCtx.pPhrase); | |||
| 7692 | pNew = 0; | |||
| 7693 | } | |||
| 7694 | ||||
| 7695 | *ppNew = pNew; | |||
| 7696 | return rc; | |||
| 7697 | } | |||
| 7698 | ||||
| 7699 | ||||
| 7700 | /* | |||
| 7701 | ** Token pTok has appeared in a MATCH expression where the NEAR operator | |||
| 7702 | ** is expected. If token pTok does not contain "NEAR", store an error | |||
| 7703 | ** in the pParse object. | |||
| 7704 | */ | |||
| 7705 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ | |||
| 7706 | if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ | |||
| 7707 | sqlite3Fts5ParseError( | |||
| 7708 | pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p | |||
| 7709 | ); | |||
| 7710 | } | |||
| 7711 | } | |||
| 7712 | ||||
| 7713 | static void sqlite3Fts5ParseSetDistance( | |||
| 7714 | Fts5Parse *pParse, | |||
| 7715 | Fts5ExprNearset *pNear, | |||
| 7716 | Fts5Token *p | |||
| 7717 | ){ | |||
| 7718 | if( pNear ){ | |||
| 7719 | int nNear = 0; | |||
| 7720 | int i; | |||
| 7721 | if( p->n ){ | |||
| 7722 | for(i=0; i<p->n; i++){ | |||
| 7723 | char c = (char)p->p[i]; | |||
| 7724 | if( c<'0' || c>'9' ){ | |||
| 7725 | sqlite3Fts5ParseError( | |||
| 7726 | pParse, "expected integer, got \"%.*s\"", p->n, p->p | |||
| 7727 | ); | |||
| 7728 | return; | |||
| 7729 | } | |||
| 7730 | if( nNear<214748363 ) nNear = nNear * 10 + (p->p[i] - '0'); | |||
| 7731 | /* ^^^^^^^^^^^^^^^--- Prevent integer overflow */ | |||
| 7732 | } | |||
| 7733 | }else{ | |||
| 7734 | nNear = FTS5_DEFAULT_NEARDIST10; | |||
| 7735 | } | |||
| 7736 | pNear->nNear = nNear; | |||
| 7737 | } | |||
| 7738 | } | |||
| 7739 | ||||
| 7740 | /* | |||
| 7741 | ** The second argument passed to this function may be NULL, or it may be | |||
| 7742 | ** an existing Fts5Colset object. This function returns a pointer to | |||
| 7743 | ** a new colset object containing the contents of (p) with new value column | |||
| 7744 | ** number iCol appended. | |||
| 7745 | ** | |||
| 7746 | ** If an OOM error occurs, store an error code in pParse and return NULL. | |||
| 7747 | ** The old colset object (if any) is not freed in this case. | |||
| 7748 | */ | |||
| 7749 | static Fts5Colset *fts5ParseColset( | |||
| 7750 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | |||
| 7751 | Fts5Colset *p, /* Existing colset object */ | |||
| 7752 | int iCol /* New column to add to colset object */ | |||
| 7753 | ){ | |||
| 7754 | int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ | |||
| 7755 | Fts5Colset *pNew; /* New colset object to return */ | |||
| 7756 | ||||
| 7757 | assert( pParse->rc==SQLITE_OK )((void) (0)); | |||
| 7758 | assert( iCol>=0 && iCol<pParse->pConfig->nCol )((void) (0)); | |||
| 7759 | ||||
| 7760 | pNew = sqlite3_realloc64sqlite3_api->realloc64(p, SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2))); | |||
| 7761 | if( pNew==0 ){ | |||
| 7762 | pParse->rc = SQLITE_NOMEM7; | |||
| 7763 | }else{ | |||
| 7764 | int *aiCol = pNew->aiCol; | |||
| 7765 | int i, j; | |||
| 7766 | for(i=0; i<nCol; i++){ | |||
| 7767 | if( aiCol[i]==iCol ) return pNew; | |||
| 7768 | if( aiCol[i]>iCol ) break; | |||
| 7769 | } | |||
| 7770 | for(j=nCol; j>i; j--){ | |||
| 7771 | aiCol[j] = aiCol[j-1]; | |||
| 7772 | } | |||
| 7773 | aiCol[i] = iCol; | |||
| 7774 | pNew->nCol = nCol+1; | |||
| 7775 | ||||
| 7776 | #ifndef NDEBUG1 | |||
| 7777 | /* Check that the array is in order and contains no duplicate entries. */ | |||
| 7778 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] )((void) (0)); | |||
| 7779 | #endif | |||
| 7780 | } | |||
| 7781 | ||||
| 7782 | return pNew; | |||
| 7783 | } | |||
| 7784 | ||||
| 7785 | /* | |||
| 7786 | ** Allocate and return an Fts5Colset object specifying the inverse of | |||
| 7787 | ** the colset passed as the second argument. Free the colset passed | |||
| 7788 | ** as the second argument before returning. | |||
| 7789 | */ | |||
| 7790 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ | |||
| 7791 | Fts5Colset *pRet; | |||
| 7792 | int nCol = pParse->pConfig->nCol; | |||
| 7793 | ||||
| 7794 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, | |||
| 7795 | SZ_FTS5COLSET(nCol+1)(sizeof(i64)*((nCol+1 +2)/2)) | |||
| 7796 | ); | |||
| 7797 | if( pRet ){ | |||
| 7798 | int i; | |||
| 7799 | int iOld = 0; | |||
| 7800 | for(i=0; i<nCol; i++){ | |||
| 7801 | if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ | |||
| 7802 | pRet->aiCol[pRet->nCol++] = i; | |||
| 7803 | }else{ | |||
| 7804 | iOld++; | |||
| 7805 | } | |||
| 7806 | } | |||
| 7807 | } | |||
| 7808 | ||||
| 7809 | sqlite3_freesqlite3_api->free(p); | |||
| 7810 | return pRet; | |||
| 7811 | } | |||
| 7812 | ||||
| 7813 | static Fts5Colset *sqlite3Fts5ParseColset( | |||
| 7814 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ | |||
| 7815 | Fts5Colset *pColset, /* Existing colset object */ | |||
| 7816 | Fts5Token *p | |||
| 7817 | ){ | |||
| 7818 | Fts5Colset *pRet = 0; | |||
| 7819 | int iCol; | |||
| 7820 | char *z; /* Dequoted copy of token p */ | |||
| 7821 | ||||
| 7822 | z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); | |||
| 7823 | if( pParse->rc==SQLITE_OK0 ){ | |||
| 7824 | Fts5Config *pConfig = pParse->pConfig; | |||
| 7825 | sqlite3Fts5Dequote(z); | |||
| 7826 | for(iCol=0; iCol<pConfig->nCol; iCol++){ | |||
| 7827 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(pConfig->azCol[iCol], z) ) break; | |||
| 7828 | } | |||
| 7829 | if( iCol==pConfig->nCol ){ | |||
| 7830 | sqlite3Fts5ParseError(pParse, "no such column: %s", z); | |||
| 7831 | }else{ | |||
| 7832 | pRet = fts5ParseColset(pParse, pColset, iCol); | |||
| 7833 | } | |||
| 7834 | sqlite3_freesqlite3_api->free(z); | |||
| 7835 | } | |||
| 7836 | ||||
| 7837 | if( pRet==0 ){ | |||
| 7838 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
| 7839 | sqlite3_freesqlite3_api->free(pColset); | |||
| 7840 | } | |||
| 7841 | ||||
| 7842 | return pRet; | |||
| 7843 | } | |||
| 7844 | ||||
| 7845 | /* | |||
| 7846 | ** If argument pOrig is NULL, or if (*pRc) is set to anything other than | |||
| 7847 | ** SQLITE_OK when this function is called, NULL is returned. | |||
| 7848 | ** | |||
| 7849 | ** Otherwise, a copy of (*pOrig) is made into memory obtained from | |||
| 7850 | ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation | |||
| 7851 | ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned. | |||
| 7852 | */ | |||
| 7853 | static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){ | |||
| 7854 | Fts5Colset *pRet; | |||
| 7855 | if( pOrig ){ | |||
| 7856 | sqlite3_int64 nByte = SZ_FTS5COLSET(pOrig->nCol)(sizeof(i64)*((pOrig->nCol+2)/2)); | |||
| 7857 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); | |||
| 7858 | if( pRet ){ | |||
| 7859 | memcpy(pRet, pOrig, (size_t)nByte); | |||
| 7860 | } | |||
| 7861 | }else{ | |||
| 7862 | pRet = 0; | |||
| 7863 | } | |||
| 7864 | return pRet; | |||
| 7865 | } | |||
| 7866 | ||||
| 7867 | /* | |||
| 7868 | ** Remove from colset pColset any columns that are not also in colset pMerge. | |||
| 7869 | */ | |||
| 7870 | static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ | |||
| 7871 | int iIn = 0; /* Next input in pColset */ | |||
| 7872 | int iMerge = 0; /* Next input in pMerge */ | |||
| 7873 | int iOut = 0; /* Next output slot in pColset */ | |||
| 7874 | ||||
| 7875 | while( iIn<pColset->nCol && iMerge<pMerge->nCol ){ | |||
| 7876 | int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge]; | |||
| 7877 | if( iDiff==0 ){ | |||
| 7878 | pColset->aiCol[iOut++] = pMerge->aiCol[iMerge]; | |||
| 7879 | iMerge++; | |||
| 7880 | iIn++; | |||
| 7881 | }else if( iDiff>0 ){ | |||
| 7882 | iMerge++; | |||
| 7883 | }else{ | |||
| 7884 | iIn++; | |||
| 7885 | } | |||
| 7886 | } | |||
| 7887 | pColset->nCol = iOut; | |||
| 7888 | } | |||
| 7889 | ||||
| 7890 | /* | |||
| 7891 | ** Recursively apply colset pColset to expression node pNode and all of | |||
| 7892 | ** its decendents. If (*ppFree) is not NULL, it contains a spare copy | |||
| 7893 | ** of pColset. This function may use the spare copy and set (*ppFree) to | |||
| 7894 | ** zero, or it may create copies of pColset using fts5CloneColset(). | |||
| 7895 | */ | |||
| 7896 | static void fts5ParseSetColset( | |||
| 7897 | Fts5Parse *pParse, | |||
| 7898 | Fts5ExprNode *pNode, | |||
| 7899 | Fts5Colset *pColset, | |||
| 7900 | Fts5Colset **ppFree | |||
| 7901 | ){ | |||
| 7902 | if( pParse->rc==SQLITE_OK0 ){ | |||
| 7903 | assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING((void) (0)) | |||
| 7904 | || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR((void) (0)) | |||
| 7905 | || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF((void) (0)) | |||
| 7906 | )((void) (0)); | |||
| 7907 | if( pNode->eType==FTS5_STRING9 || pNode->eType==FTS5_TERM4 ){ | |||
| 7908 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 7909 | if( pNear->pColset ){ | |||
| 7910 | fts5MergeColset(pNear->pColset, pColset); | |||
| 7911 | if( pNear->pColset->nCol==0 ){ | |||
| 7912 | pNode->eType = FTS5_EOF0; | |||
| 7913 | pNode->xNext = 0; | |||
| 7914 | } | |||
| 7915 | }else if( *ppFree ){ | |||
| 7916 | pNear->pColset = pColset; | |||
| 7917 | *ppFree = 0; | |||
| 7918 | }else{ | |||
| 7919 | pNear->pColset = fts5CloneColset(&pParse->rc, pColset); | |||
| 7920 | } | |||
| 7921 | }else{ | |||
| 7922 | int i; | |||
| 7923 | assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 )((void) (0)); | |||
| 7924 | for(i=0; i<pNode->nChild; i++){ | |||
| 7925 | fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree); | |||
| 7926 | } | |||
| 7927 | } | |||
| 7928 | } | |||
| 7929 | } | |||
| 7930 | ||||
| 7931 | /* | |||
| 7932 | ** Apply colset pColset to expression node pExpr and all of its descendents. | |||
| 7933 | */ | |||
| 7934 | static void sqlite3Fts5ParseSetColset( | |||
| 7935 | Fts5Parse *pParse, | |||
| 7936 | Fts5ExprNode *pExpr, | |||
| 7937 | Fts5Colset *pColset | |||
| 7938 | ){ | |||
| 7939 | Fts5Colset *pFree = pColset; | |||
| 7940 | if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 7941 | sqlite3Fts5ParseError(pParse, | |||
| 7942 | "fts5: column queries are not supported (detail=none)" | |||
| 7943 | ); | |||
| 7944 | }else{ | |||
| 7945 | fts5ParseSetColset(pParse, pExpr, pColset, &pFree); | |||
| 7946 | } | |||
| 7947 | sqlite3_freesqlite3_api->free(pFree); | |||
| 7948 | } | |||
| 7949 | ||||
| 7950 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ | |||
| 7951 | switch( pNode->eType ){ | |||
| 7952 | case FTS5_STRING9: { | |||
| 7953 | Fts5ExprNearset *pNear = pNode->pNear; | |||
| 7954 | if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 | |||
| 7955 | && pNear->apPhrase[0]->aTerm[0].pSynonym==0 | |||
| 7956 | && pNear->apPhrase[0]->aTerm[0].bFirst==0 | |||
| 7957 | ){ | |||
| 7958 | pNode->eType = FTS5_TERM4; | |||
| 7959 | pNode->xNext = fts5ExprNodeNext_TERM; | |||
| 7960 | }else{ | |||
| 7961 | pNode->xNext = fts5ExprNodeNext_STRING; | |||
| 7962 | } | |||
| 7963 | break; | |||
| 7964 | }; | |||
| 7965 | ||||
| 7966 | case FTS5_OR1: { | |||
| 7967 | pNode->xNext = fts5ExprNodeNext_OR; | |||
| 7968 | break; | |||
| 7969 | }; | |||
| 7970 | ||||
| 7971 | case FTS5_AND2: { | |||
| 7972 | pNode->xNext = fts5ExprNodeNext_AND; | |||
| 7973 | break; | |||
| 7974 | }; | |||
| 7975 | ||||
| 7976 | default: assert( pNode->eType==FTS5_NOT )((void) (0)); { | |||
| 7977 | pNode->xNext = fts5ExprNodeNext_NOT; | |||
| 7978 | break; | |||
| 7979 | }; | |||
| 7980 | } | |||
| 7981 | } | |||
| 7982 | ||||
| 7983 | /* | |||
| 7984 | ** Add pSub as a child of p. | |||
| 7985 | */ | |||
| 7986 | static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ | |||
| 7987 | int ii = p->nChild; | |||
| 7988 | if( p->eType!=FTS5_NOT3 && pSub->eType==p->eType ){ | |||
| 7989 | int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; | |||
| 7990 | memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); | |||
| 7991 | p->nChild += pSub->nChild; | |||
| 7992 | sqlite3_freesqlite3_api->free(pSub); | |||
| 7993 | }else{ | |||
| 7994 | p->apChild[p->nChild++] = pSub; | |||
| 7995 | } | |||
| 7996 | for( ; ii<p->nChild; ii++){ | |||
| 7997 | p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1)(((p->iHeight) > (p->apChild[ii]->iHeight + 1)) ? (p->iHeight) : (p->apChild[ii]->iHeight + 1)); | |||
| 7998 | } | |||
| 7999 | } | |||
| 8000 | ||||
| 8001 | /* | |||
| 8002 | ** This function is used when parsing LIKE or GLOB patterns against | |||
| 8003 | ** trigram indexes that specify either detail=column or detail=none. | |||
| 8004 | ** It converts a phrase: | |||
| 8005 | ** | |||
| 8006 | ** abc + def + ghi | |||
| 8007 | ** | |||
| 8008 | ** into an AND tree: | |||
| 8009 | ** | |||
| 8010 | ** abc AND def AND ghi | |||
| 8011 | */ | |||
| 8012 | static Fts5ExprNode *fts5ParsePhraseToAnd( | |||
| 8013 | Fts5Parse *pParse, | |||
| 8014 | Fts5ExprNearset *pNear | |||
| 8015 | ){ | |||
| 8016 | int nTerm = pNear->apPhrase[0]->nTerm; | |||
| 8017 | int ii; | |||
| 8018 | int nByte; | |||
| 8019 | Fts5ExprNode *pRet; | |||
| 8020 | ||||
| 8021 | assert( pNear->nPhrase==1 )((void) (0)); | |||
| 8022 | assert( pParse->bPhraseToAnd )((void) (0)); | |||
| 8023 | ||||
| 8024 | nByte = SZ_FTS5EXPRNODE(nTerm+1)(__builtin_offsetof(Fts5ExprNode, apChild) + (nTerm+1)*sizeof (Fts5ExprNode*)); | |||
| 8025 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | |||
| 8026 | if( pRet ){ | |||
| 8027 | pRet->eType = FTS5_AND2; | |||
| 8028 | pRet->nChild = nTerm; | |||
| 8029 | pRet->iHeight = 1; | |||
| 8030 | fts5ExprAssignXNext(pRet); | |||
| 8031 | pParse->nPhrase--; | |||
| 8032 | for(ii=0; ii<nTerm; ii++){ | |||
| 8033 | Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero( | |||
| 8034 | &pParse->rc, SZ_FTS5EXPRPHRASE(1)(__builtin_offsetof(Fts5ExprPhrase, aTerm) + (1)*sizeof(Fts5ExprTerm )) | |||
| 8035 | ); | |||
| 8036 | if( pPhrase ){ | |||
| 8037 | if( parseGrowPhraseArray(pParse) ){ | |||
| 8038 | fts5ExprPhraseFree(pPhrase); | |||
| 8039 | }else{ | |||
| 8040 | Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; | |||
| 8041 | Fts5ExprTerm *pTo = &pPhrase->aTerm[0]; | |||
| 8042 | pParse->apPhrase[pParse->nPhrase++] = pPhrase; | |||
| 8043 | pPhrase->nTerm = 1; | |||
| 8044 | pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm); | |||
| 8045 | pTo->nQueryTerm = p->nQueryTerm; | |||
| 8046 | pTo->nFullTerm = p->nFullTerm; | |||
| 8047 | pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING9, | |||
| 8048 | 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) | |||
| 8049 | ); | |||
| 8050 | } | |||
| 8051 | } | |||
| 8052 | } | |||
| 8053 | ||||
| 8054 | if( pParse->rc ){ | |||
| 8055 | sqlite3Fts5ParseNodeFree(pRet); | |||
| 8056 | pRet = 0; | |||
| 8057 | }else{ | |||
| 8058 | sqlite3Fts5ParseNearsetFree(pNear); | |||
| 8059 | } | |||
| 8060 | } | |||
| 8061 | ||||
| 8062 | return pRet; | |||
| 8063 | } | |||
| 8064 | ||||
| 8065 | /* | |||
| 8066 | ** Allocate and return a new expression object. If anything goes wrong (i.e. | |||
| 8067 | ** OOM error), leave an error code in pParse and return NULL. | |||
| 8068 | */ | |||
| 8069 | static Fts5ExprNode *sqlite3Fts5ParseNode( | |||
| 8070 | Fts5Parse *pParse, /* Parse context */ | |||
| 8071 | int eType, /* FTS5_STRING, AND, OR or NOT */ | |||
| 8072 | Fts5ExprNode *pLeft, /* Left hand child expression */ | |||
| 8073 | Fts5ExprNode *pRight, /* Right hand child expression */ | |||
| 8074 | Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ | |||
| 8075 | ){ | |||
| 8076 | Fts5ExprNode *pRet = 0; | |||
| 8077 | ||||
| 8078 | if( pParse->rc==SQLITE_OK0 ){ | |||
| 8079 | int nChild = 0; /* Number of children of returned node */ | |||
| 8080 | sqlite3_int64 nByte; /* Bytes of space to allocate for this node */ | |||
| 8081 | ||||
| 8082 | assert( (eType!=FTS5_STRING && !pNear)((void) (0)) | |||
| 8083 | || (eType==FTS5_STRING && !pLeft && !pRight)((void) (0)) | |||
| 8084 | )((void) (0)); | |||
| 8085 | if( eType==FTS5_STRING9 && pNear==0 ) return 0; | |||
| 8086 | if( eType!=FTS5_STRING9 && pLeft==0 ) return pRight; | |||
| 8087 | if( eType!=FTS5_STRING9 && pRight==0 ) return pLeft; | |||
| 8088 | ||||
| 8089 | if( eType==FTS5_STRING9 | |||
| 8090 | && pParse->bPhraseToAnd | |||
| 8091 | && pNear->apPhrase[0]->nTerm>1 | |||
| 8092 | ){ | |||
| 8093 | pRet = fts5ParsePhraseToAnd(pParse, pNear); | |||
| 8094 | }else{ | |||
| 8095 | if( eType==FTS5_NOT3 ){ | |||
| 8096 | nChild = 2; | |||
| 8097 | }else if( eType==FTS5_AND2 || eType==FTS5_OR1 ){ | |||
| 8098 | nChild = 2; | |||
| 8099 | if( pLeft->eType==eType ) nChild += pLeft->nChild-1; | |||
| 8100 | if( pRight->eType==eType ) nChild += pRight->nChild-1; | |||
| 8101 | } | |||
| 8102 | ||||
| 8103 | nByte = SZ_FTS5EXPRNODE(nChild)(__builtin_offsetof(Fts5ExprNode, apChild) + (nChild)*sizeof( Fts5ExprNode*)); | |||
| 8104 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); | |||
| 8105 | ||||
| 8106 | if( pRet ){ | |||
| 8107 | pRet->eType = eType; | |||
| 8108 | pRet->pNear = pNear; | |||
| 8109 | fts5ExprAssignXNext(pRet); | |||
| 8110 | if( eType==FTS5_STRING9 ){ | |||
| 8111 | int iPhrase; | |||
| 8112 | for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ | |||
| 8113 | pNear->apPhrase[iPhrase]->pNode = pRet; | |||
| 8114 | if( pNear->apPhrase[iPhrase]->nTerm==0 ){ | |||
| 8115 | pRet->xNext = 0; | |||
| 8116 | pRet->eType = FTS5_EOF0; | |||
| 8117 | } | |||
| 8118 | } | |||
| 8119 | ||||
| 8120 | if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
| 8121 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; | |||
| 8122 | if( pNear->nPhrase!=1 | |||
| 8123 | || pPhrase->nTerm>1 | |||
| 8124 | || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) | |||
| 8125 | ){ | |||
| 8126 | sqlite3Fts5ParseError(pParse, | |||
| 8127 | "fts5: %s queries are not supported (detail!=full)", | |||
| 8128 | pNear->nPhrase==1 ? "phrase": "NEAR" | |||
| 8129 | ); | |||
| 8130 | sqlite3Fts5ParseNodeFree(pRet); | |||
| 8131 | pRet = 0; | |||
| 8132 | pNear = 0; | |||
| 8133 | assert( pLeft==0 && pRight==0 )((void) (0)); | |||
| 8134 | } | |||
| 8135 | } | |||
| 8136 | }else{ | |||
| 8137 | assert( pNear==0 )((void) (0)); | |||
| 8138 | fts5ExprAddChildren(pRet, pLeft); | |||
| 8139 | fts5ExprAddChildren(pRet, pRight); | |||
| 8140 | pLeft = pRight = 0; | |||
| 8141 | if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH256 ){ | |||
| 8142 | sqlite3Fts5ParseError(pParse, | |||
| 8143 | "fts5 expression tree is too large (maximum depth %d)", | |||
| 8144 | SQLITE_FTS5_MAX_EXPR_DEPTH256 | |||
| 8145 | ); | |||
| 8146 | sqlite3Fts5ParseNodeFree(pRet); | |||
| 8147 | pRet = 0; | |||
| 8148 | } | |||
| 8149 | } | |||
| 8150 | } | |||
| 8151 | } | |||
| 8152 | } | |||
| 8153 | ||||
| 8154 | if( pRet==0 ){ | |||
| 8155 | assert( pParse->rc!=SQLITE_OK )((void) (0)); | |||
| 8156 | sqlite3Fts5ParseNodeFree(pLeft); | |||
| 8157 | sqlite3Fts5ParseNodeFree(pRight); | |||
| 8158 | sqlite3Fts5ParseNearsetFree(pNear); | |||
| 8159 | } | |||
| 8160 | return pRet; | |||
| 8161 | } | |||
| 8162 | ||||
| 8163 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( | |||
| 8164 | Fts5Parse *pParse, /* Parse context */ | |||
| 8165 | Fts5ExprNode *pLeft, /* Left hand child expression */ | |||
| 8166 | Fts5ExprNode *pRight /* Right hand child expression */ | |||
| 8167 | ){ | |||
| 8168 | Fts5ExprNode *pRet = 0; | |||
| 8169 | Fts5ExprNode *pPrev; | |||
| 8170 | ||||
| 8171 | if( pParse->rc ){ | |||
| 8172 | sqlite3Fts5ParseNodeFree(pLeft); | |||
| 8173 | sqlite3Fts5ParseNodeFree(pRight); | |||
| 8174 | }else{ | |||
| 8175 | ||||
| 8176 | assert( pLeft->eType==FTS5_STRING((void) (0)) | |||
| 8177 | || pLeft->eType==FTS5_TERM((void) (0)) | |||
| 8178 | || pLeft->eType==FTS5_EOF((void) (0)) | |||
| 8179 | || pLeft->eType==FTS5_AND((void) (0)) | |||
| 8180 | )((void) (0)); | |||
| 8181 | assert( pRight->eType==FTS5_STRING((void) (0)) | |||
| 8182 | || pRight->eType==FTS5_TERM((void) (0)) | |||
| 8183 | || pRight->eType==FTS5_EOF((void) (0)) | |||
| 8184 | || (pRight->eType==FTS5_AND && pParse->bPhraseToAnd)((void) (0)) | |||
| 8185 | )((void) (0)); | |||
| 8186 | ||||
| 8187 | if( pLeft->eType==FTS5_AND2 ){ | |||
| 8188 | pPrev = pLeft->apChild[pLeft->nChild-1]; | |||
| 8189 | }else{ | |||
| 8190 | pPrev = pLeft; | |||
| 8191 | } | |||
| 8192 | assert( pPrev->eType==FTS5_STRING((void) (0)) | |||
| 8193 | || pPrev->eType==FTS5_TERM((void) (0)) | |||
| 8194 | || pPrev->eType==FTS5_EOF((void) (0)) | |||
| 8195 | )((void) (0)); | |||
| 8196 | ||||
| 8197 | if( pRight->eType==FTS5_EOF0 ){ | |||
| 8198 | assert( pParse->apPhrase!=0 )((void) (0)); | |||
| 8199 | assert( pParse->nPhrase>0 )((void) (0)); | |||
| 8200 | assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] )((void) (0)); | |||
| 8201 | sqlite3Fts5ParseNodeFree(pRight); | |||
| 8202 | pRet = pLeft; | |||
| 8203 | pParse->nPhrase--; | |||
| 8204 | } | |||
| 8205 | else if( pPrev->eType==FTS5_EOF0 ){ | |||
| 8206 | Fts5ExprPhrase **ap; | |||
| 8207 | ||||
| 8208 | if( pPrev==pLeft ){ | |||
| 8209 | pRet = pRight; | |||
| 8210 | }else{ | |||
| 8211 | pLeft->apChild[pLeft->nChild-1] = pRight; | |||
| 8212 | pRet = pLeft; | |||
| 8213 | } | |||
| 8214 | ||||
| 8215 | ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; | |||
| 8216 | assert( ap[0]==pPrev->pNear->apPhrase[0] )((void) (0)); | |||
| 8217 | memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); | |||
| 8218 | pParse->nPhrase--; | |||
| 8219 | ||||
| 8220 | sqlite3Fts5ParseNodeFree(pPrev); | |||
| 8221 | } | |||
| 8222 | else{ | |||
| 8223 | pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND2, pLeft, pRight, 0); | |||
| 8224 | } | |||
| 8225 | } | |||
| 8226 | ||||
| 8227 | return pRet; | |||
| 8228 | } | |||
| 8229 | ||||
| 8230 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 8231 | static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ | |||
| 8232 | sqlite3_int64 nByte = 0; | |||
| 8233 | Fts5ExprTerm *p; | |||
| 8234 | char *zQuoted; | |||
| 8235 | ||||
| 8236 | /* Determine the maximum amount of space required. */ | |||
| 8237 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 8238 | nByte += pTerm->nQueryTerm * 2 + 3 + 2; | |||
| 8239 | } | |||
| 8240 | zQuoted = sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 8241 | ||||
| 8242 | if( zQuoted ){ | |||
| 8243 | int i = 0; | |||
| 8244 | for(p=pTerm; p; p=p->pSynonym){ | |||
| 8245 | char *zIn = p->pTerm; | |||
| 8246 | char *zEnd = &zIn[p->nQueryTerm]; | |||
| 8247 | zQuoted[i++] = '"'; | |||
| 8248 | while( zIn<zEnd ){ | |||
| 8249 | if( *zIn=='"' ) zQuoted[i++] = '"'; | |||
| 8250 | zQuoted[i++] = *zIn++; | |||
| 8251 | } | |||
| 8252 | zQuoted[i++] = '"'; | |||
| 8253 | if( p->pSynonym ) zQuoted[i++] = '|'; | |||
| 8254 | } | |||
| 8255 | if( pTerm->bPrefix ){ | |||
| 8256 | zQuoted[i++] = ' '; | |||
| 8257 | zQuoted[i++] = '*'; | |||
| 8258 | } | |||
| 8259 | zQuoted[i++] = '\0'; | |||
| 8260 | } | |||
| 8261 | return zQuoted; | |||
| 8262 | } | |||
| 8263 | ||||
| 8264 | static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ | |||
| 8265 | char *zNew; | |||
| 8266 | va_list ap; | |||
| 8267 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 8268 | zNew = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 8269 | va_end(ap)__builtin_va_end(ap); | |||
| 8270 | if( zApp && zNew ){ | |||
| 8271 | char *zNew2 = sqlite3_mprintfsqlite3_api->mprintf("%s%s", zApp, zNew); | |||
| 8272 | sqlite3_freesqlite3_api->free(zNew); | |||
| 8273 | zNew = zNew2; | |||
| 8274 | } | |||
| 8275 | sqlite3_freesqlite3_api->free(zApp); | |||
| 8276 | return zNew; | |||
| 8277 | } | |||
| 8278 | ||||
| 8279 | /* | |||
| 8280 | ** Compose a tcl-readable representation of expression pExpr. Return a | |||
| 8281 | ** pointer to a buffer containing that representation. It is the | |||
| 8282 | ** responsibility of the caller to at some point free the buffer using | |||
| 8283 | ** sqlite3_free(). | |||
| 8284 | */ | |||
| 8285 | static char *fts5ExprPrintTcl( | |||
| 8286 | Fts5Config *pConfig, | |||
| 8287 | const char *zNearsetCmd, | |||
| 8288 | Fts5ExprNode *pExpr | |||
| 8289 | ){ | |||
| 8290 | char *zRet = 0; | |||
| 8291 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | |||
| 8292 | Fts5ExprNearset *pNear = pExpr->pNear; | |||
| 8293 | int i; | |||
| 8294 | int iTerm; | |||
| 8295 | ||||
| 8296 | zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); | |||
| 8297 | if( zRet==0 ) return 0; | |||
| 8298 | if( pNear->pColset ){ | |||
| 8299 | int *aiCol = pNear->pColset->aiCol; | |||
| 8300 | int nCol = pNear->pColset->nCol; | |||
| 8301 | if( nCol==1 ){ | |||
| 8302 | zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); | |||
| 8303 | }else{ | |||
| 8304 | zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); | |||
| 8305 | for(i=1; i<pNear->pColset->nCol; i++){ | |||
| 8306 | zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); | |||
| 8307 | } | |||
| 8308 | zRet = fts5PrintfAppend(zRet, "} "); | |||
| 8309 | } | |||
| 8310 | if( zRet==0 ) return 0; | |||
| 8311 | } | |||
| 8312 | ||||
| 8313 | if( pNear->nPhrase>1 ){ | |||
| 8314 | zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); | |||
| 8315 | if( zRet==0 ) return 0; | |||
| 8316 | } | |||
| 8317 | ||||
| 8318 | zRet = fts5PrintfAppend(zRet, "--"); | |||
| 8319 | if( zRet==0 ) return 0; | |||
| 8320 | ||||
| 8321 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 8322 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 8323 | ||||
| 8324 | zRet = fts5PrintfAppend(zRet, " {"); | |||
| 8325 | for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ | |||
| 8326 | Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; | |||
| 8327 | zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", | |||
| 8328 | p->nQueryTerm, p->pTerm | |||
| 8329 | ); | |||
| 8330 | if( pPhrase->aTerm[iTerm].bPrefix ){ | |||
| 8331 | zRet = fts5PrintfAppend(zRet, "*"); | |||
| 8332 | } | |||
| 8333 | } | |||
| 8334 | ||||
| 8335 | if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); | |||
| 8336 | if( zRet==0 ) return 0; | |||
| 8337 | } | |||
| 8338 | ||||
| 8339 | }else if( pExpr->eType==0 ){ | |||
| 8340 | zRet = sqlite3_mprintfsqlite3_api->mprintf("{}"); | |||
| 8341 | }else{ | |||
| 8342 | char const *zOp = 0; | |||
| 8343 | int i; | |||
| 8344 | switch( pExpr->eType ){ | |||
| 8345 | case FTS5_AND2: zOp = "AND"; break; | |||
| 8346 | case FTS5_NOT3: zOp = "NOT"; break; | |||
| 8347 | default: | |||
| 8348 | assert( pExpr->eType==FTS5_OR )((void) (0)); | |||
| 8349 | zOp = "OR"; | |||
| 8350 | break; | |||
| 8351 | } | |||
| 8352 | ||||
| 8353 | zRet = sqlite3_mprintfsqlite3_api->mprintf("%s", zOp); | |||
| 8354 | for(i=0; zRet && i<pExpr->nChild; i++){ | |||
| 8355 | char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); | |||
| 8356 | if( !z ){ | |||
| 8357 | sqlite3_freesqlite3_api->free(zRet); | |||
| 8358 | zRet = 0; | |||
| 8359 | }else{ | |||
| 8360 | zRet = fts5PrintfAppend(zRet, " [%z]", z); | |||
| 8361 | } | |||
| 8362 | } | |||
| 8363 | } | |||
| 8364 | ||||
| 8365 | return zRet; | |||
| 8366 | } | |||
| 8367 | ||||
| 8368 | static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ | |||
| 8369 | char *zRet = 0; | |||
| 8370 | if( pExpr->eType==0 ){ | |||
| 8371 | return sqlite3_mprintfsqlite3_api->mprintf("\"\""); | |||
| 8372 | }else | |||
| 8373 | if( pExpr->eType==FTS5_STRING9 || pExpr->eType==FTS5_TERM4 ){ | |||
| 8374 | Fts5ExprNearset *pNear = pExpr->pNear; | |||
| 8375 | int i; | |||
| 8376 | int iTerm; | |||
| 8377 | ||||
| 8378 | if( pNear->pColset ){ | |||
| 8379 | int ii; | |||
| 8380 | Fts5Colset *pColset = pNear->pColset; | |||
| 8381 | if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{"); | |||
| 8382 | for(ii=0; ii<pColset->nCol; ii++){ | |||
| 8383 | zRet = fts5PrintfAppend(zRet, "%s%s", | |||
| 8384 | pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " " | |||
| 8385 | ); | |||
| 8386 | } | |||
| 8387 | if( zRet ){ | |||
| 8388 | zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : ""); | |||
| 8389 | } | |||
| 8390 | if( zRet==0 ) return 0; | |||
| 8391 | } | |||
| 8392 | ||||
| 8393 | if( pNear->nPhrase>1 ){ | |||
| 8394 | zRet = fts5PrintfAppend(zRet, "NEAR("); | |||
| 8395 | if( zRet==0 ) return 0; | |||
| 8396 | } | |||
| 8397 | ||||
| 8398 | for(i=0; i<pNear->nPhrase; i++){ | |||
| 8399 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | |||
| 8400 | if( i!=0 ){ | |||
| 8401 | zRet = fts5PrintfAppend(zRet, " "); | |||
| 8402 | if( zRet==0 ) return 0; | |||
| 8403 | } | |||
| 8404 | for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ | |||
| 8405 | char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); | |||
| 8406 | if( zTerm ){ | |||
| 8407 | zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); | |||
| 8408 | sqlite3_freesqlite3_api->free(zTerm); | |||
| 8409 | } | |||
| 8410 | if( zTerm==0 || zRet==0 ){ | |||
| 8411 | sqlite3_freesqlite3_api->free(zRet); | |||
| 8412 | return 0; | |||
| 8413 | } | |||
| 8414 | } | |||
| 8415 | } | |||
| 8416 | ||||
| 8417 | if( pNear->nPhrase>1 ){ | |||
| 8418 | zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); | |||
| 8419 | if( zRet==0 ) return 0; | |||
| 8420 | } | |||
| 8421 | ||||
| 8422 | }else{ | |||
| 8423 | char const *zOp = 0; | |||
| 8424 | int i; | |||
| 8425 | ||||
| 8426 | switch( pExpr->eType ){ | |||
| 8427 | case FTS5_AND2: zOp = " AND "; break; | |||
| 8428 | case FTS5_NOT3: zOp = " NOT "; break; | |||
| 8429 | default: | |||
| 8430 | assert( pExpr->eType==FTS5_OR )((void) (0)); | |||
| 8431 | zOp = " OR "; | |||
| 8432 | break; | |||
| 8433 | } | |||
| 8434 | ||||
| 8435 | for(i=0; i<pExpr->nChild; i++){ | |||
| 8436 | char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); | |||
| 8437 | if( z==0 ){ | |||
| 8438 | sqlite3_freesqlite3_api->free(zRet); | |||
| 8439 | zRet = 0; | |||
| 8440 | }else{ | |||
| 8441 | int e = pExpr->apChild[i]->eType; | |||
| 8442 | int b = (e!=FTS5_STRING9 && e!=FTS5_TERM4 && e!=FTS5_EOF0); | |||
| 8443 | zRet = fts5PrintfAppend(zRet, "%s%s%z%s", | |||
| 8444 | (i==0 ? "" : zOp), | |||
| 8445 | (b?"(":""), z, (b?")":"") | |||
| 8446 | ); | |||
| 8447 | } | |||
| 8448 | if( zRet==0 ) break; | |||
| 8449 | } | |||
| 8450 | } | |||
| 8451 | ||||
| 8452 | return zRet; | |||
| 8453 | } | |||
| 8454 | ||||
| 8455 | /* | |||
| 8456 | ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) | |||
| 8457 | ** and fts5_expr_tcl() (bTcl!=0). | |||
| 8458 | */ | |||
| 8459 | static void fts5ExprFunction( | |||
| 8460 | sqlite3_context *pCtx, /* Function call context */ | |||
| 8461 | int nArg, /* Number of args */ | |||
| 8462 | sqlite3_value **apVal, /* Function arguments */ | |||
| 8463 | int bTcl | |||
| 8464 | ){ | |||
| 8465 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
| 8466 | sqlite3 *db = sqlite3_context_db_handlesqlite3_api->context_db_handle(pCtx); | |||
| 8467 | const char *zExpr = 0; | |||
| 8468 | char *zErr = 0; | |||
| 8469 | Fts5Expr *pExpr = 0; | |||
| 8470 | int rc; | |||
| 8471 | int i; | |||
| 8472 | ||||
| 8473 | const char **azConfig; /* Array of arguments for Fts5Config */ | |||
| 8474 | const char *zNearsetCmd = "nearset"; | |||
| 8475 | int nConfig; /* Size of azConfig[] */ | |||
| 8476 | Fts5Config *pConfig = 0; | |||
| 8477 | int iArg = 1; | |||
| 8478 | ||||
| 8479 | if( nArg<1 ){ | |||
| 8480 | zErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of arguments to function %s", | |||
| 8481 | bTcl ? "fts5_expr_tcl" : "fts5_expr" | |||
| 8482 | ); | |||
| 8483 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
| 8484 | sqlite3_freesqlite3_api->free(zErr); | |||
| 8485 | return; | |||
| 8486 | } | |||
| 8487 | ||||
| 8488 | if( bTcl && nArg>1 ){ | |||
| 8489 | zNearsetCmd = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[1]); | |||
| 8490 | iArg = 2; | |||
| 8491 | } | |||
| 8492 | ||||
| 8493 | nConfig = 3 + (nArg-iArg); | |||
| 8494 | azConfig = (const char**)sqlite3_malloc64sqlite3_api->malloc64(sizeof(char*) * nConfig); | |||
| 8495 | if( azConfig==0 ){ | |||
| 8496 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | |||
| 8497 | return; | |||
| 8498 | } | |||
| 8499 | azConfig[0] = 0; | |||
| 8500 | azConfig[1] = "main"; | |||
| 8501 | azConfig[2] = "tbl"; | |||
| 8502 | for(i=3; iArg<nArg; iArg++){ | |||
| 8503 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[iArg]); | |||
| 8504 | azConfig[i++] = (z ? z : ""); | |||
| 8505 | } | |||
| 8506 | ||||
| 8507 | zExpr = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | |||
| 8508 | if( zExpr==0 ) zExpr = ""; | |||
| 8509 | ||||
| 8510 | rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); | |||
| 8511 | if( rc==SQLITE_OK0 ){ | |||
| 8512 | rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr); | |||
| 8513 | } | |||
| 8514 | if( rc==SQLITE_OK0 ){ | |||
| 8515 | char *zText; | |||
| 8516 | if( pExpr->pRoot->xNext==0 ){ | |||
| 8517 | zText = sqlite3_mprintfsqlite3_api->mprintf(""); | |||
| 8518 | }else if( bTcl ){ | |||
| 8519 | zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); | |||
| 8520 | }else{ | |||
| 8521 | zText = fts5ExprPrint(pConfig, pExpr->pRoot); | |||
| 8522 | } | |||
| 8523 | if( zText==0 ){ | |||
| 8524 | rc = SQLITE_NOMEM7; | |||
| 8525 | }else{ | |||
| 8526 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 8527 | sqlite3_freesqlite3_api->free(zText); | |||
| 8528 | } | |||
| 8529 | } | |||
| 8530 | ||||
| 8531 | if( rc!=SQLITE_OK0 ){ | |||
| 8532 | if( zErr ){ | |||
| 8533 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
| 8534 | sqlite3_freesqlite3_api->free(zErr); | |||
| 8535 | }else{ | |||
| 8536 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 8537 | } | |||
| 8538 | } | |||
| 8539 | sqlite3_freesqlite3_api->free((void *)azConfig); | |||
| 8540 | sqlite3Fts5ConfigFree(pConfig); | |||
| 8541 | sqlite3Fts5ExprFree(pExpr); | |||
| 8542 | } | |||
| 8543 | ||||
| 8544 | static void fts5ExprFunctionHr( | |||
| 8545 | sqlite3_context *pCtx, /* Function call context */ | |||
| 8546 | int nArg, /* Number of args */ | |||
| 8547 | sqlite3_value **apVal /* Function arguments */ | |||
| 8548 | ){ | |||
| 8549 | fts5ExprFunction(pCtx, nArg, apVal, 0); | |||
| 8550 | } | |||
| 8551 | static void fts5ExprFunctionTcl( | |||
| 8552 | sqlite3_context *pCtx, /* Function call context */ | |||
| 8553 | int nArg, /* Number of args */ | |||
| 8554 | sqlite3_value **apVal /* Function arguments */ | |||
| 8555 | ){ | |||
| 8556 | fts5ExprFunction(pCtx, nArg, apVal, 1); | |||
| 8557 | } | |||
| 8558 | ||||
| 8559 | /* | |||
| 8560 | ** The implementation of an SQLite user-defined-function that accepts a | |||
| 8561 | ** single integer as an argument. If the integer is an alpha-numeric | |||
| 8562 | ** unicode code point, 1 is returned. Otherwise 0. | |||
| 8563 | */ | |||
| 8564 | static void fts5ExprIsAlnum( | |||
| 8565 | sqlite3_context *pCtx, /* Function call context */ | |||
| 8566 | int nArg, /* Number of args */ | |||
| 8567 | sqlite3_value **apVal /* Function arguments */ | |||
| 8568 | ){ | |||
| 8569 | int iCode; | |||
| 8570 | u8 aArr[32]; | |||
| 8571 | if( nArg!=1 ){ | |||
| 8572 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
| 8573 | "wrong number of arguments to function fts5_isalnum", -1 | |||
| 8574 | ); | |||
| 8575 | return; | |||
| 8576 | } | |||
| 8577 | memset(aArr, 0, sizeof(aArr)); | |||
| 8578 | sqlite3Fts5UnicodeCatParse("L*", aArr); | |||
| 8579 | sqlite3Fts5UnicodeCatParse("N*", aArr); | |||
| 8580 | sqlite3Fts5UnicodeCatParse("Co", aArr); | |||
| 8581 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
| 8582 | sqlite3_result_intsqlite3_api->result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); | |||
| 8583 | } | |||
| 8584 | ||||
| 8585 | static void fts5ExprFold( | |||
| 8586 | sqlite3_context *pCtx, /* Function call context */ | |||
| 8587 | int nArg, /* Number of args */ | |||
| 8588 | sqlite3_value **apVal /* Function arguments */ | |||
| 8589 | ){ | |||
| 8590 | if( nArg!=1 && nArg!=2 ){ | |||
| 8591 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
| 8592 | "wrong number of arguments to function fts5_fold", -1 | |||
| 8593 | ); | |||
| 8594 | }else{ | |||
| 8595 | int iCode; | |||
| 8596 | int bRemoveDiacritics = 0; | |||
| 8597 | iCode = sqlite3_value_intsqlite3_api->value_int(apVal[0]); | |||
| 8598 | if( nArg==2 ) bRemoveDiacritics = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | |||
| 8599 | sqlite3_result_intsqlite3_api->result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); | |||
| 8600 | } | |||
| 8601 | } | |||
| 8602 | #endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 8603 | ||||
| 8604 | /* | |||
| 8605 | ** This is called during initialization to register the fts5_expr() scalar | |||
| 8606 | ** UDF with the SQLite handle passed as the only argument. | |||
| 8607 | */ | |||
| 8608 | static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ | |||
| 8609 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 8610 | struct Fts5ExprFunc { | |||
| 8611 | const char *z; | |||
| 8612 | void (*x)(sqlite3_context*,int,sqlite3_value**); | |||
| 8613 | } aFunc[] = { | |||
| 8614 | { "fts5_expr", fts5ExprFunctionHr }, | |||
| 8615 | { "fts5_expr_tcl", fts5ExprFunctionTcl }, | |||
| 8616 | { "fts5_isalnum", fts5ExprIsAlnum }, | |||
| 8617 | { "fts5_fold", fts5ExprFold }, | |||
| 8618 | }; | |||
| 8619 | int i; | |||
| 8620 | int rc = SQLITE_OK0; | |||
| 8621 | void *pCtx = (void*)pGlobal; | |||
| 8622 | ||||
| 8623 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aFunc)((int)(sizeof(aFunc) / sizeof(aFunc[0]))); i++){ | |||
| 8624 | struct Fts5ExprFunc *p = &aFunc[i]; | |||
| 8625 | rc = sqlite3_create_functionsqlite3_api->create_function(db, p->z, -1, SQLITE_UTF81, pCtx, p->x, 0, 0); | |||
| 8626 | } | |||
| 8627 | #else | |||
| 8628 | int rc = SQLITE_OK0; | |||
| 8629 | UNUSED_PARAM2(pGlobal,db)(void)(pGlobal), (void)(db); | |||
| 8630 | #endif | |||
| 8631 | ||||
| 8632 | /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and | |||
| 8633 | ** sqlite3Fts5ParserFallback() are unused */ | |||
| 8634 | #ifndef NDEBUG1 | |||
| 8635 | (void)sqlite3Fts5ParserTrace; | |||
| 8636 | #endif | |||
| 8637 | (void)sqlite3Fts5ParserFallback; | |||
| 8638 | ||||
| 8639 | return rc; | |||
| 8640 | } | |||
| 8641 | ||||
| 8642 | /* | |||
| 8643 | ** Return the number of phrases in expression pExpr. | |||
| 8644 | */ | |||
| 8645 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ | |||
| 8646 | return (pExpr ? pExpr->nPhrase : 0); | |||
| 8647 | } | |||
| 8648 | ||||
| 8649 | /* | |||
| 8650 | ** Return the number of terms in the iPhrase'th phrase in pExpr. | |||
| 8651 | */ | |||
| 8652 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ | |||
| 8653 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; | |||
| 8654 | return pExpr->apExprPhrase[iPhrase]->nTerm; | |||
| 8655 | } | |||
| 8656 | ||||
| 8657 | /* | |||
| 8658 | ** This function is used to access the current position list for phrase | |||
| 8659 | ** iPhrase. | |||
| 8660 | */ | |||
| 8661 | static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ | |||
| 8662 | int nRet; | |||
| 8663 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
| 8664 | Fts5ExprNode *pNode = pPhrase->pNode; | |||
| 8665 | if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ | |||
| 8666 | *pa = pPhrase->poslist.p; | |||
| 8667 | nRet = pPhrase->poslist.n; | |||
| 8668 | }else{ | |||
| 8669 | *pa = 0; | |||
| 8670 | nRet = 0; | |||
| 8671 | } | |||
| 8672 | return nRet; | |||
| 8673 | } | |||
| 8674 | ||||
| 8675 | struct Fts5PoslistPopulator { | |||
| 8676 | Fts5PoslistWriter writer; | |||
| 8677 | int bOk; /* True if ok to populate */ | |||
| 8678 | int bMiss; | |||
| 8679 | }; | |||
| 8680 | ||||
| 8681 | /* | |||
| 8682 | ** Clear the position lists associated with all phrases in the expression | |||
| 8683 | ** passed as the first argument. Argument bLive is true if the expression | |||
| 8684 | ** might be pointing to a real entry, otherwise it has just been reset. | |||
| 8685 | ** | |||
| 8686 | ** At present this function is only used for detail=col and detail=none | |||
| 8687 | ** fts5 tables. This implies that all phrases must be at most 1 token | |||
| 8688 | ** in size, as phrase matches are not supported without detail=full. | |||
| 8689 | */ | |||
| 8690 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ | |||
| 8691 | Fts5PoslistPopulator *pRet; | |||
| 8692 | pRet = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | |||
| 8693 | if( pRet ){ | |||
| 8694 | int i; | |||
| 8695 | memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); | |||
| 8696 | for(i=0; i<pExpr->nPhrase; i++){ | |||
| 8697 | Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; | |||
| 8698 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | |||
| 8699 | assert( pExpr->apExprPhrase[i]->nTerm<=1 )((void) (0)); | |||
| 8700 | if( bLive && | |||
| 8701 | (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) | |||
| 8702 | ){ | |||
| 8703 | pRet[i].bMiss = 1; | |||
| 8704 | }else{ | |||
| 8705 | pBuf->n = 0; | |||
| 8706 | } | |||
| 8707 | } | |||
| 8708 | } | |||
| 8709 | return pRet; | |||
| 8710 | } | |||
| 8711 | ||||
| 8712 | struct Fts5ExprCtx { | |||
| 8713 | Fts5Expr *pExpr; | |||
| 8714 | Fts5PoslistPopulator *aPopulator; | |||
| 8715 | i64 iOff; | |||
| 8716 | }; | |||
| 8717 | typedef struct Fts5ExprCtx Fts5ExprCtx; | |||
| 8718 | ||||
| 8719 | /* | |||
| 8720 | ** TODO: Make this more efficient! | |||
| 8721 | */ | |||
| 8722 | static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ | |||
| 8723 | int i; | |||
| 8724 | for(i=0; i<pColset->nCol; i++){ | |||
| 8725 | if( pColset->aiCol[i]==iCol ) return 1; | |||
| 8726 | } | |||
| 8727 | return 0; | |||
| 8728 | } | |||
| 8729 | ||||
| 8730 | /* | |||
| 8731 | ** pToken is a buffer nToken bytes in size that may or may not contain | |||
| 8732 | ** an embedded 0x00 byte. If it does, return the number of bytes in | |||
| 8733 | ** the buffer before the 0x00. If it does not, return nToken. | |||
| 8734 | */ | |||
| 8735 | static int fts5QueryTerm(const char *pToken, int nToken){ | |||
| 8736 | int ii; | |||
| 8737 | for(ii=0; ii<nToken && pToken[ii]; ii++){} | |||
| 8738 | return ii; | |||
| 8739 | } | |||
| 8740 | ||||
| 8741 | static int fts5ExprPopulatePoslistsCb( | |||
| 8742 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ | |||
| 8743 | int tflags, /* Mask of FTS5_TOKEN_* flags */ | |||
| 8744 | const char *pToken, /* Pointer to buffer containing token */ | |||
| 8745 | int nToken, /* Size of token in bytes */ | |||
| 8746 | int iUnused1, /* Byte offset of token within input text */ | |||
| 8747 | int iUnused2 /* Byte offset of end of token within input text */ | |||
| 8748 | ){ | |||
| 8749 | Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; | |||
| 8750 | Fts5Expr *pExpr = p->pExpr; | |||
| 8751 | int i; | |||
| 8752 | int nQuery = nToken; | |||
| 8753 | i64 iRowid = pExpr->pRoot->iRowid; | |||
| 8754 | ||||
| 8755 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
| 8756 | ||||
| 8757 | if( nQuery>FTS5_MAX_TOKEN_SIZE32768 ) nQuery = FTS5_MAX_TOKEN_SIZE32768; | |||
| 8758 | if( pExpr->pConfig->bTokendata ){ | |||
| 8759 | nQuery = fts5QueryTerm(pToken, nQuery); | |||
| 8760 | } | |||
| 8761 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ) p->iOff++; | |||
| 8762 | for(i=0; i<pExpr->nPhrase; i++){ | |||
| 8763 | Fts5ExprTerm *pT; | |||
| 8764 | if( p->aPopulator[i].bOk==0 ) continue; | |||
| 8765 | for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){ | |||
| 8766 | if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix)) | |||
| 8767 | && memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0 | |||
| 8768 | ){ | |||
| 8769 | int rc = sqlite3Fts5PoslistWriterAppend( | |||
| 8770 | &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff | |||
| 8771 | ); | |||
| 8772 | if( rc==SQLITE_OK0 && (pExpr->pConfig->bTokendata || pT->bPrefix) ){ | |||
| 8773 | int iCol = p->iOff>>32; | |||
| 8774 | int iTokOff = p->iOff & 0x7FFFFFFF; | |||
| 8775 | rc = sqlite3Fts5IndexIterWriteTokendata( | |||
| 8776 | pT->pIter, pToken, nToken, iRowid, iCol, iTokOff | |||
| 8777 | ); | |||
| 8778 | } | |||
| 8779 | if( rc ) return rc; | |||
| 8780 | break; | |||
| 8781 | } | |||
| 8782 | } | |||
| 8783 | } | |||
| 8784 | return SQLITE_OK0; | |||
| 8785 | } | |||
| 8786 | ||||
| 8787 | static int sqlite3Fts5ExprPopulatePoslists( | |||
| 8788 | Fts5Config *pConfig, | |||
| 8789 | Fts5Expr *pExpr, | |||
| 8790 | Fts5PoslistPopulator *aPopulator, | |||
| 8791 | int iCol, | |||
| 8792 | const char *z, int n | |||
| 8793 | ){ | |||
| 8794 | int i; | |||
| 8795 | Fts5ExprCtx sCtx; | |||
| 8796 | sCtx.pExpr = pExpr; | |||
| 8797 | sCtx.aPopulator = aPopulator; | |||
| 8798 | sCtx.iOff = (((i64)iCol) << 32) - 1; | |||
| 8799 | ||||
| 8800 | for(i=0; i<pExpr->nPhrase; i++){ | |||
| 8801 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; | |||
| 8802 | Fts5Colset *pColset = pNode->pNear->pColset; | |||
| 8803 | if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) | |||
| 8804 | || aPopulator[i].bMiss | |||
| 8805 | ){ | |||
| 8806 | aPopulator[i].bOk = 0; | |||
| 8807 | }else{ | |||
| 8808 | aPopulator[i].bOk = 1; | |||
| 8809 | } | |||
| 8810 | } | |||
| 8811 | ||||
| 8812 | return sqlite3Fts5Tokenize(pConfig, | |||
| 8813 | FTS5_TOKENIZE_DOCUMENT0x0004, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb | |||
| 8814 | ); | |||
| 8815 | } | |||
| 8816 | ||||
| 8817 | static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ | |||
| 8818 | if( pNode->eType==FTS5_TERM4 || pNode->eType==FTS5_STRING9 ){ | |||
| 8819 | pNode->pNear->apPhrase[0]->poslist.n = 0; | |||
| 8820 | }else{ | |||
| 8821 | int i; | |||
| 8822 | for(i=0; i<pNode->nChild; i++){ | |||
| 8823 | fts5ExprClearPoslists(pNode->apChild[i]); | |||
| 8824 | } | |||
| 8825 | } | |||
| 8826 | } | |||
| 8827 | ||||
| 8828 | static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ | |||
| 8829 | pNode->iRowid = iRowid; | |||
| 8830 | pNode->bEof = 0; | |||
| 8831 | switch( pNode->eType ){ | |||
| 8832 | case 0: | |||
| 8833 | case FTS5_TERM4: | |||
| 8834 | case FTS5_STRING9: | |||
| 8835 | return (pNode->pNear->apPhrase[0]->poslist.n>0); | |||
| 8836 | ||||
| 8837 | case FTS5_AND2: { | |||
| 8838 | int i; | |||
| 8839 | for(i=0; i<pNode->nChild; i++){ | |||
| 8840 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ | |||
| 8841 | fts5ExprClearPoslists(pNode); | |||
| 8842 | return 0; | |||
| 8843 | } | |||
| 8844 | } | |||
| 8845 | break; | |||
| 8846 | } | |||
| 8847 | ||||
| 8848 | case FTS5_OR1: { | |||
| 8849 | int i; | |||
| 8850 | int bRet = 0; | |||
| 8851 | for(i=0; i<pNode->nChild; i++){ | |||
| 8852 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ | |||
| 8853 | bRet = 1; | |||
| 8854 | } | |||
| 8855 | } | |||
| 8856 | return bRet; | |||
| 8857 | } | |||
| 8858 | ||||
| 8859 | default: { | |||
| 8860 | assert( pNode->eType==FTS5_NOT )((void) (0)); | |||
| 8861 | if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) | |||
| 8862 | || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) | |||
| 8863 | ){ | |||
| 8864 | fts5ExprClearPoslists(pNode); | |||
| 8865 | return 0; | |||
| 8866 | } | |||
| 8867 | break; | |||
| 8868 | } | |||
| 8869 | } | |||
| 8870 | return 1; | |||
| 8871 | } | |||
| 8872 | ||||
| 8873 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ | |||
| 8874 | fts5ExprCheckPoslists(pExpr->pRoot, iRowid); | |||
| 8875 | } | |||
| 8876 | ||||
| 8877 | /* | |||
| 8878 | ** This function is only called for detail=columns tables. | |||
| 8879 | */ | |||
| 8880 | static int sqlite3Fts5ExprPhraseCollist( | |||
| 8881 | Fts5Expr *pExpr, | |||
| 8882 | int iPhrase, | |||
| 8883 | const u8 **ppCollist, | |||
| 8884 | int *pnCollist | |||
| 8885 | ){ | |||
| 8886 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
| 8887 | Fts5ExprNode *pNode = pPhrase->pNode; | |||
| 8888 | int rc = SQLITE_OK0; | |||
| 8889 | ||||
| 8890 | assert( iPhrase>=0 && iPhrase<pExpr->nPhrase )((void) (0)); | |||
| 8891 | assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
| 8892 | ||||
| 8893 | if( pNode->bEof==0 | |||
| 8894 | && pNode->iRowid==pExpr->pRoot->iRowid | |||
| 8895 | && pPhrase->poslist.n>0 | |||
| 8896 | ){ | |||
| 8897 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; | |||
| 8898 | if( pTerm->pSynonym ){ | |||
| 8899 | Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; | |||
| 8900 | rc = fts5ExprSynonymList( | |||
| 8901 | pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist | |||
| 8902 | ); | |||
| 8903 | }else{ | |||
| 8904 | *ppCollist = pPhrase->aTerm[0].pIter->pData; | |||
| 8905 | *pnCollist = pPhrase->aTerm[0].pIter->nData; | |||
| 8906 | } | |||
| 8907 | }else{ | |||
| 8908 | *ppCollist = 0; | |||
| 8909 | *pnCollist = 0; | |||
| 8910 | } | |||
| 8911 | ||||
| 8912 | return rc; | |||
| 8913 | } | |||
| 8914 | ||||
| 8915 | /* | |||
| 8916 | ** Does the work of the fts5_api.xQueryToken() API method. | |||
| 8917 | */ | |||
| 8918 | static int sqlite3Fts5ExprQueryToken( | |||
| 8919 | Fts5Expr *pExpr, | |||
| 8920 | int iPhrase, | |||
| 8921 | int iToken, | |||
| 8922 | const char **ppOut, | |||
| 8923 | int *pnOut | |||
| 8924 | ){ | |||
| 8925 | Fts5ExprPhrase *pPhrase = 0; | |||
| 8926 | ||||
| 8927 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
| 8928 | return SQLITE_RANGE25; | |||
| 8929 | } | |||
| 8930 | pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
| 8931 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | |||
| 8932 | return SQLITE_RANGE25; | |||
| 8933 | } | |||
| 8934 | ||||
| 8935 | *ppOut = pPhrase->aTerm[iToken].pTerm; | |||
| 8936 | *pnOut = pPhrase->aTerm[iToken].nFullTerm; | |||
| 8937 | return SQLITE_OK0; | |||
| 8938 | } | |||
| 8939 | ||||
| 8940 | /* | |||
| 8941 | ** Does the work of the fts5_api.xInstToken() API method. | |||
| 8942 | */ | |||
| 8943 | static int sqlite3Fts5ExprInstToken( | |||
| 8944 | Fts5Expr *pExpr, | |||
| 8945 | i64 iRowid, | |||
| 8946 | int iPhrase, | |||
| 8947 | int iCol, | |||
| 8948 | int iOff, | |||
| 8949 | int iToken, | |||
| 8950 | const char **ppOut, | |||
| 8951 | int *pnOut | |||
| 8952 | ){ | |||
| 8953 | Fts5ExprPhrase *pPhrase = 0; | |||
| 8954 | Fts5ExprTerm *pTerm = 0; | |||
| 8955 | int rc = SQLITE_OK0; | |||
| 8956 | ||||
| 8957 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){ | |||
| 8958 | return SQLITE_RANGE25; | |||
| 8959 | } | |||
| 8960 | pPhrase = pExpr->apExprPhrase[iPhrase]; | |||
| 8961 | if( iToken<0 || iToken>=pPhrase->nTerm ){ | |||
| 8962 | return SQLITE_RANGE25; | |||
| 8963 | } | |||
| 8964 | pTerm = &pPhrase->aTerm[iToken]; | |||
| 8965 | if( pExpr->pConfig->bTokendata || pTerm->bPrefix ){ | |||
| 8966 | rc = sqlite3Fts5IterToken( | |||
| 8967 | pTerm->pIter, pTerm->pTerm, pTerm->nQueryTerm, | |||
| 8968 | iRowid, iCol, iOff+iToken, ppOut, pnOut | |||
| 8969 | ); | |||
| 8970 | }else{ | |||
| 8971 | *ppOut = pTerm->pTerm; | |||
| 8972 | *pnOut = pTerm->nFullTerm; | |||
| 8973 | } | |||
| 8974 | return rc; | |||
| 8975 | } | |||
| 8976 | ||||
| 8977 | /* | |||
| 8978 | ** Clear the token mappings for all Fts5IndexIter objects managed by | |||
| 8979 | ** the expression passed as the only argument. | |||
| 8980 | */ | |||
| 8981 | static void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ | |||
| 8982 | int ii; | |||
| 8983 | for(ii=0; ii<pExpr->nPhrase; ii++){ | |||
| 8984 | Fts5ExprTerm *pT; | |||
| 8985 | for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){ | |||
| 8986 | sqlite3Fts5IndexIterClearTokendata(pT->pIter); | |||
| 8987 | } | |||
| 8988 | } | |||
| 8989 | } | |||
| 8990 | ||||
| 8991 | #line 1 "fts5_hash.c" | |||
| 8992 | /* | |||
| 8993 | ** 2014 August 11 | |||
| 8994 | ** | |||
| 8995 | ** The author disclaims copyright to this source code. In place of | |||
| 8996 | ** a legal notice, here is a blessing: | |||
| 8997 | ** | |||
| 8998 | ** May you do good and not evil. | |||
| 8999 | ** May you find forgiveness for yourself and forgive others. | |||
| 9000 | ** May you share freely, never taking more than you give. | |||
| 9001 | ** | |||
| 9002 | ****************************************************************************** | |||
| 9003 | ** | |||
| 9004 | */ | |||
| 9005 | ||||
| 9006 | ||||
| 9007 | ||||
| 9008 | /* #include "fts5Int.h" */ | |||
| 9009 | ||||
| 9010 | typedef struct Fts5HashEntry Fts5HashEntry; | |||
| 9011 | ||||
| 9012 | /* | |||
| 9013 | ** This file contains the implementation of an in-memory hash table used | |||
| 9014 | ** to accumulate "term -> doclist" content before it is flushed to a level-0 | |||
| 9015 | ** segment. | |||
| 9016 | */ | |||
| 9017 | ||||
| 9018 | ||||
| 9019 | struct Fts5Hash { | |||
| 9020 | int eDetail; /* Copy of Fts5Config.eDetail */ | |||
| 9021 | int *pnByte; /* Pointer to bytes counter */ | |||
| 9022 | int nEntry; /* Number of entries currently in hash */ | |||
| 9023 | int nSlot; /* Size of aSlot[] array */ | |||
| 9024 | Fts5HashEntry *pScan; /* Current ordered scan item */ | |||
| 9025 | Fts5HashEntry **aSlot; /* Array of hash slots */ | |||
| 9026 | }; | |||
| 9027 | ||||
| 9028 | /* | |||
| 9029 | ** Each entry in the hash table is represented by an object of the | |||
| 9030 | ** following type. Each object, its key, and its current data are stored | |||
| 9031 | ** in a single memory allocation. The key immediately follows the object | |||
| 9032 | ** in memory. The position list data immediately follows the key data | |||
| 9033 | ** in memory. | |||
| 9034 | ** | |||
| 9035 | ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single | |||
| 9036 | ** byte identifying the index (either the main term index or a prefix-index), | |||
| 9037 | ** followed by the term data. For example: "0token". There is no | |||
| 9038 | ** nul-terminator - in this case nKey=6. | |||
| 9039 | ** | |||
| 9040 | ** The data that follows the key is in a similar, but not identical format | |||
| 9041 | ** to the doclist data stored in the database. It is: | |||
| 9042 | ** | |||
| 9043 | ** * Rowid, as a varint | |||
| 9044 | ** * Position list, without 0x00 terminator. | |||
| 9045 | ** * Size of previous position list and rowid, as a 4 byte | |||
| 9046 | ** big-endian integer. | |||
| 9047 | ** | |||
| 9048 | ** iRowidOff: | |||
| 9049 | ** Offset of last rowid written to data area. Relative to first byte of | |||
| 9050 | ** structure. | |||
| 9051 | ** | |||
| 9052 | ** nData: | |||
| 9053 | ** Bytes of data written since iRowidOff. | |||
| 9054 | */ | |||
| 9055 | struct Fts5HashEntry { | |||
| 9056 | Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ | |||
| 9057 | Fts5HashEntry *pScanNext; /* Next entry in sorted order */ | |||
| 9058 | ||||
| 9059 | int nAlloc; /* Total size of allocation */ | |||
| 9060 | int iSzPoslist; /* Offset of space for 4-byte poslist size */ | |||
| 9061 | int nData; /* Total bytes of data (incl. structure) */ | |||
| 9062 | int nKey; /* Length of key in bytes */ | |||
| 9063 | u8 bDel; /* Set delete-flag @ iSzPoslist */ | |||
| 9064 | u8 bContent; /* Set content-flag (detail=none mode) */ | |||
| 9065 | i16 iCol; /* Column of last value written */ | |||
| 9066 | int iPos; /* Position of last value written */ | |||
| 9067 | i64 iRowid; /* Rowid of last value written */ | |||
| 9068 | }; | |||
| 9069 | ||||
| 9070 | /* | |||
| 9071 | ** Equivalent to: | |||
| 9072 | ** | |||
| 9073 | ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } | |||
| 9074 | */ | |||
| 9075 | #define fts5EntryKey(p)( ((char *)(&(p)[1])) ) ( ((char *)(&(p)[1])) ) | |||
| 9076 | ||||
| 9077 | ||||
| 9078 | /* | |||
| 9079 | ** Allocate a new hash table. | |||
| 9080 | */ | |||
| 9081 | static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ | |||
| 9082 | int rc = SQLITE_OK0; | |||
| 9083 | Fts5Hash *pNew; | |||
| 9084 | ||||
| 9085 | *ppNew = pNew = (Fts5Hash*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Hash)); | |||
| 9086 | if( pNew==0 ){ | |||
| 9087 | rc = SQLITE_NOMEM7; | |||
| 9088 | }else{ | |||
| 9089 | sqlite3_int64 nByte; | |||
| 9090 | memset(pNew, 0, sizeof(Fts5Hash)); | |||
| 9091 | pNew->pnByte = pnByte; | |||
| 9092 | pNew->eDetail = pConfig->eDetail; | |||
| 9093 | ||||
| 9094 | pNew->nSlot = 1024; | |||
| 9095 | nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; | |||
| 9096 | pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 9097 | if( pNew->aSlot==0 ){ | |||
| 9098 | sqlite3_freesqlite3_api->free(pNew); | |||
| 9099 | *ppNew = 0; | |||
| 9100 | rc = SQLITE_NOMEM7; | |||
| 9101 | }else{ | |||
| 9102 | memset(pNew->aSlot, 0, (size_t)nByte); | |||
| 9103 | } | |||
| 9104 | } | |||
| 9105 | return rc; | |||
| 9106 | } | |||
| 9107 | ||||
| 9108 | /* | |||
| 9109 | ** Free a hash table object. | |||
| 9110 | */ | |||
| 9111 | static void sqlite3Fts5HashFree(Fts5Hash *pHash){ | |||
| 9112 | if( pHash ){ | |||
| 9113 | sqlite3Fts5HashClear(pHash); | |||
| 9114 | sqlite3_freesqlite3_api->free(pHash->aSlot); | |||
| 9115 | sqlite3_freesqlite3_api->free(pHash); | |||
| 9116 | } | |||
| 9117 | } | |||
| 9118 | ||||
| 9119 | /* | |||
| 9120 | ** Empty (but do not delete) a hash table. | |||
| 9121 | */ | |||
| 9122 | static void sqlite3Fts5HashClear(Fts5Hash *pHash){ | |||
| 9123 | int i; | |||
| 9124 | for(i=0; i<pHash->nSlot; i++){ | |||
| 9125 | Fts5HashEntry *pNext; | |||
| 9126 | Fts5HashEntry *pSlot; | |||
| 9127 | for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ | |||
| 9128 | pNext = pSlot->pHashNext; | |||
| 9129 | sqlite3_freesqlite3_api->free(pSlot); | |||
| 9130 | } | |||
| 9131 | } | |||
| 9132 | memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); | |||
| 9133 | pHash->nEntry = 0; | |||
| 9134 | } | |||
| 9135 | ||||
| 9136 | static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ | |||
| 9137 | int i; | |||
| 9138 | unsigned int h = 13; | |||
| 9139 | for(i=n-1; i>=0; i--){ | |||
| 9140 | h = (h << 3) ^ h ^ p[i]; | |||
| 9141 | } | |||
| 9142 | return (h % nSlot); | |||
| 9143 | } | |||
| 9144 | ||||
| 9145 | static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ | |||
| 9146 | int i; | |||
| 9147 | unsigned int h = 13; | |||
| 9148 | for(i=n-1; i>=0; i--){ | |||
| 9149 | h = (h << 3) ^ h ^ p[i]; | |||
| 9150 | } | |||
| 9151 | h = (h << 3) ^ h ^ b; | |||
| 9152 | return (h % nSlot); | |||
| 9153 | } | |||
| 9154 | ||||
| 9155 | /* | |||
| 9156 | ** Resize the hash table by doubling the number of slots. | |||
| 9157 | */ | |||
| 9158 | static int fts5HashResize(Fts5Hash *pHash){ | |||
| 9159 | int nNew = pHash->nSlot*2; | |||
| 9160 | int i; | |||
| 9161 | Fts5HashEntry **apNew; | |||
| 9162 | Fts5HashEntry **apOld = pHash->aSlot; | |||
| 9163 | ||||
| 9164 | apNew = (Fts5HashEntry**)sqlite3_malloc64sqlite3_api->malloc64(nNew*sizeof(Fts5HashEntry*)); | |||
| 9165 | if( !apNew ) return SQLITE_NOMEM7; | |||
| 9166 | memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); | |||
| 9167 | ||||
| 9168 | for(i=0; i<pHash->nSlot; i++){ | |||
| 9169 | while( apOld[i] ){ | |||
| 9170 | unsigned int iHash; | |||
| 9171 | Fts5HashEntry *p = apOld[i]; | |||
| 9172 | apOld[i] = p->pHashNext; | |||
| 9173 | iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p)( ((char *)(&(p)[1])) ), p->nKey); | |||
| 9174 | p->pHashNext = apNew[iHash]; | |||
| 9175 | apNew[iHash] = p; | |||
| 9176 | } | |||
| 9177 | } | |||
| 9178 | ||||
| 9179 | sqlite3_freesqlite3_api->free(apOld); | |||
| 9180 | pHash->nSlot = nNew; | |||
| 9181 | pHash->aSlot = apNew; | |||
| 9182 | return SQLITE_OK0; | |||
| 9183 | } | |||
| 9184 | ||||
| 9185 | static int fts5HashAddPoslistSize( | |||
| 9186 | Fts5Hash *pHash, | |||
| 9187 | Fts5HashEntry *p, | |||
| 9188 | Fts5HashEntry *p2 | |||
| 9189 | ){ | |||
| 9190 | int nRet = 0; | |||
| 9191 | if( p->iSzPoslist ){ | |||
| 9192 | u8 *pPtr = p2 ? (u8*)p2 : (u8*)p; | |||
| 9193 | int nData = p->nData; | |||
| 9194 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 9195 | assert( nData==p->iSzPoslist )((void) (0)); | |||
| 9196 | if( p->bDel ){ | |||
| 9197 | pPtr[nData++] = 0x00; | |||
| 9198 | if( p->bContent ){ | |||
| 9199 | pPtr[nData++] = 0x00; | |||
| 9200 | } | |||
| 9201 | } | |||
| 9202 | }else{ | |||
| 9203 | int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */ | |||
| 9204 | int nPos = nSz*2 + p->bDel; /* Value of nPos field */ | |||
| 9205 | ||||
| 9206 | assert( p->bDel==0 || p->bDel==1 )((void) (0)); | |||
| 9207 | if( nPos<=127 ){ | |||
| 9208 | pPtr[p->iSzPoslist] = (u8)nPos; | |||
| 9209 | }else{ | |||
| 9210 | int nByte = sqlite3Fts5GetVarintLen((u32)nPos); | |||
| 9211 | memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); | |||
| 9212 | sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); | |||
| 9213 | nData += (nByte-1); | |||
| 9214 | } | |||
| 9215 | } | |||
| 9216 | ||||
| 9217 | nRet = nData - p->nData; | |||
| 9218 | if( p2==0 ){ | |||
| 9219 | p->iSzPoslist = 0; | |||
| 9220 | p->bDel = 0; | |||
| 9221 | p->bContent = 0; | |||
| 9222 | p->nData = nData; | |||
| 9223 | } | |||
| 9224 | } | |||
| 9225 | return nRet; | |||
| 9226 | } | |||
| 9227 | ||||
| 9228 | /* | |||
| 9229 | ** Add an entry to the in-memory hash table. The key is the concatenation | |||
| 9230 | ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). | |||
| 9231 | ** | |||
| 9232 | ** (bByte || pToken) -> (iRowid,iCol,iPos) | |||
| 9233 | ** | |||
| 9234 | ** Or, if iCol is negative, then the value is a delete marker. | |||
| 9235 | */ | |||
| 9236 | static int sqlite3Fts5HashWrite( | |||
| 9237 | Fts5Hash *pHash, | |||
| 9238 | i64 iRowid, /* Rowid for this entry */ | |||
| 9239 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
| 9240 | int iPos, /* Position of token within column */ | |||
| 9241 | char bByte, /* First byte of token */ | |||
| 9242 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
| 9243 | ){ | |||
| 9244 | unsigned int iHash; | |||
| 9245 | Fts5HashEntry *p; | |||
| 9246 | u8 *pPtr; | |||
| 9247 | int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ | |||
| 9248 | int bNew; /* If non-delete entry should be written */ | |||
| 9249 | ||||
| 9250 | bNew = (pHash->eDetail==FTS5_DETAIL_FULL0); | |||
| 9251 | ||||
| 9252 | /* Attempt to locate an existing hash entry */ | |||
| 9253 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | |||
| 9254 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | |||
| 9255 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
| 9256 | if( zKey[0]==bByte | |||
| 9257 | && p->nKey==nToken+1 | |||
| 9258 | && memcmp(&zKey[1], pToken, nToken)==0 | |||
| 9259 | ){ | |||
| 9260 | break; | |||
| 9261 | } | |||
| 9262 | } | |||
| 9263 | ||||
| 9264 | /* If an existing hash entry cannot be found, create a new one. */ | |||
| 9265 | if( p==0 ){ | |||
| 9266 | /* Figure out how much space to allocate */ | |||
| 9267 | char *zKey; | |||
| 9268 | sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; | |||
| 9269 | if( nByte<128 ) nByte = 128; | |||
| 9270 | ||||
| 9271 | /* Grow the Fts5Hash.aSlot[] array if necessary. */ | |||
| 9272 | if( (pHash->nEntry*2)>=pHash->nSlot ){ | |||
| 9273 | int rc = fts5HashResize(pHash); | |||
| 9274 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 9275 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); | |||
| 9276 | } | |||
| 9277 | ||||
| 9278 | /* Allocate new Fts5HashEntry and add it to the hash table. */ | |||
| 9279 | p = (Fts5HashEntry*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 9280 | if( !p ) return SQLITE_NOMEM7; | |||
| 9281 | memset(p, 0, sizeof(Fts5HashEntry)); | |||
| 9282 | p->nAlloc = (int)nByte; | |||
| 9283 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
| 9284 | zKey[0] = bByte; | |||
| 9285 | memcpy(&zKey[1], pToken, nToken); | |||
| 9286 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) )((void) (0)); | |||
| 9287 | p->nKey = nToken+1; | |||
| 9288 | zKey[nToken+1] = '\0'; | |||
| 9289 | p->nData = nToken+1 + sizeof(Fts5HashEntry); | |||
| 9290 | p->pHashNext = pHash->aSlot[iHash]; | |||
| 9291 | pHash->aSlot[iHash] = p; | |||
| 9292 | pHash->nEntry++; | |||
| 9293 | ||||
| 9294 | /* Add the first rowid field to the hash-entry */ | |||
| 9295 | p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); | |||
| 9296 | p->iRowid = iRowid; | |||
| 9297 | ||||
| 9298 | p->iSzPoslist = p->nData; | |||
| 9299 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
| 9300 | p->nData += 1; | |||
| 9301 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | |||
| 9302 | } | |||
| 9303 | ||||
| 9304 | }else{ | |||
| 9305 | ||||
| 9306 | /* Appending to an existing hash-entry. Check that there is enough | |||
| 9307 | ** space to append the largest possible new entry. Worst case scenario | |||
| 9308 | ** is: | |||
| 9309 | ** | |||
| 9310 | ** + 9 bytes for a new rowid, | |||
| 9311 | ** + 4 byte reserved for the "poslist size" varint. | |||
| 9312 | ** + 1 byte for a "new column" byte, | |||
| 9313 | ** + 3 bytes for a new column number (16-bit max) as a varint, | |||
| 9314 | ** + 5 bytes for the new position offset (32-bit max). | |||
| 9315 | */ | |||
| 9316 | if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ | |||
| 9317 | sqlite3_int64 nNew = p->nAlloc * 2; | |||
| 9318 | Fts5HashEntry *pNew; | |||
| 9319 | Fts5HashEntry **pp; | |||
| 9320 | pNew = (Fts5HashEntry*)sqlite3_realloc64sqlite3_api->realloc64(p, nNew); | |||
| 9321 | if( pNew==0 ) return SQLITE_NOMEM7; | |||
| 9322 | pNew->nAlloc = (int)nNew; | |||
| 9323 | for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); | |||
| 9324 | *pp = pNew; | |||
| 9325 | p = pNew; | |||
| 9326 | } | |||
| 9327 | nIncr -= p->nData; | |||
| 9328 | } | |||
| 9329 | assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) )((void) (0)); | |||
| 9330 | ||||
| 9331 | pPtr = (u8*)p; | |||
| 9332 | ||||
| 9333 | /* If this is a new rowid, append the 4-byte size field for the previous | |||
| 9334 | ** entry, and the new rowid for this entry. */ | |||
| 9335 | if( iRowid!=p->iRowid ){ | |||
| 9336 | u64 iDiff = (u64)iRowid - (u64)p->iRowid; | |||
| 9337 | fts5HashAddPoslistSize(pHash, p, 0); | |||
| 9338 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff); | |||
| 9339 | p->iRowid = iRowid; | |||
| 9340 | bNew = 1; | |||
| 9341 | p->iSzPoslist = p->nData; | |||
| 9342 | if( pHash->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
| 9343 | p->nData += 1; | |||
| 9344 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL0 ? 0 : -1); | |||
| 9345 | p->iPos = 0; | |||
| 9346 | } | |||
| 9347 | } | |||
| 9348 | ||||
| 9349 | if( iCol>=0 ){ | |||
| 9350 | if( pHash->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 9351 | p->bContent = 1; | |||
| 9352 | }else{ | |||
| 9353 | /* Append a new column value, if necessary */ | |||
| 9354 | assert_nc( iCol>=p->iCol )((void) (0)); | |||
| 9355 | if( iCol!=p->iCol ){ | |||
| 9356 | if( pHash->eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 9357 | pPtr[p->nData++] = 0x01; | |||
| 9358 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); | |||
| 9359 | p->iCol = (i16)iCol; | |||
| 9360 | p->iPos = 0; | |||
| 9361 | }else{ | |||
| 9362 | bNew = 1; | |||
| 9363 | p->iCol = (i16)(iPos = iCol); | |||
| 9364 | } | |||
| 9365 | } | |||
| 9366 | ||||
| 9367 | /* Append the new position offset, if necessary */ | |||
| 9368 | if( bNew ){ | |||
| 9369 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); | |||
| 9370 | p->iPos = iPos; | |||
| 9371 | } | |||
| 9372 | } | |||
| 9373 | }else{ | |||
| 9374 | /* This is a delete. Set the delete flag. */ | |||
| 9375 | p->bDel = 1; | |||
| 9376 | } | |||
| 9377 | ||||
| 9378 | nIncr += p->nData; | |||
| 9379 | *pHash->pnByte += nIncr; | |||
| 9380 | return SQLITE_OK0; | |||
| 9381 | } | |||
| 9382 | ||||
| 9383 | ||||
| 9384 | /* | |||
| 9385 | ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, | |||
| 9386 | ** each sorted in key order. This function merges the two lists into a | |||
| 9387 | ** single list and returns a pointer to its first element. | |||
| 9388 | */ | |||
| 9389 | static Fts5HashEntry *fts5HashEntryMerge( | |||
| 9390 | Fts5HashEntry *pLeft, | |||
| 9391 | Fts5HashEntry *pRight | |||
| 9392 | ){ | |||
| 9393 | Fts5HashEntry *p1 = pLeft; | |||
| 9394 | Fts5HashEntry *p2 = pRight; | |||
| 9395 | Fts5HashEntry *pRet = 0; | |||
| 9396 | Fts5HashEntry **ppOut = &pRet; | |||
| 9397 | ||||
| 9398 | while( p1 || p2 ){ | |||
| 9399 | if( p1==0 ){ | |||
| 9400 | *ppOut = p2; | |||
| 9401 | p2 = 0; | |||
| 9402 | }else if( p2==0 ){ | |||
| 9403 | *ppOut = p1; | |||
| 9404 | p1 = 0; | |||
| 9405 | }else{ | |||
| 9406 | char *zKey1 = fts5EntryKey(p1)( ((char *)(&(p1)[1])) ); | |||
| 9407 | char *zKey2 = fts5EntryKey(p2)( ((char *)(&(p2)[1])) ); | |||
| 9408 | int nMin = MIN(p1->nKey, p2->nKey)(((p1->nKey) < (p2->nKey)) ? (p1->nKey) : (p2-> nKey)); | |||
| 9409 | ||||
| 9410 | int cmp = memcmp(zKey1, zKey2, nMin); | |||
| 9411 | if( cmp==0 ){ | |||
| 9412 | cmp = p1->nKey - p2->nKey; | |||
| 9413 | } | |||
| 9414 | assert( cmp!=0 )((void) (0)); | |||
| 9415 | ||||
| 9416 | if( cmp>0 ){ | |||
| 9417 | /* p2 is smaller */ | |||
| 9418 | *ppOut = p2; | |||
| 9419 | ppOut = &p2->pScanNext; | |||
| 9420 | p2 = p2->pScanNext; | |||
| 9421 | }else{ | |||
| 9422 | /* p1 is smaller */ | |||
| 9423 | *ppOut = p1; | |||
| 9424 | ppOut = &p1->pScanNext; | |||
| 9425 | p1 = p1->pScanNext; | |||
| 9426 | } | |||
| 9427 | *ppOut = 0; | |||
| 9428 | } | |||
| 9429 | } | |||
| 9430 | ||||
| 9431 | return pRet; | |||
| 9432 | } | |||
| 9433 | ||||
| 9434 | /* | |||
| 9435 | ** Link all tokens from hash table iHash into a list in sorted order. The | |||
| 9436 | ** tokens are not removed from the hash table. | |||
| 9437 | */ | |||
| 9438 | static int fts5HashEntrySort( | |||
| 9439 | Fts5Hash *pHash, | |||
| 9440 | const char *pTerm, int nTerm, /* Query prefix, if any */ | |||
| 9441 | Fts5HashEntry **ppSorted | |||
| 9442 | ){ | |||
| 9443 | const int nMergeSlot = 32; | |||
| 9444 | Fts5HashEntry **ap; | |||
| 9445 | Fts5HashEntry *pList; | |||
| 9446 | int iSlot; | |||
| 9447 | int i; | |||
| 9448 | ||||
| 9449 | *ppSorted = 0; | |||
| 9450 | ap = sqlite3_malloc64sqlite3_api->malloc64(sizeof(Fts5HashEntry*) * nMergeSlot); | |||
| 9451 | if( !ap ) return SQLITE_NOMEM7; | |||
| 9452 | memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); | |||
| 9453 | ||||
| 9454 | for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ | |||
| 9455 | Fts5HashEntry *pIter; | |||
| 9456 | for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ | |||
| 9457 | if( pTerm==0 | |||
| 9458 | || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter)( ((char *)(&(pIter)[1])) ), pTerm, nTerm)) | |||
| 9459 | ){ | |||
| 9460 | Fts5HashEntry *pEntry = pIter; | |||
| 9461 | pEntry->pScanNext = 0; | |||
| 9462 | for(i=0; ap[i]; i++){ | |||
| 9463 | pEntry = fts5HashEntryMerge(pEntry, ap[i]); | |||
| 9464 | ap[i] = 0; | |||
| 9465 | } | |||
| 9466 | ap[i] = pEntry; | |||
| 9467 | } | |||
| 9468 | } | |||
| 9469 | } | |||
| 9470 | ||||
| 9471 | pList = 0; | |||
| 9472 | for(i=0; i<nMergeSlot; i++){ | |||
| 9473 | pList = fts5HashEntryMerge(pList, ap[i]); | |||
| 9474 | } | |||
| 9475 | ||||
| 9476 | sqlite3_freesqlite3_api->free(ap); | |||
| 9477 | *ppSorted = pList; | |||
| 9478 | return SQLITE_OK0; | |||
| 9479 | } | |||
| 9480 | ||||
| 9481 | /* | |||
| 9482 | ** Query the hash table for a doclist associated with term pTerm/nTerm. | |||
| 9483 | */ | |||
| 9484 | static int sqlite3Fts5HashQuery( | |||
| 9485 | Fts5Hash *pHash, /* Hash table to query */ | |||
| 9486 | int nPre, | |||
| 9487 | const char *pTerm, int nTerm, /* Query term */ | |||
| 9488 | void **ppOut, /* OUT: Pointer to new object */ | |||
| 9489 | int *pnDoclist /* OUT: Size of doclist in bytes */ | |||
| 9490 | ){ | |||
| 9491 | unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); | |||
| 9492 | char *zKey = 0; | |||
| 9493 | Fts5HashEntry *p; | |||
| 9494 | ||||
| 9495 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ | |||
| 9496 | zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
| 9497 | if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break; | |||
| 9498 | } | |||
| 9499 | ||||
| 9500 | if( p ){ | |||
| 9501 | int nHashPre = sizeof(Fts5HashEntry) + nTerm; | |||
| 9502 | int nList = p->nData - nHashPre; | |||
| 9503 | u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64sqlite3_api->malloc64(nPre + nList + 10)); | |||
| 9504 | if( pRet ){ | |||
| 9505 | Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre]; | |||
| 9506 | memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList); | |||
| 9507 | nList += fts5HashAddPoslistSize(pHash, p, pFaux); | |||
| 9508 | *pnDoclist = nList; | |||
| 9509 | }else{ | |||
| 9510 | *pnDoclist = 0; | |||
| 9511 | return SQLITE_NOMEM7; | |||
| 9512 | } | |||
| 9513 | }else{ | |||
| 9514 | *ppOut = 0; | |||
| 9515 | *pnDoclist = 0; | |||
| 9516 | } | |||
| 9517 | ||||
| 9518 | return SQLITE_OK0; | |||
| 9519 | } | |||
| 9520 | ||||
| 9521 | static int sqlite3Fts5HashScanInit( | |||
| 9522 | Fts5Hash *p, /* Hash table to query */ | |||
| 9523 | const char *pTerm, int nTerm /* Query prefix */ | |||
| 9524 | ){ | |||
| 9525 | return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); | |||
| 9526 | } | |||
| 9527 | ||||
| 9528 | #ifdef SQLITE_DEBUG | |||
| 9529 | static int fts5HashCount(Fts5Hash *pHash){ | |||
| 9530 | int nEntry = 0; | |||
| 9531 | int ii; | |||
| 9532 | for(ii=0; ii<pHash->nSlot; ii++){ | |||
| 9533 | Fts5HashEntry *p = 0; | |||
| 9534 | for(p=pHash->aSlot[ii]; p; p=p->pHashNext){ | |||
| 9535 | nEntry++; | |||
| 9536 | } | |||
| 9537 | } | |||
| 9538 | return nEntry; | |||
| 9539 | } | |||
| 9540 | #endif | |||
| 9541 | ||||
| 9542 | /* | |||
| 9543 | ** Return true if the hash table is empty, false otherwise. | |||
| 9544 | */ | |||
| 9545 | static int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){ | |||
| 9546 | assert( pHash->nEntry==fts5HashCount(pHash) )((void) (0)); | |||
| 9547 | return pHash->nEntry==0; | |||
| 9548 | } | |||
| 9549 | ||||
| 9550 | static void sqlite3Fts5HashScanNext(Fts5Hash *p){ | |||
| 9551 | assert( !sqlite3Fts5HashScanEof(p) )((void) (0)); | |||
| 9552 | p->pScan = p->pScan->pScanNext; | |||
| 9553 | } | |||
| 9554 | ||||
| 9555 | static int sqlite3Fts5HashScanEof(Fts5Hash *p){ | |||
| 9556 | return (p->pScan==0); | |||
| 9557 | } | |||
| 9558 | ||||
| 9559 | static void sqlite3Fts5HashScanEntry( | |||
| 9560 | Fts5Hash *pHash, | |||
| 9561 | const char **pzTerm, /* OUT: term (nul-terminated) */ | |||
| 9562 | int *pnTerm, /* OUT: Size of term in bytes */ | |||
| 9563 | const u8 **ppDoclist, /* OUT: pointer to doclist */ | |||
| 9564 | int *pnDoclist /* OUT: size of doclist in bytes */ | |||
| 9565 | ){ | |||
| 9566 | Fts5HashEntry *p; | |||
| 9567 | if( (p = pHash->pScan) ){ | |||
| 9568 | char *zKey = fts5EntryKey(p)( ((char *)(&(p)[1])) ); | |||
| 9569 | int nTerm = p->nKey; | |||
| 9570 | fts5HashAddPoslistSize(pHash, p, 0); | |||
| 9571 | *pzTerm = zKey; | |||
| 9572 | *pnTerm = nTerm; | |||
| 9573 | *ppDoclist = (const u8*)&zKey[nTerm]; | |||
| 9574 | *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm); | |||
| 9575 | }else{ | |||
| 9576 | *pzTerm = 0; | |||
| 9577 | *pnTerm = 0; | |||
| 9578 | *ppDoclist = 0; | |||
| 9579 | *pnDoclist = 0; | |||
| 9580 | } | |||
| 9581 | } | |||
| 9582 | ||||
| 9583 | #line 1 "fts5_index.c" | |||
| 9584 | /* | |||
| 9585 | ** 2014 May 31 | |||
| 9586 | ** | |||
| 9587 | ** The author disclaims copyright to this source code. In place of | |||
| 9588 | ** a legal notice, here is a blessing: | |||
| 9589 | ** | |||
| 9590 | ** May you do good and not evil. | |||
| 9591 | ** May you find forgiveness for yourself and forgive others. | |||
| 9592 | ** May you share freely, never taking more than you give. | |||
| 9593 | ** | |||
| 9594 | ****************************************************************************** | |||
| 9595 | ** | |||
| 9596 | ** Low level access to the FTS index stored in the database file. The | |||
| 9597 | ** routines in this file file implement all read and write access to the | |||
| 9598 | ** %_data table. Other parts of the system access this functionality via | |||
| 9599 | ** the interface defined in fts5Int.h. | |||
| 9600 | */ | |||
| 9601 | ||||
| 9602 | ||||
| 9603 | /* #include "fts5Int.h" */ | |||
| 9604 | ||||
| 9605 | /* | |||
| 9606 | ** Overview: | |||
| 9607 | ** | |||
| 9608 | ** The %_data table contains all the FTS indexes for an FTS5 virtual table. | |||
| 9609 | ** As well as the main term index, there may be up to 31 prefix indexes. | |||
| 9610 | ** The format is similar to FTS3/4, except that: | |||
| 9611 | ** | |||
| 9612 | ** * all segment b-tree leaf data is stored in fixed size page records | |||
| 9613 | ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is | |||
| 9614 | ** taken to ensure it is possible to iterate in either direction through | |||
| 9615 | ** the entries in a doclist, or to seek to a specific entry within a | |||
| 9616 | ** doclist, without loading it into memory. | |||
| 9617 | ** | |||
| 9618 | ** * large doclists that span many pages have associated "doclist index" | |||
| 9619 | ** records that contain a copy of the first rowid on each page spanned by | |||
| 9620 | ** the doclist. This is used to speed up seek operations, and merges of | |||
| 9621 | ** large doclists with very small doclists. | |||
| 9622 | ** | |||
| 9623 | ** * extra fields in the "structure record" record the state of ongoing | |||
| 9624 | ** incremental merge operations. | |||
| 9625 | ** | |||
| 9626 | */ | |||
| 9627 | ||||
| 9628 | ||||
| 9629 | #define FTS5_OPT_WORK_UNIT1000 1000 /* Number of leaf pages per optimize step */ | |||
| 9630 | #define FTS5_WORK_UNIT64 64 /* Number of leaf pages in unit of work */ | |||
| 9631 | ||||
| 9632 | #define FTS5_MIN_DLIDX_SIZE4 4 /* Add dlidx if this many empty pages */ | |||
| 9633 | ||||
| 9634 | #define FTS5_MAIN_PREFIX'0' '0' | |||
| 9635 | ||||
| 9636 | #if FTS5_MAX_PREFIX_INDEXES31 > 31 | |||
| 9637 | # error "FTS5_MAX_PREFIX_INDEXES is too large" | |||
| 9638 | #endif | |||
| 9639 | ||||
| 9640 | #define FTS5_MAX_LEVEL64 64 | |||
| 9641 | ||||
| 9642 | /* | |||
| 9643 | ** There are two versions of the format used for the structure record: | |||
| 9644 | ** | |||
| 9645 | ** 1. the legacy format, that may be read by all fts5 versions, and | |||
| 9646 | ** | |||
| 9647 | ** 2. the V2 format, which is used by contentless_delete=1 databases. | |||
| 9648 | ** | |||
| 9649 | ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy | |||
| 9650 | ** format structure record contains a varint - the number of levels in | |||
| 9651 | ** the structure. Whereas a V2 structure record contains the constant | |||
| 9652 | ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a | |||
| 9653 | ** varint has to be at least 16256 to begin with "0xFF". And the default | |||
| 9654 | ** maximum number of levels is 64. | |||
| 9655 | ** | |||
| 9656 | ** See below for more on structure record formats. | |||
| 9657 | */ | |||
| 9658 | #define FTS5_STRUCTURE_V2"\xFF\x00\x00\x01" "\xFF\x00\x00\x01" | |||
| 9659 | ||||
| 9660 | /* | |||
| 9661 | ** Details: | |||
| 9662 | ** | |||
| 9663 | ** The %_data table managed by this module, | |||
| 9664 | ** | |||
| 9665 | ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); | |||
| 9666 | ** | |||
| 9667 | ** , contains the following 6 types of records. See the comments surrounding | |||
| 9668 | ** the FTS5_*_ROWID macros below for a description of how %_data rowids are | |||
| 9669 | ** assigned to each fo them. | |||
| 9670 | ** | |||
| 9671 | ** 1. Structure Records: | |||
| 9672 | ** | |||
| 9673 | ** The set of segments that make up an index - the index structure - are | |||
| 9674 | ** recorded in a single record within the %_data table. The record consists | |||
| 9675 | ** of a single 32-bit configuration cookie value followed by a list of | |||
| 9676 | ** SQLite varints. | |||
| 9677 | ** | |||
| 9678 | ** If the structure record is a V2 record, the configuration cookie is | |||
| 9679 | ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01]. | |||
| 9680 | ** | |||
| 9681 | ** Next, the record continues with three varints: | |||
| 9682 | ** | |||
| 9683 | ** + number of levels, | |||
| 9684 | ** + total number of segments on all levels, | |||
| 9685 | ** + value of write counter. | |||
| 9686 | ** | |||
| 9687 | ** Then, for each level from 0 to nMax: | |||
| 9688 | ** | |||
| 9689 | ** + number of input segments in ongoing merge. | |||
| 9690 | ** + total number of segments in level. | |||
| 9691 | ** + for each segment from oldest to newest: | |||
| 9692 | ** + segment id (always > 0) | |||
| 9693 | ** + first leaf page number (often 1, always greater than 0) | |||
| 9694 | ** + final leaf page number | |||
| 9695 | ** | |||
| 9696 | ** Then, for V2 structures only: | |||
| 9697 | ** | |||
| 9698 | ** + lower origin counter value, | |||
| 9699 | ** + upper origin counter value, | |||
| 9700 | ** + the number of tombstone hash pages. | |||
| 9701 | ** | |||
| 9702 | ** 2. The Averages Record: | |||
| 9703 | ** | |||
| 9704 | ** A single record within the %_data table. The data is a list of varints. | |||
| 9705 | ** The first value is the number of rows in the index. Then, for each column | |||
| 9706 | ** from left to right, the total number of tokens in the column for all | |||
| 9707 | ** rows of the table. | |||
| 9708 | ** | |||
| 9709 | ** 3. Segment leaves: | |||
| 9710 | ** | |||
| 9711 | ** TERM/DOCLIST FORMAT: | |||
| 9712 | ** | |||
| 9713 | ** Most of each segment leaf is taken up by term/doclist data. The | |||
| 9714 | ** general format of term/doclist, starting with the first term | |||
| 9715 | ** on the leaf page, is: | |||
| 9716 | ** | |||
| 9717 | ** varint : size of first term | |||
| 9718 | ** blob: first term data | |||
| 9719 | ** doclist: first doclist | |||
| 9720 | ** zero-or-more { | |||
| 9721 | ** varint: number of bytes in common with previous term | |||
| 9722 | ** varint: number of bytes of new term data (nNew) | |||
| 9723 | ** blob: nNew bytes of new term data | |||
| 9724 | ** doclist: next doclist | |||
| 9725 | ** } | |||
| 9726 | ** | |||
| 9727 | ** doclist format: | |||
| 9728 | ** | |||
| 9729 | ** varint: first rowid | |||
| 9730 | ** poslist: first poslist | |||
| 9731 | ** zero-or-more { | |||
| 9732 | ** varint: rowid delta (always > 0) | |||
| 9733 | ** poslist: next poslist | |||
| 9734 | ** } | |||
| 9735 | ** | |||
| 9736 | ** poslist format: | |||
| 9737 | ** | |||
| 9738 | ** varint: size of poslist in bytes multiplied by 2, not including | |||
| 9739 | ** this field. Plus 1 if this entry carries the "delete" flag. | |||
| 9740 | ** collist: collist for column 0 | |||
| 9741 | ** zero-or-more { | |||
| 9742 | ** 0x01 byte | |||
| 9743 | ** varint: column number (I) | |||
| 9744 | ** collist: collist for column I | |||
| 9745 | ** } | |||
| 9746 | ** | |||
| 9747 | ** collist format: | |||
| 9748 | ** | |||
| 9749 | ** varint: first offset + 2 | |||
| 9750 | ** zero-or-more { | |||
| 9751 | ** varint: offset delta + 2 | |||
| 9752 | ** } | |||
| 9753 | ** | |||
| 9754 | ** PAGE FORMAT | |||
| 9755 | ** | |||
| 9756 | ** Each leaf page begins with a 4-byte header containing 2 16-bit | |||
| 9757 | ** unsigned integer fields in big-endian format. They are: | |||
| 9758 | ** | |||
| 9759 | ** * The byte offset of the first rowid on the page, if it exists | |||
| 9760 | ** and occurs before the first term (otherwise 0). | |||
| 9761 | ** | |||
| 9762 | ** * The byte offset of the start of the page footer. If the page | |||
| 9763 | ** footer is 0 bytes in size, then this field is the same as the | |||
| 9764 | ** size of the leaf page in bytes. | |||
| 9765 | ** | |||
| 9766 | ** The page footer consists of a single varint for each term located | |||
| 9767 | ** on the page. Each varint is the byte offset of the current term | |||
| 9768 | ** within the page, delta-compressed against the previous value. In | |||
| 9769 | ** other words, the first varint in the footer is the byte offset of | |||
| 9770 | ** the first term, the second is the byte offset of the second less that | |||
| 9771 | ** of the first, and so on. | |||
| 9772 | ** | |||
| 9773 | ** The term/doclist format described above is accurate if the entire | |||
| 9774 | ** term/doclist data fits on a single leaf page. If this is not the case, | |||
| 9775 | ** the format is changed in two ways: | |||
| 9776 | ** | |||
| 9777 | ** + if the first rowid on a page occurs before the first term, it | |||
| 9778 | ** is stored as a literal value: | |||
| 9779 | ** | |||
| 9780 | ** varint: first rowid | |||
| 9781 | ** | |||
| 9782 | ** + the first term on each page is stored in the same way as the | |||
| 9783 | ** very first term of the segment: | |||
| 9784 | ** | |||
| 9785 | ** varint : size of first term | |||
| 9786 | ** blob: first term data | |||
| 9787 | ** | |||
| 9788 | ** 5. Segment doclist indexes: | |||
| 9789 | ** | |||
| 9790 | ** Doclist indexes are themselves b-trees, however they usually consist of | |||
| 9791 | ** a single leaf record only. The format of each doclist index leaf page | |||
| 9792 | ** is: | |||
| 9793 | ** | |||
| 9794 | ** * Flags byte. Bits are: | |||
| 9795 | ** 0x01: Clear if leaf is also the root page, otherwise set. | |||
| 9796 | ** | |||
| 9797 | ** * Page number of fts index leaf page. As a varint. | |||
| 9798 | ** | |||
| 9799 | ** * First rowid on page indicated by previous field. As a varint. | |||
| 9800 | ** | |||
| 9801 | ** * A list of varints, one for each subsequent termless page. A | |||
| 9802 | ** positive delta if the termless page contains at least one rowid, | |||
| 9803 | ** or an 0x00 byte otherwise. | |||
| 9804 | ** | |||
| 9805 | ** Internal doclist index nodes are: | |||
| 9806 | ** | |||
| 9807 | ** * Flags byte. Bits are: | |||
| 9808 | ** 0x01: Clear for root page, otherwise set. | |||
| 9809 | ** | |||
| 9810 | ** * Page number of first child page. As a varint. | |||
| 9811 | ** | |||
| 9812 | ** * Copy of first rowid on page indicated by previous field. As a varint. | |||
| 9813 | ** | |||
| 9814 | ** * A list of delta-encoded varints - the first rowid on each subsequent | |||
| 9815 | ** child page. | |||
| 9816 | ** | |||
| 9817 | ** 6. Tombstone Hash Page | |||
| 9818 | ** | |||
| 9819 | ** These records are only ever present in contentless_delete=1 tables. | |||
| 9820 | ** There are zero or more of these associated with each segment. They | |||
| 9821 | ** are used to store the tombstone rowids for rows contained in the | |||
| 9822 | ** associated segments. | |||
| 9823 | ** | |||
| 9824 | ** The set of nHashPg tombstone hash pages associated with a single | |||
| 9825 | ** segment together form a single hash table containing tombstone rowids. | |||
| 9826 | ** To find the page of the hash on which a key might be stored: | |||
| 9827 | ** | |||
| 9828 | ** iPg = (rowid % nHashPg) | |||
| 9829 | ** | |||
| 9830 | ** Then, within page iPg, which has nSlot slots: | |||
| 9831 | ** | |||
| 9832 | ** iSlot = (rowid / nHashPg) % nSlot | |||
| 9833 | ** | |||
| 9834 | ** Each tombstone hash page begins with an 8 byte header: | |||
| 9835 | ** | |||
| 9836 | ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8. | |||
| 9837 | ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the | |||
| 9838 | ** first tombstone hash page for each segment (iPg=0). If set, | |||
| 9839 | ** the hash table contains rowid 0. If clear, it does not. | |||
| 9840 | ** Rowid 0 is handled specially. | |||
| 9841 | ** 2-bytes: unused. | |||
| 9842 | ** 4-bytes: Big-endian integer containing number of entries on page. | |||
| 9843 | ** | |||
| 9844 | ** Following this are nSlot 4 or 8 byte slots (depending on the key-size | |||
| 9845 | ** in the first byte of the page header). The number of slots may be | |||
| 9846 | ** determined based on the size of the page record and the key-size: | |||
| 9847 | ** | |||
| 9848 | ** nSlot = (nByte - 8) / key-size | |||
| 9849 | */ | |||
| 9850 | ||||
| 9851 | /* | |||
| 9852 | ** Rowids for the averages and structure records in the %_data table. | |||
| 9853 | */ | |||
| 9854 | #define FTS5_AVERAGES_ROWID1 1 /* Rowid used for the averages record */ | |||
| 9855 | #define FTS5_STRUCTURE_ROWID10 10 /* The structure record */ | |||
| 9856 | ||||
| 9857 | /* | |||
| 9858 | ** Macros determining the rowids used by segment leaves and dlidx leaves | |||
| 9859 | ** and nodes. All nodes and leaves are stored in the %_data table with large | |||
| 9860 | ** positive rowids. | |||
| 9861 | ** | |||
| 9862 | ** Each segment has a unique non-zero 16-bit id. | |||
| 9863 | ** | |||
| 9864 | ** The rowid for each segment leaf is found by passing the segment id and | |||
| 9865 | ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered | |||
| 9866 | ** sequentially starting from 1. | |||
| 9867 | */ | |||
| 9868 | #define FTS5_DATA_ID_B16 16 /* Max seg id number 65535 */ | |||
| 9869 | #define FTS5_DATA_DLI_B1 1 /* Doclist-index flag (1 bit) */ | |||
| 9870 | #define FTS5_DATA_HEIGHT_B5 5 /* Max dlidx tree height of 32 */ | |||
| 9871 | #define FTS5_DATA_PAGE_B31 31 /* Max page number of 2147483648 */ | |||
| 9872 | ||||
| 9873 | #define fts5_dri(segid, dlidx, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(dlidx) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) ( \ | |||
| 9874 | ((i64)(segid) << (FTS5_DATA_PAGE_B31+FTS5_DATA_HEIGHT_B5+FTS5_DATA_DLI_B1)) + \ | |||
| 9875 | ((i64)(dlidx) << (FTS5_DATA_PAGE_B31 + FTS5_DATA_HEIGHT_B5)) + \ | |||
| 9876 | ((i64)(height) << (FTS5_DATA_PAGE_B31)) + \ | |||
| 9877 | ((i64)(pgno)) \ | |||
| 9878 | ) | |||
| 9879 | ||||
| 9880 | #define FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 0, 0, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ) | |||
| 9881 | #define FTS5_DLIDX_ROWID(segid, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) fts5_dri(segid, 1, height, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(height) << (31)) + ((i64)(pgno)) ) | |||
| 9882 | #define FTS5_TOMBSTONE_ROWID(segid,ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) fts5_dri(segid+(1<<16), 0, 0, ipg)( ((i64)(segid+(1<<16)) << (31 +5 +1)) + ((i64)(0 ) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(ipg) ) ) | |||
| 9883 | ||||
| 9884 | #ifdef SQLITE_DEBUG | |||
| 9885 | static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB(11 | (1<<8)); } | |||
| 9886 | #endif | |||
| 9887 | ||||
| 9888 | ||||
| 9889 | /* | |||
| 9890 | ** Each time a blob is read from the %_data table, it is padded with this | |||
| 9891 | ** many zero bytes. This makes it easier to decode the various record formats | |||
| 9892 | ** without overreading if the records are corrupt. | |||
| 9893 | */ | |||
| 9894 | #define FTS5_DATA_ZERO_PADDING8 8 | |||
| 9895 | #define FTS5_DATA_PADDING20 20 | |||
| 9896 | ||||
| 9897 | typedef struct Fts5Data Fts5Data; | |||
| 9898 | typedef struct Fts5DlidxIter Fts5DlidxIter; | |||
| 9899 | typedef struct Fts5DlidxLvl Fts5DlidxLvl; | |||
| 9900 | typedef struct Fts5DlidxWriter Fts5DlidxWriter; | |||
| 9901 | typedef struct Fts5Iter Fts5Iter; | |||
| 9902 | typedef struct Fts5PageWriter Fts5PageWriter; | |||
| 9903 | typedef struct Fts5SegIter Fts5SegIter; | |||
| 9904 | typedef struct Fts5DoclistIter Fts5DoclistIter; | |||
| 9905 | typedef struct Fts5SegWriter Fts5SegWriter; | |||
| 9906 | typedef struct Fts5Structure Fts5Structure; | |||
| 9907 | typedef struct Fts5StructureLevel Fts5StructureLevel; | |||
| 9908 | typedef struct Fts5StructureSegment Fts5StructureSegment; | |||
| 9909 | typedef struct Fts5TokenDataIter Fts5TokenDataIter; | |||
| 9910 | typedef struct Fts5TokenDataMap Fts5TokenDataMap; | |||
| 9911 | typedef struct Fts5TombstoneArray Fts5TombstoneArray; | |||
| 9912 | ||||
| 9913 | struct Fts5Data { | |||
| 9914 | u8 *p; /* Pointer to buffer containing record */ | |||
| 9915 | int nn; /* Size of record in bytes */ | |||
| 9916 | int szLeaf; /* Size of leaf without page-index */ | |||
| 9917 | }; | |||
| 9918 | ||||
| 9919 | /* | |||
| 9920 | ** One object per %_data table. | |||
| 9921 | ** | |||
| 9922 | ** nContentlessDelete: | |||
| 9923 | ** The number of contentless delete operations since the most recent | |||
| 9924 | ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked | |||
| 9925 | ** so that extra auto-merge work can be done by fts5IndexFlush() to | |||
| 9926 | ** account for the delete operations. | |||
| 9927 | */ | |||
| 9928 | struct Fts5Index { | |||
| 9929 | Fts5Config *pConfig; /* Virtual table configuration */ | |||
| 9930 | char *zDataTbl; /* Name of %_data table */ | |||
| 9931 | int nWorkUnit; /* Leaf pages in a "unit" of work */ | |||
| 9932 | ||||
| 9933 | /* | |||
| 9934 | ** Variables related to the accumulation of tokens and doclists within the | |||
| 9935 | ** in-memory hash tables before they are flushed to disk. | |||
| 9936 | */ | |||
| 9937 | Fts5Hash *pHash; /* Hash table for in-memory data */ | |||
| 9938 | int nPendingData; /* Current bytes of pending data */ | |||
| 9939 | i64 iWriteRowid; /* Rowid for current doc being written */ | |||
| 9940 | int bDelete; /* Current write is a delete */ | |||
| 9941 | int nContentlessDelete; /* Number of contentless delete ops */ | |||
| 9942 | int nPendingRow; /* Number of INSERT in hash table */ | |||
| 9943 | ||||
| 9944 | /* Error state. */ | |||
| 9945 | int rc; /* Current error code */ | |||
| 9946 | int flushRc; | |||
| 9947 | ||||
| 9948 | /* State used by the fts5DataXXX() functions. */ | |||
| 9949 | sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ | |||
| 9950 | sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ | |||
| 9951 | sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ | |||
| 9952 | sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ | |||
| 9953 | sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ | |||
| 9954 | sqlite3_stmt *pIdxSelect; | |||
| 9955 | sqlite3_stmt *pIdxNextSelect; | |||
| 9956 | int nRead; /* Total number of blocks read */ | |||
| 9957 | ||||
| 9958 | sqlite3_stmt *pDeleteFromIdx; | |||
| 9959 | ||||
| 9960 | sqlite3_stmt *pDataVersion; | |||
| 9961 | i64 iStructVersion; /* data_version when pStruct read */ | |||
| 9962 | Fts5Structure *pStruct; /* Current db structure (or NULL) */ | |||
| 9963 | }; | |||
| 9964 | ||||
| 9965 | struct Fts5DoclistIter { | |||
| 9966 | u8 *aEof; /* Pointer to 1 byte past end of doclist */ | |||
| 9967 | ||||
| 9968 | /* Output variables. aPoslist==0 at EOF */ | |||
| 9969 | i64 iRowid; | |||
| 9970 | u8 *aPoslist; | |||
| 9971 | int nPoslist; | |||
| 9972 | int nSize; | |||
| 9973 | }; | |||
| 9974 | ||||
| 9975 | /* | |||
| 9976 | ** The contents of the "structure" record for each index are represented | |||
| 9977 | ** using an Fts5Structure record in memory. Which uses instances of the | |||
| 9978 | ** other Fts5StructureXXX types as components. | |||
| 9979 | ** | |||
| 9980 | ** nOriginCntr: | |||
| 9981 | ** This value is set to non-zero for structure records created for | |||
| 9982 | ** contentlessdelete=1 tables only. In that case it represents the | |||
| 9983 | ** origin value to apply to the next top-level segment created. | |||
| 9984 | */ | |||
| 9985 | struct Fts5StructureSegment { | |||
| 9986 | int iSegid; /* Segment id */ | |||
| 9987 | int pgnoFirst; /* First leaf page number in segment */ | |||
| 9988 | int pgnoLast; /* Last leaf page number in segment */ | |||
| 9989 | ||||
| 9990 | /* contentlessdelete=1 tables only: */ | |||
| 9991 | u64 iOrigin1; | |||
| 9992 | u64 iOrigin2; | |||
| 9993 | int nPgTombstone; /* Number of tombstone hash table pages */ | |||
| 9994 | u64 nEntryTombstone; /* Number of tombstone entries that "count" */ | |||
| 9995 | u64 nEntry; /* Number of rows in this segment */ | |||
| 9996 | }; | |||
| 9997 | struct Fts5StructureLevel { | |||
| 9998 | int nMerge; /* Number of segments in incr-merge */ | |||
| 9999 | int nSeg; /* Total number of segments on level */ | |||
| 10000 | Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ | |||
| 10001 | }; | |||
| 10002 | struct Fts5Structure { | |||
| 10003 | int nRef; /* Object reference count */ | |||
| 10004 | u64 nWriteCounter; /* Total leaves written to level 0 */ | |||
| 10005 | u64 nOriginCntr; /* Origin value for next top-level segment */ | |||
| 10006 | int nSegment; /* Total segments in this structure */ | |||
| 10007 | int nLevel; /* Number of levels in this index */ | |||
| 10008 | Fts5StructureLevel aLevel[FLEXARRAY]; /* Array of nLevel level objects */ | |||
| 10009 | }; | |||
| 10010 | ||||
| 10011 | /* Size (in bytes) of an Fts5Structure object holding up to N levels */ | |||
| 10012 | #define SZ_FTS5STRUCTURE(N)(__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel )) \ | |||
| 10013 | (offsetof(Fts5Structure,aLevel)__builtin_offsetof(Fts5Structure, aLevel) + (N)*sizeof(Fts5StructureLevel)) | |||
| 10014 | ||||
| 10015 | /* | |||
| 10016 | ** An object of type Fts5SegWriter is used to write to segments. | |||
| 10017 | */ | |||
| 10018 | struct Fts5PageWriter { | |||
| 10019 | int pgno; /* Page number for this page */ | |||
| 10020 | int iPrevPgidx; /* Previous value written into pgidx */ | |||
| 10021 | Fts5Buffer buf; /* Buffer containing leaf data */ | |||
| 10022 | Fts5Buffer pgidx; /* Buffer containing page-index */ | |||
| 10023 | Fts5Buffer term; /* Buffer containing previous term on page */ | |||
| 10024 | }; | |||
| 10025 | struct Fts5DlidxWriter { | |||
| 10026 | int pgno; /* Page number for this page */ | |||
| 10027 | int bPrevValid; /* True if iPrev is valid */ | |||
| 10028 | i64 iPrev; /* Previous rowid value written to page */ | |||
| 10029 | Fts5Buffer buf; /* Buffer containing page data */ | |||
| 10030 | }; | |||
| 10031 | struct Fts5SegWriter { | |||
| 10032 | int iSegid; /* Segid to write to */ | |||
| 10033 | Fts5PageWriter writer; /* PageWriter object */ | |||
| 10034 | i64 iPrevRowid; /* Previous rowid written to current leaf */ | |||
| 10035 | u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ | |||
| 10036 | u8 bFirstRowidInPage; /* True if next rowid is first in page */ | |||
| 10037 | /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ | |||
| 10038 | u8 bFirstTermInPage; /* True if next term will be first in leaf */ | |||
| 10039 | int nLeafWritten; /* Number of leaf pages written */ | |||
| 10040 | int nEmpty; /* Number of contiguous term-less nodes */ | |||
| 10041 | ||||
| 10042 | int nDlidx; /* Allocated size of aDlidx[] array */ | |||
| 10043 | Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ | |||
| 10044 | ||||
| 10045 | /* Values to insert into the %_idx table */ | |||
| 10046 | Fts5Buffer btterm; /* Next term to insert into %_idx table */ | |||
| 10047 | int iBtPage; /* Page number corresponding to btterm */ | |||
| 10048 | }; | |||
| 10049 | ||||
| 10050 | typedef struct Fts5CResult Fts5CResult; | |||
| 10051 | struct Fts5CResult { | |||
| 10052 | u16 iFirst; /* aSeg[] index of firstest iterator */ | |||
| 10053 | u8 bTermEq; /* True if the terms are equal */ | |||
| 10054 | }; | |||
| 10055 | ||||
| 10056 | /* | |||
| 10057 | ** Object for iterating through a single segment, visiting each term/rowid | |||
| 10058 | ** pair in the segment. | |||
| 10059 | ** | |||
| 10060 | ** pSeg: | |||
| 10061 | ** The segment to iterate through. | |||
| 10062 | ** | |||
| 10063 | ** iLeafPgno: | |||
| 10064 | ** Current leaf page number within segment. | |||
| 10065 | ** | |||
| 10066 | ** iLeafOffset: | |||
| 10067 | ** Byte offset within the current leaf that is the first byte of the | |||
| 10068 | ** position list data (one byte passed the position-list size field). | |||
| 10069 | ** | |||
| 10070 | ** pLeaf: | |||
| 10071 | ** Buffer containing current leaf page data. Set to NULL at EOF. | |||
| 10072 | ** | |||
| 10073 | ** iTermLeafPgno, iTermLeafOffset: | |||
| 10074 | ** Leaf page number containing the last term read from the segment. And | |||
| 10075 | ** the offset immediately following the term data. | |||
| 10076 | ** | |||
| 10077 | ** flags: | |||
| 10078 | ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: | |||
| 10079 | ** | |||
| 10080 | ** FTS5_SEGITER_ONETERM: | |||
| 10081 | ** If set, set the iterator to point to EOF after the current doclist | |||
| 10082 | ** has been exhausted. Do not proceed to the next term in the segment. | |||
| 10083 | ** | |||
| 10084 | ** FTS5_SEGITER_REVERSE: | |||
| 10085 | ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If | |||
| 10086 | ** it is set, iterate through rowid in descending order instead of the | |||
| 10087 | ** default ascending order. | |||
| 10088 | ** | |||
| 10089 | ** iRowidOffset/nRowidOffset/aRowidOffset: | |||
| 10090 | ** These are used if the FTS5_SEGITER_REVERSE flag is set. | |||
| 10091 | ** | |||
| 10092 | ** For each rowid on the page corresponding to the current term, the | |||
| 10093 | ** corresponding aRowidOffset[] entry is set to the byte offset of the | |||
| 10094 | ** start of the "position-list-size" field within the page. | |||
| 10095 | ** | |||
| 10096 | ** iTermIdx: | |||
| 10097 | ** Index of current term on iTermLeafPgno. | |||
| 10098 | ** | |||
| 10099 | ** apTombstone/nTombstone: | |||
| 10100 | ** These are used for contentless_delete=1 tables only. When the cursor | |||
| 10101 | ** is first allocated, the apTombstone[] array is allocated so that it | |||
| 10102 | ** is large enough for all tombstones hash pages associated with the | |||
| 10103 | ** segment. The pages themselves are loaded lazily from the database as | |||
| 10104 | ** they are required. | |||
| 10105 | */ | |||
| 10106 | struct Fts5SegIter { | |||
| 10107 | Fts5StructureSegment *pSeg; /* Segment to iterate through */ | |||
| 10108 | int flags; /* Mask of configuration flags */ | |||
| 10109 | int iLeafPgno; /* Current leaf page number */ | |||
| 10110 | Fts5Data *pLeaf; /* Current leaf data */ | |||
| 10111 | Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ | |||
| 10112 | i64 iLeafOffset; /* Byte offset within current leaf */ | |||
| 10113 | Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */ | |||
| 10114 | ||||
| 10115 | /* Next method */ | |||
| 10116 | void (*xNext)(Fts5Index*, Fts5SegIter*, int*); | |||
| 10117 | ||||
| 10118 | /* The page and offset from which the current term was read. The offset | |||
| 10119 | ** is the offset of the first rowid in the current doclist. */ | |||
| 10120 | int iTermLeafPgno; | |||
| 10121 | int iTermLeafOffset; | |||
| 10122 | ||||
| 10123 | int iPgidxOff; /* Next offset in pgidx */ | |||
| 10124 | int iEndofDoclist; | |||
| 10125 | ||||
| 10126 | /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ | |||
| 10127 | int iRowidOffset; /* Current entry in aRowidOffset[] */ | |||
| 10128 | int nRowidOffset; /* Allocated size of aRowidOffset[] array */ | |||
| 10129 | int *aRowidOffset; /* Array of offset to rowid fields */ | |||
| 10130 | ||||
| 10131 | Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ | |||
| 10132 | ||||
| 10133 | /* Variables populated based on current entry. */ | |||
| 10134 | Fts5Buffer term; /* Current term */ | |||
| 10135 | i64 iRowid; /* Current rowid */ | |||
| 10136 | int nPos; /* Number of bytes in current position list */ | |||
| 10137 | u8 bDel; /* True if the delete flag is set */ | |||
| 10138 | }; | |||
| 10139 | ||||
| 10140 | /* | |||
| 10141 | ** Array of tombstone pages. Reference counted. | |||
| 10142 | */ | |||
| 10143 | struct Fts5TombstoneArray { | |||
| 10144 | int nRef; /* Number of pointers to this object */ | |||
| 10145 | int nTombstone; | |||
| 10146 | Fts5Data *apTombstone[FLEXARRAY]; /* Array of tombstone pages */ | |||
| 10147 | }; | |||
| 10148 | ||||
| 10149 | /* Size (in bytes) of an Fts5TombstoneArray holding up to N tombstones */ | |||
| 10150 | #define SZ_FTS5TOMBSTONEARRAY(N)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof (Fts5Data*)) \ | |||
| 10151 | (offsetof(Fts5TombstoneArray,apTombstone)__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(N)*sizeof(Fts5Data*)) | |||
| 10152 | ||||
| 10153 | /* | |||
| 10154 | ** Argument is a pointer to an Fts5Data structure that contains a | |||
| 10155 | ** leaf page. | |||
| 10156 | */ | |||
| 10157 | #define ASSERT_SZLEAF_OK(x)((void) (0)) assert( \((void) (0)) | |||
| 10158 | (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \((void) (0)) | |||
| 10159 | )((void) (0)) | |||
| 10160 | ||||
| 10161 | #define FTS5_SEGITER_ONETERM0x01 0x01 | |||
| 10162 | #define FTS5_SEGITER_REVERSE0x02 0x02 | |||
| 10163 | ||||
| 10164 | /* | |||
| 10165 | ** Argument is a pointer to an Fts5Data structure that contains a leaf | |||
| 10166 | ** page. This macro evaluates to true if the leaf contains no terms, or | |||
| 10167 | ** false if it contains at least one term. | |||
| 10168 | */ | |||
| 10169 | #define fts5LeafIsTermless(x)((x)->szLeaf >= (x)->nn) ((x)->szLeaf >= (x)->nn) | |||
| 10170 | ||||
| 10171 | #define fts5LeafTermOff(x, i)(fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) | |||
| 10172 | ||||
| 10173 | #define fts5LeafFirstRowidOff(x)(fts5GetU16((x)->p)) (fts5GetU16((x)->p)) | |||
| 10174 | ||||
| 10175 | /* | |||
| 10176 | ** Object for iterating through the merged results of one or more segments, | |||
| 10177 | ** visiting each term/rowid pair in the merged data. | |||
| 10178 | ** | |||
| 10179 | ** nSeg is always a power of two greater than or equal to the number of | |||
| 10180 | ** segments that this object is merging data from. Both the aSeg[] and | |||
| 10181 | ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded | |||
| 10182 | ** with zeroed objects - these are handled as if they were iterators opened | |||
| 10183 | ** on empty segments. | |||
| 10184 | ** | |||
| 10185 | ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an | |||
| 10186 | ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the | |||
| 10187 | ** comparison in this context is the index of the iterator that currently | |||
| 10188 | ** points to the smaller term/rowid combination. Iterators at EOF are | |||
| 10189 | ** considered to be greater than all other iterators. | |||
| 10190 | ** | |||
| 10191 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to | |||
| 10192 | ** the smallest key overall. aFirst[0] is unused. | |||
| 10193 | ** | |||
| 10194 | ** poslist: | |||
| 10195 | ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. | |||
| 10196 | ** There is no way to tell if this is populated or not. | |||
| 10197 | ** | |||
| 10198 | ** pColset: | |||
| 10199 | ** If not NULL, points to an object containing a set of column indices. | |||
| 10200 | ** Only matches that occur in one of these columns will be returned. | |||
| 10201 | ** The Fts5Iter does not own the Fts5Colset object, and so it is not | |||
| 10202 | ** freed when the iterator is closed - it is owned by the upper layer. | |||
| 10203 | */ | |||
| 10204 | struct Fts5Iter { | |||
| 10205 | Fts5IndexIter base; /* Base class containing output vars */ | |||
| 10206 | Fts5TokenDataIter *pTokenDataIter; | |||
| 10207 | ||||
| 10208 | Fts5Index *pIndex; /* Index that owns this iterator */ | |||
| 10209 | Fts5Buffer poslist; /* Buffer containing current poslist */ | |||
| 10210 | Fts5Colset *pColset; /* Restrict matches to these columns */ | |||
| 10211 | ||||
| 10212 | /* Invoked to set output variables. */ | |||
| 10213 | void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); | |||
| 10214 | ||||
| 10215 | int nSeg; /* Size of aSeg[] array */ | |||
| 10216 | int bRev; /* True to iterate in reverse order */ | |||
| 10217 | u8 bSkipEmpty; /* True to skip deleted entries */ | |||
| 10218 | ||||
| 10219 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ | |||
| 10220 | Fts5CResult *aFirst; /* Current merge state (see above) */ | |||
| 10221 | Fts5SegIter aSeg[FLEXARRAY]; /* Array of segment iterators */ | |||
| 10222 | }; | |||
| 10223 | ||||
| 10224 | /* Size (in bytes) of an Fts5Iter object holding up to N segment iterators */ | |||
| 10225 | #define SZ_FTS5ITER(N)(__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) (offsetof(Fts5Iter,aSeg)__builtin_offsetof(Fts5Iter, aSeg)+(N)*sizeof(Fts5SegIter)) | |||
| 10226 | ||||
| 10227 | /* | |||
| 10228 | ** An instance of the following type is used to iterate through the contents | |||
| 10229 | ** of a doclist-index record. | |||
| 10230 | ** | |||
| 10231 | ** pData: | |||
| 10232 | ** Record containing the doclist-index data. | |||
| 10233 | ** | |||
| 10234 | ** bEof: | |||
| 10235 | ** Set to true once iterator has reached EOF. | |||
| 10236 | ** | |||
| 10237 | ** iOff: | |||
| 10238 | ** Set to the current offset within record pData. | |||
| 10239 | */ | |||
| 10240 | struct Fts5DlidxLvl { | |||
| 10241 | Fts5Data *pData; /* Data for current page of this level */ | |||
| 10242 | int iOff; /* Current offset into pData */ | |||
| 10243 | int bEof; /* At EOF already */ | |||
| 10244 | int iFirstOff; /* Used by reverse iterators */ | |||
| 10245 | ||||
| 10246 | /* Output variables */ | |||
| 10247 | int iLeafPgno; /* Page number of current leaf page */ | |||
| 10248 | i64 iRowid; /* First rowid on leaf iLeafPgno */ | |||
| 10249 | }; | |||
| 10250 | struct Fts5DlidxIter { | |||
| 10251 | int nLvl; | |||
| 10252 | int iSegid; | |||
| 10253 | Fts5DlidxLvl aLvl[FLEXARRAY]; | |||
| 10254 | }; | |||
| 10255 | ||||
| 10256 | /* Size (in bytes) of an Fts5DlidxIter object with up to N levels */ | |||
| 10257 | #define SZ_FTS5DLIDXITER(N)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl )) \ | |||
| 10258 | (offsetof(Fts5DlidxIter,aLvl)__builtin_offsetof(Fts5DlidxIter, aLvl)+(N)*sizeof(Fts5DlidxLvl)) | |||
| 10259 | ||||
| 10260 | static void fts5PutU16(u8 *aOut, u16 iVal){ | |||
| 10261 | aOut[0] = (iVal>>8); | |||
| 10262 | aOut[1] = (iVal&0xFF); | |||
| 10263 | } | |||
| 10264 | ||||
| 10265 | static u16 fts5GetU16(const u8 *aIn){ | |||
| 10266 | return ((u16)aIn[0] << 8) + aIn[1]; | |||
| 10267 | } | |||
| 10268 | ||||
| 10269 | /* | |||
| 10270 | ** The only argument points to a buffer at least 8 bytes in size. This | |||
| 10271 | ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian | |||
| 10272 | ** unsigned integer and returns the result. | |||
| 10273 | */ | |||
| 10274 | static u64 fts5GetU64(u8 *a){ | |||
| 10275 | return ((u64)a[0] << 56) | |||
| 10276 | + ((u64)a[1] << 48) | |||
| 10277 | + ((u64)a[2] << 40) | |||
| 10278 | + ((u64)a[3] << 32) | |||
| 10279 | + ((u64)a[4] << 24) | |||
| 10280 | + ((u64)a[5] << 16) | |||
| 10281 | + ((u64)a[6] << 8) | |||
| 10282 | + ((u64)a[7] << 0); | |||
| 10283 | } | |||
| 10284 | ||||
| 10285 | /* | |||
| 10286 | ** The only argument points to a buffer at least 4 bytes in size. This | |||
| 10287 | ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian | |||
| 10288 | ** unsigned integer and returns the result. | |||
| 10289 | */ | |||
| 10290 | static u32 fts5GetU32(const u8 *a){ | |||
| 10291 | return ((u32)a[0] << 24) | |||
| 10292 | + ((u32)a[1] << 16) | |||
| 10293 | + ((u32)a[2] << 8) | |||
| 10294 | + ((u32)a[3] << 0); | |||
| 10295 | } | |||
| 10296 | ||||
| 10297 | /* | |||
| 10298 | ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the | |||
| 10299 | ** buffer indicated by the first argument. | |||
| 10300 | */ | |||
| 10301 | static void fts5PutU64(u8 *a, u64 iVal){ | |||
| 10302 | a[0] = ((iVal >> 56) & 0xFF); | |||
| 10303 | a[1] = ((iVal >> 48) & 0xFF); | |||
| 10304 | a[2] = ((iVal >> 40) & 0xFF); | |||
| 10305 | a[3] = ((iVal >> 32) & 0xFF); | |||
| 10306 | a[4] = ((iVal >> 24) & 0xFF); | |||
| 10307 | a[5] = ((iVal >> 16) & 0xFF); | |||
| 10308 | a[6] = ((iVal >> 8) & 0xFF); | |||
| 10309 | a[7] = ((iVal >> 0) & 0xFF); | |||
| 10310 | } | |||
| 10311 | ||||
| 10312 | /* | |||
| 10313 | ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the | |||
| 10314 | ** buffer indicated by the first argument. | |||
| 10315 | */ | |||
| 10316 | static void fts5PutU32(u8 *a, u32 iVal){ | |||
| 10317 | a[0] = ((iVal >> 24) & 0xFF); | |||
| 10318 | a[1] = ((iVal >> 16) & 0xFF); | |||
| 10319 | a[2] = ((iVal >> 8) & 0xFF); | |||
| 10320 | a[3] = ((iVal >> 0) & 0xFF); | |||
| 10321 | } | |||
| 10322 | ||||
| 10323 | /* | |||
| 10324 | ** Allocate and return a buffer at least nByte bytes in size. | |||
| 10325 | ** | |||
| 10326 | ** If an OOM error is encountered, return NULL and set the error code in | |||
| 10327 | ** the Fts5Index handle passed as the first argument. | |||
| 10328 | */ | |||
| 10329 | static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ | |||
| 10330 | return sqlite3Fts5MallocZero(&p->rc, nByte); | |||
| 10331 | } | |||
| 10332 | ||||
| 10333 | /* | |||
| 10334 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. | |||
| 10335 | ** | |||
| 10336 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | |||
| 10337 | ** +ve if pRight is smaller than pLeft. In other words: | |||
| 10338 | ** | |||
| 10339 | ** res = *pLeft - *pRight | |||
| 10340 | */ | |||
| 10341 | #ifdef SQLITE_DEBUG | |||
| 10342 | static int fts5BufferCompareBlob( | |||
| 10343 | Fts5Buffer *pLeft, /* Left hand side of comparison */ | |||
| 10344 | const u8 *pRight, int nRight /* Right hand side of comparison */ | |||
| 10345 | ){ | |||
| 10346 | int nCmp = MIN(pLeft->n, nRight)(((pLeft->n) < (nRight)) ? (pLeft->n) : (nRight)); | |||
| 10347 | int res = memcmp(pLeft->p, pRight, nCmp); | |||
| 10348 | return (res==0 ? (pLeft->n - nRight) : res); | |||
| 10349 | } | |||
| 10350 | #endif | |||
| 10351 | ||||
| 10352 | /* | |||
| 10353 | ** Compare the contents of the two buffers using memcmp(). If one buffer | |||
| 10354 | ** is a prefix of the other, it is considered the lesser. | |||
| 10355 | ** | |||
| 10356 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or | |||
| 10357 | ** +ve if pRight is smaller than pLeft. In other words: | |||
| 10358 | ** | |||
| 10359 | ** res = *pLeft - *pRight | |||
| 10360 | */ | |||
| 10361 | static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ | |||
| 10362 | int nCmp, res; | |||
| 10363 | nCmp = MIN(pLeft->n, pRight->n)(((pLeft->n) < (pRight->n)) ? (pLeft->n) : (pRight ->n)); | |||
| 10364 | assert( nCmp<=0 || pLeft->p!=0 )((void) (0)); | |||
| 10365 | assert( nCmp<=0 || pRight->p!=0 )((void) (0)); | |||
| 10366 | res = fts5Memcmp(pLeft->p, pRight->p, nCmp)((nCmp)<=0 ? 0 : memcmp((pLeft->p), (pRight->p), (nCmp ))); | |||
| 10367 | return (res==0 ? (pLeft->n - pRight->n) : res); | |||
| 10368 | } | |||
| 10369 | ||||
| 10370 | static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ | |||
| 10371 | int ret; | |||
| 10372 | fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret)sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(ret)); | |||
| 10373 | return ret; | |||
| 10374 | } | |||
| 10375 | ||||
| 10376 | /* | |||
| 10377 | ** Close the read-only blob handle, if it is open. | |||
| 10378 | */ | |||
| 10379 | static void fts5IndexCloseReader(Fts5Index *p){ | |||
| 10380 | if( p->pReader ){ | |||
| 10381 | int rc; | |||
| 10382 | sqlite3_blob *pReader = p->pReader; | |||
| 10383 | p->pReader = 0; | |||
| 10384 | rc = sqlite3_blob_closesqlite3_api->blob_close(pReader); | |||
| 10385 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | |||
| 10386 | } | |||
| 10387 | } | |||
| 10388 | ||||
| 10389 | /* | |||
| 10390 | ** Retrieve a record from the %_data table. | |||
| 10391 | ** | |||
| 10392 | ** If an error occurs, NULL is returned and an error left in the | |||
| 10393 | ** Fts5Index object. | |||
| 10394 | */ | |||
| 10395 | static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ | |||
| 10396 | Fts5Data *pRet = 0; | |||
| 10397 | if( p->rc==SQLITE_OK0 ){ | |||
| 10398 | int rc = SQLITE_OK0; | |||
| 10399 | ||||
| 10400 | if( p->pReader ){ | |||
| 10401 | /* This call may return SQLITE_ABORT if there has been a savepoint | |||
| 10402 | ** rollback since it was last used. In this case a new blob handle | |||
| 10403 | ** is required. */ | |||
| 10404 | sqlite3_blob *pBlob = p->pReader; | |||
| 10405 | p->pReader = 0; | |||
| 10406 | rc = sqlite3_blob_reopensqlite3_api->blob_reopen(pBlob, iRowid); | |||
| 10407 | assert( p->pReader==0 )((void) (0)); | |||
| 10408 | p->pReader = pBlob; | |||
| 10409 | if( rc!=SQLITE_OK0 ){ | |||
| 10410 | fts5IndexCloseReader(p); | |||
| 10411 | } | |||
| 10412 | if( rc==SQLITE_ABORT4 ) rc = SQLITE_OK0; | |||
| 10413 | } | |||
| 10414 | ||||
| 10415 | /* If the blob handle is not open at this point, open it and seek | |||
| 10416 | ** to the requested entry. */ | |||
| 10417 | if( p->pReader==0 && rc==SQLITE_OK0 ){ | |||
| 10418 | Fts5Config *pConfig = p->pConfig; | |||
| 10419 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, | |||
| 10420 | pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader | |||
| 10421 | ); | |||
| 10422 | } | |||
| 10423 | ||||
| 10424 | /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls | |||
| 10425 | ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. | |||
| 10426 | ** All the reasons those functions might return SQLITE_ERROR - missing | |||
| 10427 | ** table, missing row, non-blob/text in block column - indicate | |||
| 10428 | ** backing store corruption. */ | |||
| 10429 | if( rc==SQLITE_ERROR1 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10430 | ||||
| 10431 | if( rc==SQLITE_OK0 ){ | |||
| 10432 | u8 *aOut = 0; /* Read blob data into this buffer */ | |||
| 10433 | int nByte = sqlite3_blob_bytessqlite3_api->blob_bytes(p->pReader); | |||
| 10434 | int szData = (sizeof(Fts5Data) + 7) & ~7; | |||
| 10435 | sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING20; | |||
| 10436 | pRet = (Fts5Data*)sqlite3_malloc64sqlite3_api->malloc64(nAlloc); | |||
| 10437 | if( pRet ){ | |||
| 10438 | pRet->nn = nByte; | |||
| 10439 | aOut = pRet->p = (u8*)pRet + szData; | |||
| 10440 | }else{ | |||
| 10441 | rc = SQLITE_NOMEM7; | |||
| 10442 | } | |||
| 10443 | ||||
| 10444 | if( rc==SQLITE_OK0 ){ | |||
| 10445 | rc = sqlite3_blob_readsqlite3_api->blob_read(p->pReader, aOut, nByte, 0); | |||
| 10446 | } | |||
| 10447 | if( rc!=SQLITE_OK0 ){ | |||
| 10448 | sqlite3_freesqlite3_api->free(pRet); | |||
| 10449 | pRet = 0; | |||
| 10450 | }else{ | |||
| 10451 | /* TODO1: Fix this */ | |||
| 10452 | pRet->p[nByte] = 0x00; | |||
| 10453 | pRet->p[nByte+1] = 0x00; | |||
| 10454 | pRet->szLeaf = fts5GetU16(&pRet->p[2]); | |||
| 10455 | } | |||
| 10456 | } | |||
| 10457 | p->rc = rc; | |||
| 10458 | p->nRead++; | |||
| 10459 | } | |||
| 10460 | ||||
| 10461 | assert( (pRet==0)==(p->rc!=SQLITE_OK) )((void) (0)); | |||
| 10462 | assert( pRet==0 || EIGHT_BYTE_ALIGNMENT( pRet->p ) )((void) (0)); | |||
| 10463 | return pRet; | |||
| 10464 | } | |||
| 10465 | ||||
| 10466 | ||||
| 10467 | /* | |||
| 10468 | ** Release a reference to data record returned by an earlier call to | |||
| 10469 | ** fts5DataRead(). | |||
| 10470 | */ | |||
| 10471 | static void fts5DataRelease(Fts5Data *pData){ | |||
| 10472 | sqlite3_freesqlite3_api->free(pData); | |||
| 10473 | } | |||
| 10474 | ||||
| 10475 | static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ | |||
| 10476 | Fts5Data *pRet = fts5DataRead(p, iRowid); | |||
| 10477 | if( pRet ){ | |||
| 10478 | if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){ | |||
| 10479 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10480 | fts5DataRelease(pRet); | |||
| 10481 | pRet = 0; | |||
| 10482 | } | |||
| 10483 | } | |||
| 10484 | return pRet; | |||
| 10485 | } | |||
| 10486 | ||||
| 10487 | static int fts5IndexPrepareStmt( | |||
| 10488 | Fts5Index *p, | |||
| 10489 | sqlite3_stmt **ppStmt, | |||
| 10490 | char *zSql | |||
| 10491 | ){ | |||
| 10492 | if( p->rc==SQLITE_OK0 ){ | |||
| 10493 | if( zSql ){ | |||
| 10494 | int rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(p->pConfig->db, zSql, -1, | |||
| 10495 | SQLITE_PREPARE_PERSISTENT0x01|SQLITE_PREPARE_NO_VTAB0x04, | |||
| 10496 | ppStmt, 0); | |||
| 10497 | /* If this prepare() call fails with SQLITE_ERROR, then one of the | |||
| 10498 | ** %_idx or %_data tables has been removed or modified. Call this | |||
| 10499 | ** corruption. */ | |||
| 10500 | p->rc = (rc==SQLITE_ERROR1 ? SQLITE_CORRUPT11 : rc); | |||
| 10501 | }else{ | |||
| 10502 | p->rc = SQLITE_NOMEM7; | |||
| 10503 | } | |||
| 10504 | } | |||
| 10505 | sqlite3_freesqlite3_api->free(zSql); | |||
| 10506 | return p->rc; | |||
| 10507 | } | |||
| 10508 | ||||
| 10509 | ||||
| 10510 | /* | |||
| 10511 | ** INSERT OR REPLACE a record into the %_data table. | |||
| 10512 | */ | |||
| 10513 | static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ | |||
| 10514 | if( p->rc!=SQLITE_OK0 ) return; | |||
| 10515 | ||||
| 10516 | if( p->pWriter==0 ){ | |||
| 10517 | Fts5Config *pConfig = p->pConfig; | |||
| 10518 | fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 10519 | "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", | |||
| 10520 | pConfig->zDb, pConfig->zName | |||
| 10521 | )); | |||
| 10522 | if( p->rc ) return; | |||
| 10523 | } | |||
| 10524 | ||||
| 10525 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pWriter, 1, iRowid); | |||
| 10526 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 10527 | sqlite3_stepsqlite3_api->step(p->pWriter); | |||
| 10528 | p->rc = sqlite3_resetsqlite3_api->reset(p->pWriter); | |||
| 10529 | sqlite3_bind_nullsqlite3_api->bind_null(p->pWriter, 2); | |||
| 10530 | } | |||
| 10531 | ||||
| 10532 | /* | |||
| 10533 | ** Execute the following SQL: | |||
| 10534 | ** | |||
| 10535 | ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast | |||
| 10536 | */ | |||
| 10537 | static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ | |||
| 10538 | if( p->rc!=SQLITE_OK0 ) return; | |||
| 10539 | ||||
| 10540 | if( p->pDeleter==0 ){ | |||
| 10541 | Fts5Config *pConfig = p->pConfig; | |||
| 10542 | char *zSql = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 10543 | "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", | |||
| 10544 | pConfig->zDb, pConfig->zName | |||
| 10545 | ); | |||
| 10546 | if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return; | |||
| 10547 | } | |||
| 10548 | ||||
| 10549 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 1, iFirst); | |||
| 10550 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pDeleter, 2, iLast); | |||
| 10551 | sqlite3_stepsqlite3_api->step(p->pDeleter); | |||
| 10552 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleter); | |||
| 10553 | } | |||
| 10554 | ||||
| 10555 | /* | |||
| 10556 | ** Remove all records associated with segment iSegid. | |||
| 10557 | */ | |||
| 10558 | static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){ | |||
| 10559 | int iSegid = pSeg->iSegid; | |||
| 10560 | i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | |||
| 10561 | i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)( ((i64)(iSegid+1) << (31 +5 +1)) + ((i64)(0) << ( 31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) )-1; | |||
| 10562 | fts5DataDelete(p, iFirst, iLast); | |||
| 10563 | ||||
| 10564 | if( pSeg->nPgTombstone ){ | |||
| 10565 | i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(0)) ); | |||
| 10566 | i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1)( ((i64)(iSegid+(1<<16)) << (31 +5 +1)) + ((i64)( 0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg ->nPgTombstone-1)) ); | |||
| 10567 | fts5DataDelete(p, iTomb1, iTomb2); | |||
| 10568 | } | |||
| 10569 | if( p->pIdxDeleter==0 ){ | |||
| 10570 | Fts5Config *pConfig = p->pConfig; | |||
| 10571 | fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 10572 | "DELETE FROM '%q'.'%q_idx' WHERE segid=?", | |||
| 10573 | pConfig->zDb, pConfig->zName | |||
| 10574 | )); | |||
| 10575 | } | |||
| 10576 | if( p->rc==SQLITE_OK0 ){ | |||
| 10577 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxDeleter, 1, iSegid); | |||
| 10578 | sqlite3_stepsqlite3_api->step(p->pIdxDeleter); | |||
| 10579 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxDeleter); | |||
| 10580 | } | |||
| 10581 | } | |||
| 10582 | ||||
| 10583 | /* | |||
| 10584 | ** Release a reference to an Fts5Structure object returned by an earlier | |||
| 10585 | ** call to fts5StructureRead() or fts5StructureDecode(). | |||
| 10586 | */ | |||
| 10587 | static void fts5StructureRelease(Fts5Structure *pStruct){ | |||
| 10588 | if( pStruct && 0>=(--pStruct->nRef) ){ | |||
| 10589 | int i; | |||
| 10590 | assert( pStruct->nRef==0 )((void) (0)); | |||
| 10591 | for(i=0; i<pStruct->nLevel; i++){ | |||
| 10592 | sqlite3_freesqlite3_api->free(pStruct->aLevel[i].aSeg); | |||
| 10593 | } | |||
| 10594 | sqlite3_freesqlite3_api->free(pStruct); | |||
| 10595 | } | |||
| 10596 | } | |||
| 10597 | ||||
| 10598 | static void fts5StructureRef(Fts5Structure *pStruct){ | |||
| 10599 | pStruct->nRef++; | |||
| 10600 | } | |||
| 10601 | ||||
| 10602 | static void *sqlite3Fts5StructureRef(Fts5Index *p){ | |||
| 10603 | fts5StructureRef(p->pStruct); | |||
| 10604 | return (void*)p->pStruct; | |||
| 10605 | } | |||
| 10606 | static void sqlite3Fts5StructureRelease(void *p){ | |||
| 10607 | if( p ){ | |||
| 10608 | fts5StructureRelease((Fts5Structure*)p); | |||
| 10609 | } | |||
| 10610 | } | |||
| 10611 | static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){ | |||
| 10612 | if( p->pStruct!=(Fts5Structure*)pStruct ){ | |||
| 10613 | return SQLITE_ABORT4; | |||
| 10614 | } | |||
| 10615 | return SQLITE_OK0; | |||
| 10616 | } | |||
| 10617 | ||||
| 10618 | /* | |||
| 10619 | ** Ensure that structure object (*pp) is writable. | |||
| 10620 | ** | |||
| 10621 | ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If | |||
| 10622 | ** an error occurs, (*pRc) is set to an SQLite error code before returning. | |||
| 10623 | */ | |||
| 10624 | static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){ | |||
| 10625 | Fts5Structure *p = *pp; | |||
| 10626 | if( *pRc==SQLITE_OK0 && p->nRef>1 ){ | |||
| 10627 | i64 nByte = SZ_FTS5STRUCTURE(p->nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (p->nLevel)*sizeof (Fts5StructureLevel)); | |||
| 10628 | Fts5Structure *pNew; | |||
| 10629 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); | |||
| 10630 | if( pNew ){ | |||
| 10631 | int i; | |||
| 10632 | memcpy(pNew, p, nByte); | |||
| 10633 | for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; | |||
| 10634 | for(i=0; i<p->nLevel; i++){ | |||
| 10635 | Fts5StructureLevel *pLvl = &pNew->aLevel[i]; | |||
| 10636 | nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg; | |||
| 10637 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); | |||
| 10638 | if( pLvl->aSeg==0 ){ | |||
| 10639 | for(i=0; i<p->nLevel; i++){ | |||
| 10640 | sqlite3_freesqlite3_api->free(pNew->aLevel[i].aSeg); | |||
| 10641 | } | |||
| 10642 | sqlite3_freesqlite3_api->free(pNew); | |||
| 10643 | return; | |||
| 10644 | } | |||
| 10645 | memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte); | |||
| 10646 | } | |||
| 10647 | p->nRef--; | |||
| 10648 | pNew->nRef = 1; | |||
| 10649 | } | |||
| 10650 | *pp = pNew; | |||
| 10651 | } | |||
| 10652 | } | |||
| 10653 | ||||
| 10654 | /* | |||
| 10655 | ** Deserialize and return the structure record currently stored in serialized | |||
| 10656 | ** form within buffer pData/nData. | |||
| 10657 | ** | |||
| 10658 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | |||
| 10659 | ** are over-allocated by one slot. This allows the structure contents | |||
| 10660 | ** to be more easily edited. | |||
| 10661 | ** | |||
| 10662 | ** If an error occurs, *ppOut is set to NULL and an SQLite error code | |||
| 10663 | ** returned. Otherwise, *ppOut is set to point to the new object and | |||
| 10664 | ** SQLITE_OK returned. | |||
| 10665 | */ | |||
| 10666 | static int fts5StructureDecode( | |||
| 10667 | const u8 *pData, /* Buffer containing serialized structure */ | |||
| 10668 | int nData, /* Size of buffer pData in bytes */ | |||
| 10669 | int *piCookie, /* Configuration cookie value */ | |||
| 10670 | Fts5Structure **ppOut /* OUT: Deserialized object */ | |||
| 10671 | ){ | |||
| 10672 | int rc = SQLITE_OK0; | |||
| 10673 | int i = 0; | |||
| 10674 | int iLvl; | |||
| 10675 | int nLevel = 0; | |||
| 10676 | int nSegment = 0; | |||
| 10677 | sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */ | |||
| 10678 | Fts5Structure *pRet = 0; /* Structure object to return */ | |||
| 10679 | int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */ | |||
| 10680 | u64 nOriginCntr = 0; /* Largest origin value seen so far */ | |||
| 10681 | ||||
| 10682 | /* Grab the cookie value */ | |||
| 10683 | if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); | |||
| 10684 | i = 4; | |||
| 10685 | ||||
| 10686 | /* Check if this is a V2 structure record. Set bStructureV2 if it is. */ | |||
| 10687 | if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2"\xFF\x00\x00\x01", 4) ){ | |||
| 10688 | i += 4; | |||
| 10689 | bStructureV2 = 1; | |||
| 10690 | } | |||
| 10691 | ||||
| 10692 | /* Read the total number of levels and segments from the start of the | |||
| 10693 | ** structure record. */ | |||
| 10694 | i += fts5GetVarint32(&pData[i], nLevel)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nLevel)); | |||
| 10695 | i += fts5GetVarint32(&pData[i], nSegment)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nSegment)); | |||
| 10696 | if( nLevel>FTS5_MAX_SEGMENT2000 || nLevel<0 | |||
| 10697 | || nSegment>FTS5_MAX_SEGMENT2000 || nSegment<0 | |||
| 10698 | ){ | |||
| 10699 | return FTS5_CORRUPT(11 | (1<<8)); | |||
| 10700 | } | |||
| 10701 | nByte = SZ_FTS5STRUCTURE(nLevel)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel)*sizeof( Fts5StructureLevel)); | |||
| 10702 | pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 10703 | ||||
| 10704 | if( pRet ){ | |||
| 10705 | pRet->nRef = 1; | |||
| 10706 | pRet->nLevel = nLevel; | |||
| 10707 | pRet->nSegment = nSegment; | |||
| 10708 | i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); | |||
| 10709 | ||||
| 10710 | for(iLvl=0; rc==SQLITE_OK0 && iLvl<nLevel; iLvl++){ | |||
| 10711 | Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; | |||
| 10712 | int nTotal = 0; | |||
| 10713 | int iSeg; | |||
| 10714 | ||||
| 10715 | if( i>=nData ){ | |||
| 10716 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10717 | }else{ | |||
| 10718 | i += fts5GetVarint32(&pData[i], pLvl->nMerge)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pLvl->nMerge )); | |||
| 10719 | i += fts5GetVarint32(&pData[i], nTotal)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(nTotal)); | |||
| 10720 | if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10721 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, | |||
| 10722 | nTotal * sizeof(Fts5StructureSegment) | |||
| 10723 | ); | |||
| 10724 | nSegment -= nTotal; | |||
| 10725 | } | |||
| 10726 | ||||
| 10727 | if( rc==SQLITE_OK0 ){ | |||
| 10728 | pLvl->nSeg = nTotal; | |||
| 10729 | for(iSeg=0; iSeg<nTotal; iSeg++){ | |||
| 10730 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
| 10731 | if( i>=nData ){ | |||
| 10732 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10733 | break; | |||
| 10734 | } | |||
| 10735 | assert( pSeg!=0 )((void) (0)); | |||
| 10736 | i += fts5GetVarint32(&pData[i], pSeg->iSegid)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->iSegid )); | |||
| 10737 | i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoFirst )); | |||
| 10738 | i += fts5GetVarint32(&pData[i], pSeg->pgnoLast)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->pgnoLast )); | |||
| 10739 | if( bStructureV2 ){ | |||
| 10740 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin1); | |||
| 10741 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->iOrigin2); | |||
| 10742 | i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone)sqlite3Fts5GetVarint32(&pData[i],(u32*)&(pSeg->nPgTombstone )); | |||
| 10743 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntryTombstone); | |||
| 10744 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData[i], &pSeg->nEntry); | |||
| 10745 | nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2)(((nOriginCntr) > (pSeg->iOrigin2)) ? (nOriginCntr) : ( pSeg->iOrigin2)); | |||
| 10746 | } | |||
| 10747 | if( pSeg->pgnoLast<pSeg->pgnoFirst ){ | |||
| 10748 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10749 | break; | |||
| 10750 | } | |||
| 10751 | } | |||
| 10752 | if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10753 | if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10754 | } | |||
| 10755 | } | |||
| 10756 | if( nSegment!=0 && rc==SQLITE_OK0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 10757 | if( bStructureV2 ){ | |||
| 10758 | pRet->nOriginCntr = nOriginCntr+1; | |||
| 10759 | } | |||
| 10760 | ||||
| 10761 | if( rc!=SQLITE_OK0 ){ | |||
| 10762 | fts5StructureRelease(pRet); | |||
| 10763 | pRet = 0; | |||
| 10764 | } | |||
| 10765 | } | |||
| 10766 | ||||
| 10767 | *ppOut = pRet; | |||
| 10768 | return rc; | |||
| 10769 | } | |||
| 10770 | ||||
| 10771 | /* | |||
| 10772 | ** Add a level to the Fts5Structure.aLevel[] array of structure object | |||
| 10773 | ** (*ppStruct). | |||
| 10774 | */ | |||
| 10775 | static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ | |||
| 10776 | fts5StructureMakeWritable(pRc, ppStruct); | |||
| 10777 | assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK )((void) (0)); | |||
| 10778 | if( *pRc==SQLITE_OK0 ){ | |||
| 10779 | Fts5Structure *pStruct = *ppStruct; | |||
| 10780 | int nLevel = pStruct->nLevel; | |||
| 10781 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(nLevel+2)(__builtin_offsetof(Fts5Structure, aLevel) + (nLevel+2)*sizeof (Fts5StructureLevel)); | |||
| 10782 | ||||
| 10783 | pStruct = sqlite3_realloc64sqlite3_api->realloc64(pStruct, nByte); | |||
| 10784 | if( pStruct ){ | |||
| 10785 | memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); | |||
| 10786 | pStruct->nLevel++; | |||
| 10787 | *ppStruct = pStruct; | |||
| 10788 | }else{ | |||
| 10789 | *pRc = SQLITE_NOMEM7; | |||
| 10790 | } | |||
| 10791 | } | |||
| 10792 | } | |||
| 10793 | ||||
| 10794 | /* | |||
| 10795 | ** Extend level iLvl so that there is room for at least nExtra more | |||
| 10796 | ** segments. | |||
| 10797 | */ | |||
| 10798 | static void fts5StructureExtendLevel( | |||
| 10799 | int *pRc, | |||
| 10800 | Fts5Structure *pStruct, | |||
| 10801 | int iLvl, | |||
| 10802 | int nExtra, | |||
| 10803 | int bInsert | |||
| 10804 | ){ | |||
| 10805 | if( *pRc==SQLITE_OK0 ){ | |||
| 10806 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
| 10807 | Fts5StructureSegment *aNew; | |||
| 10808 | sqlite3_int64 nByte; | |||
| 10809 | ||||
| 10810 | nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); | |||
| 10811 | aNew = sqlite3_realloc64sqlite3_api->realloc64(pLvl->aSeg, nByte); | |||
| 10812 | if( aNew ){ | |||
| 10813 | if( bInsert==0 ){ | |||
| 10814 | memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); | |||
| 10815 | }else{ | |||
| 10816 | int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); | |||
| 10817 | memmove(&aNew[nExtra], aNew, nMove); | |||
| 10818 | memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); | |||
| 10819 | } | |||
| 10820 | pLvl->aSeg = aNew; | |||
| 10821 | }else{ | |||
| 10822 | *pRc = SQLITE_NOMEM7; | |||
| 10823 | } | |||
| 10824 | } | |||
| 10825 | } | |||
| 10826 | ||||
| 10827 | static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ | |||
| 10828 | Fts5Structure *pRet = 0; | |||
| 10829 | Fts5Config *pConfig = p->pConfig; | |||
| 10830 | int iCookie; /* Configuration cookie */ | |||
| 10831 | Fts5Data *pData; | |||
| 10832 | ||||
| 10833 | pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID10); | |||
| 10834 | if( p->rc==SQLITE_OK0 ){ | |||
| 10835 | /* TODO: Do we need this if the leaf-index is appended? Probably... */ | |||
| 10836 | memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING20); | |||
| 10837 | p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); | |||
| 10838 | if( p->rc==SQLITE_OK0 && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){ | |||
| 10839 | p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); | |||
| 10840 | } | |||
| 10841 | fts5DataRelease(pData); | |||
| 10842 | if( p->rc!=SQLITE_OK0 ){ | |||
| 10843 | fts5StructureRelease(pRet); | |||
| 10844 | pRet = 0; | |||
| 10845 | } | |||
| 10846 | } | |||
| 10847 | ||||
| 10848 | return pRet; | |||
| 10849 | } | |||
| 10850 | ||||
| 10851 | static i64 fts5IndexDataVersion(Fts5Index *p){ | |||
| 10852 | i64 iVersion = 0; | |||
| 10853 | ||||
| 10854 | if( p->rc==SQLITE_OK0 ){ | |||
| 10855 | if( p->pDataVersion==0 ){ | |||
| 10856 | p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, | |||
| 10857 | sqlite3_mprintfsqlite3_api->mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) | |||
| 10858 | ); | |||
| 10859 | if( p->rc ) return 0; | |||
| 10860 | } | |||
| 10861 | ||||
| 10862 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(p->pDataVersion) ){ | |||
| 10863 | iVersion = sqlite3_column_int64sqlite3_api->column_int64(p->pDataVersion, 0); | |||
| 10864 | } | |||
| 10865 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDataVersion); | |||
| 10866 | } | |||
| 10867 | ||||
| 10868 | return iVersion; | |||
| 10869 | } | |||
| 10870 | ||||
| 10871 | /* | |||
| 10872 | ** Read, deserialize and return the structure record. | |||
| 10873 | ** | |||
| 10874 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array | |||
| 10875 | ** are over-allocated as described for function fts5StructureDecode() | |||
| 10876 | ** above. | |||
| 10877 | ** | |||
| 10878 | ** If an error occurs, NULL is returned and an error code left in the | |||
| 10879 | ** Fts5Index handle. If an error has already occurred when this function | |||
| 10880 | ** is called, it is a no-op. | |||
| 10881 | */ | |||
| 10882 | static Fts5Structure *fts5StructureRead(Fts5Index *p){ | |||
| 10883 | ||||
| 10884 | if( p->pStruct==0 ){ | |||
| 10885 | p->iStructVersion = fts5IndexDataVersion(p); | |||
| 10886 | if( p->rc==SQLITE_OK0 ){ | |||
| 10887 | p->pStruct = fts5StructureReadUncached(p); | |||
| 10888 | } | |||
| 10889 | } | |||
| 10890 | ||||
| 10891 | #if 0 | |||
| 10892 | else{ | |||
| 10893 | Fts5Structure *pTest = fts5StructureReadUncached(p); | |||
| 10894 | if( pTest ){ | |||
| 10895 | int i, j; | |||
| 10896 | assert_nc( p->pStruct->nSegment==pTest->nSegment )((void) (0)); | |||
| 10897 | assert_nc( p->pStruct->nLevel==pTest->nLevel )((void) (0)); | |||
| 10898 | for(i=0; i<pTest->nLevel; i++){ | |||
| 10899 | assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge )((void) (0)); | |||
| 10900 | assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg )((void) (0)); | |||
| 10901 | for(j=0; j<pTest->aLevel[i].nSeg; j++){ | |||
| 10902 | Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; | |||
| 10903 | Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; | |||
| 10904 | assert_nc( p1->iSegid==p2->iSegid )((void) (0)); | |||
| 10905 | assert_nc( p1->pgnoFirst==p2->pgnoFirst )((void) (0)); | |||
| 10906 | assert_nc( p1->pgnoLast==p2->pgnoLast )((void) (0)); | |||
| 10907 | } | |||
| 10908 | } | |||
| 10909 | fts5StructureRelease(pTest); | |||
| 10910 | } | |||
| 10911 | } | |||
| 10912 | #endif | |||
| 10913 | ||||
| 10914 | if( p->rc!=SQLITE_OK0 ) return 0; | |||
| 10915 | assert( p->iStructVersion!=0 )((void) (0)); | |||
| 10916 | assert( p->pStruct!=0 )((void) (0)); | |||
| 10917 | fts5StructureRef(p->pStruct); | |||
| 10918 | return p->pStruct; | |||
| 10919 | } | |||
| 10920 | ||||
| 10921 | static void fts5StructureInvalidate(Fts5Index *p){ | |||
| 10922 | if( p->pStruct ){ | |||
| 10923 | fts5StructureRelease(p->pStruct); | |||
| 10924 | p->pStruct = 0; | |||
| 10925 | } | |||
| 10926 | } | |||
| 10927 | ||||
| 10928 | /* | |||
| 10929 | ** Return the total number of segments in index structure pStruct. This | |||
| 10930 | ** function is only ever used as part of assert() conditions. | |||
| 10931 | */ | |||
| 10932 | #ifdef SQLITE_DEBUG | |||
| 10933 | static int fts5StructureCountSegments(Fts5Structure *pStruct){ | |||
| 10934 | int nSegment = 0; /* Total number of segments */ | |||
| 10935 | if( pStruct ){ | |||
| 10936 | int iLvl; /* Used to iterate through levels */ | |||
| 10937 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 10938 | nSegment += pStruct->aLevel[iLvl].nSeg; | |||
| 10939 | } | |||
| 10940 | } | |||
| 10941 | ||||
| 10942 | return nSegment; | |||
| 10943 | } | |||
| 10944 | #endif | |||
| 10945 | ||||
| 10946 | #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pBlob , nBlob); (pBuf)->n += nBlob; } { \ | |||
| 10947 | assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) )((void) (0)); \ | |||
| 10948 | memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ | |||
| 10949 | (pBuf)->n += nBlob; \ | |||
| 10950 | } | |||
| 10951 | ||||
| 10952 | #define fts5BufferSafeAppendVarint(pBuf, iVal){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iVal)); ((void) (0)); } { \ | |||
| 10953 | (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ | |||
| 10954 | assert( (pBuf)->nSpace>=(pBuf)->n )((void) (0)); \ | |||
| 10955 | } | |||
| 10956 | ||||
| 10957 | ||||
| 10958 | /* | |||
| 10959 | ** Serialize and store the "structure" record. | |||
| 10960 | ** | |||
| 10961 | ** If an error occurs, leave an error code in the Fts5Index object. If an | |||
| 10962 | ** error has already occurred, this function is a no-op. | |||
| 10963 | */ | |||
| 10964 | static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ | |||
| 10965 | if( p->rc==SQLITE_OK0 ){ | |||
| 10966 | Fts5Buffer buf; /* Buffer to serialize record into */ | |||
| 10967 | int iLvl; /* Used to iterate through levels */ | |||
| 10968 | int iCookie; /* Cookie value to store */ | |||
| 10969 | int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9)); | |||
| 10970 | ||||
| 10971 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | |||
| 10972 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
| 10973 | ||||
| 10974 | /* Append the current configuration cookie */ | |||
| 10975 | iCookie = p->pConfig->iCookie; | |||
| 10976 | if( iCookie<0 ) iCookie = 0; | |||
| 10977 | ||||
| 10978 | if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){ | |||
| 10979 | sqlite3Fts5Put32(buf.p, iCookie); | |||
| 10980 | buf.n = 4; | |||
| 10981 | if( pStruct->nOriginCntr>0 ){ | |||
| 10982 | fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4){ ((void) (0)); memcpy(&(&buf)->p[(&buf)->n ], "\xFF\x00\x00\x01", 4); (&buf)->n += 4; }; | |||
| 10983 | } | |||
| 10984 | fts5BufferSafeAppendVarint(&buf, pStruct->nLevel){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nLevel)); ((void) (0)); }; | |||
| 10985 | fts5BufferSafeAppendVarint(&buf, pStruct->nSegment){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], (pStruct->nSegment)); ((void) (0)); }; | |||
| 10986 | fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter){ (&buf)->n += sqlite3Fts5PutVarint(&(&buf)-> p[(&buf)->n], ((i64)pStruct->nWriteCounter)); ((void ) (0)); }; | |||
| 10987 | } | |||
| 10988 | ||||
| 10989 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 10990 | int iSeg; /* Used to iterate through segments */ | |||
| 10991 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
| 10992 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nMerge); | |||
| 10993 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pLvl ->nSeg); | |||
| 10994 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | |||
| 10995 | ||||
| 10996 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
| 10997 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
| 10998 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iSegid); | |||
| 10999 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoFirst); | |||
| 11000 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->pgnoLast); | |||
| 11001 | if( pStruct->nOriginCntr>0 ){ | |||
| 11002 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin1); | |||
| 11003 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->iOrigin2); | |||
| 11004 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nPgTombstone); | |||
| 11005 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntryTombstone); | |||
| 11006 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->nEntry); | |||
| 11007 | } | |||
| 11008 | } | |||
| 11009 | } | |||
| 11010 | ||||
| 11011 | fts5DataWrite(p, FTS5_STRUCTURE_ROWID10, buf.p, buf.n); | |||
| 11012 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
| 11013 | } | |||
| 11014 | } | |||
| 11015 | ||||
| 11016 | #if 0 | |||
| 11017 | static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); | |||
| 11018 | static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ | |||
| 11019 | int rc = SQLITE_OK0; | |||
| 11020 | Fts5Buffer buf; | |||
| 11021 | memset(&buf, 0, sizeof(buf)); | |||
| 11022 | fts5DebugStructure(&rc, &buf, pStruct); | |||
| 11023 | fprintf(stdout, "%s: %s\n", zCaption, buf.p); | |||
| 11024 | fflush(stdout); | |||
| 11025 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
| 11026 | } | |||
| 11027 | #else | |||
| 11028 | # define fts5PrintStructure(x,y) | |||
| 11029 | #endif | |||
| 11030 | ||||
| 11031 | static int fts5SegmentSize(Fts5StructureSegment *pSeg){ | |||
| 11032 | return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; | |||
| 11033 | } | |||
| 11034 | ||||
| 11035 | /* | |||
| 11036 | ** Return a copy of index structure pStruct. Except, promote as many | |||
| 11037 | ** segments as possible to level iPromote. If an OOM occurs, NULL is | |||
| 11038 | ** returned. | |||
| 11039 | */ | |||
| 11040 | static void fts5StructurePromoteTo( | |||
| 11041 | Fts5Index *p, | |||
| 11042 | int iPromote, | |||
| 11043 | int szPromote, | |||
| 11044 | Fts5Structure *pStruct | |||
| 11045 | ){ | |||
| 11046 | int il, is; | |||
| 11047 | Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; | |||
| 11048 | ||||
| 11049 | if( pOut->nMerge==0 ){ | |||
| 11050 | for(il=iPromote+1; il<pStruct->nLevel; il++){ | |||
| 11051 | Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; | |||
| 11052 | if( pLvl->nMerge ) return; | |||
| 11053 | for(is=pLvl->nSeg-1; is>=0; is--){ | |||
| 11054 | int sz = fts5SegmentSize(&pLvl->aSeg[is]); | |||
| 11055 | if( sz>szPromote ) return; | |||
| 11056 | fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); | |||
| 11057 | if( p->rc ) return; | |||
| 11058 | memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); | |||
| 11059 | pOut->nSeg++; | |||
| 11060 | pLvl->nSeg--; | |||
| 11061 | } | |||
| 11062 | } | |||
| 11063 | } | |||
| 11064 | } | |||
| 11065 | ||||
| 11066 | /* | |||
| 11067 | ** A new segment has just been written to level iLvl of index structure | |||
| 11068 | ** pStruct. This function determines if any segments should be promoted | |||
| 11069 | ** as a result. Segments are promoted in two scenarios: | |||
| 11070 | ** | |||
| 11071 | ** a) If the segment just written is smaller than one or more segments | |||
| 11072 | ** within the previous populated level, it is promoted to the previous | |||
| 11073 | ** populated level. | |||
| 11074 | ** | |||
| 11075 | ** b) If the segment just written is larger than the newest segment on | |||
| 11076 | ** the next populated level, then that segment, and any other adjacent | |||
| 11077 | ** segments that are also smaller than the one just written, are | |||
| 11078 | ** promoted. | |||
| 11079 | ** | |||
| 11080 | ** If one or more segments are promoted, the structure object is updated | |||
| 11081 | ** to reflect this. | |||
| 11082 | */ | |||
| 11083 | static void fts5StructurePromote( | |||
| 11084 | Fts5Index *p, /* FTS5 backend object */ | |||
| 11085 | int iLvl, /* Index level just updated */ | |||
| 11086 | Fts5Structure *pStruct /* Index structure */ | |||
| 11087 | ){ | |||
| 11088 | if( p->rc==SQLITE_OK0 ){ | |||
| 11089 | int iTst; | |||
| 11090 | int iPromote = -1; | |||
| 11091 | int szPromote = 0; /* Promote anything this size or smaller */ | |||
| 11092 | Fts5StructureSegment *pSeg; /* Segment just written */ | |||
| 11093 | int szSeg; /* Size of segment just written */ | |||
| 11094 | int nSeg = pStruct->aLevel[iLvl].nSeg; | |||
| 11095 | ||||
| 11096 | if( nSeg==0 ) return; | |||
| 11097 | pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; | |||
| 11098 | szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); | |||
| 11099 | ||||
| 11100 | /* Check for condition (a) */ | |||
| 11101 | for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); | |||
| 11102 | if( iTst>=0 ){ | |||
| 11103 | int i; | |||
| 11104 | int szMax = 0; | |||
| 11105 | Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; | |||
| 11106 | assert( pTst->nMerge==0 )((void) (0)); | |||
| 11107 | for(i=0; i<pTst->nSeg; i++){ | |||
| 11108 | int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; | |||
| 11109 | if( sz>szMax ) szMax = sz; | |||
| 11110 | } | |||
| 11111 | if( szMax>=szSeg ){ | |||
| 11112 | /* Condition (a) is true. Promote the newest segment on level | |||
| 11113 | ** iLvl to level iTst. */ | |||
| 11114 | iPromote = iTst; | |||
| 11115 | szPromote = szMax; | |||
| 11116 | } | |||
| 11117 | } | |||
| 11118 | ||||
| 11119 | /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() | |||
| 11120 | ** is a no-op if it is not. */ | |||
| 11121 | if( iPromote<0 ){ | |||
| 11122 | iPromote = iLvl; | |||
| 11123 | szPromote = szSeg; | |||
| 11124 | } | |||
| 11125 | fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); | |||
| 11126 | } | |||
| 11127 | } | |||
| 11128 | ||||
| 11129 | ||||
| 11130 | /* | |||
| 11131 | ** Advance the iterator passed as the only argument. If the end of the | |||
| 11132 | ** doclist-index page is reached, return non-zero. | |||
| 11133 | */ | |||
| 11134 | static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ | |||
| 11135 | Fts5Data *pData = pLvl->pData; | |||
| 11136 | ||||
| 11137 | if( pLvl->iOff==0 ){ | |||
| 11138 | assert( pLvl->bEof==0 )((void) (0)); | |||
| 11139 | pLvl->iOff = 1; | |||
| 11140 | pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno)sqlite3Fts5GetVarint32(&pData->p[1],(u32*)&(pLvl-> iLeafPgno)); | |||
| 11141 | pLvl->iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); | |||
| 11142 | pLvl->iFirstOff = pLvl->iOff; | |||
| 11143 | }else{ | |||
| 11144 | int iOff; | |||
| 11145 | for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ | |||
| 11146 | if( pData->p[iOff] ) break; | |||
| 11147 | } | |||
| 11148 | ||||
| 11149 | if( iOff<pData->nn ){ | |||
| 11150 | u64 iVal; | |||
| 11151 | pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; | |||
| 11152 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[iOff], &iVal); | |||
| 11153 | pLvl->iRowid += iVal; | |||
| 11154 | pLvl->iOff = iOff; | |||
| 11155 | }else{ | |||
| 11156 | pLvl->bEof = 1; | |||
| 11157 | } | |||
| 11158 | } | |||
| 11159 | ||||
| 11160 | return pLvl->bEof; | |||
| 11161 | } | |||
| 11162 | ||||
| 11163 | /* | |||
| 11164 | ** Advance the iterator passed as the only argument. | |||
| 11165 | */ | |||
| 11166 | static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | |||
| 11167 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | |||
| 11168 | ||||
| 11169 | assert( iLvl<pIter->nLvl )((void) (0)); | |||
| 11170 | if( fts5DlidxLvlNext(pLvl) ){ | |||
| 11171 | if( (iLvl+1) < pIter->nLvl ){ | |||
| 11172 | fts5DlidxIterNextR(p, pIter, iLvl+1); | |||
| 11173 | if( pLvl[1].bEof==0 ){ | |||
| 11174 | fts5DataRelease(pLvl->pData); | |||
| 11175 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
| 11176 | pLvl->pData = fts5DataRead(p, | |||
| 11177 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | |||
| 11178 | ); | |||
| 11179 | if( pLvl->pData ) fts5DlidxLvlNext(pLvl); | |||
| 11180 | } | |||
| 11181 | } | |||
| 11182 | } | |||
| 11183 | ||||
| 11184 | return pIter->aLvl[0].bEof; | |||
| 11185 | } | |||
| 11186 | static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
| 11187 | return fts5DlidxIterNextR(p, pIter, 0); | |||
| 11188 | } | |||
| 11189 | ||||
| 11190 | /* | |||
| 11191 | ** The iterator passed as the first argument has the following fields set | |||
| 11192 | ** as follows. This function sets up the rest of the iterator so that it | |||
| 11193 | ** points to the first rowid in the doclist-index. | |||
| 11194 | ** | |||
| 11195 | ** pData: | |||
| 11196 | ** pointer to doclist-index record, | |||
| 11197 | ** | |||
| 11198 | ** When this function is called pIter->iLeafPgno is the page number the | |||
| 11199 | ** doclist is associated with (the one featuring the term). | |||
| 11200 | */ | |||
| 11201 | static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ | |||
| 11202 | int i; | |||
| 11203 | for(i=0; i<pIter->nLvl; i++){ | |||
| 11204 | fts5DlidxLvlNext(&pIter->aLvl[i]); | |||
| 11205 | } | |||
| 11206 | return pIter->aLvl[0].bEof; | |||
| 11207 | } | |||
| 11208 | ||||
| 11209 | ||||
| 11210 | static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
| 11211 | return p->rc!=SQLITE_OK0 || pIter->aLvl[0].bEof; | |||
| 11212 | } | |||
| 11213 | ||||
| 11214 | static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
| 11215 | int i; | |||
| 11216 | ||||
| 11217 | /* Advance each level to the last entry on the last page */ | |||
| 11218 | for(i=pIter->nLvl-1; p->rc==SQLITE_OK0 && i>=0; i--){ | |||
| 11219 | Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; | |||
| 11220 | while( fts5DlidxLvlNext(pLvl)==0 ); | |||
| 11221 | pLvl->bEof = 0; | |||
| 11222 | ||||
| 11223 | if( i>0 ){ | |||
| 11224 | Fts5DlidxLvl *pChild = &pLvl[-1]; | |||
| 11225 | fts5DataRelease(pChild->pData); | |||
| 11226 | memset(pChild, 0, sizeof(Fts5DlidxLvl)); | |||
| 11227 | pChild->pData = fts5DataRead(p, | |||
| 11228 | FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i-1) << (31)) + ((i64)(pLvl->iLeafPgno )) ) | |||
| 11229 | ); | |||
| 11230 | } | |||
| 11231 | } | |||
| 11232 | } | |||
| 11233 | ||||
| 11234 | /* | |||
| 11235 | ** Move the iterator passed as the only argument to the previous entry. | |||
| 11236 | */ | |||
| 11237 | static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ | |||
| 11238 | int iOff = pLvl->iOff; | |||
| 11239 | ||||
| 11240 | assert( pLvl->bEof==0 )((void) (0)); | |||
| 11241 | if( iOff<=pLvl->iFirstOff ){ | |||
| 11242 | pLvl->bEof = 1; | |||
| 11243 | }else{ | |||
| 11244 | u8 *a = pLvl->pData->p; | |||
| 11245 | ||||
| 11246 | pLvl->iOff = 0; | |||
| 11247 | fts5DlidxLvlNext(pLvl); | |||
| 11248 | while( 1 ){ | |||
| 11249 | int nZero = 0; | |||
| 11250 | int ii = pLvl->iOff; | |||
| 11251 | u64 delta = 0; | |||
| 11252 | ||||
| 11253 | while( a[ii]==0 ){ | |||
| 11254 | nZero++; | |||
| 11255 | ii++; | |||
| 11256 | } | |||
| 11257 | ii += sqlite3Fts5GetVarint(&a[ii], &delta); | |||
| 11258 | ||||
| 11259 | if( ii>=iOff ) break; | |||
| 11260 | pLvl->iLeafPgno += nZero+1; | |||
| 11261 | pLvl->iRowid += delta; | |||
| 11262 | pLvl->iOff = ii; | |||
| 11263 | } | |||
| 11264 | } | |||
| 11265 | ||||
| 11266 | return pLvl->bEof; | |||
| 11267 | } | |||
| 11268 | ||||
| 11269 | static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ | |||
| 11270 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; | |||
| 11271 | ||||
| 11272 | assert( iLvl<pIter->nLvl )((void) (0)); | |||
| 11273 | if( fts5DlidxLvlPrev(pLvl) ){ | |||
| 11274 | if( (iLvl+1) < pIter->nLvl ){ | |||
| 11275 | fts5DlidxIterPrevR(p, pIter, iLvl+1); | |||
| 11276 | if( pLvl[1].bEof==0 ){ | |||
| 11277 | fts5DataRelease(pLvl->pData); | |||
| 11278 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
| 11279 | pLvl->pData = fts5DataRead(p, | |||
| 11280 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)( ((i64)(pIter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(iLvl) << (31)) + ((i64)(pLvl[1].iLeafPgno )) ) | |||
| 11281 | ); | |||
| 11282 | if( pLvl->pData ){ | |||
| 11283 | while( fts5DlidxLvlNext(pLvl)==0 ); | |||
| 11284 | pLvl->bEof = 0; | |||
| 11285 | } | |||
| 11286 | } | |||
| 11287 | } | |||
| 11288 | } | |||
| 11289 | ||||
| 11290 | return pIter->aLvl[0].bEof; | |||
| 11291 | } | |||
| 11292 | static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ | |||
| 11293 | return fts5DlidxIterPrevR(p, pIter, 0); | |||
| 11294 | } | |||
| 11295 | ||||
| 11296 | /* | |||
| 11297 | ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). | |||
| 11298 | */ | |||
| 11299 | static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ | |||
| 11300 | if( pIter ){ | |||
| 11301 | int i; | |||
| 11302 | for(i=0; i<pIter->nLvl; i++){ | |||
| 11303 | fts5DataRelease(pIter->aLvl[i].pData); | |||
| 11304 | } | |||
| 11305 | sqlite3_freesqlite3_api->free(pIter); | |||
| 11306 | } | |||
| 11307 | } | |||
| 11308 | ||||
| 11309 | static Fts5DlidxIter *fts5DlidxIterInit( | |||
| 11310 | Fts5Index *p, /* Fts5 Backend to iterate within */ | |||
| 11311 | int bRev, /* True for ORDER BY ASC */ | |||
| 11312 | int iSegid, /* Segment id */ | |||
| 11313 | int iLeafPg /* Leaf page number to load dlidx for */ | |||
| 11314 | ){ | |||
| 11315 | Fts5DlidxIter *pIter = 0; | |||
| 11316 | int i; | |||
| 11317 | int bDone = 0; | |||
| 11318 | ||||
| 11319 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | |||
| 11320 | sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1)(__builtin_offsetof(Fts5DlidxIter, aLvl)+(i+1)*sizeof(Fts5DlidxLvl )); | |||
| 11321 | Fts5DlidxIter *pNew; | |||
| 11322 | ||||
| 11323 | pNew = (Fts5DlidxIter*)sqlite3_realloc64sqlite3_api->realloc64(pIter, nByte); | |||
| 11324 | if( pNew==0 ){ | |||
| 11325 | p->rc = SQLITE_NOMEM7; | |||
| 11326 | }else{ | |||
| 11327 | i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(iLeafPg)) ); | |||
| 11328 | Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; | |||
| 11329 | pIter = pNew; | |||
| 11330 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); | |||
| 11331 | pLvl->pData = fts5DataRead(p, iRowid); | |||
| 11332 | if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ | |||
| 11333 | bDone = 1; | |||
| 11334 | } | |||
| 11335 | pIter->nLvl = i+1; | |||
| 11336 | } | |||
| 11337 | } | |||
| 11338 | ||||
| 11339 | if( p->rc==SQLITE_OK0 ){ | |||
| 11340 | pIter->iSegid = iSegid; | |||
| 11341 | if( bRev==0 ){ | |||
| 11342 | fts5DlidxIterFirst(pIter); | |||
| 11343 | }else{ | |||
| 11344 | fts5DlidxIterLast(p, pIter); | |||
| 11345 | } | |||
| 11346 | } | |||
| 11347 | ||||
| 11348 | if( p->rc!=SQLITE_OK0 ){ | |||
| 11349 | fts5DlidxIterFree(pIter); | |||
| 11350 | pIter = 0; | |||
| 11351 | } | |||
| 11352 | ||||
| 11353 | return pIter; | |||
| 11354 | } | |||
| 11355 | ||||
| 11356 | static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ | |||
| 11357 | return pIter->aLvl[0].iRowid; | |||
| 11358 | } | |||
| 11359 | static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ | |||
| 11360 | return pIter->aLvl[0].iLeafPgno; | |||
| 11361 | } | |||
| 11362 | ||||
| 11363 | /* | |||
| 11364 | ** Load the next leaf page into the segment iterator. | |||
| 11365 | */ | |||
| 11366 | static void fts5SegIterNextPage( | |||
| 11367 | Fts5Index *p, /* FTS5 backend object */ | |||
| 11368 | Fts5SegIter *pIter /* Iterator to advance to next page */ | |||
| 11369 | ){ | |||
| 11370 | Fts5Data *pLeaf; | |||
| 11371 | Fts5StructureSegment *pSeg = pIter->pSeg; | |||
| 11372 | fts5DataRelease(pIter->pLeaf); | |||
| 11373 | pIter->iLeafPgno++; | |||
| 11374 | if( pIter->pNextLeaf ){ | |||
| 11375 | pIter->pLeaf = pIter->pNextLeaf; | |||
| 11376 | pIter->pNextLeaf = 0; | |||
| 11377 | }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ | |||
| 11378 | pIter->pLeaf = fts5LeafRead(p, | |||
| 11379 | FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pIter->iLeafPgno )) ) | |||
| 11380 | ); | |||
| 11381 | }else{ | |||
| 11382 | pIter->pLeaf = 0; | |||
| 11383 | } | |||
| 11384 | pLeaf = pIter->pLeaf; | |||
| 11385 | ||||
| 11386 | if( pLeaf ){ | |||
| 11387 | pIter->iPgidxOff = pLeaf->szLeaf; | |||
| 11388 | if( fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ){ | |||
| 11389 | pIter->iEndofDoclist = pLeaf->nn+1; | |||
| 11390 | }else{ | |||
| 11391 | pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | |||
| 11392 | pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)) | |||
| 11393 | )sqlite3Fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], (u32*)&(pIter->iEndofDoclist)); | |||
| 11394 | } | |||
| 11395 | } | |||
| 11396 | } | |||
| 11397 | ||||
| 11398 | /* | |||
| 11399 | ** Argument p points to a buffer containing a varint to be interpreted as a | |||
| 11400 | ** position list size field. Read the varint and return the number of bytes | |||
| 11401 | ** read. Before returning, set *pnSz to the number of bytes in the position | |||
| 11402 | ** list, and *pbDel to true if the delete flag is set, or false otherwise. | |||
| 11403 | */ | |||
| 11404 | static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ | |||
| 11405 | int nSz; | |||
| 11406 | int n = 0; | |||
| 11407 | fts5FastGetVarint32(p, n, nSz){ nSz = (p)[n++]; if( nSz & 0x80 ){ n--; n += sqlite3Fts5GetVarint32 (&(p)[n],(u32*)&(nSz)); } }; | |||
| 11408 | assert_nc( nSz>=0 )((void) (0)); | |||
| 11409 | *pnSz = nSz/2; | |||
| 11410 | *pbDel = nSz & 0x0001; | |||
| 11411 | return n; | |||
| 11412 | } | |||
| 11413 | ||||
| 11414 | /* | |||
| 11415 | ** Fts5SegIter.iLeafOffset currently points to the first byte of a | |||
| 11416 | ** position-list size field. Read the value of the field and store it | |||
| 11417 | ** in the following variables: | |||
| 11418 | ** | |||
| 11419 | ** Fts5SegIter.nPos | |||
| 11420 | ** Fts5SegIter.bDel | |||
| 11421 | ** | |||
| 11422 | ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the | |||
| 11423 | ** position list content (if any). | |||
| 11424 | */ | |||
| 11425 | static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11426 | if( p->rc==SQLITE_OK0 ){ | |||
| 11427 | int iOff = pIter->iLeafOffset; /* Offset to read at */ | |||
| 11428 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
| 11429 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 11430 | int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf)(((pIter->iEndofDoclist) < (pIter->pLeaf->szLeaf) ) ? (pIter->iEndofDoclist) : (pIter->pLeaf->szLeaf)); | |||
| 11431 | pIter->bDel = 0; | |||
| 11432 | pIter->nPos = 1; | |||
| 11433 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | |||
| 11434 | pIter->bDel = 1; | |||
| 11435 | iOff++; | |||
| 11436 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ | |||
| 11437 | pIter->nPos = 1; | |||
| 11438 | iOff++; | |||
| 11439 | }else{ | |||
| 11440 | pIter->nPos = 0; | |||
| 11441 | } | |||
| 11442 | } | |||
| 11443 | }else{ | |||
| 11444 | int nSz; | |||
| 11445 | fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz){ nSz = (pIter->pLeaf->p)[iOff++]; if( nSz & 0x80 ) { iOff--; iOff += sqlite3Fts5GetVarint32(&(pIter->pLeaf ->p)[iOff],(u32*)&(nSz)); } }; | |||
| 11446 | pIter->bDel = (nSz & 0x0001); | |||
| 11447 | pIter->nPos = nSz>>1; | |||
| 11448 | assert_nc( pIter->nPos>=0 )((void) (0)); | |||
| 11449 | } | |||
| 11450 | pIter->iLeafOffset = iOff; | |||
| 11451 | } | |||
| 11452 | } | |||
| 11453 | ||||
| 11454 | static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11455 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | |||
| 11456 | i64 iOff = pIter->iLeafOffset; | |||
| 11457 | ||||
| 11458 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
| 11459 | while( iOff>=pIter->pLeaf->szLeaf ){ | |||
| 11460 | fts5SegIterNextPage(p, pIter); | |||
| 11461 | if( pIter->pLeaf==0 ){ | |||
| 11462 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 11463 | return; | |||
| 11464 | } | |||
| 11465 | iOff = 4; | |||
| 11466 | a = pIter->pLeaf->p; | |||
| 11467 | } | |||
| 11468 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | |||
| 11469 | pIter->iLeafOffset = iOff; | |||
| 11470 | } | |||
| 11471 | ||||
| 11472 | /* | |||
| 11473 | ** Fts5SegIter.iLeafOffset currently points to the first byte of the | |||
| 11474 | ** "nSuffix" field of a term. Function parameter nKeep contains the value | |||
| 11475 | ** of the "nPrefix" field (if there was one - it is passed 0 if this is | |||
| 11476 | ** the first term in the segment). | |||
| 11477 | ** | |||
| 11478 | ** This function populates: | |||
| 11479 | ** | |||
| 11480 | ** Fts5SegIter.term | |||
| 11481 | ** Fts5SegIter.rowid | |||
| 11482 | ** | |||
| 11483 | ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of | |||
| 11484 | ** the first position list. The position list belonging to document | |||
| 11485 | ** (Fts5SegIter.iRowid). | |||
| 11486 | */ | |||
| 11487 | static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ | |||
| 11488 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ | |||
| 11489 | i64 iOff = pIter->iLeafOffset; /* Offset to read at */ | |||
| 11490 | int nNew; /* Bytes of new data */ | |||
| 11491 | ||||
| 11492 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | |||
| 11493 | if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){ | |||
| 11494 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 11495 | return; | |||
| 11496 | } | |||
| 11497 | pIter->term.n = nKeep; | |||
| 11498 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | |||
| 11499 | assert( pIter->term.n<=pIter->term.nSpace )((void) (0)); | |||
| 11500 | iOff += nNew; | |||
| 11501 | pIter->iTermLeafOffset = iOff; | |||
| 11502 | pIter->iTermLeafPgno = pIter->iLeafPgno; | |||
| 11503 | pIter->iLeafOffset = iOff; | |||
| 11504 | ||||
| 11505 | if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ | |||
| 11506 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
| 11507 | }else{ | |||
| 11508 | int nExtra; | |||
| 11509 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (nExtra)); | |||
| 11510 | pIter->iEndofDoclist += nExtra; | |||
| 11511 | } | |||
| 11512 | ||||
| 11513 | fts5SegIterLoadRowid(p, pIter); | |||
| 11514 | } | |||
| 11515 | ||||
| 11516 | static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); | |||
| 11517 | static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); | |||
| 11518 | static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); | |||
| 11519 | ||||
| 11520 | static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11521 | if( pIter->flags & FTS5_SEGITER_REVERSE0x02 ){ | |||
| 11522 | pIter->xNext = fts5SegIterNext_Reverse; | |||
| 11523 | }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 11524 | pIter->xNext = fts5SegIterNext_None; | |||
| 11525 | }else{ | |||
| 11526 | pIter->xNext = fts5SegIterNext; | |||
| 11527 | } | |||
| 11528 | } | |||
| 11529 | ||||
| 11530 | /* | |||
| 11531 | ** Allocate a tombstone hash page array object (pIter->pTombArray) for | |||
| 11532 | ** the iterator passed as the second argument. If an OOM error occurs, | |||
| 11533 | ** leave an error in the Fts5Index object. | |||
| 11534 | */ | |||
| 11535 | static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11536 | const int nTomb = pIter->pSeg->nPgTombstone; | |||
| 11537 | if( nTomb>0 ){ | |||
| 11538 | int nByte = SZ_FTS5TOMBSTONEARRAY(nTomb+1)(__builtin_offsetof(Fts5TombstoneArray, apTombstone)+(nTomb+1 )*sizeof(Fts5Data*)); | |||
| 11539 | Fts5TombstoneArray *pNew; | |||
| 11540 | pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
| 11541 | if( pNew ){ | |||
| 11542 | pNew->nTombstone = nTomb; | |||
| 11543 | pNew->nRef = 1; | |||
| 11544 | pIter->pTombArray = pNew; | |||
| 11545 | } | |||
| 11546 | } | |||
| 11547 | } | |||
| 11548 | ||||
| 11549 | /* | |||
| 11550 | ** Initialize the iterator object pIter to iterate through the entries in | |||
| 11551 | ** segment pSeg. The iterator is left pointing to the first entry when | |||
| 11552 | ** this function returns. | |||
| 11553 | ** | |||
| 11554 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
| 11555 | ** an error has already occurred when this function is called, it is a no-op. | |||
| 11556 | */ | |||
| 11557 | static void fts5SegIterInit( | |||
| 11558 | Fts5Index *p, /* FTS index object */ | |||
| 11559 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
| 11560 | Fts5SegIter *pIter /* Object to populate */ | |||
| 11561 | ){ | |||
| 11562 | if( pSeg->pgnoFirst==0 ){ | |||
| 11563 | /* This happens if the segment is being used as an input to an incremental | |||
| 11564 | ** merge and all data has already been "trimmed". See function | |||
| 11565 | ** fts5TrimSegments() for details. In this case leave the iterator empty. | |||
| 11566 | ** The caller will see the (pIter->pLeaf==0) and assume the iterator is | |||
| 11567 | ** at EOF already. */ | |||
| 11568 | assert( pIter->pLeaf==0 )((void) (0)); | |||
| 11569 | return; | |||
| 11570 | } | |||
| 11571 | ||||
| 11572 | if( p->rc==SQLITE_OK0 ){ | |||
| 11573 | memset(pIter, 0, sizeof(*pIter)); | |||
| 11574 | fts5SegIterSetNext(p, pIter); | |||
| 11575 | pIter->pSeg = pSeg; | |||
| 11576 | pIter->iLeafPgno = pSeg->pgnoFirst-1; | |||
| 11577 | do { | |||
| 11578 | fts5SegIterNextPage(p, pIter); | |||
| 11579 | }while( p->rc==SQLITE_OK0 && pIter->pLeaf && pIter->pLeaf->nn==4 ); | |||
| 11580 | } | |||
| 11581 | ||||
| 11582 | if( p->rc==SQLITE_OK0 && pIter->pLeaf ){ | |||
| 11583 | pIter->iLeafOffset = 4; | |||
| 11584 | assert( pIter->pLeaf!=0 )((void) (0)); | |||
| 11585 | assert_nc( pIter->pLeaf->nn>4 )((void) (0)); | |||
| 11586 | assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 )((void) (0)); | |||
| 11587 | pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; | |||
| 11588 | fts5SegIterLoadTerm(p, pIter, 0); | |||
| 11589 | fts5SegIterLoadNPos(p, pIter); | |||
| 11590 | fts5SegIterAllocTombstone(p, pIter); | |||
| 11591 | } | |||
| 11592 | } | |||
| 11593 | ||||
| 11594 | /* | |||
| 11595 | ** This function is only ever called on iterators created by calls to | |||
| 11596 | ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. | |||
| 11597 | ** | |||
| 11598 | ** The iterator is in an unusual state when this function is called: the | |||
| 11599 | ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of | |||
| 11600 | ** the position-list size field for the first relevant rowid on the page. | |||
| 11601 | ** Fts5SegIter.rowid is set, but nPos and bDel are not. | |||
| 11602 | ** | |||
| 11603 | ** This function advances the iterator so that it points to the last | |||
| 11604 | ** relevant rowid on the page and, if necessary, initializes the | |||
| 11605 | ** aRowidOffset[] and iRowidOffset variables. At this point the iterator | |||
| 11606 | ** is in its regular state - Fts5SegIter.iLeafOffset points to the first | |||
| 11607 | ** byte of the position list content associated with said rowid. | |||
| 11608 | */ | |||
| 11609 | static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11610 | int eDetail = p->pConfig->eDetail; | |||
| 11611 | int n = pIter->pLeaf->szLeaf; | |||
| 11612 | int i = pIter->iLeafOffset; | |||
| 11613 | u8 *a = pIter->pLeaf->p; | |||
| 11614 | int iRowidOffset = 0; | |||
| 11615 | ||||
| 11616 | if( n>pIter->iEndofDoclist ){ | |||
| 11617 | n = pIter->iEndofDoclist; | |||
| 11618 | } | |||
| 11619 | ||||
| 11620 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
| 11621 | while( 1 ){ | |||
| 11622 | u64 iDelta = 0; | |||
| 11623 | ||||
| 11624 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 11625 | /* todo */ | |||
| 11626 | if( i<n && a[i]==0 ){ | |||
| 11627 | i++; | |||
| 11628 | if( i<n && a[i]==0 ) i++; | |||
| 11629 | } | |||
| 11630 | }else{ | |||
| 11631 | int nPos; | |||
| 11632 | int bDummy; | |||
| 11633 | i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); | |||
| 11634 | i += nPos; | |||
| 11635 | } | |||
| 11636 | if( i>=n ) break; | |||
| 11637 | i += fts5GetVarintsqlite3Fts5GetVarint(&a[i], &iDelta); | |||
| 11638 | pIter->iRowid += iDelta; | |||
| 11639 | ||||
| 11640 | /* If necessary, grow the pIter->aRowidOffset[] array. */ | |||
| 11641 | if( iRowidOffset>=pIter->nRowidOffset ){ | |||
| 11642 | int nNew = pIter->nRowidOffset + 8; | |||
| 11643 | int *aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(pIter->aRowidOffset,nNew*sizeof(int)); | |||
| 11644 | if( aNew==0 ){ | |||
| 11645 | p->rc = SQLITE_NOMEM7; | |||
| 11646 | break; | |||
| 11647 | } | |||
| 11648 | pIter->aRowidOffset = aNew; | |||
| 11649 | pIter->nRowidOffset = nNew; | |||
| 11650 | } | |||
| 11651 | ||||
| 11652 | pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; | |||
| 11653 | pIter->iLeafOffset = i; | |||
| 11654 | } | |||
| 11655 | pIter->iRowidOffset = iRowidOffset; | |||
| 11656 | fts5SegIterLoadNPos(p, pIter); | |||
| 11657 | } | |||
| 11658 | ||||
| 11659 | /* | |||
| 11660 | ** | |||
| 11661 | */ | |||
| 11662 | static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11663 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | |||
| 11664 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
| 11665 | ||||
| 11666 | fts5DataRelease(pIter->pLeaf); | |||
| 11667 | pIter->pLeaf = 0; | |||
| 11668 | while( p->rc==SQLITE_OK0 && pIter->iLeafPgno>pIter->iTermLeafPgno ){ | |||
| 11669 | Fts5Data *pNew; | |||
| 11670 | pIter->iLeafPgno--; | |||
| 11671 | pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | |||
| 11672 | pIter->pSeg->iSegid, pIter->iLeafPgno( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) ) | |||
| 11673 | )( ((i64)(pIter->pSeg->iSegid) << (31 +5 +1)) + (( i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64 )(pIter->iLeafPgno)) )); | |||
| 11674 | if( pNew ){ | |||
| 11675 | /* iTermLeafOffset may be equal to szLeaf if the term is the last | |||
| 11676 | ** thing on the page - i.e. the first rowid is on the following page. | |||
| 11677 | ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ | |||
| 11678 | if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ | |||
| 11679 | assert( pIter->pLeaf==0 )((void) (0)); | |||
| 11680 | if( pIter->iTermLeafOffset<pNew->szLeaf ){ | |||
| 11681 | pIter->pLeaf = pNew; | |||
| 11682 | pIter->iLeafOffset = pIter->iTermLeafOffset; | |||
| 11683 | } | |||
| 11684 | }else{ | |||
| 11685 | int iRowidOff; | |||
| 11686 | iRowidOff = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | |||
| 11687 | if( iRowidOff ){ | |||
| 11688 | if( iRowidOff>=pNew->szLeaf ){ | |||
| 11689 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 11690 | }else{ | |||
| 11691 | pIter->pLeaf = pNew; | |||
| 11692 | pIter->iLeafOffset = iRowidOff; | |||
| 11693 | } | |||
| 11694 | } | |||
| 11695 | } | |||
| 11696 | ||||
| 11697 | if( pIter->pLeaf ){ | |||
| 11698 | u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; | |||
| 11699 | pIter->iLeafOffset += fts5GetVarintsqlite3Fts5GetVarint(a, (u64*)&pIter->iRowid); | |||
| 11700 | break; | |||
| 11701 | }else{ | |||
| 11702 | fts5DataRelease(pNew); | |||
| 11703 | } | |||
| 11704 | } | |||
| 11705 | } | |||
| 11706 | ||||
| 11707 | if( pIter->pLeaf ){ | |||
| 11708 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
| 11709 | fts5SegIterReverseInitPage(p, pIter); | |||
| 11710 | } | |||
| 11711 | } | |||
| 11712 | ||||
| 11713 | /* | |||
| 11714 | ** Return true if the iterator passed as the second argument currently | |||
| 11715 | ** points to a delete marker. A delete marker is an entry with a 0 byte | |||
| 11716 | ** position-list. | |||
| 11717 | */ | |||
| 11718 | static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ | |||
| 11719 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
| 11720 | return (p->rc==SQLITE_OK0 && pSeg->pLeaf && pSeg->nPos==0); | |||
| 11721 | } | |||
| 11722 | ||||
| 11723 | /* | |||
| 11724 | ** Advance iterator pIter to the next entry. | |||
| 11725 | ** | |||
| 11726 | ** This version of fts5SegIterNext() is only used by reverse iterators. | |||
| 11727 | */ | |||
| 11728 | static void fts5SegIterNext_Reverse( | |||
| 11729 | Fts5Index *p, /* FTS5 backend object */ | |||
| 11730 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
| 11731 | int *pbUnused /* Unused */ | |||
| 11732 | ){ | |||
| 11733 | assert( pIter->flags & FTS5_SEGITER_REVERSE )((void) (0)); | |||
| 11734 | assert( pIter->pNextLeaf==0 )((void) (0)); | |||
| 11735 | UNUSED_PARAM(pbUnused)(void)(pbUnused); | |||
| 11736 | ||||
| 11737 | if( pIter->iRowidOffset>0 ){ | |||
| 11738 | u8 *a = pIter->pLeaf->p; | |||
| 11739 | int iOff; | |||
| 11740 | u64 iDelta; | |||
| 11741 | ||||
| 11742 | pIter->iRowidOffset--; | |||
| 11743 | pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; | |||
| 11744 | fts5SegIterLoadNPos(p, pIter); | |||
| 11745 | iOff = pIter->iLeafOffset; | |||
| 11746 | if( p->pConfig->eDetail!=FTS5_DETAIL_NONE1 ){ | |||
| 11747 | iOff += pIter->nPos; | |||
| 11748 | } | |||
| 11749 | fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], &iDelta); | |||
| 11750 | pIter->iRowid -= iDelta; | |||
| 11751 | }else{ | |||
| 11752 | fts5SegIterReverseNewPage(p, pIter); | |||
| 11753 | } | |||
| 11754 | } | |||
| 11755 | ||||
| 11756 | /* | |||
| 11757 | ** Advance iterator pIter to the next entry. | |||
| 11758 | ** | |||
| 11759 | ** This version of fts5SegIterNext() is only used if detail=none and the | |||
| 11760 | ** iterator is not a reverse direction iterator. | |||
| 11761 | */ | |||
| 11762 | static void fts5SegIterNext_None( | |||
| 11763 | Fts5Index *p, /* FTS5 backend object */ | |||
| 11764 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
| 11765 | int *pbNewTerm /* OUT: Set for new term */ | |||
| 11766 | ){ | |||
| 11767 | int iOff; | |||
| 11768 | ||||
| 11769 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 11770 | assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 )((void) (0)); | |||
| 11771 | assert( p->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
| 11772 | ||||
| 11773 | ASSERT_SZLEAF_OK(pIter->pLeaf)((void) (0)); | |||
| 11774 | iOff = pIter->iLeafOffset; | |||
| 11775 | ||||
| 11776 | /* Next entry is on the next page */ | |||
| 11777 | while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ | |||
| 11778 | fts5SegIterNextPage(p, pIter); | |||
| 11779 | if( p->rc || pIter->pLeaf==0 ) return; | |||
| 11780 | pIter->iRowid = 0; | |||
| 11781 | iOff = 4; | |||
| 11782 | } | |||
| 11783 | ||||
| 11784 | if( iOff<pIter->iEndofDoclist ){ | |||
| 11785 | /* Next entry is on the current page */ | |||
| 11786 | u64 iDelta; | |||
| 11787 | iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); | |||
| 11788 | pIter->iLeafOffset = iOff; | |||
| 11789 | pIter->iRowid += iDelta; | |||
| 11790 | }else if( (pIter->flags & FTS5_SEGITER_ONETERM0x01)==0 ){ | |||
| 11791 | if( pIter->pSeg ){ | |||
| 11792 | int nKeep = 0; | |||
| 11793 | if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ | |||
| 11794 | iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iOff],(u32* )&(nKeep)); | |||
| 11795 | } | |||
| 11796 | pIter->iLeafOffset = iOff; | |||
| 11797 | fts5SegIterLoadTerm(p, pIter, nKeep); | |||
| 11798 | }else{ | |||
| 11799 | const u8 *pList = 0; | |||
| 11800 | const char *zTerm = 0; | |||
| 11801 | int nTerm = 0; | |||
| 11802 | int nList; | |||
| 11803 | sqlite3Fts5HashScanNext(p->pHash); | |||
| 11804 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | |||
| 11805 | if( pList==0 ) goto next_none_eof; | |||
| 11806 | pIter->pLeaf->p = (u8*)pList; | |||
| 11807 | pIter->pLeaf->nn = nList; | |||
| 11808 | pIter->pLeaf->szLeaf = nList; | |||
| 11809 | pIter->iEndofDoclist = nList; | |||
| 11810 | sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm); | |||
| 11811 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | |||
| 11812 | } | |||
| 11813 | ||||
| 11814 | if( pbNewTerm ) *pbNewTerm = 1; | |||
| 11815 | }else{ | |||
| 11816 | goto next_none_eof; | |||
| 11817 | } | |||
| 11818 | ||||
| 11819 | fts5SegIterLoadNPos(p, pIter); | |||
| 11820 | ||||
| 11821 | return; | |||
| 11822 | next_none_eof: | |||
| 11823 | fts5DataRelease(pIter->pLeaf); | |||
| 11824 | pIter->pLeaf = 0; | |||
| 11825 | } | |||
| 11826 | ||||
| 11827 | ||||
| 11828 | /* | |||
| 11829 | ** Advance iterator pIter to the next entry. | |||
| 11830 | ** | |||
| 11831 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It | |||
| 11832 | ** is not considered an error if the iterator reaches EOF. If an error has | |||
| 11833 | ** already occurred when this function is called, it is a no-op. | |||
| 11834 | */ | |||
| 11835 | static void fts5SegIterNext( | |||
| 11836 | Fts5Index *p, /* FTS5 backend object */ | |||
| 11837 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
| 11838 | int *pbNewTerm /* OUT: Set for new term */ | |||
| 11839 | ){ | |||
| 11840 | Fts5Data *pLeaf = pIter->pLeaf; | |||
| 11841 | int iOff; | |||
| 11842 | int bNewTerm = 0; | |||
| 11843 | int nKeep = 0; | |||
| 11844 | u8 *a; | |||
| 11845 | int n; | |||
| 11846 | ||||
| 11847 | assert( pbNewTerm==0 || *pbNewTerm==0 )((void) (0)); | |||
| 11848 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
| 11849 | ||||
| 11850 | /* Search for the end of the position list within the current page. */ | |||
| 11851 | a = pLeaf->p; | |||
| 11852 | n = pLeaf->szLeaf; | |||
| 11853 | ||||
| 11854 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
| 11855 | iOff = pIter->iLeafOffset + pIter->nPos; | |||
| 11856 | ||||
| 11857 | if( iOff<n ){ | |||
| 11858 | /* The next entry is on the current page. */ | |||
| 11859 | assert_nc( iOff<=pIter->iEndofDoclist )((void) (0)); | |||
| 11860 | if( iOff>=pIter->iEndofDoclist ){ | |||
| 11861 | bNewTerm = 1; | |||
| 11862 | if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ | |||
| 11863 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | |||
| 11864 | } | |||
| 11865 | }else{ | |||
| 11866 | u64 iDelta; | |||
| 11867 | iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); | |||
| 11868 | pIter->iRowid += iDelta; | |||
| 11869 | assert_nc( iDelta>0 )((void) (0)); | |||
| 11870 | } | |||
| 11871 | pIter->iLeafOffset = iOff; | |||
| 11872 | ||||
| 11873 | }else if( pIter->pSeg==0 ){ | |||
| 11874 | const u8 *pList = 0; | |||
| 11875 | const char *zTerm = 0; | |||
| 11876 | int nTerm = 0; | |||
| 11877 | int nList = 0; | |||
| 11878 | assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm )((void) (0)); | |||
| 11879 | if( 0==(pIter->flags & FTS5_SEGITER_ONETERM0x01) ){ | |||
| 11880 | sqlite3Fts5HashScanNext(p->pHash); | |||
| 11881 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList); | |||
| 11882 | } | |||
| 11883 | if( pList==0 ){ | |||
| 11884 | fts5DataRelease(pIter->pLeaf); | |||
| 11885 | pIter->pLeaf = 0; | |||
| 11886 | }else{ | |||
| 11887 | pIter->pLeaf->p = (u8*)pList; | |||
| 11888 | pIter->pLeaf->nn = nList; | |||
| 11889 | pIter->pLeaf->szLeaf = nList; | |||
| 11890 | pIter->iEndofDoclist = nList+1; | |||
| 11891 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm); | |||
| 11892 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pList, (u64*)&pIter->iRowid); | |||
| 11893 | *pbNewTerm = 1; | |||
| 11894 | } | |||
| 11895 | }else{ | |||
| 11896 | iOff = 0; | |||
| 11897 | /* Next entry is not on the current page */ | |||
| 11898 | while( iOff==0 ){ | |||
| 11899 | fts5SegIterNextPage(p, pIter); | |||
| 11900 | pLeaf = pIter->pLeaf; | |||
| 11901 | if( pLeaf==0 ) break; | |||
| 11902 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
| 11903 | if( (iOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))) && iOff<pLeaf->szLeaf ){ | |||
| 11904 | iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); | |||
| 11905 | pIter->iLeafOffset = iOff; | |||
| 11906 | ||||
| 11907 | if( pLeaf->nn>pLeaf->szLeaf ){ | |||
| 11908 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | |||
| 11909 | &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclistsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)) | |||
| 11910 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(pIter->iEndofDoclist)); | |||
| 11911 | } | |||
| 11912 | } | |||
| 11913 | else if( pLeaf->nn>pLeaf->szLeaf ){ | |||
| 11914 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | |||
| 11915 | &pLeaf->p[pLeaf->szLeaf], iOffsqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)) | |||
| 11916 | )sqlite3Fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf],(u32 *)&(iOff)); | |||
| 11917 | pIter->iLeafOffset = iOff; | |||
| 11918 | pIter->iEndofDoclist = iOff; | |||
| 11919 | bNewTerm = 1; | |||
| 11920 | } | |||
| 11921 | assert_nc( iOff<pLeaf->szLeaf )((void) (0)); | |||
| 11922 | if( iOff>pLeaf->szLeaf ){ | |||
| 11923 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 11924 | return; | |||
| 11925 | } | |||
| 11926 | } | |||
| 11927 | } | |||
| 11928 | ||||
| 11929 | /* Check if the iterator is now at EOF. If so, return early. */ | |||
| 11930 | if( pIter->pLeaf ){ | |||
| 11931 | if( bNewTerm ){ | |||
| 11932 | if( pIter->flags & FTS5_SEGITER_ONETERM0x01 ){ | |||
| 11933 | fts5DataRelease(pIter->pLeaf); | |||
| 11934 | pIter->pLeaf = 0; | |||
| 11935 | }else{ | |||
| 11936 | fts5SegIterLoadTerm(p, pIter, nKeep); | |||
| 11937 | fts5SegIterLoadNPos(p, pIter); | |||
| 11938 | if( pbNewTerm ) *pbNewTerm = 1; | |||
| 11939 | } | |||
| 11940 | }else{ | |||
| 11941 | /* The following could be done by calling fts5SegIterLoadNPos(). But | |||
| 11942 | ** this block is particularly performance critical, so equivalent | |||
| 11943 | ** code is inlined. */ | |||
| 11944 | int nSz; | |||
| 11945 | assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn )((void) (0)); | |||
| 11946 | fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz){ nSz = (pIter->pLeaf->p)[pIter->iLeafOffset++]; if( nSz & 0x80 ){ pIter->iLeafOffset--; pIter->iLeafOffset += sqlite3Fts5GetVarint32(&(pIter->pLeaf->p)[pIter ->iLeafOffset],(u32*)&(nSz)); } }; | |||
| 11947 | pIter->bDel = (nSz & 0x0001); | |||
| 11948 | pIter->nPos = nSz>>1; | |||
| 11949 | assert_nc( pIter->nPos>=0 )((void) (0)); | |||
| 11950 | } | |||
| 11951 | } | |||
| 11952 | } | |||
| 11953 | ||||
| 11954 | #define SWAPVAL(T, a, b){ T tmp; tmp=a; a=b; b=tmp; } { T tmp; tmp=a; a=b; b=tmp; } | |||
| 11955 | ||||
| 11956 | #define fts5IndexSkipVarint(a, iOff){ int iEnd = iOff+9; while( (a[iOff++] & 0x80) && iOff<iEnd ); } { \ | |||
| 11957 | int iEnd = iOff+9; \ | |||
| 11958 | while( (a[iOff++] & 0x80) && iOff<iEnd ); \ | |||
| 11959 | } | |||
| 11960 | ||||
| 11961 | /* | |||
| 11962 | ** Iterator pIter currently points to the first rowid in a doclist. This | |||
| 11963 | ** function sets the iterator up so that iterates in reverse order through | |||
| 11964 | ** the doclist. | |||
| 11965 | */ | |||
| 11966 | static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 11967 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | |||
| 11968 | Fts5Data *pLast = 0; | |||
| 11969 | int pgnoLast = 0; | |||
| 11970 | ||||
| 11971 | if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION4 ){ | |||
| 11972 | int iSegid = pIter->pSeg->iSegid; | |||
| 11973 | pgnoLast = fts5DlidxIterPgno(pDlidx); | |||
| 11974 | pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgnoLast)) )); | |||
| 11975 | }else{ | |||
| 11976 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | |||
| 11977 | ||||
| 11978 | /* Currently, Fts5SegIter.iLeafOffset points to the first byte of | |||
| 11979 | ** position-list content for the current rowid. Back it up so that it | |||
| 11980 | ** points to the start of the position-list size field. */ | |||
| 11981 | int iPoslist; | |||
| 11982 | if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ | |||
| 11983 | iPoslist = pIter->iTermLeafOffset; | |||
| 11984 | }else{ | |||
| 11985 | iPoslist = 4; | |||
| 11986 | } | |||
| 11987 | fts5IndexSkipVarint(pLeaf->p, iPoslist){ int iEnd = iPoslist+9; while( (pLeaf->p[iPoslist++] & 0x80) && iPoslist<iEnd ); }; | |||
| 11988 | pIter->iLeafOffset = iPoslist; | |||
| 11989 | ||||
| 11990 | /* If this condition is true then the largest rowid for the current | |||
| 11991 | ** term may not be stored on the current page. So search forward to | |||
| 11992 | ** see where said rowid really is. */ | |||
| 11993 | if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ | |||
| 11994 | int pgno; | |||
| 11995 | Fts5StructureSegment *pSeg = pIter->pSeg; | |||
| 11996 | ||||
| 11997 | /* The last rowid in the doclist may not be on the current page. Search | |||
| 11998 | ** forward to find the page containing the last rowid. */ | |||
| 11999 | for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ | |||
| 12000 | i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
| 12001 | Fts5Data *pNew = fts5LeafRead(p, iAbs); | |||
| 12002 | if( pNew ){ | |||
| 12003 | int iRowid, bTermless; | |||
| 12004 | iRowid = fts5LeafFirstRowidOff(pNew)(fts5GetU16((pNew)->p)); | |||
| 12005 | bTermless = fts5LeafIsTermless(pNew)((pNew)->szLeaf >= (pNew)->nn); | |||
| 12006 | if( iRowid ){ | |||
| 12007 | SWAPVAL(Fts5Data*, pNew, pLast){ Fts5Data* tmp; tmp=pNew; pNew=pLast; pLast=tmp; }; | |||
| 12008 | pgnoLast = pgno; | |||
| 12009 | } | |||
| 12010 | fts5DataRelease(pNew); | |||
| 12011 | if( bTermless==0 ) break; | |||
| 12012 | } | |||
| 12013 | } | |||
| 12014 | } | |||
| 12015 | } | |||
| 12016 | ||||
| 12017 | /* If pLast is NULL at this point, then the last rowid for this doclist | |||
| 12018 | ** lies on the page currently indicated by the iterator. In this case | |||
| 12019 | ** pIter->iLeafOffset is already set to point to the position-list size | |||
| 12020 | ** field associated with the first relevant rowid on the page. | |||
| 12021 | ** | |||
| 12022 | ** Or, if pLast is non-NULL, then it is the page that contains the last | |||
| 12023 | ** rowid. In this case configure the iterator so that it points to the | |||
| 12024 | ** first rowid on this page. | |||
| 12025 | */ | |||
| 12026 | if( pLast ){ | |||
| 12027 | int iOff; | |||
| 12028 | fts5DataRelease(pIter->pLeaf); | |||
| 12029 | pIter->pLeaf = pLast; | |||
| 12030 | pIter->iLeafPgno = pgnoLast; | |||
| 12031 | iOff = fts5LeafFirstRowidOff(pLast)(fts5GetU16((pLast)->p)); | |||
| 12032 | if( iOff>pLast->szLeaf ){ | |||
| 12033 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12034 | return; | |||
| 12035 | } | |||
| 12036 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); | |||
| 12037 | pIter->iLeafOffset = iOff; | |||
| 12038 | ||||
| 12039 | if( fts5LeafIsTermless(pLast)((pLast)->szLeaf >= (pLast)->nn) ){ | |||
| 12040 | pIter->iEndofDoclist = pLast->nn+1; | |||
| 12041 | }else{ | |||
| 12042 | pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); | |||
| 12043 | } | |||
| 12044 | } | |||
| 12045 | ||||
| 12046 | fts5SegIterReverseInitPage(p, pIter); | |||
| 12047 | } | |||
| 12048 | ||||
| 12049 | /* | |||
| 12050 | ** Iterator pIter currently points to the first rowid of a doclist. | |||
| 12051 | ** There is a doclist-index associated with the final term on the current | |||
| 12052 | ** page. If the current term is the last term on the page, load the | |||
| 12053 | ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). | |||
| 12054 | */ | |||
| 12055 | static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ | |||
| 12056 | int iSeg = pIter->pSeg->iSegid; | |||
| 12057 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | |||
| 12058 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ | |||
| 12059 | ||||
| 12060 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
| 12061 | assert( pIter->pDlidx==0 )((void) (0)); | |||
| 12062 | ||||
| 12063 | /* Check if the current doclist ends on this page. If it does, return | |||
| 12064 | ** early without loading the doclist-index (as it belongs to a different | |||
| 12065 | ** term. */ | |||
| 12066 | if( pIter->iTermLeafPgno==pIter->iLeafPgno | |||
| 12067 | && pIter->iEndofDoclist<pLeaf->szLeaf | |||
| 12068 | ){ | |||
| 12069 | return; | |||
| 12070 | } | |||
| 12071 | ||||
| 12072 | pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); | |||
| 12073 | } | |||
| 12074 | ||||
| 12075 | /* | |||
| 12076 | ** The iterator object passed as the second argument currently contains | |||
| 12077 | ** no valid values except for the Fts5SegIter.pLeaf member variable. This | |||
| 12078 | ** function searches the leaf page for a term matching (pTerm/nTerm). | |||
| 12079 | ** | |||
| 12080 | ** If the specified term is found on the page, then the iterator is left | |||
| 12081 | ** pointing to it. If argument bGe is zero and the term is not found, | |||
| 12082 | ** the iterator is left pointing at EOF. | |||
| 12083 | ** | |||
| 12084 | ** If bGe is non-zero and the specified term is not found, then the | |||
| 12085 | ** iterator is left pointing to the smallest term in the segment that | |||
| 12086 | ** is larger than the specified term, even if this term is not on the | |||
| 12087 | ** current page. | |||
| 12088 | */ | |||
| 12089 | static void fts5LeafSeek( | |||
| 12090 | Fts5Index *p, /* Leave any error code here */ | |||
| 12091 | int bGe, /* True for a >= search */ | |||
| 12092 | Fts5SegIter *pIter, /* Iterator to seek */ | |||
| 12093 | const u8 *pTerm, int nTerm /* Term to search for */ | |||
| 12094 | ){ | |||
| 12095 | u32 iOff; | |||
| 12096 | const u8 *a = pIter->pLeaf->p; | |||
| 12097 | u32 n = (u32)pIter->pLeaf->nn; | |||
| 12098 | ||||
| 12099 | u32 nMatch = 0; | |||
| 12100 | u32 nKeep = 0; | |||
| 12101 | u32 nNew = 0; | |||
| 12102 | u32 iTermOff; | |||
| 12103 | u32 iPgidx; /* Current offset in pgidx */ | |||
| 12104 | int bEndOfPage = 0; | |||
| 12105 | ||||
| 12106 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12107 | ||||
| 12108 | iPgidx = (u32)pIter->pLeaf->szLeaf; | |||
| 12109 | iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(iTermOff)); | |||
| 12110 | iOff = iTermOff; | |||
| 12111 | if( iOff>n ){ | |||
| 12112 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12113 | return; | |||
| 12114 | } | |||
| 12115 | ||||
| 12116 | while( 1 ){ | |||
| 12117 | ||||
| 12118 | /* Figure out how many new bytes are in this term */ | |||
| 12119 | fts5FastGetVarint32(a, iOff, nNew){ nNew = (a)[iOff++]; if( nNew & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32 (&(a)[iOff],(u32*)&(nNew)); } }; | |||
| 12120 | if( nKeep<nMatch ){ | |||
| 12121 | goto search_failed; | |||
| 12122 | } | |||
| 12123 | ||||
| 12124 | assert( nKeep>=nMatch )((void) (0)); | |||
| 12125 | if( nKeep==nMatch ){ | |||
| 12126 | u32 nCmp; | |||
| 12127 | u32 i; | |||
| 12128 | nCmp = (u32)MIN(nNew, nTerm-nMatch)(((nNew) < (nTerm-nMatch)) ? (nNew) : (nTerm-nMatch)); | |||
| 12129 | for(i=0; i<nCmp; i++){ | |||
| 12130 | if( a[iOff+i]!=pTerm[nMatch+i] ) break; | |||
| 12131 | } | |||
| 12132 | nMatch += i; | |||
| 12133 | ||||
| 12134 | if( (u32)nTerm==nMatch ){ | |||
| 12135 | if( i==nNew ){ | |||
| 12136 | goto search_success; | |||
| 12137 | }else{ | |||
| 12138 | goto search_failed; | |||
| 12139 | } | |||
| 12140 | }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ | |||
| 12141 | goto search_failed; | |||
| 12142 | } | |||
| 12143 | } | |||
| 12144 | ||||
| 12145 | if( iPgidx>=n ){ | |||
| 12146 | bEndOfPage = 1; | |||
| 12147 | break; | |||
| 12148 | } | |||
| 12149 | ||||
| 12150 | iPgidx += fts5GetVarint32(&a[iPgidx], nKeep)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nKeep)); | |||
| 12151 | iTermOff += nKeep; | |||
| 12152 | iOff = iTermOff; | |||
| 12153 | ||||
| 12154 | if( iOff>=n ){ | |||
| 12155 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12156 | return; | |||
| 12157 | } | |||
| 12158 | ||||
| 12159 | /* Read the nKeep field of the next term. */ | |||
| 12160 | fts5FastGetVarint32(a, iOff, nKeep){ nKeep = (a)[iOff++]; if( nKeep & 0x80 ){ iOff--; iOff += sqlite3Fts5GetVarint32(&(a)[iOff],(u32*)&(nKeep)); } }; | |||
| 12161 | } | |||
| 12162 | ||||
| 12163 | search_failed: | |||
| 12164 | if( bGe==0 ){ | |||
| 12165 | fts5DataRelease(pIter->pLeaf); | |||
| 12166 | pIter->pLeaf = 0; | |||
| 12167 | return; | |||
| 12168 | }else if( bEndOfPage ){ | |||
| 12169 | do { | |||
| 12170 | fts5SegIterNextPage(p, pIter); | |||
| 12171 | if( pIter->pLeaf==0 ) return; | |||
| 12172 | a = pIter->pLeaf->p; | |||
| 12173 | if( fts5LeafIsTermless(pIter->pLeaf)((pIter->pLeaf)->szLeaf >= (pIter->pLeaf)->nn)==0 ){ | |||
| 12174 | iPgidx = (u32)pIter->pLeaf->szLeaf; | |||
| 12175 | iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff)sqlite3Fts5GetVarint32(&pIter->pLeaf->p[iPgidx],(u32 *)&(iOff)); | |||
| 12176 | if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){ | |||
| 12177 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12178 | return; | |||
| 12179 | }else{ | |||
| 12180 | nKeep = 0; | |||
| 12181 | iTermOff = iOff; | |||
| 12182 | n = (u32)pIter->pLeaf->nn; | |||
| 12183 | iOff += fts5GetVarint32(&a[iOff], nNew)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nNew)); | |||
| 12184 | break; | |||
| 12185 | } | |||
| 12186 | } | |||
| 12187 | }while( 1 ); | |||
| 12188 | } | |||
| 12189 | ||||
| 12190 | search_success: | |||
| 12191 | if( (i64)iOff+nNew>n || nNew<1 ){ | |||
| 12192 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12193 | return; | |||
| 12194 | } | |||
| 12195 | pIter->iLeafOffset = iOff + nNew; | |||
| 12196 | pIter->iTermLeafOffset = pIter->iLeafOffset; | |||
| 12197 | pIter->iTermLeafPgno = pIter->iLeafPgno; | |||
| 12198 | ||||
| 12199 | fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm)sqlite3Fts5BufferSet(&p->rc,&pIter->term,nKeep, pTerm); | |||
| 12200 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&pIter->term ,nNew,&a[iOff]); | |||
| 12201 | ||||
| 12202 | if( iPgidx>=n ){ | |||
| 12203 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; | |||
| 12204 | }else{ | |||
| 12205 | int nExtra; | |||
| 12206 | iPgidx += fts5GetVarint32(&a[iPgidx], nExtra)sqlite3Fts5GetVarint32(&a[iPgidx],(u32*)&(nExtra)); | |||
| 12207 | pIter->iEndofDoclist = iTermOff + nExtra; | |||
| 12208 | } | |||
| 12209 | pIter->iPgidxOff = iPgidx; | |||
| 12210 | ||||
| 12211 | fts5SegIterLoadRowid(p, pIter); | |||
| 12212 | fts5SegIterLoadNPos(p, pIter); | |||
| 12213 | } | |||
| 12214 | ||||
| 12215 | static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ | |||
| 12216 | if( p->pIdxSelect==0 ){ | |||
| 12217 | Fts5Config *pConfig = p->pConfig; | |||
| 12218 | fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 12219 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | |||
| 12220 | "segid=? AND term<=? ORDER BY term DESC LIMIT 1", | |||
| 12221 | pConfig->zDb, pConfig->zName | |||
| 12222 | )); | |||
| 12223 | } | |||
| 12224 | return p->pIdxSelect; | |||
| 12225 | } | |||
| 12226 | ||||
| 12227 | /* | |||
| 12228 | ** Initialize the object pIter to point to term pTerm/nTerm within segment | |||
| 12229 | ** pSeg. If there is no such term in the index, the iterator is set to EOF. | |||
| 12230 | ** | |||
| 12231 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
| 12232 | ** an error has already occurred when this function is called, it is a no-op. | |||
| 12233 | */ | |||
| 12234 | static void fts5SegIterSeekInit( | |||
| 12235 | Fts5Index *p, /* FTS5 backend */ | |||
| 12236 | const u8 *pTerm, int nTerm, /* Term to seek to */ | |||
| 12237 | int flags, /* Mask of FTS5INDEX_XXX flags */ | |||
| 12238 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
| 12239 | Fts5SegIter *pIter /* Object to populate */ | |||
| 12240 | ){ | |||
| 12241 | int iPg = 1; | |||
| 12242 | int bGe = (flags & FTS5INDEX_QUERY_SCAN0x0008); | |||
| 12243 | int bDlidx = 0; /* True if there is a doclist-index */ | |||
| 12244 | sqlite3_stmt *pIdxSelect = 0; | |||
| 12245 | ||||
| 12246 | assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 )((void) (0)); | |||
| 12247 | assert( pTerm && nTerm )((void) (0)); | |||
| 12248 | memset(pIter, 0, sizeof(*pIter)); | |||
| 12249 | pIter->pSeg = pSeg; | |||
| 12250 | ||||
| 12251 | /* This block sets stack variable iPg to the leaf page number that may | |||
| 12252 | ** contain term (pTerm/nTerm), if it is present in the segment. */ | |||
| 12253 | pIdxSelect = fts5IdxSelectStmt(p); | |||
| 12254 | if( p->rc ) return; | |||
| 12255 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, pSeg->iSegid); | |||
| 12256 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 12257 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pIdxSelect) ){ | |||
| 12258 | i64 val = sqlite3_column_intsqlite3_api->column_int(pIdxSelect, 0); | |||
| 12259 | iPg = (int)(val>>1); | |||
| 12260 | bDlidx = (val & 0x0001); | |||
| 12261 | } | |||
| 12262 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | |||
| 12263 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | |||
| 12264 | ||||
| 12265 | if( iPg<pSeg->pgnoFirst ){ | |||
| 12266 | iPg = pSeg->pgnoFirst; | |||
| 12267 | bDlidx = 0; | |||
| 12268 | } | |||
| 12269 | ||||
| 12270 | pIter->iLeafPgno = iPg - 1; | |||
| 12271 | fts5SegIterNextPage(p, pIter); | |||
| 12272 | ||||
| 12273 | if( pIter->pLeaf ){ | |||
| 12274 | fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); | |||
| 12275 | } | |||
| 12276 | ||||
| 12277 | if( p->rc==SQLITE_OK0 && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM0x0100)) ){ | |||
| 12278 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
| 12279 | if( pIter->pLeaf ){ | |||
| 12280 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
| 12281 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
| 12282 | } | |||
| 12283 | if( bDlidx ){ | |||
| 12284 | fts5SegIterLoadDlidx(p, pIter); | |||
| 12285 | } | |||
| 12286 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
| 12287 | fts5SegIterReverse(p, pIter); | |||
| 12288 | } | |||
| 12289 | } | |||
| 12290 | } | |||
| 12291 | ||||
| 12292 | fts5SegIterSetNext(p, pIter); | |||
| 12293 | if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM0x0100) ){ | |||
| 12294 | fts5SegIterAllocTombstone(p, pIter); | |||
| 12295 | } | |||
| 12296 | ||||
| 12297 | /* Either: | |||
| 12298 | ** | |||
| 12299 | ** 1) an error has occurred, or | |||
| 12300 | ** 2) the iterator points to EOF, or | |||
| 12301 | ** 3) the iterator points to an entry with term (pTerm/nTerm), or | |||
| 12302 | ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points | |||
| 12303 | ** to an entry with a term greater than or equal to (pTerm/nTerm). | |||
| 12304 | */ | |||
| 12305 | assert_nc( p->rc!=SQLITE_OK /* 1 */((void) (0)) | |||
| 12306 | || pIter->pLeaf==0 /* 2 */((void) (0)) | |||
| 12307 | || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */((void) (0)) | |||
| 12308 | || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */((void) (0)) | |||
| 12309 | )((void) (0)); | |||
| 12310 | } | |||
| 12311 | ||||
| 12312 | ||||
| 12313 | /* | |||
| 12314 | ** SQL used by fts5SegIterNextInit() to find the page to open. | |||
| 12315 | */ | |||
| 12316 | static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ | |||
| 12317 | if( p->pIdxNextSelect==0 ){ | |||
| 12318 | Fts5Config *pConfig = p->pConfig; | |||
| 12319 | fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 12320 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " | |||
| 12321 | "segid=? AND term>? ORDER BY term ASC LIMIT 1", | |||
| 12322 | pConfig->zDb, pConfig->zName | |||
| 12323 | )); | |||
| 12324 | ||||
| 12325 | } | |||
| 12326 | return p->pIdxNextSelect; | |||
| 12327 | } | |||
| 12328 | ||||
| 12329 | /* | |||
| 12330 | ** This is similar to fts5SegIterSeekInit(), except that it initializes | |||
| 12331 | ** the segment iterator to point to the first term following the page | |||
| 12332 | ** with pToken/nToken on it. | |||
| 12333 | */ | |||
| 12334 | static void fts5SegIterNextInit( | |||
| 12335 | Fts5Index *p, | |||
| 12336 | const char *pTerm, int nTerm, | |||
| 12337 | Fts5StructureSegment *pSeg, /* Description of segment */ | |||
| 12338 | Fts5SegIter *pIter /* Object to populate */ | |||
| 12339 | ){ | |||
| 12340 | int iPg = -1; /* Page of segment to open */ | |||
| 12341 | int bDlidx = 0; | |||
| 12342 | sqlite3_stmt *pSel = 0; /* SELECT to find iPg */ | |||
| 12343 | ||||
| 12344 | pSel = fts5IdxNextStmt(p); | |||
| 12345 | if( pSel ){ | |||
| 12346 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12347 | sqlite3_bind_intsqlite3_api->bind_int(pSel, 1, pSeg->iSegid); | |||
| 12348 | sqlite3_bind_blobsqlite3_api->bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 12349 | ||||
| 12350 | if( sqlite3_stepsqlite3_api->step(pSel)==SQLITE_ROW100 ){ | |||
| 12351 | i64 val = sqlite3_column_int64sqlite3_api->column_int64(pSel, 0); | |||
| 12352 | iPg = (int)(val>>1); | |||
| 12353 | bDlidx = (val & 0x0001); | |||
| 12354 | } | |||
| 12355 | p->rc = sqlite3_resetsqlite3_api->reset(pSel); | |||
| 12356 | sqlite3_bind_nullsqlite3_api->bind_null(pSel, 2); | |||
| 12357 | if( p->rc ) return; | |||
| 12358 | } | |||
| 12359 | ||||
| 12360 | memset(pIter, 0, sizeof(*pIter)); | |||
| 12361 | pIter->pSeg = pSeg; | |||
| 12362 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
| 12363 | if( iPg>=0 ){ | |||
| 12364 | pIter->iLeafPgno = iPg - 1; | |||
| 12365 | fts5SegIterNextPage(p, pIter); | |||
| 12366 | fts5SegIterSetNext(p, pIter); | |||
| 12367 | } | |||
| 12368 | if( pIter->pLeaf ){ | |||
| 12369 | const u8 *a = pIter->pLeaf->p; | |||
| 12370 | int iTermOff = 0; | |||
| 12371 | ||||
| 12372 | pIter->iPgidxOff = pIter->pLeaf->szLeaf; | |||
| 12373 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[pIter->iPgidxOff],(u32*)& (iTermOff)); | |||
| 12374 | pIter->iLeafOffset = iTermOff; | |||
| 12375 | fts5SegIterLoadTerm(p, pIter, 0); | |||
| 12376 | fts5SegIterLoadNPos(p, pIter); | |||
| 12377 | if( bDlidx ) fts5SegIterLoadDlidx(p, pIter); | |||
| 12378 | ||||
| 12379 | assert( p->rc!=SQLITE_OK ||((void) (0)) | |||
| 12380 | fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0((void) (0)) | |||
| 12381 | )((void) (0)); | |||
| 12382 | } | |||
| 12383 | } | |||
| 12384 | ||||
| 12385 | /* | |||
| 12386 | ** Initialize the object pIter to point to term pTerm/nTerm within the | |||
| 12387 | ** in-memory hash table. If there is no such term in the hash-table, the | |||
| 12388 | ** iterator is set to EOF. | |||
| 12389 | ** | |||
| 12390 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If | |||
| 12391 | ** an error has already occurred when this function is called, it is a no-op. | |||
| 12392 | */ | |||
| 12393 | static void fts5SegIterHashInit( | |||
| 12394 | Fts5Index *p, /* FTS5 backend */ | |||
| 12395 | const u8 *pTerm, int nTerm, /* Term to seek to */ | |||
| 12396 | int flags, /* Mask of FTS5INDEX_XXX flags */ | |||
| 12397 | Fts5SegIter *pIter /* Object to populate */ | |||
| 12398 | ){ | |||
| 12399 | int nList = 0; | |||
| 12400 | const u8 *z = 0; | |||
| 12401 | int n = 0; | |||
| 12402 | Fts5Data *pLeaf = 0; | |||
| 12403 | ||||
| 12404 | assert( p->pHash )((void) (0)); | |||
| 12405 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12406 | ||||
| 12407 | if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN0x0008) ){ | |||
| 12408 | const u8 *pList = 0; | |||
| 12409 | ||||
| 12410 | p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); | |||
| 12411 | sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList); | |||
| 12412 | if( pList ){ | |||
| 12413 | pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); | |||
| 12414 | if( pLeaf ){ | |||
| 12415 | pLeaf->p = (u8*)pList; | |||
| 12416 | } | |||
| 12417 | } | |||
| 12418 | ||||
| 12419 | /* The call to sqlite3Fts5HashScanInit() causes the hash table to | |||
| 12420 | ** fill the size field of all existing position lists. This means they | |||
| 12421 | ** can no longer be appended to. Since the only scenario in which they | |||
| 12422 | ** can be appended to is if the previous operation on this table was | |||
| 12423 | ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this | |||
| 12424 | ** possibility altogether. */ | |||
| 12425 | p->bDelete = 0; | |||
| 12426 | }else{ | |||
| 12427 | p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data), | |||
| 12428 | (const char*)pTerm, nTerm, (void**)&pLeaf, &nList | |||
| 12429 | ); | |||
| 12430 | if( pLeaf ){ | |||
| 12431 | pLeaf->p = (u8*)&pLeaf[1]; | |||
| 12432 | } | |||
| 12433 | z = pTerm; | |||
| 12434 | n = nTerm; | |||
| 12435 | pIter->flags |= FTS5_SEGITER_ONETERM0x01; | |||
| 12436 | } | |||
| 12437 | ||||
| 12438 | if( pLeaf ){ | |||
| 12439 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); | |||
| 12440 | pLeaf->nn = pLeaf->szLeaf = nList; | |||
| 12441 | pIter->pLeaf = pLeaf; | |||
| 12442 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); | |||
| 12443 | pIter->iEndofDoclist = pLeaf->nn; | |||
| 12444 | ||||
| 12445 | if( flags & FTS5INDEX_QUERY_DESC0x0002 ){ | |||
| 12446 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
| 12447 | fts5SegIterReverseInitPage(p, pIter); | |||
| 12448 | }else{ | |||
| 12449 | fts5SegIterLoadNPos(p, pIter); | |||
| 12450 | } | |||
| 12451 | } | |||
| 12452 | ||||
| 12453 | fts5SegIterSetNext(p, pIter); | |||
| 12454 | } | |||
| 12455 | ||||
| 12456 | /* | |||
| 12457 | ** Array ap[] contains n elements. Release each of these elements using | |||
| 12458 | ** fts5DataRelease(). Then free the array itself using sqlite3_free(). | |||
| 12459 | */ | |||
| 12460 | static void fts5IndexFreeArray(Fts5Data **ap, int n){ | |||
| 12461 | if( ap ){ | |||
| 12462 | int ii; | |||
| 12463 | for(ii=0; ii<n; ii++){ | |||
| 12464 | fts5DataRelease(ap[ii]); | |||
| 12465 | } | |||
| 12466 | sqlite3_freesqlite3_api->free(ap); | |||
| 12467 | } | |||
| 12468 | } | |||
| 12469 | ||||
| 12470 | /* | |||
| 12471 | ** Decrement the ref-count of the object passed as the only argument. If it | |||
| 12472 | ** reaches 0, free it and its contents. | |||
| 12473 | */ | |||
| 12474 | static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ | |||
| 12475 | if( p ){ | |||
| 12476 | p->nRef--; | |||
| 12477 | if( p->nRef<=0 ){ | |||
| 12478 | int ii; | |||
| 12479 | for(ii=0; ii<p->nTombstone; ii++){ | |||
| 12480 | fts5DataRelease(p->apTombstone[ii]); | |||
| 12481 | } | |||
| 12482 | sqlite3_freesqlite3_api->free(p); | |||
| 12483 | } | |||
| 12484 | } | |||
| 12485 | } | |||
| 12486 | ||||
| 12487 | /* | |||
| 12488 | ** Zero the iterator passed as the only argument. | |||
| 12489 | */ | |||
| 12490 | static void fts5SegIterClear(Fts5SegIter *pIter){ | |||
| 12491 | fts5BufferFree(&pIter->term)sqlite3Fts5BufferFree(&pIter->term); | |||
| 12492 | fts5DataRelease(pIter->pLeaf); | |||
| 12493 | fts5DataRelease(pIter->pNextLeaf); | |||
| 12494 | fts5TombstoneArrayDelete(pIter->pTombArray); | |||
| 12495 | fts5DlidxIterFree(pIter->pDlidx); | |||
| 12496 | sqlite3_freesqlite3_api->free(pIter->aRowidOffset); | |||
| 12497 | memset(pIter, 0, sizeof(Fts5SegIter)); | |||
| 12498 | } | |||
| 12499 | ||||
| 12500 | #ifdef SQLITE_DEBUG | |||
| 12501 | ||||
| 12502 | /* | |||
| 12503 | ** This function is used as part of the big assert() procedure implemented by | |||
| 12504 | ** fts5AssertMultiIterSetup(). It ensures that the result currently stored | |||
| 12505 | ** in *pRes is the correct result of comparing the current positions of the | |||
| 12506 | ** two iterators. | |||
| 12507 | */ | |||
| 12508 | static void fts5AssertComparisonResult( | |||
| 12509 | Fts5Iter *pIter, | |||
| 12510 | Fts5SegIter *p1, | |||
| 12511 | Fts5SegIter *p2, | |||
| 12512 | Fts5CResult *pRes | |||
| 12513 | ){ | |||
| 12514 | int i1 = p1 - pIter->aSeg; | |||
| 12515 | int i2 = p2 - pIter->aSeg; | |||
| 12516 | ||||
| 12517 | if( p1->pLeaf || p2->pLeaf ){ | |||
| 12518 | if( p1->pLeaf==0 ){ | |||
| 12519 | assert( pRes->iFirst==i2 )((void) (0)); | |||
| 12520 | }else if( p2->pLeaf==0 ){ | |||
| 12521 | assert( pRes->iFirst==i1 )((void) (0)); | |||
| 12522 | }else{ | |||
| 12523 | int nMin = MIN(p1->term.n, p2->term.n)(((p1->term.n) < (p2->term.n)) ? (p1->term.n) : ( p2->term.n)); | |||
| 12524 | int res = fts5Memcmp(p1->term.p, p2->term.p, nMin)((nMin)<=0 ? 0 : memcmp((p1->term.p), (p2->term.p), ( nMin))); | |||
| 12525 | if( res==0 ) res = p1->term.n - p2->term.n; | |||
| 12526 | ||||
| 12527 | if( res==0 ){ | |||
| 12528 | assert( pRes->bTermEq==1 )((void) (0)); | |||
| 12529 | assert( p1->iRowid!=p2->iRowid )((void) (0)); | |||
| 12530 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; | |||
| 12531 | }else{ | |||
| 12532 | assert( pRes->bTermEq==0 )((void) (0)); | |||
| 12533 | } | |||
| 12534 | ||||
| 12535 | if( res<0 ){ | |||
| 12536 | assert( pRes->iFirst==i1 )((void) (0)); | |||
| 12537 | }else{ | |||
| 12538 | assert( pRes->iFirst==i2 )((void) (0)); | |||
| 12539 | } | |||
| 12540 | } | |||
| 12541 | } | |||
| 12542 | } | |||
| 12543 | ||||
| 12544 | /* | |||
| 12545 | ** This function is a no-op unless SQLITE_DEBUG is defined when this module | |||
| 12546 | ** is compiled. In that case, this function is essentially an assert() | |||
| 12547 | ** statement used to verify that the contents of the pIter->aFirst[] array | |||
| 12548 | ** are correct. | |||
| 12549 | */ | |||
| 12550 | static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ | |||
| 12551 | if( p->rc==SQLITE_OK0 ){ | |||
| 12552 | Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
| 12553 | int i; | |||
| 12554 | ||||
| 12555 | assert( (pFirst->pLeaf==0)==pIter->base.bEof )((void) (0)); | |||
| 12556 | ||||
| 12557 | /* Check that pIter->iSwitchRowid is set correctly. */ | |||
| 12558 | for(i=0; i<pIter->nSeg; i++){ | |||
| 12559 | Fts5SegIter *p1 = &pIter->aSeg[i]; | |||
| 12560 | assert( p1==pFirst((void) (0)) | |||
| 12561 | || p1->pLeaf==0((void) (0)) | |||
| 12562 | || fts5BufferCompare(&pFirst->term, &p1->term)((void) (0)) | |||
| 12563 | || p1->iRowid==pIter->iSwitchRowid((void) (0)) | |||
| 12564 | || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev((void) (0)) | |||
| 12565 | )((void) (0)); | |||
| 12566 | } | |||
| 12567 | ||||
| 12568 | for(i=0; i<pIter->nSeg; i+=2){ | |||
| 12569 | Fts5SegIter *p1 = &pIter->aSeg[i]; | |||
| 12570 | Fts5SegIter *p2 = &pIter->aSeg[i+1]; | |||
| 12571 | Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; | |||
| 12572 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | |||
| 12573 | } | |||
| 12574 | ||||
| 12575 | for(i=1; i<(pIter->nSeg / 2); i+=2){ | |||
| 12576 | Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; | |||
| 12577 | Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; | |||
| 12578 | Fts5CResult *pRes = &pIter->aFirst[i]; | |||
| 12579 | fts5AssertComparisonResult(pIter, p1, p2, pRes); | |||
| 12580 | } | |||
| 12581 | } | |||
| 12582 | } | |||
| 12583 | #else | |||
| 12584 | # define fts5AssertMultiIterSetup(x,y) | |||
| 12585 | #endif | |||
| 12586 | ||||
| 12587 | /* | |||
| 12588 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. | |||
| 12589 | ** | |||
| 12590 | ** If the returned value is non-zero, then it is the index of an entry | |||
| 12591 | ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing | |||
| 12592 | ** to a key that is a duplicate of another, higher priority, | |||
| 12593 | ** segment-iterator in the pSeg->aSeg[] array. | |||
| 12594 | */ | |||
| 12595 | static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ | |||
| 12596 | int i1; /* Index of left-hand Fts5SegIter */ | |||
| 12597 | int i2; /* Index of right-hand Fts5SegIter */ | |||
| 12598 | int iRes; | |||
| 12599 | Fts5SegIter *p1; /* Left-hand Fts5SegIter */ | |||
| 12600 | Fts5SegIter *p2; /* Right-hand Fts5SegIter */ | |||
| 12601 | Fts5CResult *pRes = &pIter->aFirst[iOut]; | |||
| 12602 | ||||
| 12603 | assert( iOut<pIter->nSeg && iOut>0 )((void) (0)); | |||
| 12604 | assert( pIter->bRev==0 || pIter->bRev==1 )((void) (0)); | |||
| 12605 | ||||
| 12606 | if( iOut>=(pIter->nSeg/2) ){ | |||
| 12607 | i1 = (iOut - pIter->nSeg/2) * 2; | |||
| 12608 | i2 = i1 + 1; | |||
| 12609 | }else{ | |||
| 12610 | i1 = pIter->aFirst[iOut*2].iFirst; | |||
| 12611 | i2 = pIter->aFirst[iOut*2+1].iFirst; | |||
| 12612 | } | |||
| 12613 | p1 = &pIter->aSeg[i1]; | |||
| 12614 | p2 = &pIter->aSeg[i2]; | |||
| 12615 | ||||
| 12616 | pRes->bTermEq = 0; | |||
| 12617 | if( p1->pLeaf==0 ){ /* If p1 is at EOF */ | |||
| 12618 | iRes = i2; | |||
| 12619 | }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ | |||
| 12620 | iRes = i1; | |||
| 12621 | }else{ | |||
| 12622 | int res = fts5BufferCompare(&p1->term, &p2->term); | |||
| 12623 | if( res==0 ){ | |||
| 12624 | assert_nc( i2>i1 )((void) (0)); | |||
| 12625 | assert_nc( i2!=0 )((void) (0)); | |||
| 12626 | pRes->bTermEq = 1; | |||
| 12627 | if( p1->iRowid==p2->iRowid ){ | |||
| 12628 | return i2; | |||
| 12629 | } | |||
| 12630 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; | |||
| 12631 | } | |||
| 12632 | assert( res!=0 )((void) (0)); | |||
| 12633 | if( res<0 ){ | |||
| 12634 | iRes = i1; | |||
| 12635 | }else{ | |||
| 12636 | iRes = i2; | |||
| 12637 | } | |||
| 12638 | } | |||
| 12639 | ||||
| 12640 | pRes->iFirst = (u16)iRes; | |||
| 12641 | return 0; | |||
| 12642 | } | |||
| 12643 | ||||
| 12644 | /* | |||
| 12645 | ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. | |||
| 12646 | ** It is an error if leaf iLeafPgno does not exist. Unless the db is | |||
| 12647 | ** a 'secure-delete' db, if it contains no rowids then this is also an error. | |||
| 12648 | */ | |||
| 12649 | static void fts5SegIterGotoPage( | |||
| 12650 | Fts5Index *p, /* FTS5 backend object */ | |||
| 12651 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
| 12652 | int iLeafPgno | |||
| 12653 | ){ | |||
| 12654 | assert( iLeafPgno>pIter->iLeafPgno )((void) (0)); | |||
| 12655 | ||||
| 12656 | if( iLeafPgno>pIter->pSeg->pgnoLast ){ | |||
| 12657 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12658 | }else{ | |||
| 12659 | fts5DataRelease(pIter->pNextLeaf); | |||
| 12660 | pIter->pNextLeaf = 0; | |||
| 12661 | pIter->iLeafPgno = iLeafPgno-1; | |||
| 12662 | ||||
| 12663 | while( p->rc==SQLITE_OK0 ){ | |||
| 12664 | int iOff; | |||
| 12665 | fts5SegIterNextPage(p, pIter); | |||
| 12666 | if( pIter->pLeaf==0 ) break; | |||
| 12667 | iOff = fts5LeafFirstRowidOff(pIter->pLeaf)(fts5GetU16((pIter->pLeaf)->p)); | |||
| 12668 | if( iOff>0 ){ | |||
| 12669 | u8 *a = pIter->pLeaf->p; | |||
| 12670 | int n = pIter->pLeaf->szLeaf; | |||
| 12671 | if( iOff<4 || iOff>=n ){ | |||
| 12672 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 12673 | }else{ | |||
| 12674 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); | |||
| 12675 | pIter->iLeafOffset = iOff; | |||
| 12676 | fts5SegIterLoadNPos(p, pIter); | |||
| 12677 | } | |||
| 12678 | break; | |||
| 12679 | } | |||
| 12680 | } | |||
| 12681 | } | |||
| 12682 | } | |||
| 12683 | ||||
| 12684 | /* | |||
| 12685 | ** Advance the iterator passed as the second argument until it is at or | |||
| 12686 | ** past rowid iFrom. Regardless of the value of iFrom, the iterator is | |||
| 12687 | ** always advanced at least once. | |||
| 12688 | */ | |||
| 12689 | static void fts5SegIterNextFrom( | |||
| 12690 | Fts5Index *p, /* FTS5 backend object */ | |||
| 12691 | Fts5SegIter *pIter, /* Iterator to advance */ | |||
| 12692 | i64 iMatch /* Advance iterator at least this far */ | |||
| 12693 | ){ | |||
| 12694 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE0x02); | |||
| 12695 | Fts5DlidxIter *pDlidx = pIter->pDlidx; | |||
| 12696 | int iLeafPgno = pIter->iLeafPgno; | |||
| 12697 | int bMove = 1; | |||
| 12698 | ||||
| 12699 | assert( pIter->flags & FTS5_SEGITER_ONETERM )((void) (0)); | |||
| 12700 | assert( pIter->pDlidx )((void) (0)); | |||
| 12701 | assert( pIter->pLeaf )((void) (0)); | |||
| 12702 | ||||
| 12703 | if( bRev==0 ){ | |||
| 12704 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ | |||
| 12705 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | |||
| 12706 | fts5DlidxIterNext(p, pDlidx); | |||
| 12707 | } | |||
| 12708 | assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc )((void) (0)); | |||
| 12709 | if( iLeafPgno>pIter->iLeafPgno ){ | |||
| 12710 | fts5SegIterGotoPage(p, pIter, iLeafPgno); | |||
| 12711 | bMove = 0; | |||
| 12712 | } | |||
| 12713 | }else{ | |||
| 12714 | assert( pIter->pNextLeaf==0 )((void) (0)); | |||
| 12715 | assert( iMatch<pIter->iRowid )((void) (0)); | |||
| 12716 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ | |||
| 12717 | fts5DlidxIterPrev(p, pDlidx); | |||
| 12718 | } | |||
| 12719 | iLeafPgno = fts5DlidxIterPgno(pDlidx); | |||
| 12720 | ||||
| 12721 | assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno )((void) (0)); | |||
| 12722 | ||||
| 12723 | if( iLeafPgno<pIter->iLeafPgno ){ | |||
| 12724 | pIter->iLeafPgno = iLeafPgno+1; | |||
| 12725 | fts5SegIterReverseNewPage(p, pIter); | |||
| 12726 | bMove = 0; | |||
| 12727 | } | |||
| 12728 | } | |||
| 12729 | ||||
| 12730 | do{ | |||
| 12731 | if( bMove && p->rc==SQLITE_OK0 ) pIter->xNext(p, pIter, 0); | |||
| 12732 | if( pIter->pLeaf==0 ) break; | |||
| 12733 | if( bRev==0 && pIter->iRowid>=iMatch ) break; | |||
| 12734 | if( bRev!=0 && pIter->iRowid<=iMatch ) break; | |||
| 12735 | bMove = 1; | |||
| 12736 | }while( p->rc==SQLITE_OK0 ); | |||
| 12737 | } | |||
| 12738 | ||||
| 12739 | /* | |||
| 12740 | ** Free the iterator object passed as the second argument. | |||
| 12741 | */ | |||
| 12742 | static void fts5MultiIterFree(Fts5Iter *pIter){ | |||
| 12743 | if( pIter ){ | |||
| 12744 | int i; | |||
| 12745 | for(i=0; i<pIter->nSeg; i++){ | |||
| 12746 | fts5SegIterClear(&pIter->aSeg[i]); | |||
| 12747 | } | |||
| 12748 | fts5BufferFree(&pIter->poslist)sqlite3Fts5BufferFree(&pIter->poslist); | |||
| 12749 | sqlite3_freesqlite3_api->free(pIter); | |||
| 12750 | } | |||
| 12751 | } | |||
| 12752 | ||||
| 12753 | static void fts5MultiIterAdvanced( | |||
| 12754 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
| 12755 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | |||
| 12756 | int iChanged, /* Index of sub-iterator just advanced */ | |||
| 12757 | int iMinset /* Minimum entry in aFirst[] to set */ | |||
| 12758 | ){ | |||
| 12759 | int i; | |||
| 12760 | for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK0; i=i/2){ | |||
| 12761 | int iEq; | |||
| 12762 | if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ | |||
| 12763 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | |||
| 12764 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12765 | pSeg->xNext(p, pSeg, 0); | |||
| 12766 | i = pIter->nSeg + iEq; | |||
| 12767 | } | |||
| 12768 | } | |||
| 12769 | } | |||
| 12770 | ||||
| 12771 | /* | |||
| 12772 | ** Sub-iterator iChanged of iterator pIter has just been advanced. It still | |||
| 12773 | ** points to the same term though - just a different rowid. This function | |||
| 12774 | ** attempts to update the contents of the pIter->aFirst[] accordingly. | |||
| 12775 | ** If it does so successfully, 0 is returned. Otherwise 1. | |||
| 12776 | ** | |||
| 12777 | ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() | |||
| 12778 | ** on the iterator instead. That function does the same as this one, except | |||
| 12779 | ** that it deals with more complicated cases as well. | |||
| 12780 | */ | |||
| 12781 | static int fts5MultiIterAdvanceRowid( | |||
| 12782 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ | |||
| 12783 | int iChanged, /* Index of sub-iterator just advanced */ | |||
| 12784 | Fts5SegIter **ppFirst | |||
| 12785 | ){ | |||
| 12786 | Fts5SegIter *pNew = &pIter->aSeg[iChanged]; | |||
| 12787 | ||||
| 12788 | if( pNew->iRowid==pIter->iSwitchRowid | |||
| 12789 | || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev | |||
| 12790 | ){ | |||
| 12791 | int i; | |||
| 12792 | Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; | |||
| 12793 | pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))) : LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
| 12794 | for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ | |||
| 12795 | Fts5CResult *pRes = &pIter->aFirst[i]; | |||
| 12796 | ||||
| 12797 | assert( pNew->pLeaf )((void) (0)); | |||
| 12798 | assert( pRes->bTermEq==0 || pOther->pLeaf )((void) (0)); | |||
| 12799 | ||||
| 12800 | if( pRes->bTermEq ){ | |||
| 12801 | if( pNew->iRowid==pOther->iRowid ){ | |||
| 12802 | return 1; | |||
| 12803 | }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ | |||
| 12804 | pIter->iSwitchRowid = pOther->iRowid; | |||
| 12805 | pNew = pOther; | |||
| 12806 | }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ | |||
| 12807 | pIter->iSwitchRowid = pOther->iRowid; | |||
| 12808 | } | |||
| 12809 | } | |||
| 12810 | pRes->iFirst = (u16)(pNew - pIter->aSeg); | |||
| 12811 | if( i==1 ) break; | |||
| 12812 | ||||
| 12813 | pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; | |||
| 12814 | } | |||
| 12815 | } | |||
| 12816 | ||||
| 12817 | *ppFirst = pNew; | |||
| 12818 | return 0; | |||
| 12819 | } | |||
| 12820 | ||||
| 12821 | /* | |||
| 12822 | ** Set the pIter->bEof variable based on the state of the sub-iterators. | |||
| 12823 | */ | |||
| 12824 | static void fts5MultiIterSetEof(Fts5Iter *pIter){ | |||
| 12825 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
| 12826 | pIter->base.bEof = pSeg->pLeaf==0; | |||
| 12827 | pIter->iSwitchRowid = pSeg->iRowid; | |||
| 12828 | } | |||
| 12829 | ||||
| 12830 | /* | |||
| 12831 | ** The argument to this macro must be an Fts5Data structure containing a | |||
| 12832 | ** tombstone hash page. This macro returns the key-size of the hash-page. | |||
| 12833 | */ | |||
| 12834 | #define TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) (pPg->p[0]==4 ? 4 : 8) | |||
| 12835 | ||||
| 12836 | #define TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1) \ | |||
| 12837 | ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8)) : 1) | |||
| 12838 | ||||
| 12839 | /* | |||
| 12840 | ** Query a single tombstone hash table for rowid iRowid. Return true if | |||
| 12841 | ** it is found or false otherwise. The tombstone hash table is one of | |||
| 12842 | ** nHashTable tables. | |||
| 12843 | */ | |||
| 12844 | static int fts5IndexTombstoneQuery( | |||
| 12845 | Fts5Data *pHash, /* Hash table page to query */ | |||
| 12846 | int nHashTable, /* Number of pages attached to segment */ | |||
| 12847 | u64 iRowid /* Rowid to query hash for */ | |||
| 12848 | ){ | |||
| 12849 | const int szKey = TOMBSTONE_KEYSIZE(pHash)(pHash->p[0]==4 ? 4 : 8); | |||
| 12850 | const int nSlot = TOMBSTONE_NSLOT(pHash)((pHash->nn > 16) ? ((pHash->nn-8) / (pHash->p[0] ==4 ? 4 : 8)) : 1); | |||
| 12851 | int iSlot = (iRowid / nHashTable) % nSlot; | |||
| 12852 | int nCollide = nSlot; | |||
| 12853 | ||||
| 12854 | if( iRowid==0 ){ | |||
| 12855 | return pHash->p[1]; | |||
| 12856 | }else if( szKey==4 ){ | |||
| 12857 | u32 *aSlot = (u32*)&pHash->p[8]; | |||
| 12858 | while( aSlot[iSlot] ){ | |||
| 12859 | if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1; | |||
| 12860 | if( nCollide--==0 ) break; | |||
| 12861 | iSlot = (iSlot+1)%nSlot; | |||
| 12862 | } | |||
| 12863 | }else{ | |||
| 12864 | u64 *aSlot = (u64*)&pHash->p[8]; | |||
| 12865 | while( aSlot[iSlot] ){ | |||
| 12866 | if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1; | |||
| 12867 | if( nCollide--==0 ) break; | |||
| 12868 | iSlot = (iSlot+1)%nSlot; | |||
| 12869 | } | |||
| 12870 | } | |||
| 12871 | ||||
| 12872 | return 0; | |||
| 12873 | } | |||
| 12874 | ||||
| 12875 | /* | |||
| 12876 | ** Return true if the iterator passed as the only argument points | |||
| 12877 | ** to an segment entry for which there is a tombstone. Return false | |||
| 12878 | ** if there is no tombstone or if the iterator is already at EOF. | |||
| 12879 | */ | |||
| 12880 | static int fts5MultiIterIsDeleted(Fts5Iter *pIter){ | |||
| 12881 | int iFirst = pIter->aFirst[1].iFirst; | |||
| 12882 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
| 12883 | Fts5TombstoneArray *pArray = pSeg->pTombArray; | |||
| 12884 | ||||
| 12885 | if( pSeg->pLeaf && pArray ){ | |||
| 12886 | /* Figure out which page the rowid might be present on. */ | |||
| 12887 | int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone; | |||
| 12888 | assert( iPg>=0 )((void) (0)); | |||
| 12889 | ||||
| 12890 | /* If tombstone hash page iPg has not yet been loaded from the | |||
| 12891 | ** database, load it now. */ | |||
| 12892 | if( pArray->apTombstone[iPg]==0 ){ | |||
| 12893 | pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex, | |||
| 12894 | FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)( ((i64)(pSeg->pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << ( 31)) + ((i64)(iPg)) ) | |||
| 12895 | ); | |||
| 12896 | if( pArray->apTombstone[iPg]==0 ) return 0; | |||
| 12897 | } | |||
| 12898 | ||||
| 12899 | return fts5IndexTombstoneQuery( | |||
| 12900 | pArray->apTombstone[iPg], | |||
| 12901 | pArray->nTombstone, | |||
| 12902 | pSeg->iRowid | |||
| 12903 | ); | |||
| 12904 | } | |||
| 12905 | ||||
| 12906 | return 0; | |||
| 12907 | } | |||
| 12908 | ||||
| 12909 | /* | |||
| 12910 | ** Move the iterator to the next entry. | |||
| 12911 | ** | |||
| 12912 | ** If an error occurs, an error code is left in Fts5Index.rc. It is not | |||
| 12913 | ** considered an error if the iterator reaches EOF, or if it is already at | |||
| 12914 | ** EOF when this function is called. | |||
| 12915 | */ | |||
| 12916 | static void fts5MultiIterNext( | |||
| 12917 | Fts5Index *p, | |||
| 12918 | Fts5Iter *pIter, | |||
| 12919 | int bFrom, /* True if argument iFrom is valid */ | |||
| 12920 | i64 iFrom /* Advance at least as far as this */ | |||
| 12921 | ){ | |||
| 12922 | int bUseFrom = bFrom; | |||
| 12923 | assert( pIter->base.bEof==0 )((void) (0)); | |||
| 12924 | while( p->rc==SQLITE_OK0 ){ | |||
| 12925 | int iFirst = pIter->aFirst[1].iFirst; | |||
| 12926 | int bNewTerm = 0; | |||
| 12927 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
| 12928 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12929 | if( bUseFrom && pSeg->pDlidx ){ | |||
| 12930 | fts5SegIterNextFrom(p, pSeg, iFrom); | |||
| 12931 | }else{ | |||
| 12932 | pSeg->xNext(p, pSeg, &bNewTerm); | |||
| 12933 | } | |||
| 12934 | ||||
| 12935 | if( pSeg->pLeaf==0 || bNewTerm | |||
| 12936 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | |||
| 12937 | ){ | |||
| 12938 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | |||
| 12939 | fts5MultiIterSetEof(pIter); | |||
| 12940 | pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
| 12941 | if( pSeg->pLeaf==0 ) return; | |||
| 12942 | } | |||
| 12943 | ||||
| 12944 | fts5AssertMultiIterSetup(p, pIter); | |||
| 12945 | assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf )((void) (0)); | |||
| 12946 | if( (pIter->bSkipEmpty==0 || pSeg->nPos) | |||
| 12947 | && 0==fts5MultiIterIsDeleted(pIter) | |||
| 12948 | ){ | |||
| 12949 | pIter->xSetOutputs(pIter, pSeg); | |||
| 12950 | return; | |||
| 12951 | } | |||
| 12952 | bUseFrom = 0; | |||
| 12953 | } | |||
| 12954 | } | |||
| 12955 | ||||
| 12956 | static void fts5MultiIterNext2( | |||
| 12957 | Fts5Index *p, | |||
| 12958 | Fts5Iter *pIter, | |||
| 12959 | int *pbNewTerm /* OUT: True if *might* be new term */ | |||
| 12960 | ){ | |||
| 12961 | assert( pIter->bSkipEmpty )((void) (0)); | |||
| 12962 | if( p->rc==SQLITE_OK0 ){ | |||
| 12963 | *pbNewTerm = 0; | |||
| 12964 | do{ | |||
| 12965 | int iFirst = pIter->aFirst[1].iFirst; | |||
| 12966 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; | |||
| 12967 | int bNewTerm = 0; | |||
| 12968 | ||||
| 12969 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 12970 | pSeg->xNext(p, pSeg, &bNewTerm); | |||
| 12971 | if( pSeg->pLeaf==0 || bNewTerm | |||
| 12972 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) | |||
| 12973 | ){ | |||
| 12974 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); | |||
| 12975 | fts5MultiIterSetEof(pIter); | |||
| 12976 | *pbNewTerm = 1; | |||
| 12977 | } | |||
| 12978 | fts5AssertMultiIterSetup(p, pIter); | |||
| 12979 | ||||
| 12980 | }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter)) | |||
| 12981 | && (p->rc==SQLITE_OK0) | |||
| 12982 | ); | |||
| 12983 | } | |||
| 12984 | } | |||
| 12985 | ||||
| 12986 | static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ | |||
| 12987 | UNUSED_PARAM2(pUnused1, pUnused2)(void)(pUnused1), (void)(pUnused2); | |||
| 12988 | } | |||
| 12989 | ||||
| 12990 | static Fts5Iter *fts5MultiIterAlloc( | |||
| 12991 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
| 12992 | int nSeg | |||
| 12993 | ){ | |||
| 12994 | Fts5Iter *pNew; | |||
| 12995 | i64 nSlot; /* Power of two >= nSeg */ | |||
| 12996 | ||||
| 12997 | for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); | |||
| 12998 | pNew = fts5IdxMalloc(p, | |||
| 12999 | SZ_FTS5ITER(nSlot)(__builtin_offsetof(Fts5Iter, aSeg)+(nSlot)*sizeof(Fts5SegIter )) + /* pNew + pNew->aSeg[] */ | |||
| 13000 | sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ | |||
| 13001 | ); | |||
| 13002 | if( pNew ){ | |||
| 13003 | pNew->nSeg = nSlot; | |||
| 13004 | pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; | |||
| 13005 | pNew->pIndex = p; | |||
| 13006 | pNew->xSetOutputs = fts5IterSetOutputs_Noop; | |||
| 13007 | } | |||
| 13008 | return pNew; | |||
| 13009 | } | |||
| 13010 | ||||
| 13011 | static void fts5PoslistCallback( | |||
| 13012 | Fts5Index *pUnused, | |||
| 13013 | void *pContext, | |||
| 13014 | const u8 *pChunk, int nChunk | |||
| 13015 | ){ | |||
| 13016 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 13017 | assert_nc( nChunk>=0 )((void) (0)); | |||
| 13018 | if( nChunk>0 ){ | |||
| 13019 | fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk){ ((void) (0)); memcpy(&((Fts5Buffer*)pContext)->p[((Fts5Buffer *)pContext)->n], pChunk, nChunk); ((Fts5Buffer*)pContext)-> n += nChunk; }; | |||
| 13020 | } | |||
| 13021 | } | |||
| 13022 | ||||
| 13023 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; | |||
| 13024 | struct PoslistCallbackCtx { | |||
| 13025 | Fts5Buffer *pBuf; /* Append to this buffer */ | |||
| 13026 | Fts5Colset *pColset; /* Restrict matches to this column */ | |||
| 13027 | int eState; /* See above */ | |||
| 13028 | }; | |||
| 13029 | ||||
| 13030 | typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; | |||
| 13031 | struct PoslistOffsetsCtx { | |||
| 13032 | Fts5Buffer *pBuf; /* Append to this buffer */ | |||
| 13033 | Fts5Colset *pColset; /* Restrict matches to this column */ | |||
| 13034 | int iRead; | |||
| 13035 | int iWrite; | |||
| 13036 | }; | |||
| 13037 | ||||
| 13038 | /* | |||
| 13039 | ** TODO: Make this more efficient! | |||
| 13040 | */ | |||
| 13041 | static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ | |||
| 13042 | int i; | |||
| 13043 | for(i=0; i<pColset->nCol; i++){ | |||
| 13044 | if( pColset->aiCol[i]==iCol ) return 1; | |||
| 13045 | } | |||
| 13046 | return 0; | |||
| 13047 | } | |||
| 13048 | ||||
| 13049 | static void fts5PoslistOffsetsCallback( | |||
| 13050 | Fts5Index *pUnused, | |||
| 13051 | void *pContext, | |||
| 13052 | const u8 *pChunk, int nChunk | |||
| 13053 | ){ | |||
| 13054 | PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; | |||
| 13055 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 13056 | assert_nc( nChunk>=0 )((void) (0)); | |||
| 13057 | if( nChunk>0 ){ | |||
| 13058 | int i = 0; | |||
| 13059 | while( i<nChunk ){ | |||
| 13060 | int iVal; | |||
| 13061 | i += fts5GetVarint32(&pChunk[i], iVal)sqlite3Fts5GetVarint32(&pChunk[i],(u32*)&(iVal)); | |||
| 13062 | iVal += pCtx->iRead - 2; | |||
| 13063 | pCtx->iRead = iVal; | |||
| 13064 | if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ | |||
| 13065 | fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (iVal + 2 - pCtx->iWrite )); ((void) (0)); }; | |||
| 13066 | pCtx->iWrite = iVal; | |||
| 13067 | } | |||
| 13068 | } | |||
| 13069 | } | |||
| 13070 | } | |||
| 13071 | ||||
| 13072 | static void fts5PoslistFilterCallback( | |||
| 13073 | Fts5Index *pUnused, | |||
| 13074 | void *pContext, | |||
| 13075 | const u8 *pChunk, int nChunk | |||
| 13076 | ){ | |||
| 13077 | PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; | |||
| 13078 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 13079 | assert_nc( nChunk>=0 )((void) (0)); | |||
| 13080 | if( nChunk>0 ){ | |||
| 13081 | /* Search through to find the first varint with value 1. This is the | |||
| 13082 | ** start of the next columns hits. */ | |||
| 13083 | int i = 0; | |||
| 13084 | int iStart = 0; | |||
| 13085 | ||||
| 13086 | if( pCtx->eState==2 ){ | |||
| 13087 | int iCol; | |||
| 13088 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | |||
| 13089 | if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ | |||
| 13090 | pCtx->eState = 1; | |||
| 13091 | fts5BufferSafeAppendVarint(pCtx->pBuf, 1){ (pCtx->pBuf)->n += sqlite3Fts5PutVarint(&(pCtx-> pBuf)->p[(pCtx->pBuf)->n], (1)); ((void) (0)); }; | |||
| 13092 | }else{ | |||
| 13093 | pCtx->eState = 0; | |||
| 13094 | } | |||
| 13095 | } | |||
| 13096 | ||||
| 13097 | do { | |||
| 13098 | while( i<nChunk && pChunk[i]!=0x01 ){ | |||
| 13099 | while( pChunk[i] & 0x80 ) i++; | |||
| 13100 | i++; | |||
| 13101 | } | |||
| 13102 | if( pCtx->eState ){ | |||
| 13103 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | |||
| 13104 | } | |||
| 13105 | if( i<nChunk ){ | |||
| 13106 | int iCol; | |||
| 13107 | iStart = i; | |||
| 13108 | i++; | |||
| 13109 | if( i>=nChunk ){ | |||
| 13110 | pCtx->eState = 2; | |||
| 13111 | }else{ | |||
| 13112 | fts5FastGetVarint32(pChunk, i, iCol){ iCol = (pChunk)[i++]; if( iCol & 0x80 ){ i--; i += sqlite3Fts5GetVarint32 (&(pChunk)[i],(u32*)&(iCol)); } }; | |||
| 13113 | pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); | |||
| 13114 | if( pCtx->eState ){ | |||
| 13115 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart){ ((void) (0)); memcpy(&(pCtx->pBuf)->p[(pCtx->pBuf )->n], &pChunk[iStart], i-iStart); (pCtx->pBuf)-> n += i-iStart; }; | |||
| 13116 | iStart = i; | |||
| 13117 | } | |||
| 13118 | } | |||
| 13119 | } | |||
| 13120 | }while( i<nChunk ); | |||
| 13121 | } | |||
| 13122 | } | |||
| 13123 | ||||
| 13124 | static void fts5ChunkIterate( | |||
| 13125 | Fts5Index *p, /* Index object */ | |||
| 13126 | Fts5SegIter *pSeg, /* Poslist of this iterator */ | |||
| 13127 | void *pCtx, /* Context pointer for xChunk callback */ | |||
| 13128 | void (*xChunk)(Fts5Index*, void*, const u8*, int) | |||
| 13129 | ){ | |||
| 13130 | int nRem = pSeg->nPos; /* Number of bytes still to come */ | |||
| 13131 | Fts5Data *pData = 0; | |||
| 13132 | u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
| 13133 | int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset)(((nRem) < (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )) ? (nRem) : (pSeg->pLeaf->szLeaf - pSeg->iLeafOffset )); | |||
| 13134 | int pgno = pSeg->iLeafPgno; | |||
| 13135 | int pgnoSave = 0; | |||
| 13136 | ||||
| 13137 | /* This function does not work with detail=none databases. */ | |||
| 13138 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
| 13139 | ||||
| 13140 | if( (pSeg->flags & FTS5_SEGITER_REVERSE0x02)==0 ){ | |||
| 13141 | pgnoSave = pgno+1; | |||
| 13142 | } | |||
| 13143 | ||||
| 13144 | while( 1 ){ | |||
| 13145 | xChunk(p, pCtx, pChunk, nChunk); | |||
| 13146 | nRem -= nChunk; | |||
| 13147 | fts5DataRelease(pData); | |||
| 13148 | if( nRem<=0 ){ | |||
| 13149 | break; | |||
| 13150 | }else if( pSeg->pSeg==0 ){ | |||
| 13151 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 13152 | return; | |||
| 13153 | }else{ | |||
| 13154 | pgno++; | |||
| 13155 | pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)( ((i64)(pSeg->pSeg->iSegid) << (31 +5 +1)) + ((i64 )(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno )) )); | |||
| 13156 | if( pData==0 ) break; | |||
| 13157 | pChunk = &pData->p[4]; | |||
| 13158 | nChunk = MIN(nRem, pData->szLeaf - 4)(((nRem) < (pData->szLeaf - 4)) ? (nRem) : (pData->szLeaf - 4)); | |||
| 13159 | if( pgno==pgnoSave ){ | |||
| 13160 | assert( pSeg->pNextLeaf==0 )((void) (0)); | |||
| 13161 | pSeg->pNextLeaf = pData; | |||
| 13162 | pData = 0; | |||
| 13163 | } | |||
| 13164 | } | |||
| 13165 | } | |||
| 13166 | } | |||
| 13167 | ||||
| 13168 | /* | |||
| 13169 | ** Iterator pIter currently points to a valid entry (not EOF). This | |||
| 13170 | ** function appends the position list data for the current entry to | |||
| 13171 | ** buffer pBuf. It does not make a copy of the position-list size | |||
| 13172 | ** field. | |||
| 13173 | */ | |||
| 13174 | static void fts5SegiterPoslist( | |||
| 13175 | Fts5Index *p, | |||
| 13176 | Fts5SegIter *pSeg, | |||
| 13177 | Fts5Colset *pColset, | |||
| 13178 | Fts5Buffer *pBuf | |||
| 13179 | ){ | |||
| 13180 | assert( pBuf!=0 )((void) (0)); | |||
| 13181 | assert( pSeg!=0 )((void) (0)); | |||
| 13182 | if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING)( (u32)((pBuf)->n) + (u32)(pSeg->nPos+8) <= (u32)((pBuf )->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf ),(pSeg->nPos+8)+(pBuf)->n) ) ){ | |||
| 13183 | assert( pBuf->p!=0 )((void) (0)); | |||
| 13184 | assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING )((void) (0)); | |||
| 13185 | memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING8); | |||
| 13186 | if( pColset==0 ){ | |||
| 13187 | fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); | |||
| 13188 | }else{ | |||
| 13189 | if( p->pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 13190 | PoslistCallbackCtx sCtx; | |||
| 13191 | sCtx.pBuf = pBuf; | |||
| 13192 | sCtx.pColset = pColset; | |||
| 13193 | sCtx.eState = fts5IndexColsetTest(pColset, 0); | |||
| 13194 | assert( sCtx.eState==0 || sCtx.eState==1 )((void) (0)); | |||
| 13195 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); | |||
| 13196 | }else{ | |||
| 13197 | PoslistOffsetsCtx sCtx; | |||
| 13198 | memset(&sCtx, 0, sizeof(sCtx)); | |||
| 13199 | sCtx.pBuf = pBuf; | |||
| 13200 | sCtx.pColset = pColset; | |||
| 13201 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); | |||
| 13202 | } | |||
| 13203 | } | |||
| 13204 | } | |||
| 13205 | } | |||
| 13206 | ||||
| 13207 | /* | |||
| 13208 | ** Parameter pPos points to a buffer containing a position list, size nPos. | |||
| 13209 | ** This function filters it according to pColset (which must be non-NULL) | |||
| 13210 | ** and sets pIter->base.pData/nData to point to the new position list. | |||
| 13211 | ** If memory is required for the new position list, use buffer pIter->poslist. | |||
| 13212 | ** Or, if the new position list is a contiguous subset of the input, set | |||
| 13213 | ** pIter->base.pData/nData to point directly to it. | |||
| 13214 | ** | |||
| 13215 | ** This function is a no-op if *pRc is other than SQLITE_OK when it is | |||
| 13216 | ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM | |||
| 13217 | ** before returning. | |||
| 13218 | */ | |||
| 13219 | static void fts5IndexExtractColset( | |||
| 13220 | int *pRc, | |||
| 13221 | Fts5Colset *pColset, /* Colset to filter on */ | |||
| 13222 | const u8 *pPos, int nPos, /* Position list */ | |||
| 13223 | Fts5Iter *pIter | |||
| 13224 | ){ | |||
| 13225 | if( *pRc==SQLITE_OK0 ){ | |||
| 13226 | const u8 *p = pPos; | |||
| 13227 | const u8 *aCopy = p; | |||
| 13228 | const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ | |||
| 13229 | int i = 0; | |||
| 13230 | int iCurrent = 0; | |||
| 13231 | ||||
| 13232 | if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){ | |||
| 13233 | return; | |||
| 13234 | } | |||
| 13235 | ||||
| 13236 | while( 1 ){ | |||
| 13237 | while( pColset->aiCol[i]<iCurrent ){ | |||
| 13238 | i++; | |||
| 13239 | if( i==pColset->nCol ){ | |||
| 13240 | pIter->base.pData = pIter->poslist.p; | |||
| 13241 | pIter->base.nData = pIter->poslist.n; | |||
| 13242 | return; | |||
| 13243 | } | |||
| 13244 | } | |||
| 13245 | ||||
| 13246 | /* Advance pointer p until it points to pEnd or an 0x01 byte that is | |||
| 13247 | ** not part of a varint */ | |||
| 13248 | while( p<pEnd && *p!=0x01 ){ | |||
| 13249 | while( *p++ & 0x80 ); | |||
| 13250 | } | |||
| 13251 | ||||
| 13252 | if( pColset->aiCol[i]==iCurrent ){ | |||
| 13253 | if( pColset->nCol==1 ){ | |||
| 13254 | pIter->base.pData = aCopy; | |||
| 13255 | pIter->base.nData = p-aCopy; | |||
| 13256 | return; | |||
| 13257 | } | |||
| 13258 | fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy){ ((void) (0)); memcpy(&(&pIter->poslist)->p[(& pIter->poslist)->n], aCopy, p-aCopy); (&pIter->poslist )->n += p-aCopy; }; | |||
| 13259 | } | |||
| 13260 | if( p>=pEnd ){ | |||
| 13261 | pIter->base.pData = pIter->poslist.p; | |||
| 13262 | pIter->base.nData = pIter->poslist.n; | |||
| 13263 | return; | |||
| 13264 | } | |||
| 13265 | aCopy = p++; | |||
| 13266 | iCurrent = *p++; | |||
| 13267 | if( iCurrent & 0x80 ){ | |||
| 13268 | p--; | |||
| 13269 | p += fts5GetVarint32(p, iCurrent)sqlite3Fts5GetVarint32(p,(u32*)&(iCurrent)); | |||
| 13270 | } | |||
| 13271 | } | |||
| 13272 | } | |||
| 13273 | ||||
| 13274 | } | |||
| 13275 | ||||
| 13276 | /* | |||
| 13277 | ** xSetOutputs callback used by detail=none tables. | |||
| 13278 | */ | |||
| 13279 | static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13280 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
| 13281 | pIter->base.iRowid = pSeg->iRowid; | |||
| 13282 | pIter->base.nData = pSeg->nPos; | |||
| 13283 | } | |||
| 13284 | ||||
| 13285 | /* | |||
| 13286 | ** xSetOutputs callback used by detail=full and detail=col tables when no | |||
| 13287 | ** column filters are specified. | |||
| 13288 | */ | |||
| 13289 | static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13290 | pIter->base.iRowid = pSeg->iRowid; | |||
| 13291 | pIter->base.nData = pSeg->nPos; | |||
| 13292 | ||||
| 13293 | assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE )((void) (0)); | |||
| 13294 | assert( pIter->pColset==0 )((void) (0)); | |||
| 13295 | ||||
| 13296 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | |||
| 13297 | /* All data is stored on the current page. Populate the output | |||
| 13298 | ** variables to point into the body of the page object. */ | |||
| 13299 | pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
| 13300 | }else{ | |||
| 13301 | /* The data is distributed over two or more pages. Copy it into the | |||
| 13302 | ** Fts5Iter.poslist buffer and then set the output pointer to point | |||
| 13303 | ** to this buffer. */ | |||
| 13304 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
| 13305 | fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); | |||
| 13306 | pIter->base.pData = pIter->poslist.p; | |||
| 13307 | } | |||
| 13308 | } | |||
| 13309 | ||||
| 13310 | /* | |||
| 13311 | ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match | |||
| 13312 | ** against no columns at all). | |||
| 13313 | */ | |||
| 13314 | static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13315 | UNUSED_PARAM(pSeg)(void)(pSeg); | |||
| 13316 | pIter->base.nData = 0; | |||
| 13317 | } | |||
| 13318 | ||||
| 13319 | /* | |||
| 13320 | ** xSetOutputs callback used by detail=col when there is a column filter | |||
| 13321 | ** and there are 100 or more columns. Also called as a fallback from | |||
| 13322 | ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. | |||
| 13323 | */ | |||
| 13324 | static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13325 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
| 13326 | fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); | |||
| 13327 | pIter->base.iRowid = pSeg->iRowid; | |||
| 13328 | pIter->base.pData = pIter->poslist.p; | |||
| 13329 | pIter->base.nData = pIter->poslist.n; | |||
| 13330 | } | |||
| 13331 | ||||
| 13332 | /* | |||
| 13333 | ** xSetOutputs callback used when: | |||
| 13334 | ** | |||
| 13335 | ** * detail=col, | |||
| 13336 | ** * there is a column filter, and | |||
| 13337 | ** * the table contains 100 or fewer columns. | |||
| 13338 | ** | |||
| 13339 | ** The last point is to ensure all column numbers are stored as | |||
| 13340 | ** single-byte varints. | |||
| 13341 | */ | |||
| 13342 | static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13343 | ||||
| 13344 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
| 13345 | assert( pIter->pColset )((void) (0)); | |||
| 13346 | ||||
| 13347 | if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ | |||
| 13348 | fts5IterSetOutputs_Col(pIter, pSeg); | |||
| 13349 | }else{ | |||
| 13350 | u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
| 13351 | u8 *pEnd = (u8*)&a[pSeg->nPos]; | |||
| 13352 | int iPrev = 0; | |||
| 13353 | int *aiCol = pIter->pColset->aiCol; | |||
| 13354 | int *aiColEnd = &aiCol[pIter->pColset->nCol]; | |||
| 13355 | ||||
| 13356 | u8 *aOut = pIter->poslist.p; | |||
| 13357 | int iPrevOut = 0; | |||
| 13358 | ||||
| 13359 | pIter->base.iRowid = pSeg->iRowid; | |||
| 13360 | ||||
| 13361 | while( a<pEnd ){ | |||
| 13362 | iPrev += (int)a++[0] - 2; | |||
| 13363 | while( *aiCol<iPrev ){ | |||
| 13364 | aiCol++; | |||
| 13365 | if( aiCol==aiColEnd ) goto setoutputs_col_out; | |||
| 13366 | } | |||
| 13367 | if( *aiCol==iPrev ){ | |||
| 13368 | *aOut++ = (u8)((iPrev - iPrevOut) + 2); | |||
| 13369 | iPrevOut = iPrev; | |||
| 13370 | } | |||
| 13371 | } | |||
| 13372 | ||||
| 13373 | setoutputs_col_out: | |||
| 13374 | pIter->base.pData = pIter->poslist.p; | |||
| 13375 | pIter->base.nData = aOut - pIter->poslist.p; | |||
| 13376 | } | |||
| 13377 | } | |||
| 13378 | ||||
| 13379 | /* | |||
| 13380 | ** xSetOutputs callback used by detail=full when there is a column filter. | |||
| 13381 | */ | |||
| 13382 | static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ | |||
| 13383 | Fts5Colset *pColset = pIter->pColset; | |||
| 13384 | pIter->base.iRowid = pSeg->iRowid; | |||
| 13385 | ||||
| 13386 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL )((void) (0)); | |||
| 13387 | assert( pColset )((void) (0)); | |||
| 13388 | ||||
| 13389 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ | |||
| 13390 | /* All data is stored on the current page. Populate the output | |||
| 13391 | ** variables to point into the body of the page object. */ | |||
| 13392 | const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; | |||
| 13393 | int *pRc = &pIter->pIndex->rc; | |||
| 13394 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
| 13395 | fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter); | |||
| 13396 | }else{ | |||
| 13397 | /* The data is distributed over two or more pages. Copy it into the | |||
| 13398 | ** Fts5Iter.poslist buffer and then set the output pointer to point | |||
| 13399 | ** to this buffer. */ | |||
| 13400 | fts5BufferZero(&pIter->poslist)sqlite3Fts5BufferZero(&pIter->poslist); | |||
| 13401 | fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); | |||
| 13402 | pIter->base.pData = pIter->poslist.p; | |||
| 13403 | pIter->base.nData = pIter->poslist.n; | |||
| 13404 | } | |||
| 13405 | } | |||
| 13406 | ||||
| 13407 | static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ | |||
| 13408 | assert( pIter!=0 || (*pRc)!=SQLITE_OK )((void) (0)); | |||
| 13409 | if( *pRc==SQLITE_OK0 ){ | |||
| 13410 | Fts5Config *pConfig = pIter->pIndex->pConfig; | |||
| 13411 | if( pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 13412 | pIter->xSetOutputs = fts5IterSetOutputs_None; | |||
| 13413 | } | |||
| 13414 | ||||
| 13415 | else if( pIter->pColset==0 ){ | |||
| 13416 | pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; | |||
| 13417 | } | |||
| 13418 | ||||
| 13419 | else if( pIter->pColset->nCol==0 ){ | |||
| 13420 | pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; | |||
| 13421 | } | |||
| 13422 | ||||
| 13423 | else if( pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 13424 | pIter->xSetOutputs = fts5IterSetOutputs_Full; | |||
| 13425 | } | |||
| 13426 | ||||
| 13427 | else{ | |||
| 13428 | assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS )((void) (0)); | |||
| 13429 | if( pConfig->nCol<=100 ){ | |||
| 13430 | pIter->xSetOutputs = fts5IterSetOutputs_Col100; | |||
| 13431 | sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); | |||
| 13432 | }else{ | |||
| 13433 | pIter->xSetOutputs = fts5IterSetOutputs_Col; | |||
| 13434 | } | |||
| 13435 | } | |||
| 13436 | } | |||
| 13437 | } | |||
| 13438 | ||||
| 13439 | /* | |||
| 13440 | ** All the component segment-iterators of pIter have been set up. This | |||
| 13441 | ** functions finishes setup for iterator pIter itself. | |||
| 13442 | */ | |||
| 13443 | static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ | |||
| 13444 | int iIter; | |||
| 13445 | for(iIter=pIter->nSeg-1; iIter>0; iIter--){ | |||
| 13446 | int iEq; | |||
| 13447 | if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){ | |||
| 13448 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; | |||
| 13449 | if( p->rc==SQLITE_OK0 ) pSeg->xNext(p, pSeg, 0); | |||
| 13450 | fts5MultiIterAdvanced(p, pIter, iEq, iIter); | |||
| 13451 | } | |||
| 13452 | } | |||
| 13453 | fts5MultiIterSetEof(pIter); | |||
| 13454 | fts5AssertMultiIterSetup(p, pIter); | |||
| 13455 | ||||
| 13456 | if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter)) | |||
| 13457 | || fts5MultiIterIsDeleted(pIter) | |||
| 13458 | ){ | |||
| 13459 | fts5MultiIterNext(p, pIter, 0, 0); | |||
| 13460 | }else if( pIter->base.bEof==0 ){ | |||
| 13461 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
| 13462 | pIter->xSetOutputs(pIter, pSeg); | |||
| 13463 | } | |||
| 13464 | } | |||
| 13465 | ||||
| 13466 | /* | |||
| 13467 | ** Allocate a new Fts5Iter object. | |||
| 13468 | ** | |||
| 13469 | ** The new object will be used to iterate through data in structure pStruct. | |||
| 13470 | ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel | |||
| 13471 | ** is zero or greater, data from the first nSegment segments on level iLevel | |||
| 13472 | ** is merged. | |||
| 13473 | ** | |||
| 13474 | ** The iterator initially points to the first term/rowid entry in the | |||
| 13475 | ** iterated data. | |||
| 13476 | */ | |||
| 13477 | static void fts5MultiIterNew( | |||
| 13478 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
| 13479 | Fts5Structure *pStruct, /* Structure of specific index */ | |||
| 13480 | int flags, /* FTS5INDEX_QUERY_XXX flags */ | |||
| 13481 | Fts5Colset *pColset, /* Colset to filter on (or NULL) */ | |||
| 13482 | const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ | |||
| 13483 | int iLevel, /* Level to iterate (-1 for all) */ | |||
| 13484 | int nSegment, /* Number of segments to merge (iLevel>=0) */ | |||
| 13485 | Fts5Iter **ppOut /* New object */ | |||
| 13486 | ){ | |||
| 13487 | int nSeg = 0; /* Number of segment-iters in use */ | |||
| 13488 | int iIter = 0; /* */ | |||
| 13489 | int iSeg; /* Used to iterate through segments */ | |||
| 13490 | Fts5StructureLevel *pLvl; | |||
| 13491 | Fts5Iter *pNew; | |||
| 13492 | ||||
| 13493 | assert( (pTerm==0 && nTerm==0) || iLevel<0 )((void) (0)); | |||
| 13494 | ||||
| 13495 | /* Allocate space for the new multi-seg-iterator. */ | |||
| 13496 | if( p->rc==SQLITE_OK0 ){ | |||
| 13497 | if( iLevel<0 ){ | |||
| 13498 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) )((void) (0)); | |||
| 13499 | nSeg = pStruct->nSegment; | |||
| 13500 | nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040)); | |||
| 13501 | }else{ | |||
| 13502 | nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment)(((pStruct->aLevel[iLevel].nSeg) < (nSegment)) ? (pStruct ->aLevel[iLevel].nSeg) : (nSegment)); | |||
| 13503 | } | |||
| 13504 | } | |||
| 13505 | *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); | |||
| 13506 | if( pNew==0 ){ | |||
| 13507 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
| 13508 | goto fts5MultiIterNew_post_check; | |||
| 13509 | } | |||
| 13510 | pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC0x0002)); | |||
| 13511 | pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY0x0010)); | |||
| 13512 | pNew->pColset = pColset; | |||
| 13513 | if( (flags & FTS5INDEX_QUERY_NOOUTPUT0x0020)==0 ){ | |||
| 13514 | fts5IterSetOutputCb(&p->rc, pNew); | |||
| 13515 | } | |||
| 13516 | ||||
| 13517 | /* Initialize each of the component segment iterators. */ | |||
| 13518 | if( p->rc==SQLITE_OK0 ){ | |||
| 13519 | if( iLevel<0 ){ | |||
| 13520 | Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; | |||
| 13521 | if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH0x0040) ){ | |||
| 13522 | /* Add a segment iterator for the current contents of the hash table. */ | |||
| 13523 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | |||
| 13524 | fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); | |||
| 13525 | } | |||
| 13526 | for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ | |||
| 13527 | for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ | |||
| 13528 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
| 13529 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; | |||
| 13530 | if( pTerm==0 ){ | |||
| 13531 | fts5SegIterInit(p, pSeg, pIter); | |||
| 13532 | }else{ | |||
| 13533 | fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); | |||
| 13534 | } | |||
| 13535 | } | |||
| 13536 | } | |||
| 13537 | }else{ | |||
| 13538 | pLvl = &pStruct->aLevel[iLevel]; | |||
| 13539 | for(iSeg=nSeg-1; iSeg>=0; iSeg--){ | |||
| 13540 | fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); | |||
| 13541 | } | |||
| 13542 | } | |||
| 13543 | assert( iIter==nSeg )((void) (0)); | |||
| 13544 | } | |||
| 13545 | ||||
| 13546 | /* If the above was successful, each component iterator now points | |||
| 13547 | ** to the first entry in its segment. In this case initialize the | |||
| 13548 | ** aFirst[] array. Or, if an error has occurred, free the iterator | |||
| 13549 | ** object and set the output variable to NULL. */ | |||
| 13550 | if( p->rc==SQLITE_OK0 ){ | |||
| 13551 | fts5MultiIterFinishSetup(p, pNew); | |||
| 13552 | }else{ | |||
| 13553 | fts5MultiIterFree(pNew); | |||
| 13554 | *ppOut = 0; | |||
| 13555 | } | |||
| 13556 | ||||
| 13557 | fts5MultiIterNew_post_check: | |||
| 13558 | assert( (*ppOut)!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 13559 | return; | |||
| 13560 | } | |||
| 13561 | ||||
| 13562 | /* | |||
| 13563 | ** Create an Fts5Iter that iterates through the doclist provided | |||
| 13564 | ** as the second argument. | |||
| 13565 | */ | |||
| 13566 | static void fts5MultiIterNew2( | |||
| 13567 | Fts5Index *p, /* FTS5 backend to iterate within */ | |||
| 13568 | Fts5Data *pData, /* Doclist to iterate through */ | |||
| 13569 | int bDesc, /* True for descending rowid order */ | |||
| 13570 | Fts5Iter **ppOut /* New object */ | |||
| 13571 | ){ | |||
| 13572 | Fts5Iter *pNew; | |||
| 13573 | pNew = fts5MultiIterAlloc(p, 2); | |||
| 13574 | if( pNew ){ | |||
| 13575 | Fts5SegIter *pIter = &pNew->aSeg[1]; | |||
| 13576 | pIter->flags = FTS5_SEGITER_ONETERM0x01; | |||
| 13577 | if( pData->szLeaf>0 ){ | |||
| 13578 | pIter->pLeaf = pData; | |||
| 13579 | pIter->iLeafOffset = fts5GetVarintsqlite3Fts5GetVarint(pData->p, (u64*)&pIter->iRowid); | |||
| 13580 | pIter->iEndofDoclist = pData->nn; | |||
| 13581 | pNew->aFirst[1].iFirst = 1; | |||
| 13582 | if( bDesc ){ | |||
| 13583 | pNew->bRev = 1; | |||
| 13584 | pIter->flags |= FTS5_SEGITER_REVERSE0x02; | |||
| 13585 | fts5SegIterReverseInitPage(p, pIter); | |||
| 13586 | }else{ | |||
| 13587 | fts5SegIterLoadNPos(p, pIter); | |||
| 13588 | } | |||
| 13589 | pData = 0; | |||
| 13590 | }else{ | |||
| 13591 | pNew->base.bEof = 1; | |||
| 13592 | } | |||
| 13593 | fts5SegIterSetNext(p, pIter); | |||
| 13594 | ||||
| 13595 | *ppOut = pNew; | |||
| 13596 | } | |||
| 13597 | ||||
| 13598 | fts5DataRelease(pData); | |||
| 13599 | } | |||
| 13600 | ||||
| 13601 | /* | |||
| 13602 | ** Return true if the iterator is at EOF or if an error has occurred. | |||
| 13603 | ** False otherwise. | |||
| 13604 | */ | |||
| 13605 | static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ | |||
| 13606 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 13607 | assert( p->rc!=SQLITE_OK((void) (0)) | |||
| 13608 | || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof((void) (0)) | |||
| 13609 | )((void) (0)); | |||
| 13610 | return (p->rc || pIter->base.bEof); | |||
| 13611 | } | |||
| 13612 | ||||
| 13613 | /* | |||
| 13614 | ** Return the rowid of the entry that the iterator currently points | |||
| 13615 | ** to. If the iterator points to EOF when this function is called the | |||
| 13616 | ** results are undefined. | |||
| 13617 | */ | |||
| 13618 | static i64 fts5MultiIterRowid(Fts5Iter *pIter){ | |||
| 13619 | assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf )((void) (0)); | |||
| 13620 | return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; | |||
| 13621 | } | |||
| 13622 | ||||
| 13623 | /* | |||
| 13624 | ** Move the iterator to the next entry at or following iMatch. | |||
| 13625 | */ | |||
| 13626 | static void fts5MultiIterNextFrom( | |||
| 13627 | Fts5Index *p, | |||
| 13628 | Fts5Iter *pIter, | |||
| 13629 | i64 iMatch | |||
| 13630 | ){ | |||
| 13631 | while( 1 ){ | |||
| 13632 | i64 iRowid; | |||
| 13633 | fts5MultiIterNext(p, pIter, 1, iMatch); | |||
| 13634 | if( fts5MultiIterEof(p, pIter) ) break; | |||
| 13635 | iRowid = fts5MultiIterRowid(pIter); | |||
| 13636 | if( pIter->bRev==0 && iRowid>=iMatch ) break; | |||
| 13637 | if( pIter->bRev!=0 && iRowid<=iMatch ) break; | |||
| 13638 | } | |||
| 13639 | } | |||
| 13640 | ||||
| 13641 | /* | |||
| 13642 | ** Return a pointer to a buffer containing the term associated with the | |||
| 13643 | ** entry that the iterator currently points to. | |||
| 13644 | */ | |||
| 13645 | static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ | |||
| 13646 | Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
| 13647 | *pn = p->term.n; | |||
| 13648 | return p->term.p; | |||
| 13649 | } | |||
| 13650 | ||||
| 13651 | /* | |||
| 13652 | ** Allocate a new segment-id for the structure pStruct. The new segment | |||
| 13653 | ** id must be between 1 and 65335 inclusive, and must not be used by | |||
| 13654 | ** any currently existing segment. If a free segment id cannot be found, | |||
| 13655 | ** SQLITE_FULL is returned. | |||
| 13656 | ** | |||
| 13657 | ** If an error has already occurred, this function is a no-op. 0 is | |||
| 13658 | ** returned in this case. | |||
| 13659 | */ | |||
| 13660 | static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ | |||
| 13661 | int iSegid = 0; | |||
| 13662 | ||||
| 13663 | if( p->rc==SQLITE_OK0 ){ | |||
| 13664 | if( pStruct->nSegment>=FTS5_MAX_SEGMENT2000 ){ | |||
| 13665 | p->rc = SQLITE_FULL13; | |||
| 13666 | }else{ | |||
| 13667 | /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following | |||
| 13668 | ** array is 63 elements, or 252 bytes, in size. */ | |||
| 13669 | u32 aUsed[(FTS5_MAX_SEGMENT2000+31) / 32]; | |||
| 13670 | int iLvl, iSeg; | |||
| 13671 | int i; | |||
| 13672 | u32 mask; | |||
| 13673 | memset(aUsed, 0, sizeof(aUsed)); | |||
| 13674 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 13675 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
| 13676 | int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; | |||
| 13677 | if( iId<=FTS5_MAX_SEGMENT2000 && iId>0 ){ | |||
| 13678 | aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); | |||
| 13679 | } | |||
| 13680 | } | |||
| 13681 | } | |||
| 13682 | ||||
| 13683 | for(i=0; aUsed[i]==0xFFFFFFFF; i++); | |||
| 13684 | mask = aUsed[i]; | |||
| 13685 | for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); | |||
| 13686 | iSegid += 1 + i*32; | |||
| 13687 | ||||
| 13688 | #ifdef SQLITE_DEBUG | |||
| 13689 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 13690 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
| 13691 | assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid )((void) (0)); | |||
| 13692 | } | |||
| 13693 | } | |||
| 13694 | assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT )((void) (0)); | |||
| 13695 | ||||
| 13696 | { | |||
| 13697 | sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); | |||
| 13698 | if( p->rc==SQLITE_OK0 ){ | |||
| 13699 | u8 aBlob[2] = {0xff, 0xff}; | |||
| 13700 | sqlite3_bind_intsqlite3_api->bind_int(pIdxSelect, 1, iSegid); | |||
| 13701 | sqlite3_bind_blobsqlite3_api->bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 13702 | assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW )((void) (0)); | |||
| 13703 | p->rc = sqlite3_resetsqlite3_api->reset(pIdxSelect); | |||
| 13704 | sqlite3_bind_nullsqlite3_api->bind_null(pIdxSelect, 2); | |||
| 13705 | } | |||
| 13706 | } | |||
| 13707 | #endif | |||
| 13708 | } | |||
| 13709 | } | |||
| 13710 | ||||
| 13711 | return iSegid; | |||
| 13712 | } | |||
| 13713 | ||||
| 13714 | /* | |||
| 13715 | ** Discard all data currently cached in the hash-tables. | |||
| 13716 | */ | |||
| 13717 | static void fts5IndexDiscardData(Fts5Index *p){ | |||
| 13718 | assert( p->pHash || p->nPendingData==0 )((void) (0)); | |||
| 13719 | if( p->pHash ){ | |||
| 13720 | sqlite3Fts5HashClear(p->pHash); | |||
| 13721 | p->nPendingData = 0; | |||
| 13722 | p->nPendingRow = 0; | |||
| 13723 | p->flushRc = SQLITE_OK0; | |||
| 13724 | } | |||
| 13725 | p->nContentlessDelete = 0; | |||
| 13726 | } | |||
| 13727 | ||||
| 13728 | /* | |||
| 13729 | ** Return the size of the prefix, in bytes, that buffer | |||
| 13730 | ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). | |||
| 13731 | ** | |||
| 13732 | ** Buffer (pNew/<length-unknown>) is guaranteed to be greater | |||
| 13733 | ** than buffer (pOld/nOld). | |||
| 13734 | */ | |||
| 13735 | static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ | |||
| 13736 | int i; | |||
| 13737 | for(i=0; i<nOld; i++){ | |||
| 13738 | if( pOld[i]!=pNew[i] ) break; | |||
| 13739 | } | |||
| 13740 | return i; | |||
| 13741 | } | |||
| 13742 | ||||
| 13743 | static void fts5WriteDlidxClear( | |||
| 13744 | Fts5Index *p, | |||
| 13745 | Fts5SegWriter *pWriter, | |||
| 13746 | int bFlush /* If true, write dlidx to disk */ | |||
| 13747 | ){ | |||
| 13748 | int i; | |||
| 13749 | assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) )((void) (0)); | |||
| 13750 | for(i=0; i<pWriter->nDlidx; i++){ | |||
| 13751 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | |||
| 13752 | if( pDlidx->buf.n==0 ) break; | |||
| 13753 | if( bFlush ){ | |||
| 13754 | assert( pDlidx->pgno!=0 )((void) (0)); | |||
| 13755 | fts5DataWrite(p, | |||
| 13756 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | |||
| 13757 | pDlidx->buf.p, pDlidx->buf.n | |||
| 13758 | ); | |||
| 13759 | } | |||
| 13760 | sqlite3Fts5BufferZero(&pDlidx->buf); | |||
| 13761 | pDlidx->bPrevValid = 0; | |||
| 13762 | } | |||
| 13763 | } | |||
| 13764 | ||||
| 13765 | /* | |||
| 13766 | ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. | |||
| 13767 | ** Any new array elements are zeroed before returning. | |||
| 13768 | */ | |||
| 13769 | static int fts5WriteDlidxGrow( | |||
| 13770 | Fts5Index *p, | |||
| 13771 | Fts5SegWriter *pWriter, | |||
| 13772 | int nLvl | |||
| 13773 | ){ | |||
| 13774 | if( p->rc==SQLITE_OK0 && nLvl>=pWriter->nDlidx ){ | |||
| 13775 | Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64sqlite3_api->realloc64( | |||
| 13776 | pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl | |||
| 13777 | ); | |||
| 13778 | if( aDlidx==0 ){ | |||
| 13779 | p->rc = SQLITE_NOMEM7; | |||
| 13780 | }else{ | |||
| 13781 | size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); | |||
| 13782 | memset(&aDlidx[pWriter->nDlidx], 0, nByte); | |||
| 13783 | pWriter->aDlidx = aDlidx; | |||
| 13784 | pWriter->nDlidx = nLvl; | |||
| 13785 | } | |||
| 13786 | } | |||
| 13787 | return p->rc; | |||
| 13788 | } | |||
| 13789 | ||||
| 13790 | /* | |||
| 13791 | ** If the current doclist-index accumulating in pWriter->aDlidx[] is large | |||
| 13792 | ** enough, flush it to disk and return 1. Otherwise discard it and return | |||
| 13793 | ** zero. | |||
| 13794 | */ | |||
| 13795 | static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
| 13796 | int bFlag = 0; | |||
| 13797 | ||||
| 13798 | /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written | |||
| 13799 | ** to the database, also write the doclist-index to disk. */ | |||
| 13800 | if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE4 ){ | |||
| 13801 | bFlag = 1; | |||
| 13802 | } | |||
| 13803 | fts5WriteDlidxClear(p, pWriter, bFlag); | |||
| 13804 | pWriter->nEmpty = 0; | |||
| 13805 | return bFlag; | |||
| 13806 | } | |||
| 13807 | ||||
| 13808 | /* | |||
| 13809 | ** This function is called whenever processing of the doclist for the | |||
| 13810 | ** last term on leaf page (pWriter->iBtPage) is completed. | |||
| 13811 | ** | |||
| 13812 | ** The doclist-index for that term is currently stored in-memory within the | |||
| 13813 | ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function | |||
| 13814 | ** writes it out to disk. Or, if it is too small to bother with, discards | |||
| 13815 | ** it. | |||
| 13816 | ** | |||
| 13817 | ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. | |||
| 13818 | */ | |||
| 13819 | static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
| 13820 | int bFlag; | |||
| 13821 | ||||
| 13822 | assert( pWriter->iBtPage || pWriter->nEmpty==0 )((void) (0)); | |||
| 13823 | if( pWriter->iBtPage==0 ) return; | |||
| 13824 | bFlag = fts5WriteFlushDlidx(p, pWriter); | |||
| 13825 | ||||
| 13826 | if( p->rc==SQLITE_OK0 ){ | |||
| 13827 | const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); | |||
| 13828 | /* The following was already done in fts5WriteInit(): */ | |||
| 13829 | /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ | |||
| 13830 | sqlite3_bind_blobsqlite3_api->bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 13831 | sqlite3_bind_int64sqlite3_api->bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); | |||
| 13832 | sqlite3_stepsqlite3_api->step(p->pIdxWriter); | |||
| 13833 | p->rc = sqlite3_resetsqlite3_api->reset(p->pIdxWriter); | |||
| 13834 | sqlite3_bind_nullsqlite3_api->bind_null(p->pIdxWriter, 2); | |||
| 13835 | } | |||
| 13836 | pWriter->iBtPage = 0; | |||
| 13837 | } | |||
| 13838 | ||||
| 13839 | /* | |||
| 13840 | ** This is called once for each leaf page except the first that contains | |||
| 13841 | ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that | |||
| 13842 | ** is larger than all terms written to earlier leaves, and equal to or | |||
| 13843 | ** smaller than the first term on the new leaf. | |||
| 13844 | ** | |||
| 13845 | ** If an error occurs, an error code is left in Fts5Index.rc. If an error | |||
| 13846 | ** has already occurred when this function is called, it is a no-op. | |||
| 13847 | */ | |||
| 13848 | static void fts5WriteBtreeTerm( | |||
| 13849 | Fts5Index *p, /* FTS5 backend object */ | |||
| 13850 | Fts5SegWriter *pWriter, /* Writer object */ | |||
| 13851 | int nTerm, const u8 *pTerm /* First term on new page */ | |||
| 13852 | ){ | |||
| 13853 | fts5WriteFlushBtree(p, pWriter); | |||
| 13854 | if( p->rc==SQLITE_OK0 ){ | |||
| 13855 | fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pWriter->btterm,nTerm ,pTerm); | |||
| 13856 | pWriter->iBtPage = pWriter->writer.pgno; | |||
| 13857 | } | |||
| 13858 | } | |||
| 13859 | ||||
| 13860 | /* | |||
| 13861 | ** This function is called when flushing a leaf page that contains no | |||
| 13862 | ** terms at all to disk. | |||
| 13863 | */ | |||
| 13864 | static void fts5WriteBtreeNoTerm( | |||
| 13865 | Fts5Index *p, /* FTS5 backend object */ | |||
| 13866 | Fts5SegWriter *pWriter /* Writer object */ | |||
| 13867 | ){ | |||
| 13868 | /* If there were no rowids on the leaf page either and the doclist-index | |||
| 13869 | ** has already been started, append an 0x00 byte to it. */ | |||
| 13870 | if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ | |||
| 13871 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; | |||
| 13872 | assert( pDlidx->bPrevValid )((void) (0)); | |||
| 13873 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); | |||
| 13874 | } | |||
| 13875 | ||||
| 13876 | /* Increment the "number of sequential leaves without a term" counter. */ | |||
| 13877 | pWriter->nEmpty++; | |||
| 13878 | } | |||
| 13879 | ||||
| 13880 | static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ | |||
| 13881 | i64 iRowid; | |||
| 13882 | int iOff; | |||
| 13883 | ||||
| 13884 | iOff = 1 + fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); | |||
| 13885 | fts5GetVarintsqlite3Fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); | |||
| 13886 | return iRowid; | |||
| 13887 | } | |||
| 13888 | ||||
| 13889 | /* | |||
| 13890 | ** Rowid iRowid has just been appended to the current leaf page. It is the | |||
| 13891 | ** first on the page. This function appends an appropriate entry to the current | |||
| 13892 | ** doclist-index. | |||
| 13893 | */ | |||
| 13894 | static void fts5WriteDlidxAppend( | |||
| 13895 | Fts5Index *p, | |||
| 13896 | Fts5SegWriter *pWriter, | |||
| 13897 | i64 iRowid | |||
| 13898 | ){ | |||
| 13899 | int i; | |||
| 13900 | int bDone = 0; | |||
| 13901 | ||||
| 13902 | for(i=0; p->rc==SQLITE_OK0 && bDone==0; i++){ | |||
| 13903 | i64 iVal; | |||
| 13904 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; | |||
| 13905 | ||||
| 13906 | if( pDlidx->buf.n>=p->pConfig->pgsz ){ | |||
| 13907 | /* The current doclist-index page is full. Write it to disk and push | |||
| 13908 | ** a copy of iRowid (which will become the first rowid on the next | |||
| 13909 | ** doclist-index leaf page) up into the next level of the b-tree | |||
| 13910 | ** hierarchy. If the node being flushed is currently the root node, | |||
| 13911 | ** also push its first rowid upwards. */ | |||
| 13912 | pDlidx->buf.p[0] = 0x01; /* Not the root node */ | |||
| 13913 | fts5DataWrite(p, | |||
| 13914 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(1) << (31 + 5)) + ((i64)(i) << (31)) + ((i64)(pDlidx ->pgno)) ), | |||
| 13915 | pDlidx->buf.p, pDlidx->buf.n | |||
| 13916 | ); | |||
| 13917 | fts5WriteDlidxGrow(p, pWriter, i+2); | |||
| 13918 | pDlidx = &pWriter->aDlidx[i]; | |||
| 13919 | if( p->rc==SQLITE_OK0 && pDlidx[1].buf.n==0 ){ | |||
| 13920 | i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); | |||
| 13921 | ||||
| 13922 | /* This was the root node. Push its first rowid up to the new root. */ | |||
| 13923 | pDlidx[1].pgno = pDlidx->pgno; | |||
| 13924 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); | |||
| 13925 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); | |||
| 13926 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); | |||
| 13927 | pDlidx[1].bPrevValid = 1; | |||
| 13928 | pDlidx[1].iPrev = iFirst; | |||
| 13929 | } | |||
| 13930 | ||||
| 13931 | sqlite3Fts5BufferZero(&pDlidx->buf); | |||
| 13932 | pDlidx->bPrevValid = 0; | |||
| 13933 | pDlidx->pgno++; | |||
| 13934 | }else{ | |||
| 13935 | bDone = 1; | |||
| 13936 | } | |||
| 13937 | ||||
| 13938 | if( pDlidx->bPrevValid ){ | |||
| 13939 | iVal = (u64)iRowid - (u64)pDlidx->iPrev; | |||
| 13940 | }else{ | |||
| 13941 | i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); | |||
| 13942 | assert( pDlidx->buf.n==0 )((void) (0)); | |||
| 13943 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); | |||
| 13944 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); | |||
| 13945 | iVal = iRowid; | |||
| 13946 | } | |||
| 13947 | ||||
| 13948 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); | |||
| 13949 | pDlidx->bPrevValid = 1; | |||
| 13950 | pDlidx->iPrev = iRowid; | |||
| 13951 | } | |||
| 13952 | } | |||
| 13953 | ||||
| 13954 | static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ | |||
| 13955 | static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; | |||
| 13956 | Fts5PageWriter *pPage = &pWriter->writer; | |||
| 13957 | i64 iRowid; | |||
| 13958 | ||||
| 13959 | assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) )((void) (0)); | |||
| 13960 | ||||
| 13961 | /* Set the szLeaf header field. */ | |||
| 13962 | assert( 0==fts5GetU16(&pPage->buf.p[2]) )((void) (0)); | |||
| 13963 | fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); | |||
| 13964 | ||||
| 13965 | if( pWriter->bFirstTermInPage ){ | |||
| 13966 | /* No term was written to this page. */ | |||
| 13967 | assert( pPage->pgidx.n==0 )((void) (0)); | |||
| 13968 | fts5WriteBtreeNoTerm(p, pWriter); | |||
| 13969 | }else{ | |||
| 13970 | /* Append the pgidx to the page buffer. Set the szLeaf header field. */ | |||
| 13971 | fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, pPage->pgidx.n,pPage->pgidx.p); | |||
| 13972 | } | |||
| 13973 | ||||
| 13974 | /* Write the page out to disk */ | |||
| 13975 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno)( ((i64)(pWriter->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pPage ->pgno)) ); | |||
| 13976 | fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); | |||
| 13977 | ||||
| 13978 | /* Initialize the next page. */ | |||
| 13979 | fts5BufferZero(&pPage->buf)sqlite3Fts5BufferZero(&pPage->buf); | |||
| 13980 | fts5BufferZero(&pPage->pgidx)sqlite3Fts5BufferZero(&pPage->pgidx); | |||
| 13981 | fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, 4,zero); | |||
| 13982 | pPage->iPrevPgidx = 0; | |||
| 13983 | pPage->pgno++; | |||
| 13984 | ||||
| 13985 | /* Increase the leaves written counter */ | |||
| 13986 | pWriter->nLeafWritten++; | |||
| 13987 | ||||
| 13988 | /* The new leaf holds no terms or rowids */ | |||
| 13989 | pWriter->bFirstTermInPage = 1; | |||
| 13990 | pWriter->bFirstRowidInPage = 1; | |||
| 13991 | } | |||
| 13992 | ||||
| 13993 | /* | |||
| 13994 | ** Append term pTerm/nTerm to the segment being written by the writer passed | |||
| 13995 | ** as the second argument. | |||
| 13996 | ** | |||
| 13997 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
| 13998 | ** already occurred, this function is a no-op. | |||
| 13999 | */ | |||
| 14000 | static void fts5WriteAppendTerm( | |||
| 14001 | Fts5Index *p, | |||
| 14002 | Fts5SegWriter *pWriter, | |||
| 14003 | int nTerm, const u8 *pTerm | |||
| 14004 | ){ | |||
| 14005 | int nPrefix; /* Bytes of prefix compression for term */ | |||
| 14006 | Fts5PageWriter *pPage = &pWriter->writer; | |||
| 14007 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; | |||
| 14008 | int nMin = MIN(pPage->term.n, nTerm)(((pPage->term.n) < (nTerm)) ? (pPage->term.n) : (nTerm )); | |||
| 14009 | ||||
| 14010 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 14011 | assert( pPage->buf.n>=4 )((void) (0)); | |||
| 14012 | assert( pPage->buf.n>4 || pWriter->bFirstTermInPage )((void) (0)); | |||
| 14013 | ||||
| 14014 | /* If the current leaf page is full, flush it to disk. */ | |||
| 14015 | if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ | |||
| 14016 | if( pPage->buf.n>4 ){ | |||
| 14017 | fts5WriteFlushLeaf(p, pWriter); | |||
| 14018 | if( p->rc!=SQLITE_OK0 ) return; | |||
| 14019 | } | |||
| 14020 | fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING)( (u32)((&pPage->buf)->n) + (u32)(nTerm+20) <= ( u32)((&pPage->buf)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&p->rc),(&pPage->buf),(nTerm+20)+(&pPage-> buf)->n) ); | |||
| 14021 | } | |||
| 14022 | ||||
| 14023 | /* TODO1: Updating pgidx here. */ | |||
| 14024 | pPgidx->n += sqlite3Fts5PutVarint( | |||
| 14025 | &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx | |||
| 14026 | ); | |||
| 14027 | pPage->iPrevPgidx = pPage->buf.n; | |||
| 14028 | #if 0 | |||
| 14029 | fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); | |||
| 14030 | pPgidx->n += 2; | |||
| 14031 | #endif | |||
| 14032 | ||||
| 14033 | if( pWriter->bFirstTermInPage ){ | |||
| 14034 | nPrefix = 0; | |||
| 14035 | if( pPage->pgno!=1 ){ | |||
| 14036 | /* This is the first term on a leaf that is not the leftmost leaf in | |||
| 14037 | ** the segment b-tree. In this case it is necessary to add a term to | |||
| 14038 | ** the b-tree hierarchy that is (a) larger than the largest term | |||
| 14039 | ** already written to the segment and (b) smaller than or equal to | |||
| 14040 | ** this term. In other words, a prefix of (pTerm/nTerm) that is one | |||
| 14041 | ** byte longer than the longest prefix (pTerm/nTerm) shares with the | |||
| 14042 | ** previous term. | |||
| 14043 | ** | |||
| 14044 | ** Usually, the previous term is available in pPage->term. The exception | |||
| 14045 | ** is if this is the first term written in an incremental-merge step. | |||
| 14046 | ** In this case the previous term is not available, so just write a | |||
| 14047 | ** copy of (pTerm/nTerm) into the parent node. This is slightly | |||
| 14048 | ** inefficient, but still correct. */ | |||
| 14049 | int n = nTerm; | |||
| 14050 | if( pPage->term.n ){ | |||
| 14051 | n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm); | |||
| 14052 | } | |||
| 14053 | fts5WriteBtreeTerm(p, pWriter, n, pTerm); | |||
| 14054 | if( p->rc!=SQLITE_OK0 ) return; | |||
| 14055 | pPage = &pWriter->writer; | |||
| 14056 | } | |||
| 14057 | }else{ | |||
| 14058 | nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm); | |||
| 14059 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nPrefix); | |||
| 14060 | } | |||
| 14061 | ||||
| 14062 | /* Append the number of bytes of new data, then the term data itself | |||
| 14063 | ** to the page. */ | |||
| 14064 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)nTerm - nPrefix); | |||
| 14065 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix])sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nTerm - nPrefix,&pTerm[nPrefix]); | |||
| 14066 | ||||
| 14067 | /* Update the Fts5PageWriter.term field. */ | |||
| 14068 | fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&pPage->term,nTerm, pTerm); | |||
| 14069 | pWriter->bFirstTermInPage = 0; | |||
| 14070 | ||||
| 14071 | pWriter->bFirstRowidInPage = 0; | |||
| 14072 | pWriter->bFirstRowidInDoclist = 1; | |||
| 14073 | ||||
| 14074 | assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) )((void) (0)); | |||
| 14075 | pWriter->aDlidx[0].pgno = pPage->pgno; | |||
| 14076 | } | |||
| 14077 | ||||
| 14078 | /* | |||
| 14079 | ** Append a rowid and position-list size field to the writers output. | |||
| 14080 | */ | |||
| 14081 | static void fts5WriteAppendRowid( | |||
| 14082 | Fts5Index *p, | |||
| 14083 | Fts5SegWriter *pWriter, | |||
| 14084 | i64 iRowid | |||
| 14085 | ){ | |||
| 14086 | if( p->rc==SQLITE_OK0 ){ | |||
| 14087 | Fts5PageWriter *pPage = &pWriter->writer; | |||
| 14088 | ||||
| 14089 | if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ | |||
| 14090 | fts5WriteFlushLeaf(p, pWriter); | |||
| 14091 | } | |||
| 14092 | ||||
| 14093 | /* If this is to be the first rowid written to the page, set the | |||
| 14094 | ** rowid-pointer in the page-header. Also append a value to the dlidx | |||
| 14095 | ** buffer, in case a doclist-index is required. */ | |||
| 14096 | if( pWriter->bFirstRowidInPage ){ | |||
| 14097 | fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); | |||
| 14098 | fts5WriteDlidxAppend(p, pWriter, iRowid); | |||
| 14099 | } | |||
| 14100 | ||||
| 14101 | /* Write the rowid. */ | |||
| 14102 | if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ | |||
| 14103 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid)sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)iRowid); | |||
| 14104 | }else{ | |||
| 14105 | assert_nc( p->rc || iRowid>pWriter->iPrevRowid )((void) (0)); | |||
| 14106 | fts5BufferAppendVarint(&p->rc, &pPage->buf,sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | |||
| 14107 | (u64)iRowid - (u64)pWriter->iPrevRowidsqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid) | |||
| 14108 | )sqlite3Fts5BufferAppendVarint(&p->rc,&pPage->buf ,(i64)(u64)iRowid - (u64)pWriter->iPrevRowid); | |||
| 14109 | } | |||
| 14110 | pWriter->iPrevRowid = iRowid; | |||
| 14111 | pWriter->bFirstRowidInDoclist = 0; | |||
| 14112 | pWriter->bFirstRowidInPage = 0; | |||
| 14113 | } | |||
| 14114 | } | |||
| 14115 | ||||
| 14116 | static void fts5WriteAppendPoslistData( | |||
| 14117 | Fts5Index *p, | |||
| 14118 | Fts5SegWriter *pWriter, | |||
| 14119 | const u8 *aData, | |||
| 14120 | int nData | |||
| 14121 | ){ | |||
| 14122 | Fts5PageWriter *pPage = &pWriter->writer; | |||
| 14123 | const u8 *a = aData; | |||
| 14124 | int n = nData; | |||
| 14125 | ||||
| 14126 | assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 14127 | while( p->rc==SQLITE_OK0 | |||
| 14128 | && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz | |||
| 14129 | ){ | |||
| 14130 | int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; | |||
| 14131 | int nCopy = 0; | |||
| 14132 | while( nCopy<nReq ){ | |||
| 14133 | i64 dummy; | |||
| 14134 | nCopy += fts5GetVarintsqlite3Fts5GetVarint(&a[nCopy], (u64*)&dummy); | |||
| 14135 | } | |||
| 14136 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, nCopy,a); | |||
| 14137 | a += nCopy; | |||
| 14138 | n -= nCopy; | |||
| 14139 | fts5WriteFlushLeaf(p, pWriter); | |||
| 14140 | } | |||
| 14141 | if( n>0 ){ | |||
| 14142 | fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a)sqlite3Fts5BufferAppendBlob(&p->rc,&pPage->buf, n,a); | |||
| 14143 | } | |||
| 14144 | } | |||
| 14145 | ||||
| 14146 | /* | |||
| 14147 | ** Flush any data cached by the writer object to the database. Free any | |||
| 14148 | ** allocations associated with the writer. | |||
| 14149 | */ | |||
| 14150 | static void fts5WriteFinish( | |||
| 14151 | Fts5Index *p, | |||
| 14152 | Fts5SegWriter *pWriter, /* Writer object */ | |||
| 14153 | int *pnLeaf /* OUT: Number of leaf pages in b-tree */ | |||
| 14154 | ){ | |||
| 14155 | int i; | |||
| 14156 | Fts5PageWriter *pLeaf = &pWriter->writer; | |||
| 14157 | if( p->rc==SQLITE_OK0 ){ | |||
| 14158 | assert( pLeaf->pgno>=1 )((void) (0)); | |||
| 14159 | if( pLeaf->buf.n>4 ){ | |||
| 14160 | fts5WriteFlushLeaf(p, pWriter); | |||
| 14161 | } | |||
| 14162 | *pnLeaf = pLeaf->pgno-1; | |||
| 14163 | if( pLeaf->pgno>1 ){ | |||
| 14164 | fts5WriteFlushBtree(p, pWriter); | |||
| 14165 | } | |||
| 14166 | } | |||
| 14167 | fts5BufferFree(&pLeaf->term)sqlite3Fts5BufferFree(&pLeaf->term); | |||
| 14168 | fts5BufferFree(&pLeaf->buf)sqlite3Fts5BufferFree(&pLeaf->buf); | |||
| 14169 | fts5BufferFree(&pLeaf->pgidx)sqlite3Fts5BufferFree(&pLeaf->pgidx); | |||
| 14170 | fts5BufferFree(&pWriter->btterm)sqlite3Fts5BufferFree(&pWriter->btterm); | |||
| 14171 | ||||
| 14172 | for(i=0; i<pWriter->nDlidx; i++){ | |||
| 14173 | sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); | |||
| 14174 | } | |||
| 14175 | sqlite3_freesqlite3_api->free(pWriter->aDlidx); | |||
| 14176 | } | |||
| 14177 | ||||
| 14178 | static void fts5WriteInit( | |||
| 14179 | Fts5Index *p, | |||
| 14180 | Fts5SegWriter *pWriter, | |||
| 14181 | int iSegid | |||
| 14182 | ){ | |||
| 14183 | const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING20; | |||
| 14184 | ||||
| 14185 | memset(pWriter, 0, sizeof(Fts5SegWriter)); | |||
| 14186 | pWriter->iSegid = iSegid; | |||
| 14187 | ||||
| 14188 | fts5WriteDlidxGrow(p, pWriter, 1); | |||
| 14189 | pWriter->writer.pgno = 1; | |||
| 14190 | pWriter->bFirstTermInPage = 1; | |||
| 14191 | pWriter->iBtPage = 1; | |||
| 14192 | ||||
| 14193 | assert( pWriter->writer.buf.n==0 )((void) (0)); | |||
| 14194 | assert( pWriter->writer.pgidx.n==0 )((void) (0)); | |||
| 14195 | ||||
| 14196 | /* Grow the two buffers to pgsz + padding bytes in size. */ | |||
| 14197 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); | |||
| 14198 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); | |||
| 14199 | ||||
| 14200 | if( p->pIdxWriter==0 ){ | |||
| 14201 | Fts5Config *pConfig = p->pConfig; | |||
| 14202 | fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 14203 | "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", | |||
| 14204 | pConfig->zDb, pConfig->zName | |||
| 14205 | )); | |||
| 14206 | } | |||
| 14207 | ||||
| 14208 | if( p->rc==SQLITE_OK0 ){ | |||
| 14209 | /* Initialize the 4-byte leaf-page header to 0x00. */ | |||
| 14210 | memset(pWriter->writer.buf.p, 0, 4); | |||
| 14211 | pWriter->writer.buf.n = 4; | |||
| 14212 | ||||
| 14213 | /* Bind the current output segment id to the index-writer. This is an | |||
| 14214 | ** optimization over binding the same value over and over as rows are | |||
| 14215 | ** inserted into %_idx by the current writer. */ | |||
| 14216 | sqlite3_bind_intsqlite3_api->bind_int(p->pIdxWriter, 1, pWriter->iSegid); | |||
| 14217 | } | |||
| 14218 | } | |||
| 14219 | ||||
| 14220 | /* | |||
| 14221 | ** Iterator pIter was used to iterate through the input segments of on an | |||
| 14222 | ** incremental merge operation. This function is called if the incremental | |||
| 14223 | ** merge step has finished but the input has not been completely exhausted. | |||
| 14224 | */ | |||
| 14225 | static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ | |||
| 14226 | int i; | |||
| 14227 | Fts5Buffer buf; | |||
| 14228 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
| 14229 | for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK0; i++){ | |||
| 14230 | Fts5SegIter *pSeg = &pIter->aSeg[i]; | |||
| 14231 | if( pSeg->pSeg==0 ){ | |||
| 14232 | /* no-op */ | |||
| 14233 | }else if( pSeg->pLeaf==0 ){ | |||
| 14234 | /* All keys from this input segment have been transfered to the output. | |||
| 14235 | ** Set both the first and last page-numbers to 0 to indicate that the | |||
| 14236 | ** segment is now empty. */ | |||
| 14237 | pSeg->pSeg->pgnoLast = 0; | |||
| 14238 | pSeg->pSeg->pgnoFirst = 0; | |||
| 14239 | }else{ | |||
| 14240 | int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ | |||
| 14241 | i64 iLeafRowid; | |||
| 14242 | Fts5Data *pData; | |||
| 14243 | int iId = pSeg->pSeg->iSegid; | |||
| 14244 | u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; | |||
| 14245 | ||||
| 14246 | iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | |||
| 14247 | pData = fts5LeafRead(p, iLeafRowid); | |||
| 14248 | if( pData ){ | |||
| 14249 | if( iOff>pData->szLeaf ){ | |||
| 14250 | /* This can occur if the pages that the segments occupy overlap - if | |||
| 14251 | ** a single page has been assigned to more than one segment. In | |||
| 14252 | ** this case a prior iteration of this loop may have corrupted the | |||
| 14253 | ** segment currently being trimmed. */ | |||
| 14254 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 14255 | }else{ | |||
| 14256 | fts5BufferZero(&buf)sqlite3Fts5BufferZero(&buf); | |||
| 14257 | fts5BufferGrow(&p->rc, &buf, pData->nn)( (u32)((&buf)->n) + (u32)(pData->nn) <= (u32)(( &buf)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p-> rc),(&buf),(pData->nn)+(&buf)->n) ); | |||
| 14258 | fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,sizeof(aHdr ),aHdr); | |||
| 14259 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)pSeg ->term.n); | |||
| 14260 | fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p)sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pSeg->term .n,pSeg->term.p); | |||
| 14261 | fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> szLeaf-iOff,&pData->p[iOff]); | |||
| 14262 | if( p->rc==SQLITE_OK0 ){ | |||
| 14263 | /* Set the szLeaf field */ | |||
| 14264 | fts5PutU16(&buf.p[2], (u16)buf.n); | |||
| 14265 | } | |||
| 14266 | ||||
| 14267 | /* Set up the new page-index array */ | |||
| 14268 | fts5BufferAppendVarint(&p->rc, &buf, 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)4); | |||
| 14269 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno | |||
| 14270 | && pSeg->iEndofDoclist<pData->szLeaf | |||
| 14271 | && pSeg->iPgidxOff<=pData->nn | |||
| 14272 | ){ | |||
| 14273 | int nDiff = pData->szLeaf - pSeg->iEndofDoclist; | |||
| 14274 | fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4)sqlite3Fts5BufferAppendVarint(&p->rc,&buf,(i64)buf .n - 1 - nDiff - 4); | |||
| 14275 | fts5BufferAppendBlob(&p->rc, &buf,sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | |||
| 14276 | pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]) | |||
| 14277 | )sqlite3Fts5BufferAppendBlob(&p->rc,&buf,pData-> nn - pSeg->iPgidxOff,&pData->p[pSeg->iPgidxOff]); | |||
| 14278 | } | |||
| 14279 | ||||
| 14280 | pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; | |||
| 14281 | fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1)( ((i64)(iId) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(1)) ), iLeafRowid); | |||
| 14282 | fts5DataWrite(p, iLeafRowid, buf.p, buf.n); | |||
| 14283 | } | |||
| 14284 | fts5DataRelease(pData); | |||
| 14285 | } | |||
| 14286 | } | |||
| 14287 | } | |||
| 14288 | fts5BufferFree(&buf)sqlite3Fts5BufferFree(&buf); | |||
| 14289 | } | |||
| 14290 | ||||
| 14291 | static void fts5MergeChunkCallback( | |||
| 14292 | Fts5Index *p, | |||
| 14293 | void *pCtx, | |||
| 14294 | const u8 *pChunk, int nChunk | |||
| 14295 | ){ | |||
| 14296 | Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; | |||
| 14297 | fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); | |||
| 14298 | } | |||
| 14299 | ||||
| 14300 | /* | |||
| 14301 | ** | |||
| 14302 | */ | |||
| 14303 | static void fts5IndexMergeLevel( | |||
| 14304 | Fts5Index *p, /* FTS5 backend object */ | |||
| 14305 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ | |||
| 14306 | int iLvl, /* Level to read input from */ | |||
| 14307 | int *pnRem /* Write up to this many output leaves */ | |||
| 14308 | ){ | |||
| 14309 | Fts5Structure *pStruct = *ppStruct; | |||
| 14310 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
| 14311 | Fts5StructureLevel *pLvlOut; | |||
| 14312 | Fts5Iter *pIter = 0; /* Iterator to read input data */ | |||
| 14313 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ | |||
| 14314 | int nInput; /* Number of input segments */ | |||
| 14315 | Fts5SegWriter writer; /* Writer object */ | |||
| 14316 | Fts5StructureSegment *pSeg; /* Output segment */ | |||
| 14317 | Fts5Buffer term; | |||
| 14318 | int bOldest; /* True if the output segment is the oldest */ | |||
| 14319 | int eDetail = p->pConfig->eDetail; | |||
| 14320 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
| 14321 | int bTermWritten = 0; /* True if current term already output */ | |||
| 14322 | ||||
| 14323 | assert( iLvl<pStruct->nLevel )((void) (0)); | |||
| 14324 | assert( pLvl->nMerge<=pLvl->nSeg )((void) (0)); | |||
| 14325 | ||||
| 14326 | memset(&writer, 0, sizeof(Fts5SegWriter)); | |||
| 14327 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
| 14328 | if( pLvl->nMerge ){ | |||
| 14329 | pLvlOut = &pStruct->aLevel[iLvl+1]; | |||
| 14330 | assert( pLvlOut->nSeg>0 )((void) (0)); | |||
| 14331 | nInput = pLvl->nMerge; | |||
| 14332 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; | |||
| 14333 | ||||
| 14334 | fts5WriteInit(p, &writer, pSeg->iSegid); | |||
| 14335 | writer.writer.pgno = pSeg->pgnoLast+1; | |||
| 14336 | writer.iBtPage = 0; | |||
| 14337 | }else{ | |||
| 14338 | int iSegid = fts5AllocateSegid(p, pStruct); | |||
| 14339 | ||||
| 14340 | /* Extend the Fts5Structure object as required to ensure the output | |||
| 14341 | ** segment exists. */ | |||
| 14342 | if( iLvl==pStruct->nLevel-1 ){ | |||
| 14343 | fts5StructureAddLevel(&p->rc, ppStruct); | |||
| 14344 | pStruct = *ppStruct; | |||
| 14345 | } | |||
| 14346 | fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); | |||
| 14347 | if( p->rc ) return; | |||
| 14348 | pLvl = &pStruct->aLevel[iLvl]; | |||
| 14349 | pLvlOut = &pStruct->aLevel[iLvl+1]; | |||
| 14350 | ||||
| 14351 | fts5WriteInit(p, &writer, iSegid); | |||
| 14352 | ||||
| 14353 | /* Add the new segment to the output level */ | |||
| 14354 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; | |||
| 14355 | pLvlOut->nSeg++; | |||
| 14356 | pSeg->pgnoFirst = 1; | |||
| 14357 | pSeg->iSegid = iSegid; | |||
| 14358 | pStruct->nSegment++; | |||
| 14359 | ||||
| 14360 | /* Read input from all segments in the input level */ | |||
| 14361 | nInput = pLvl->nSeg; | |||
| 14362 | ||||
| 14363 | /* Set the range of origins that will go into the output segment. */ | |||
| 14364 | if( pStruct->nOriginCntr>0 ){ | |||
| 14365 | pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1; | |||
| 14366 | pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2; | |||
| 14367 | } | |||
| 14368 | } | |||
| 14369 | bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); | |||
| 14370 | ||||
| 14371 | assert( iLvl>=0 )((void) (0)); | |||
| 14372 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); | |||
| 14373 | fts5MultiIterEof(p, pIter)==0; | |||
| 14374 | fts5MultiIterNext(p, pIter, 0, 0) | |||
| 14375 | ){ | |||
| 14376 | Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
| 14377 | int nPos; /* position-list size field value */ | |||
| 14378 | int nTerm; | |||
| 14379 | const u8 *pTerm; | |||
| 14380 | ||||
| 14381 | pTerm = fts5MultiIterTerm(pIter, &nTerm); | |||
| 14382 | if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm)((nTerm)<=0 ? 0 : memcmp((pTerm), (term.p), (nTerm))) ){ | |||
| 14383 | if( pnRem && writer.nLeafWritten>nRem ){ | |||
| 14384 | break; | |||
| 14385 | } | |||
| 14386 | fts5BufferSet(&p->rc, &term, nTerm, pTerm)sqlite3Fts5BufferSet(&p->rc,&term,nTerm,pTerm); | |||
| 14387 | bTermWritten =0; | |||
| 14388 | } | |||
| 14389 | ||||
| 14390 | /* Check for key annihilation. */ | |||
| 14391 | if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; | |||
| 14392 | ||||
| 14393 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | |||
| 14394 | /* This is a new term. Append a term to the output segment. */ | |||
| 14395 | fts5WriteAppendTerm(p, &writer, nTerm, pTerm); | |||
| 14396 | bTermWritten = 1; | |||
| 14397 | } | |||
| 14398 | ||||
| 14399 | /* Append the rowid to the output */ | |||
| 14400 | /* WRITEPOSLISTSIZE */ | |||
| 14401 | fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); | |||
| 14402 | ||||
| 14403 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 14404 | if( pSegIter->bDel ){ | |||
| 14405 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | |||
| 14406 | if( pSegIter->nPos>0 ){ | |||
| 14407 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)0); | |||
| 14408 | } | |||
| 14409 | } | |||
| 14410 | }else{ | |||
| 14411 | /* Append the position-list data to the output */ | |||
| 14412 | nPos = pSegIter->nPos*2 + pSegIter->bDel; | |||
| 14413 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos)sqlite3Fts5BufferAppendVarint(&p->rc,&writer.writer .buf,(i64)nPos); | |||
| 14414 | fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); | |||
| 14415 | } | |||
| 14416 | } | |||
| 14417 | ||||
| 14418 | /* Flush the last leaf page to disk. Set the output segment b-tree height | |||
| 14419 | ** and last leaf page number at the same time. */ | |||
| 14420 | fts5WriteFinish(p, &writer, &pSeg->pgnoLast); | |||
| 14421 | ||||
| 14422 | assert( pIter!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 14423 | if( fts5MultiIterEof(p, pIter) ){ | |||
| 14424 | int i; | |||
| 14425 | ||||
| 14426 | /* Remove the redundant segments from the %_data table */ | |||
| 14427 | assert( pSeg->nEntry==0 )((void) (0)); | |||
| 14428 | for(i=0; i<nInput; i++){ | |||
| 14429 | Fts5StructureSegment *pOld = &pLvl->aSeg[i]; | |||
| 14430 | pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone); | |||
| 14431 | fts5DataRemoveSegment(p, pOld); | |||
| 14432 | } | |||
| 14433 | ||||
| 14434 | /* Remove the redundant segments from the input level */ | |||
| 14435 | if( pLvl->nSeg!=nInput ){ | |||
| 14436 | int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); | |||
| 14437 | memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); | |||
| 14438 | } | |||
| 14439 | pStruct->nSegment -= nInput; | |||
| 14440 | pLvl->nSeg -= nInput; | |||
| 14441 | pLvl->nMerge = 0; | |||
| 14442 | if( pSeg->pgnoLast==0 ){ | |||
| 14443 | pLvlOut->nSeg--; | |||
| 14444 | pStruct->nSegment--; | |||
| 14445 | } | |||
| 14446 | }else{ | |||
| 14447 | assert( pSeg->pgnoLast>0 )((void) (0)); | |||
| 14448 | fts5TrimSegments(p, pIter); | |||
| 14449 | pLvl->nMerge = nInput; | |||
| 14450 | } | |||
| 14451 | ||||
| 14452 | fts5MultiIterFree(pIter); | |||
| 14453 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
| 14454 | if( pnRem ) *pnRem -= writer.nLeafWritten; | |||
| 14455 | } | |||
| 14456 | ||||
| 14457 | /* | |||
| 14458 | ** If this is not a contentless_delete=1 table, or if the 'deletemerge' | |||
| 14459 | ** configuration option is set to 0, then this function always returns -1. | |||
| 14460 | ** Otherwise, it searches the structure object passed as the second argument | |||
| 14461 | ** for a level suitable for merging due to having a large number of | |||
| 14462 | ** tombstones in the tombstone hash. If one is found, its index is returned. | |||
| 14463 | ** Otherwise, if there is no suitable level, -1. | |||
| 14464 | */ | |||
| 14465 | static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){ | |||
| 14466 | Fts5Config *pConfig = p->pConfig; | |||
| 14467 | int iRet = -1; | |||
| 14468 | if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){ | |||
| 14469 | int ii; | |||
| 14470 | int nBest = 0; | |||
| 14471 | ||||
| 14472 | for(ii=0; ii<pStruct->nLevel; ii++){ | |||
| 14473 | Fts5StructureLevel *pLvl = &pStruct->aLevel[ii]; | |||
| 14474 | i64 nEntry = 0; | |||
| 14475 | i64 nTomb = 0; | |||
| 14476 | int iSeg; | |||
| 14477 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
| 14478 | nEntry += pLvl->aSeg[iSeg].nEntry; | |||
| 14479 | nTomb += pLvl->aSeg[iSeg].nEntryTombstone; | |||
| 14480 | } | |||
| 14481 | assert_nc( nEntry>0 || pLvl->nSeg==0 )((void) (0)); | |||
| 14482 | if( nEntry>0 ){ | |||
| 14483 | int nPercent = (nTomb * 100) / nEntry; | |||
| 14484 | if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){ | |||
| 14485 | iRet = ii; | |||
| 14486 | nBest = nPercent; | |||
| 14487 | } | |||
| 14488 | } | |||
| 14489 | ||||
| 14490 | /* If pLvl is already the input level to an ongoing merge, look no | |||
| 14491 | ** further for a merge candidate. The caller should be allowed to | |||
| 14492 | ** continue merging from pLvl first. */ | |||
| 14493 | if( pLvl->nMerge ) break; | |||
| 14494 | } | |||
| 14495 | } | |||
| 14496 | return iRet; | |||
| 14497 | } | |||
| 14498 | ||||
| 14499 | /* | |||
| 14500 | ** Do up to nPg pages of automerge work on the index. | |||
| 14501 | ** | |||
| 14502 | ** Return true if any changes were actually made, or false otherwise. | |||
| 14503 | */ | |||
| 14504 | static int fts5IndexMerge( | |||
| 14505 | Fts5Index *p, /* FTS5 backend object */ | |||
| 14506 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | |||
| 14507 | int nPg, /* Pages of work to do */ | |||
| 14508 | int nMin /* Minimum number of segments to merge */ | |||
| 14509 | ){ | |||
| 14510 | int nRem = nPg; | |||
| 14511 | int bRet = 0; | |||
| 14512 | Fts5Structure *pStruct = *ppStruct; | |||
| 14513 | while( nRem>0 && p->rc==SQLITE_OK0 ){ | |||
| 14514 | int iLvl; /* To iterate through levels */ | |||
| 14515 | int iBestLvl = 0; /* Level offering the most input segments */ | |||
| 14516 | int nBest = 0; /* Number of input segments on best level */ | |||
| 14517 | ||||
| 14518 | /* Set iBestLvl to the level to read input segments from. Or to -1 if | |||
| 14519 | ** there is no level suitable to merge segments from. */ | |||
| 14520 | assert( pStruct->nLevel>0 )((void) (0)); | |||
| 14521 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 14522 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; | |||
| 14523 | if( pLvl->nMerge ){ | |||
| 14524 | if( pLvl->nMerge>nBest ){ | |||
| 14525 | iBestLvl = iLvl; | |||
| 14526 | nBest = nMin; | |||
| 14527 | } | |||
| 14528 | break; | |||
| 14529 | } | |||
| 14530 | if( pLvl->nSeg>nBest ){ | |||
| 14531 | nBest = pLvl->nSeg; | |||
| 14532 | iBestLvl = iLvl; | |||
| 14533 | } | |||
| 14534 | } | |||
| 14535 | if( nBest<nMin ){ | |||
| 14536 | iBestLvl = fts5IndexFindDeleteMerge(p, pStruct); | |||
| 14537 | } | |||
| 14538 | ||||
| 14539 | if( iBestLvl<0 ) break; | |||
| 14540 | bRet = 1; | |||
| 14541 | fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); | |||
| 14542 | if( p->rc==SQLITE_OK0 && pStruct->aLevel[iBestLvl].nMerge==0 ){ | |||
| 14543 | fts5StructurePromote(p, iBestLvl+1, pStruct); | |||
| 14544 | } | |||
| 14545 | ||||
| 14546 | if( nMin==1 ) nMin = 2; | |||
| 14547 | } | |||
| 14548 | *ppStruct = pStruct; | |||
| 14549 | return bRet; | |||
| 14550 | } | |||
| 14551 | ||||
| 14552 | /* | |||
| 14553 | ** A total of nLeaf leaf pages of data has just been flushed to a level-0 | |||
| 14554 | ** segment. This function updates the write-counter accordingly and, if | |||
| 14555 | ** necessary, performs incremental merge work. | |||
| 14556 | ** | |||
| 14557 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
| 14558 | ** already occurred, this function is a no-op. | |||
| 14559 | */ | |||
| 14560 | static void fts5IndexAutomerge( | |||
| 14561 | Fts5Index *p, /* FTS5 backend object */ | |||
| 14562 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ | |||
| 14563 | int nLeaf /* Number of output leaves just written */ | |||
| 14564 | ){ | |||
| 14565 | if( p->rc==SQLITE_OK0 && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0)((*ppStruct)!=0) ){ | |||
| 14566 | Fts5Structure *pStruct = *ppStruct; | |||
| 14567 | u64 nWrite; /* Initial value of write-counter */ | |||
| 14568 | int nWork; /* Number of work-quanta to perform */ | |||
| 14569 | int nRem; /* Number of leaf pages left to write */ | |||
| 14570 | ||||
| 14571 | /* Update the write-counter. While doing so, set nWork. */ | |||
| 14572 | nWrite = pStruct->nWriteCounter; | |||
| 14573 | nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); | |||
| 14574 | pStruct->nWriteCounter += nLeaf; | |||
| 14575 | nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); | |||
| 14576 | ||||
| 14577 | fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); | |||
| 14578 | } | |||
| 14579 | } | |||
| 14580 | ||||
| 14581 | static void fts5IndexCrisismerge( | |||
| 14582 | Fts5Index *p, /* FTS5 backend object */ | |||
| 14583 | Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ | |||
| 14584 | ){ | |||
| 14585 | const int nCrisis = p->pConfig->nCrisisMerge; | |||
| 14586 | Fts5Structure *pStruct = *ppStruct; | |||
| 14587 | if( pStruct && pStruct->nLevel>0 ){ | |||
| 14588 | int iLvl = 0; | |||
| 14589 | while( p->rc==SQLITE_OK0 && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ | |||
| 14590 | fts5IndexMergeLevel(p, &pStruct, iLvl, 0); | |||
| 14591 | assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) )((void) (0)); | |||
| 14592 | fts5StructurePromote(p, iLvl+1, pStruct); | |||
| 14593 | iLvl++; | |||
| 14594 | } | |||
| 14595 | *ppStruct = pStruct; | |||
| 14596 | } | |||
| 14597 | } | |||
| 14598 | ||||
| 14599 | static int fts5IndexReturn(Fts5Index *p){ | |||
| 14600 | int rc = p->rc; | |||
| 14601 | p->rc = SQLITE_OK0; | |||
| 14602 | return rc; | |||
| 14603 | } | |||
| 14604 | ||||
| 14605 | /* | |||
| 14606 | ** Close the read-only blob handle, if it is open. | |||
| 14607 | */ | |||
| 14608 | static void sqlite3Fts5IndexCloseReader(Fts5Index *p){ | |||
| 14609 | fts5IndexCloseReader(p); | |||
| 14610 | fts5IndexReturn(p); | |||
| 14611 | } | |||
| 14612 | ||||
| 14613 | typedef struct Fts5FlushCtx Fts5FlushCtx; | |||
| 14614 | struct Fts5FlushCtx { | |||
| 14615 | Fts5Index *pIdx; | |||
| 14616 | Fts5SegWriter writer; | |||
| 14617 | }; | |||
| 14618 | ||||
| 14619 | /* | |||
| 14620 | ** Buffer aBuf[] contains a list of varints, all small enough to fit | |||
| 14621 | ** in a 32-bit integer. Return the size of the largest prefix of this | |||
| 14622 | ** list nMax bytes or less in size. | |||
| 14623 | */ | |||
| 14624 | static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ | |||
| 14625 | int ret; | |||
| 14626 | u32 dummy; | |||
| 14627 | ret = fts5GetVarint32(aBuf, dummy)sqlite3Fts5GetVarint32(aBuf,(u32*)&(dummy)); | |||
| 14628 | if( ret<nMax ){ | |||
| 14629 | while( 1 ){ | |||
| 14630 | int i = fts5GetVarint32(&aBuf[ret], dummy)sqlite3Fts5GetVarint32(&aBuf[ret],(u32*)&(dummy)); | |||
| 14631 | if( (ret + i) > nMax ) break; | |||
| 14632 | ret += i; | |||
| 14633 | } | |||
| 14634 | } | |||
| 14635 | return ret; | |||
| 14636 | } | |||
| 14637 | ||||
| 14638 | /* | |||
| 14639 | ** Execute the SQL statement: | |||
| 14640 | ** | |||
| 14641 | ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno); | |||
| 14642 | ** | |||
| 14643 | ** This is used when a secure-delete operation removes the last term | |||
| 14644 | ** from a segment leaf page. In that case the %_idx entry is removed | |||
| 14645 | ** too. This is done to ensure that if all instances of a token are | |||
| 14646 | ** removed from an fts5 database in secure-delete mode, no trace of | |||
| 14647 | ** the token itself remains in the database. | |||
| 14648 | */ | |||
| 14649 | static void fts5SecureDeleteIdxEntry( | |||
| 14650 | Fts5Index *p, /* FTS5 backend object */ | |||
| 14651 | int iSegid, /* Id of segment to delete entry for */ | |||
| 14652 | int iPgno /* Page number within segment */ | |||
| 14653 | ){ | |||
| 14654 | if( iPgno!=1 ){ | |||
| 14655 | assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE )((void) (0)); | |||
| 14656 | if( p->pDeleteFromIdx==0 ){ | |||
| 14657 | fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 14658 | "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)", | |||
| 14659 | p->pConfig->zDb, p->pConfig->zName | |||
| 14660 | )); | |||
| 14661 | } | |||
| 14662 | if( p->rc==SQLITE_OK0 ){ | |||
| 14663 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 1, iSegid); | |||
| 14664 | sqlite3_bind_intsqlite3_api->bind_int(p->pDeleteFromIdx, 2, iPgno); | |||
| 14665 | sqlite3_stepsqlite3_api->step(p->pDeleteFromIdx); | |||
| 14666 | p->rc = sqlite3_resetsqlite3_api->reset(p->pDeleteFromIdx); | |||
| 14667 | } | |||
| 14668 | } | |||
| 14669 | } | |||
| 14670 | ||||
| 14671 | /* | |||
| 14672 | ** This is called when a secure-delete operation removes a position-list | |||
| 14673 | ** that overflows onto segment page iPgno of segment pSeg. This function | |||
| 14674 | ** rewrites node iPgno, and possibly one or more of its right-hand peers, | |||
| 14675 | ** to remove this portion of the position list. | |||
| 14676 | ** | |||
| 14677 | ** Output variable (*pbLastInDoclist) is set to true if the position-list | |||
| 14678 | ** removed is followed by a new term or the end-of-segment, or false if | |||
| 14679 | ** it is followed by another rowid/position list. | |||
| 14680 | */ | |||
| 14681 | static void fts5SecureDeleteOverflow( | |||
| 14682 | Fts5Index *p, | |||
| 14683 | Fts5StructureSegment *pSeg, | |||
| 14684 | int iPgno, | |||
| 14685 | int *pbLastInDoclist | |||
| 14686 | ){ | |||
| 14687 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | |||
| 14688 | int pgno; | |||
| 14689 | Fts5Data *pLeaf = 0; | |||
| 14690 | assert( iPgno!=1 )((void) (0)); | |||
| 14691 | ||||
| 14692 | *pbLastInDoclist = 1; | |||
| 14693 | for(pgno=iPgno; p->rc==SQLITE_OK0 && pgno<=pSeg->pgnoLast; pgno++){ | |||
| 14694 | i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
| 14695 | int iNext = 0; | |||
| 14696 | u8 *aPg = 0; | |||
| 14697 | ||||
| 14698 | pLeaf = fts5DataRead(p, iRowid); | |||
| 14699 | if( pLeaf==0 ) break; | |||
| 14700 | aPg = pLeaf->p; | |||
| 14701 | ||||
| 14702 | iNext = fts5GetU16(&aPg[0]); | |||
| 14703 | if( iNext!=0 ){ | |||
| 14704 | *pbLastInDoclist = 0; | |||
| 14705 | } | |||
| 14706 | if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){ | |||
| 14707 | fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext)sqlite3Fts5GetVarint32(&aPg[pLeaf->szLeaf],(u32*)& (iNext)); | |||
| 14708 | } | |||
| 14709 | ||||
| 14710 | if( iNext==0 ){ | |||
| 14711 | /* The page contains no terms or rowids. Replace it with an empty | |||
| 14712 | ** page and move on to the right-hand peer. */ | |||
| 14713 | const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04}; | |||
| 14714 | assert_nc( bDetailNone==0 || pLeaf->nn==4 )((void) (0)); | |||
| 14715 | if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty)); | |||
| 14716 | fts5DataRelease(pLeaf); | |||
| 14717 | pLeaf = 0; | |||
| 14718 | }else if( bDetailNone ){ | |||
| 14719 | break; | |||
| 14720 | }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){ | |||
| 14721 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 14722 | break; | |||
| 14723 | }else{ | |||
| 14724 | int nShift = iNext - 4; | |||
| 14725 | int nPg; | |||
| 14726 | ||||
| 14727 | int nIdx = 0; | |||
| 14728 | u8 *aIdx = 0; | |||
| 14729 | ||||
| 14730 | /* Unless the current page footer is 0 bytes in size (in which case | |||
| 14731 | ** the new page footer will be as well), allocate and populate a | |||
| 14732 | ** buffer containing the new page footer. Set stack variables aIdx | |||
| 14733 | ** and nIdx accordingly. */ | |||
| 14734 | if( pLeaf->nn>pLeaf->szLeaf ){ | |||
| 14735 | int iFirst = 0; | |||
| 14736 | int i1 = pLeaf->szLeaf; | |||
| 14737 | int i2 = 0; | |||
| 14738 | ||||
| 14739 | i1 += fts5GetVarint32(&aPg[i1], iFirst)sqlite3Fts5GetVarint32(&aPg[i1],(u32*)&(iFirst)); | |||
| 14740 | if( iFirst<iNext ){ | |||
| 14741 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 14742 | break; | |||
| 14743 | } | |||
| 14744 | aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2); | |||
| 14745 | if( aIdx==0 ) break; | |||
| 14746 | i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift); | |||
| 14747 | if( i1<pLeaf->nn ){ | |||
| 14748 | memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1); | |||
| 14749 | i2 += (pLeaf->nn-i1); | |||
| 14750 | } | |||
| 14751 | nIdx = i2; | |||
| 14752 | } | |||
| 14753 | ||||
| 14754 | /* Modify the contents of buffer aPg[]. Set nPg to the new size | |||
| 14755 | ** in bytes. The new page is always smaller than the old. */ | |||
| 14756 | nPg = pLeaf->szLeaf - nShift; | |||
| 14757 | memmove(&aPg[4], &aPg[4+nShift], nPg-4); | |||
| 14758 | fts5PutU16(&aPg[2], nPg); | |||
| 14759 | if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4); | |||
| 14760 | if( nIdx>0 ){ | |||
| 14761 | memcpy(&aPg[nPg], aIdx, nIdx); | |||
| 14762 | nPg += nIdx; | |||
| 14763 | } | |||
| 14764 | sqlite3_freesqlite3_api->free(aIdx); | |||
| 14765 | ||||
| 14766 | /* Write the new page to disk and exit the loop */ | |||
| 14767 | assert( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | |||
| 14768 | fts5DataWrite(p, iRowid, aPg, nPg); | |||
| 14769 | break; | |||
| 14770 | } | |||
| 14771 | } | |||
| 14772 | fts5DataRelease(pLeaf); | |||
| 14773 | } | |||
| 14774 | ||||
| 14775 | /* | |||
| 14776 | ** Completely remove the entry that pSeg currently points to from | |||
| 14777 | ** the database. | |||
| 14778 | */ | |||
| 14779 | static void fts5DoSecureDelete( | |||
| 14780 | Fts5Index *p, | |||
| 14781 | Fts5SegIter *pSeg | |||
| 14782 | ){ | |||
| 14783 | const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE1); | |||
| 14784 | int iSegid = pSeg->pSeg->iSegid; | |||
| 14785 | u8 *aPg = pSeg->pLeaf->p; | |||
| 14786 | int nPg = pSeg->pLeaf->nn; | |||
| 14787 | int iPgIdx = pSeg->pLeaf->szLeaf; | |||
| 14788 | ||||
| 14789 | u64 iDelta = 0; | |||
| 14790 | int iNextOff = 0; | |||
| 14791 | int iOff = 0; | |||
| 14792 | int nIdx = 0; | |||
| 14793 | u8 *aIdx = 0; | |||
| 14794 | int bLastInDoclist = 0; | |||
| 14795 | int iIdx = 0; | |||
| 14796 | int iStart = 0; | |||
| 14797 | int iDelKeyOff = 0; /* Offset of deleted key, if any */ | |||
| 14798 | ||||
| 14799 | nIdx = nPg-iPgIdx; | |||
| 14800 | aIdx = sqlite3Fts5MallocZero(&p->rc, ((i64)nIdx)+16); | |||
| 14801 | if( p->rc ) return; | |||
| 14802 | memcpy(aIdx, &aPg[iPgIdx], nIdx); | |||
| 14803 | ||||
| 14804 | /* At this point segment iterator pSeg points to the entry | |||
| 14805 | ** this function should remove from the b-tree segment. | |||
| 14806 | ** | |||
| 14807 | ** In detail=full or detail=column mode, pSeg->iLeafOffset is the | |||
| 14808 | ** offset of the first byte in the position-list for the entry to | |||
| 14809 | ** remove. Immediately before this comes two varints that will also | |||
| 14810 | ** need to be removed: | |||
| 14811 | ** | |||
| 14812 | ** + the rowid or delta rowid value for the entry, and | |||
| 14813 | ** + the size of the position list in bytes. | |||
| 14814 | ** | |||
| 14815 | ** Or, in detail=none mode, there is a single varint prior to | |||
| 14816 | ** pSeg->iLeafOffset - the rowid or delta rowid value. | |||
| 14817 | ** | |||
| 14818 | ** This block sets the following variables: | |||
| 14819 | ** | |||
| 14820 | ** iStart: | |||
| 14821 | ** The offset of the first byte of the rowid or delta-rowid | |||
| 14822 | ** value for the doclist entry being removed. | |||
| 14823 | ** | |||
| 14824 | ** iDelta: | |||
| 14825 | ** The value of the rowid or delta-rowid value for the doclist | |||
| 14826 | ** entry being removed. | |||
| 14827 | ** | |||
| 14828 | ** iNextOff: | |||
| 14829 | ** The offset of the next entry following the position list | |||
| 14830 | ** for the one being removed. If the position list for this | |||
| 14831 | ** entry overflows onto the next leaf page, this value will be | |||
| 14832 | ** greater than pLeaf->szLeaf. | |||
| 14833 | */ | |||
| 14834 | { | |||
| 14835 | int iSOP; /* Start-Of-Position-list */ | |||
| 14836 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){ | |||
| 14837 | iStart = pSeg->iTermLeafOffset; | |||
| 14838 | }else{ | |||
| 14839 | iStart = fts5GetU16(&aPg[0]); | |||
| 14840 | } | |||
| 14841 | ||||
| 14842 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
| 14843 | assert_nc( iSOP<=pSeg->iLeafOffset )((void) (0)); | |||
| 14844 | ||||
| 14845 | if( bDetailNone ){ | |||
| 14846 | while( iSOP<pSeg->iLeafOffset ){ | |||
| 14847 | if( aPg[iSOP]==0x00 ) iSOP++; | |||
| 14848 | if( aPg[iSOP]==0x00 ) iSOP++; | |||
| 14849 | iStart = iSOP; | |||
| 14850 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
| 14851 | } | |||
| 14852 | ||||
| 14853 | iNextOff = iSOP; | |||
| 14854 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | |||
| 14855 | if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++; | |||
| 14856 | ||||
| 14857 | }else{ | |||
| 14858 | int nPos = 0; | |||
| 14859 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | |||
| 14860 | while( iSOP<pSeg->iLeafOffset ){ | |||
| 14861 | iStart = iSOP + (nPos/2); | |||
| 14862 | iSOP = iStart + fts5GetVarintsqlite3Fts5GetVarint(&aPg[iStart], &iDelta); | |||
| 14863 | iSOP += fts5GetVarint32(&aPg[iSOP], nPos)sqlite3Fts5GetVarint32(&aPg[iSOP],(u32*)&(nPos)); | |||
| 14864 | } | |||
| 14865 | assert_nc( iSOP==pSeg->iLeafOffset )((void) (0)); | |||
| 14866 | iNextOff = pSeg->iLeafOffset + pSeg->nPos; | |||
| 14867 | } | |||
| 14868 | } | |||
| 14869 | ||||
| 14870 | iOff = iStart; | |||
| 14871 | ||||
| 14872 | /* If the position-list for the entry being removed flows over past | |||
| 14873 | ** the end of this page, delete the portion of the position-list on the | |||
| 14874 | ** next page and beyond. | |||
| 14875 | ** | |||
| 14876 | ** Set variable bLastInDoclist to true if this entry happens | |||
| 14877 | ** to be the last rowid in the doclist for its term. */ | |||
| 14878 | if( iNextOff>=iPgIdx ){ | |||
| 14879 | int pgno = pSeg->iLeafPgno+1; | |||
| 14880 | fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist); | |||
| 14881 | iNextOff = iPgIdx; | |||
| 14882 | } | |||
| 14883 | ||||
| 14884 | if( pSeg->bDel==0 ){ | |||
| 14885 | if( iNextOff!=iPgIdx ){ | |||
| 14886 | /* Loop through the page-footer. If iNextOff (offset of the | |||
| 14887 | ** entry following the one we are removing) is equal to the | |||
| 14888 | ** offset of a key on this page, then the entry is the last | |||
| 14889 | ** in its doclist. */ | |||
| 14890 | int iKeyOff = 0; | |||
| 14891 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | |||
| 14892 | u32 iVal = 0; | |||
| 14893 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
| 14894 | iKeyOff += iVal; | |||
| 14895 | if( iKeyOff==iNextOff ){ | |||
| 14896 | bLastInDoclist = 1; | |||
| 14897 | } | |||
| 14898 | } | |||
| 14899 | } | |||
| 14900 | ||||
| 14901 | /* If this is (a) the first rowid on a page and (b) is not followed by | |||
| 14902 | ** another position list on the same page, set the "first-rowid" field | |||
| 14903 | ** of the header to 0. */ | |||
| 14904 | if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){ | |||
| 14905 | fts5PutU16(&aPg[0], 0); | |||
| 14906 | } | |||
| 14907 | } | |||
| 14908 | ||||
| 14909 | if( pSeg->bDel ){ | |||
| 14910 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta); | |||
| 14911 | aPg[iOff++] = 0x01; | |||
| 14912 | }else if( bLastInDoclist==0 ){ | |||
| 14913 | if( iNextOff!=iPgIdx ){ | |||
| 14914 | u64 iNextDelta = 0; | |||
| 14915 | iNextOff += fts5GetVarintsqlite3Fts5GetVarint(&aPg[iNextOff], &iNextDelta); | |||
| 14916 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta); | |||
| 14917 | } | |||
| 14918 | }else if( | |||
| 14919 | pSeg->iLeafPgno==pSeg->iTermLeafPgno | |||
| 14920 | && iStart==pSeg->iTermLeafOffset | |||
| 14921 | ){ | |||
| 14922 | /* The entry being removed was the only position list in its | |||
| 14923 | ** doclist. Therefore the term needs to be removed as well. */ | |||
| 14924 | int iKey = 0; | |||
| 14925 | int iKeyOff = 0; | |||
| 14926 | ||||
| 14927 | /* Set iKeyOff to the offset of the term that will be removed - the | |||
| 14928 | ** last offset in the footer that is not greater than iStart. */ | |||
| 14929 | for(iIdx=0; iIdx<nIdx; iKey++){ | |||
| 14930 | u32 iVal = 0; | |||
| 14931 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
| 14932 | if( (iKeyOff+iVal)>(u32)iStart ) break; | |||
| 14933 | iKeyOff += iVal; | |||
| 14934 | } | |||
| 14935 | assert_nc( iKey>=1 )((void) (0)); | |||
| 14936 | ||||
| 14937 | /* Set iDelKeyOff to the value of the footer entry to remove from | |||
| 14938 | ** the page. */ | |||
| 14939 | iDelKeyOff = iOff = iKeyOff; | |||
| 14940 | ||||
| 14941 | if( iNextOff!=iPgIdx ){ | |||
| 14942 | /* This is the only position-list associated with the term, and there | |||
| 14943 | ** is another term following it on this page. So the subsequent term | |||
| 14944 | ** needs to be moved to replace the term associated with the entry | |||
| 14945 | ** being removed. */ | |||
| 14946 | int nPrefix = 0; | |||
| 14947 | int nSuffix = 0; | |||
| 14948 | int nPrefix2 = 0; | |||
| 14949 | int nSuffix2 = 0; | |||
| 14950 | ||||
| 14951 | iDelKeyOff = iNextOff; | |||
| 14952 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nPrefix2 )); | |||
| 14953 | iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2)sqlite3Fts5GetVarint32(&aPg[iNextOff],(u32*)&(nSuffix2 )); | |||
| 14954 | ||||
| 14955 | if( iKey!=1 ){ | |||
| 14956 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nPrefix) ); | |||
| 14957 | } | |||
| 14958 | iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix)sqlite3Fts5GetVarint32(&aPg[iKeyOff],(u32*)&(nSuffix) ); | |||
| 14959 | ||||
| 14960 | nPrefix = MIN(nPrefix, nPrefix2)(((nPrefix) < (nPrefix2)) ? (nPrefix) : (nPrefix2)); | |||
| 14961 | nSuffix = (nPrefix2 + nSuffix2) - nPrefix; | |||
| 14962 | ||||
| 14963 | if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){ | |||
| 14964 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 14965 | }else{ | |||
| 14966 | if( iKey!=1 ){ | |||
| 14967 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix); | |||
| 14968 | } | |||
| 14969 | iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix); | |||
| 14970 | if( nPrefix2>pSeg->term.n ){ | |||
| 14971 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 14972 | }else if( nPrefix2>nPrefix ){ | |||
| 14973 | memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix); | |||
| 14974 | iOff += (nPrefix2-nPrefix); | |||
| 14975 | } | |||
| 14976 | memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2); | |||
| 14977 | iOff += nSuffix2; | |||
| 14978 | iNextOff += nSuffix2; | |||
| 14979 | } | |||
| 14980 | } | |||
| 14981 | }else if( iStart==4 ){ | |||
| 14982 | int iPgno; | |||
| 14983 | ||||
| 14984 | assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno )((void) (0)); | |||
| 14985 | /* The entry being removed may be the only position list in | |||
| 14986 | ** its doclist. */ | |||
| 14987 | for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){ | |||
| 14988 | Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPgno)) )); | |||
| 14989 | int bEmpty = (pPg && pPg->nn==4); | |||
| 14990 | fts5DataRelease(pPg); | |||
| 14991 | if( bEmpty==0 ) break; | |||
| 14992 | } | |||
| 14993 | ||||
| 14994 | if( iPgno==pSeg->iTermLeafPgno ){ | |||
| 14995 | i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iTermLeafPgno )) ); | |||
| 14996 | Fts5Data *pTerm = fts5DataRead(p, iId); | |||
| 14997 | if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){ | |||
| 14998 | u8 *aTermIdx = &pTerm->p[pTerm->szLeaf]; | |||
| 14999 | int nTermIdx = pTerm->nn - pTerm->szLeaf; | |||
| 15000 | int iTermIdx = 0; | |||
| 15001 | int iTermOff = 0; | |||
| 15002 | ||||
| 15003 | while( 1 ){ | |||
| 15004 | u32 iVal = 0; | |||
| 15005 | int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal)sqlite3Fts5GetVarint32(&aTermIdx[iTermIdx],(u32*)&(iVal )); | |||
| 15006 | iTermOff += iVal; | |||
| 15007 | if( (iTermIdx+nByte)>=nTermIdx ) break; | |||
| 15008 | iTermIdx += nByte; | |||
| 15009 | } | |||
| 15010 | nTermIdx = iTermIdx; | |||
| 15011 | ||||
| 15012 | memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx); | |||
| 15013 | fts5PutU16(&pTerm->p[2], iTermOff); | |||
| 15014 | ||||
| 15015 | fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx); | |||
| 15016 | if( nTermIdx==0 ){ | |||
| 15017 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno); | |||
| 15018 | } | |||
| 15019 | } | |||
| 15020 | fts5DataRelease(pTerm); | |||
| 15021 | } | |||
| 15022 | } | |||
| 15023 | ||||
| 15024 | /* Assuming no error has occurred, this block does final edits to the | |||
| 15025 | ** leaf page before writing it back to disk. Input variables are: | |||
| 15026 | ** | |||
| 15027 | ** nPg: Total initial size of leaf page. | |||
| 15028 | ** iPgIdx: Initial offset of page footer. | |||
| 15029 | ** | |||
| 15030 | ** iOff: Offset to move data to | |||
| 15031 | ** iNextOff: Offset to move data from | |||
| 15032 | */ | |||
| 15033 | if( p->rc==SQLITE_OK0 ){ | |||
| 15034 | const int nMove = nPg - iNextOff; /* Number of bytes to move */ | |||
| 15035 | int nShift = iNextOff - iOff; /* Distance to move them */ | |||
| 15036 | ||||
| 15037 | int iPrevKeyOut = 0; | |||
| 15038 | int iKeyIn = 0; | |||
| 15039 | ||||
| 15040 | memmove(&aPg[iOff], &aPg[iNextOff], nMove); | |||
| 15041 | iPgIdx -= nShift; | |||
| 15042 | nPg = iPgIdx; | |||
| 15043 | fts5PutU16(&aPg[2], iPgIdx); | |||
| 15044 | ||||
| 15045 | for(iIdx=0; iIdx<nIdx; /* no-op */){ | |||
| 15046 | u32 iVal = 0; | |||
| 15047 | iIdx += fts5GetVarint32(&aIdx[iIdx], iVal)sqlite3Fts5GetVarint32(&aIdx[iIdx],(u32*)&(iVal)); | |||
| 15048 | iKeyIn += iVal; | |||
| 15049 | if( iKeyIn!=iDelKeyOff ){ | |||
| 15050 | int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0)); | |||
| 15051 | nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut); | |||
| 15052 | iPrevKeyOut = iKeyOut; | |||
| 15053 | } | |||
| 15054 | } | |||
| 15055 | ||||
| 15056 | if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){ | |||
| 15057 | fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno); | |||
| 15058 | } | |||
| 15059 | ||||
| 15060 | assert_nc( nPg>4 || fts5GetU16(aPg)==0 )((void) (0)); | |||
| 15061 | fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pSeg->iLeafPgno )) ), aPg, nPg); | |||
| 15062 | } | |||
| 15063 | sqlite3_freesqlite3_api->free(aIdx); | |||
| 15064 | } | |||
| 15065 | ||||
| 15066 | /* | |||
| 15067 | ** This is called as part of flushing a delete to disk in 'secure-delete' | |||
| 15068 | ** mode. It edits the segments within the database described by argument | |||
| 15069 | ** pStruct to remove the entries for term zTerm, rowid iRowid. | |||
| 15070 | ** | |||
| 15071 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 15072 | ** has occurred. Any error code is also stored in the Fts5Index handle. | |||
| 15073 | */ | |||
| 15074 | static int fts5FlushSecureDelete( | |||
| 15075 | Fts5Index *p, | |||
| 15076 | Fts5Structure *pStruct, | |||
| 15077 | const char *zTerm, | |||
| 15078 | int nTerm, | |||
| 15079 | i64 iRowid | |||
| 15080 | ){ | |||
| 15081 | const int f = FTS5INDEX_QUERY_SKIPHASH0x0040; | |||
| 15082 | Fts5Iter *pIter = 0; /* Used to find term instance */ | |||
| 15083 | ||||
| 15084 | /* If the version number has not been set to SECUREDELETE, do so now. */ | |||
| 15085 | if( p->pConfig->iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE5 ){ | |||
| 15086 | Fts5Config *pConfig = p->pConfig; | |||
| 15087 | sqlite3_stmt *pStmt = 0; | |||
| 15088 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 15089 | "REPLACE INTO %Q.'%q_config' VALUES ('version', %d)", | |||
| 15090 | pConfig->zDb, pConfig->zName, FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
| 15091 | )); | |||
| 15092 | if( p->rc==SQLITE_OK0 ){ | |||
| 15093 | int rc; | |||
| 15094 | sqlite3_stepsqlite3_api->step(pStmt); | |||
| 15095 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 15096 | if( p->rc==SQLITE_OK0 ) p->rc = rc; | |||
| 15097 | pConfig->iCookie++; | |||
| 15098 | pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE5; | |||
| 15099 | } | |||
| 15100 | } | |||
| 15101 | ||||
| 15102 | fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); | |||
| 15103 | if( fts5MultiIterEof(p, pIter)==0 ){ | |||
| 15104 | i64 iThis = fts5MultiIterRowid(pIter); | |||
| 15105 | if( iThis<iRowid ){ | |||
| 15106 | fts5MultiIterNextFrom(p, pIter, iRowid); | |||
| 15107 | } | |||
| 15108 | ||||
| 15109 | if( p->rc==SQLITE_OK0 | |||
| 15110 | && fts5MultiIterEof(p, pIter)==0 | |||
| 15111 | && iRowid==fts5MultiIterRowid(pIter) | |||
| 15112 | ){ | |||
| 15113 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; | |||
| 15114 | fts5DoSecureDelete(p, pSeg); | |||
| 15115 | } | |||
| 15116 | } | |||
| 15117 | ||||
| 15118 | fts5MultiIterFree(pIter); | |||
| 15119 | return p->rc; | |||
| 15120 | } | |||
| 15121 | ||||
| 15122 | ||||
| 15123 | /* | |||
| 15124 | ** Flush the contents of in-memory hash table iHash to a new level-0 | |||
| 15125 | ** segment on disk. Also update the corresponding structure record. | |||
| 15126 | ** | |||
| 15127 | ** If an error occurs, set the Fts5Index.rc error code. If an error has | |||
| 15128 | ** already occurred, this function is a no-op. | |||
| 15129 | */ | |||
| 15130 | static void fts5FlushOneHash(Fts5Index *p){ | |||
| 15131 | Fts5Hash *pHash = p->pHash; | |||
| 15132 | Fts5Structure *pStruct; | |||
| 15133 | int iSegid; | |||
| 15134 | int pgnoLast = 0; /* Last leaf page number in segment */ | |||
| 15135 | ||||
| 15136 | /* Obtain a reference to the index structure and allocate a new segment-id | |||
| 15137 | ** for the new level-0 segment. */ | |||
| 15138 | pStruct = fts5StructureRead(p); | |||
| 15139 | fts5StructureInvalidate(p); | |||
| 15140 | ||||
| 15141 | if( sqlite3Fts5HashIsEmpty(pHash)==0 ){ | |||
| 15142 | iSegid = fts5AllocateSegid(p, pStruct); | |||
| 15143 | if( iSegid ){ | |||
| 15144 | const int pgsz = p->pConfig->pgsz; | |||
| 15145 | int eDetail = p->pConfig->eDetail; | |||
| 15146 | int bSecureDelete = p->pConfig->bSecureDelete; | |||
| 15147 | Fts5StructureSegment *pSeg; /* New segment within pStruct */ | |||
| 15148 | Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ | |||
| 15149 | Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ | |||
| 15150 | ||||
| 15151 | Fts5SegWriter writer; | |||
| 15152 | fts5WriteInit(p, &writer, iSegid); | |||
| 15153 | ||||
| 15154 | pBuf = &writer.writer.buf; | |||
| 15155 | pPgidx = &writer.writer.pgidx; | |||
| 15156 | ||||
| 15157 | /* fts5WriteInit() should have initialized the buffers to (most likely) | |||
| 15158 | ** the maximum space required. */ | |||
| 15159 | assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | |||
| 15160 | assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) )((void) (0)); | |||
| 15161 | ||||
| 15162 | /* Begin scanning through hash table entries. This loop runs once for each | |||
| 15163 | ** term/doclist currently stored within the hash table. */ | |||
| 15164 | if( p->rc==SQLITE_OK0 ){ | |||
| 15165 | p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); | |||
| 15166 | } | |||
| 15167 | while( p->rc==SQLITE_OK0 && 0==sqlite3Fts5HashScanEof(pHash) ){ | |||
| 15168 | const char *zTerm; /* Buffer containing term */ | |||
| 15169 | int nTerm; /* Size of zTerm in bytes */ | |||
| 15170 | const u8 *pDoclist; /* Pointer to doclist for this term */ | |||
| 15171 | int nDoclist; /* Size of doclist in bytes */ | |||
| 15172 | ||||
| 15173 | /* Get the term and doclist for this entry. */ | |||
| 15174 | sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); | |||
| 15175 | if( bSecureDelete==0 ){ | |||
| 15176 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | |||
| 15177 | if( p->rc!=SQLITE_OK0 ) break; | |||
| 15178 | assert( writer.bFirstRowidInPage==0 )((void) (0)); | |||
| 15179 | } | |||
| 15180 | ||||
| 15181 | if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ | |||
| 15182 | /* The entire doclist will fit on the current leaf. */ | |||
| 15183 | fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pDoclist , nDoclist); (pBuf)->n += nDoclist; }; | |||
| 15184 | }else{ | |||
| 15185 | int bTermWritten = !bSecureDelete; | |||
| 15186 | i64 iRowid = 0; | |||
| 15187 | i64 iPrev = 0; | |||
| 15188 | int iOff = 0; | |||
| 15189 | ||||
| 15190 | /* The entire doclist will not fit on this leaf. The following | |||
| 15191 | ** loop iterates through the poslists that make up the current | |||
| 15192 | ** doclist. */ | |||
| 15193 | while( p->rc==SQLITE_OK0 && iOff<nDoclist ){ | |||
| 15194 | u64 iDelta = 0; | |||
| 15195 | iOff += fts5GetVarintsqlite3Fts5GetVarint(&pDoclist[iOff], &iDelta); | |||
| 15196 | iRowid += iDelta; | |||
| 15197 | ||||
| 15198 | /* If in secure delete mode, and if this entry in the poslist is | |||
| 15199 | ** in fact a delete, then edit the existing segments directly | |||
| 15200 | ** using fts5FlushSecureDelete(). */ | |||
| 15201 | if( bSecureDelete ){ | |||
| 15202 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 15203 | if( iOff<nDoclist && pDoclist[iOff]==0x00 | |||
| 15204 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | |||
| 15205 | ){ | |||
| 15206 | iOff++; | |||
| 15207 | if( iOff<nDoclist && pDoclist[iOff]==0x00 ){ | |||
| 15208 | iOff++; | |||
| 15209 | nDoclist = 0; | |||
| 15210 | }else{ | |||
| 15211 | continue; | |||
| 15212 | } | |||
| 15213 | } | |||
| 15214 | }else if( (pDoclist[iOff] & 0x01) | |||
| 15215 | && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid) | |||
| 15216 | ){ | |||
| 15217 | if( p->rc!=SQLITE_OK0 || pDoclist[iOff]==0x01 ){ | |||
| 15218 | iOff++; | |||
| 15219 | continue; | |||
| 15220 | } | |||
| 15221 | } | |||
| 15222 | } | |||
| 15223 | ||||
| 15224 | if( p->rc==SQLITE_OK0 && bTermWritten==0 ){ | |||
| 15225 | fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); | |||
| 15226 | bTermWritten = 1; | |||
| 15227 | assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 )((void) (0)); | |||
| 15228 | } | |||
| 15229 | ||||
| 15230 | if( writer.bFirstRowidInPage ){ | |||
| 15231 | fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ | |||
| 15232 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); | |||
| 15233 | writer.bFirstRowidInPage = 0; | |||
| 15234 | fts5WriteDlidxAppend(p, &writer, iRowid); | |||
| 15235 | }else{ | |||
| 15236 | u64 iRowidDelta = (u64)iRowid - (u64)iPrev; | |||
| 15237 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta); | |||
| 15238 | } | |||
| 15239 | if( p->rc!=SQLITE_OK0 ) break; | |||
| 15240 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | |||
| 15241 | iPrev = iRowid; | |||
| 15242 | ||||
| 15243 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 15244 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | |||
| 15245 | pBuf->p[pBuf->n++] = 0; | |||
| 15246 | iOff++; | |||
| 15247 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ | |||
| 15248 | pBuf->p[pBuf->n++] = 0; | |||
| 15249 | iOff++; | |||
| 15250 | } | |||
| 15251 | } | |||
| 15252 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | |||
| 15253 | fts5WriteFlushLeaf(p, &writer); | |||
| 15254 | } | |||
| 15255 | }else{ | |||
| 15256 | int bDel = 0; | |||
| 15257 | int nPos = 0; | |||
| 15258 | int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel); | |||
| 15259 | if( bDel && bSecureDelete ){ | |||
| 15260 | fts5BufferAppendVarint(&p->rc, pBuf, nPos*2)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)nPos*2); | |||
| 15261 | iOff += nCopy; | |||
| 15262 | nCopy = nPos; | |||
| 15263 | }else{ | |||
| 15264 | nCopy += nPos; | |||
| 15265 | } | |||
| 15266 | if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ | |||
| 15267 | /* The entire poslist will fit on the current leaf. So copy | |||
| 15268 | ** it in one go. */ | |||
| 15269 | fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pDoclist[iOff], nCopy); (pBuf)->n += nCopy; }; | |||
| 15270 | }else{ | |||
| 15271 | /* The entire poslist will not fit on this leaf. So it needs | |||
| 15272 | ** to be broken into sections. The only qualification being | |||
| 15273 | ** that each varint must be stored contiguously. */ | |||
| 15274 | const u8 *pPoslist = &pDoclist[iOff]; | |||
| 15275 | int iPos = 0; | |||
| 15276 | while( p->rc==SQLITE_OK0 ){ | |||
| 15277 | int nSpace = pgsz - pBuf->n - pPgidx->n; | |||
| 15278 | int n = 0; | |||
| 15279 | if( (nCopy - iPos)<=nSpace ){ | |||
| 15280 | n = nCopy - iPos; | |||
| 15281 | }else{ | |||
| 15282 | n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); | |||
| 15283 | } | |||
| 15284 | assert( n>0 )((void) (0)); | |||
| 15285 | fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], & pPoslist[iPos], n); (pBuf)->n += n; }; | |||
| 15286 | iPos += n; | |||
| 15287 | if( (pBuf->n + pPgidx->n)>=pgsz ){ | |||
| 15288 | fts5WriteFlushLeaf(p, &writer); | |||
| 15289 | } | |||
| 15290 | if( iPos>=nCopy ) break; | |||
| 15291 | } | |||
| 15292 | } | |||
| 15293 | iOff += nCopy; | |||
| 15294 | } | |||
| 15295 | } | |||
| 15296 | } | |||
| 15297 | ||||
| 15298 | /* TODO2: Doclist terminator written here. */ | |||
| 15299 | /* pBuf->p[pBuf->n++] = '\0'; */ | |||
| 15300 | assert( pBuf->n<=pBuf->nSpace )((void) (0)); | |||
| 15301 | if( p->rc==SQLITE_OK0 ) sqlite3Fts5HashScanNext(pHash); | |||
| 15302 | } | |||
| 15303 | fts5WriteFinish(p, &writer, &pgnoLast); | |||
| 15304 | ||||
| 15305 | assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 )((void) (0)); | |||
| 15306 | if( pgnoLast>0 ){ | |||
| 15307 | /* Update the Fts5Structure. It is written back to the database by the | |||
| 15308 | ** fts5StructureRelease() call below. */ | |||
| 15309 | if( pStruct->nLevel==0 ){ | |||
| 15310 | fts5StructureAddLevel(&p->rc, &pStruct); | |||
| 15311 | } | |||
| 15312 | fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); | |||
| 15313 | if( p->rc==SQLITE_OK0 ){ | |||
| 15314 | pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; | |||
| 15315 | pSeg->iSegid = iSegid; | |||
| 15316 | pSeg->pgnoFirst = 1; | |||
| 15317 | pSeg->pgnoLast = pgnoLast; | |||
| 15318 | if( pStruct->nOriginCntr>0 ){ | |||
| 15319 | pSeg->iOrigin1 = pStruct->nOriginCntr; | |||
| 15320 | pSeg->iOrigin2 = pStruct->nOriginCntr; | |||
| 15321 | pSeg->nEntry = p->nPendingRow; | |||
| 15322 | pStruct->nOriginCntr++; | |||
| 15323 | } | |||
| 15324 | pStruct->nSegment++; | |||
| 15325 | } | |||
| 15326 | fts5StructurePromote(p, 0, pStruct); | |||
| 15327 | } | |||
| 15328 | } | |||
| 15329 | } | |||
| 15330 | ||||
| 15331 | fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete); | |||
| 15332 | fts5IndexCrisismerge(p, &pStruct); | |||
| 15333 | fts5StructureWrite(p, pStruct); | |||
| 15334 | fts5StructureRelease(pStruct); | |||
| 15335 | } | |||
| 15336 | ||||
| 15337 | /* | |||
| 15338 | ** Flush any data stored in the in-memory hash tables to the database. | |||
| 15339 | */ | |||
| 15340 | static void fts5IndexFlush(Fts5Index *p){ | |||
| 15341 | /* Unless it is empty, flush the hash table to disk */ | |||
| 15342 | if( p->flushRc ){ | |||
| 15343 | p->rc = p->flushRc; | |||
| 15344 | return; | |||
| 15345 | } | |||
| 15346 | if( p->nPendingData || p->nContentlessDelete ){ | |||
| 15347 | assert( p->pHash )((void) (0)); | |||
| 15348 | fts5FlushOneHash(p); | |||
| 15349 | if( p->rc==SQLITE_OK0 ){ | |||
| 15350 | sqlite3Fts5HashClear(p->pHash); | |||
| 15351 | p->nPendingData = 0; | |||
| 15352 | p->nPendingRow = 0; | |||
| 15353 | p->nContentlessDelete = 0; | |||
| 15354 | }else if( p->nPendingData || p->nContentlessDelete ){ | |||
| 15355 | p->flushRc = p->rc; | |||
| 15356 | } | |||
| 15357 | } | |||
| 15358 | } | |||
| 15359 | ||||
| 15360 | static Fts5Structure *fts5IndexOptimizeStruct( | |||
| 15361 | Fts5Index *p, | |||
| 15362 | Fts5Structure *pStruct | |||
| 15363 | ){ | |||
| 15364 | Fts5Structure *pNew = 0; | |||
| 15365 | sqlite3_int64 nByte = SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel )); | |||
| 15366 | int nSeg = pStruct->nSegment; | |||
| 15367 | int i; | |||
| 15368 | ||||
| 15369 | /* Figure out if this structure requires optimization. A structure does | |||
| 15370 | ** not require optimization if either: | |||
| 15371 | ** | |||
| 15372 | ** 1. it consists of fewer than two segments, or | |||
| 15373 | ** 2. all segments are on the same level, or | |||
| 15374 | ** 3. all segments except one are currently inputs to a merge operation. | |||
| 15375 | ** | |||
| 15376 | ** In the first case, if there are no tombstone hash pages, return NULL. In | |||
| 15377 | ** the second, increment the ref-count on *pStruct and return a copy of the | |||
| 15378 | ** pointer to it. | |||
| 15379 | */ | |||
| 15380 | if( nSeg==0 ) return 0; | |||
| 15381 | for(i=0; i<pStruct->nLevel; i++){ | |||
| 15382 | int nThis = pStruct->aLevel[i].nSeg; | |||
| 15383 | int nMerge = pStruct->aLevel[i].nMerge; | |||
| 15384 | if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){ | |||
| 15385 | if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){ | |||
| 15386 | return 0; | |||
| 15387 | } | |||
| 15388 | fts5StructureRef(pStruct); | |||
| 15389 | return pStruct; | |||
| 15390 | } | |||
| 15391 | assert( pStruct->aLevel[i].nMerge<=nThis )((void) (0)); | |||
| 15392 | } | |||
| 15393 | ||||
| 15394 | nByte += (((i64)pStruct->nLevel)+1) * sizeof(Fts5StructureLevel); | |||
| 15395 | assert( nByte==SZ_FTS5STRUCTURE(pStruct->nLevel+2) )((void) (0)); | |||
| 15396 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
| 15397 | ||||
| 15398 | if( pNew ){ | |||
| 15399 | Fts5StructureLevel *pLvl; | |||
| 15400 | nByte = nSeg * sizeof(Fts5StructureSegment); | |||
| 15401 | pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL)(((pStruct->nLevel+1) < (64)) ? (pStruct->nLevel+1) : (64)); | |||
| 15402 | pNew->nRef = 1; | |||
| 15403 | pNew->nWriteCounter = pStruct->nWriteCounter; | |||
| 15404 | pNew->nOriginCntr = pStruct->nOriginCntr; | |||
| 15405 | pLvl = &pNew->aLevel[pNew->nLevel-1]; | |||
| 15406 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
| 15407 | if( pLvl->aSeg ){ | |||
| 15408 | int iLvl, iSeg; | |||
| 15409 | int iSegOut = 0; | |||
| 15410 | /* Iterate through all segments, from oldest to newest. Add them to | |||
| 15411 | ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest | |||
| 15412 | ** segment in the data structure. */ | |||
| 15413 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | |||
| 15414 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
| 15415 | pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
| 15416 | iSegOut++; | |||
| 15417 | } | |||
| 15418 | } | |||
| 15419 | pNew->nSegment = pLvl->nSeg = nSeg; | |||
| 15420 | }else{ | |||
| 15421 | sqlite3_freesqlite3_api->free(pNew); | |||
| 15422 | pNew = 0; | |||
| 15423 | } | |||
| 15424 | } | |||
| 15425 | ||||
| 15426 | return pNew; | |||
| 15427 | } | |||
| 15428 | ||||
| 15429 | static int sqlite3Fts5IndexOptimize(Fts5Index *p){ | |||
| 15430 | Fts5Structure *pStruct; | |||
| 15431 | Fts5Structure *pNew = 0; | |||
| 15432 | ||||
| 15433 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 15434 | fts5IndexFlush(p); | |||
| 15435 | assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 )((void) (0)); | |||
| 15436 | pStruct = fts5StructureRead(p); | |||
| 15437 | assert( p->rc!=SQLITE_OK || pStruct!=0 )((void) (0)); | |||
| 15438 | fts5StructureInvalidate(p); | |||
| 15439 | ||||
| 15440 | if( pStruct ){ | |||
| 15441 | pNew = fts5IndexOptimizeStruct(p, pStruct); | |||
| 15442 | } | |||
| 15443 | fts5StructureRelease(pStruct); | |||
| 15444 | ||||
| 15445 | assert( pNew==0 || pNew->nSegment>0 )((void) (0)); | |||
| 15446 | if( pNew ){ | |||
| 15447 | int iLvl; | |||
| 15448 | for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} | |||
| 15449 | while( p->rc==SQLITE_OK0 && pNew->aLevel[iLvl].nSeg>0 ){ | |||
| 15450 | int nRem = FTS5_OPT_WORK_UNIT1000; | |||
| 15451 | fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); | |||
| 15452 | } | |||
| 15453 | ||||
| 15454 | fts5StructureWrite(p, pNew); | |||
| 15455 | fts5StructureRelease(pNew); | |||
| 15456 | } | |||
| 15457 | ||||
| 15458 | return fts5IndexReturn(p); | |||
| 15459 | } | |||
| 15460 | ||||
| 15461 | /* | |||
| 15462 | ** This is called to implement the special "VALUES('merge', $nMerge)" | |||
| 15463 | ** INSERT command. | |||
| 15464 | */ | |||
| 15465 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ | |||
| 15466 | Fts5Structure *pStruct = 0; | |||
| 15467 | ||||
| 15468 | fts5IndexFlush(p); | |||
| 15469 | pStruct = fts5StructureRead(p); | |||
| 15470 | if( pStruct ){ | |||
| 15471 | int nMin = p->pConfig->nUsermerge; | |||
| 15472 | fts5StructureInvalidate(p); | |||
| 15473 | if( nMerge<0 ){ | |||
| 15474 | Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); | |||
| 15475 | fts5StructureRelease(pStruct); | |||
| 15476 | pStruct = pNew; | |||
| 15477 | nMin = 1; | |||
| 15478 | nMerge = nMerge*-1; | |||
| 15479 | } | |||
| 15480 | if( pStruct && pStruct->nLevel ){ | |||
| 15481 | if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ | |||
| 15482 | fts5StructureWrite(p, pStruct); | |||
| 15483 | } | |||
| 15484 | } | |||
| 15485 | fts5StructureRelease(pStruct); | |||
| 15486 | } | |||
| 15487 | return fts5IndexReturn(p); | |||
| 15488 | } | |||
| 15489 | ||||
| 15490 | static void fts5AppendRowid( | |||
| 15491 | Fts5Index *p, | |||
| 15492 | u64 iDelta, | |||
| 15493 | Fts5Iter *pUnused, | |||
| 15494 | Fts5Buffer *pBuf | |||
| 15495 | ){ | |||
| 15496 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 15497 | fts5BufferAppendVarint(&p->rc, pBuf, iDelta)sqlite3Fts5BufferAppendVarint(&p->rc,pBuf,(i64)iDelta); | |||
| 15498 | } | |||
| 15499 | ||||
| 15500 | static void fts5AppendPoslist( | |||
| 15501 | Fts5Index *p, | |||
| 15502 | u64 iDelta, | |||
| 15503 | Fts5Iter *pMulti, | |||
| 15504 | Fts5Buffer *pBuf | |||
| 15505 | ){ | |||
| 15506 | int nData = pMulti->base.nData; | |||
| 15507 | int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING8; | |||
| 15508 | assert( nData>0 )((void) (0)); | |||
| 15509 | if( p->rc==SQLITE_OK0 && 0==fts5BufferGrow(&p->rc, pBuf, nByte)( (u32)((pBuf)->n) + (u32)(nByte) <= (u32)((pBuf)->nSpace ) ? 0 : sqlite3Fts5BufferSize((&p->rc),(pBuf),(nByte)+ (pBuf)->n) ) ){ | |||
| 15510 | fts5BufferSafeAppendVarint(pBuf, iDelta){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iDelta)); ((void) (0)); }; | |||
| 15511 | fts5BufferSafeAppendVarint(pBuf, nData*2){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (nData*2)); ((void) (0)); }; | |||
| 15512 | fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData){ ((void) (0)); memcpy(&(pBuf)->p[(pBuf)->n], pMulti ->base.pData, nData); (pBuf)->n += nData; }; | |||
| 15513 | memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING8); | |||
| 15514 | } | |||
| 15515 | } | |||
| 15516 | ||||
| 15517 | ||||
| 15518 | static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ | |||
| 15519 | u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; | |||
| 15520 | ||||
| 15521 | assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) )((void) (0)); | |||
| 15522 | if( p>=pIter->aEof ){ | |||
| 15523 | pIter->aPoslist = 0; | |||
| 15524 | }else{ | |||
| 15525 | i64 iDelta; | |||
| 15526 | ||||
| 15527 | p += fts5GetVarintsqlite3Fts5GetVarint(p, (u64*)&iDelta); | |||
| 15528 | pIter->iRowid += iDelta; | |||
| 15529 | ||||
| 15530 | /* Read position list size */ | |||
| 15531 | if( p[0] & 0x80 ){ | |||
| 15532 | int nPos; | |||
| 15533 | pIter->nSize = fts5GetVarint32(p, nPos)sqlite3Fts5GetVarint32(p,(u32*)&(nPos)); | |||
| 15534 | pIter->nPoslist = (nPos>>1); | |||
| 15535 | }else{ | |||
| 15536 | pIter->nPoslist = ((int)(p[0])) >> 1; | |||
| 15537 | pIter->nSize = 1; | |||
| 15538 | } | |||
| 15539 | ||||
| 15540 | pIter->aPoslist = p; | |||
| 15541 | if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){ | |||
| 15542 | pIter->aPoslist = 0; | |||
| 15543 | } | |||
| 15544 | } | |||
| 15545 | } | |||
| 15546 | ||||
| 15547 | static void fts5DoclistIterInit( | |||
| 15548 | Fts5Buffer *pBuf, | |||
| 15549 | Fts5DoclistIter *pIter | |||
| 15550 | ){ | |||
| 15551 | memset(pIter, 0, sizeof(*pIter)); | |||
| 15552 | if( pBuf->n>0 ){ | |||
| 15553 | pIter->aPoslist = pBuf->p; | |||
| 15554 | pIter->aEof = &pBuf->p[pBuf->n]; | |||
| 15555 | fts5DoclistIterNext(pIter); | |||
| 15556 | } | |||
| 15557 | } | |||
| 15558 | ||||
| 15559 | #if 0 | |||
| 15560 | /* | |||
| 15561 | ** Append a doclist to buffer pBuf. | |||
| 15562 | ** | |||
| 15563 | ** This function assumes that space within the buffer has already been | |||
| 15564 | ** allocated. | |||
| 15565 | */ | |||
| 15566 | static void fts5MergeAppendDocid({ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
| 15567 | Fts5Buffer *pBuf, /* Buffer to write to */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
| 15568 | i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
| 15569 | i64 iRowid /* Rowid to append */{ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); } | |||
| 15570 | ){ ((void) (0)); { ((Fts5Buffer *pBuf))->n += sqlite3Fts5PutVarint (&((Fts5Buffer *pBuf))->p[((Fts5Buffer *pBuf))->n], ((u64)(i64 iRowid) - (u64)(i64 *piLastRowid))); ((void) (0)) ; }; (i64 *piLastRowid) = (i64 iRowid); }{ | |||
| 15571 | assert( pBuf->n!=0 || (*piLastRowid)==0 )((void) (0)); | |||
| 15572 | fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid){ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf )->n], (iRowid - *piLastRowid)); ((void) (0)); }; | |||
| 15573 | *piLastRowid = iRowid; | |||
| 15574 | } | |||
| 15575 | #endif | |||
| 15576 | ||||
| 15577 | #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid){ ((void) (0)); { ((pBuf))->n += sqlite3Fts5PutVarint(& ((pBuf))->p[((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid ))); ((void) (0)); }; (iLastRowid) = (iRowid); } { \ | |||
| 15578 | assert( (pBuf)->n!=0 || (iLastRowid)==0 )((void) (0)); \ | |||
| 15579 | fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)){ ((pBuf))->n += sqlite3Fts5PutVarint(&((pBuf))->p[ ((pBuf))->n], ((u64)(iRowid) - (u64)(iLastRowid))); ((void ) (0)); }; \ | |||
| 15580 | (iLastRowid) = (iRowid); \ | |||
| 15581 | } | |||
| 15582 | ||||
| 15583 | /* | |||
| 15584 | ** Swap the contents of buffer *p1 with that of *p2. | |||
| 15585 | */ | |||
| 15586 | static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ | |||
| 15587 | Fts5Buffer tmp = *p1; | |||
| 15588 | *p1 = *p2; | |||
| 15589 | *p2 = tmp; | |||
| 15590 | } | |||
| 15591 | ||||
| 15592 | static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ | |||
| 15593 | int i = *piOff; | |||
| 15594 | if( i>=pBuf->n ){ | |||
| 15595 | *piOff = -1; | |||
| 15596 | }else{ | |||
| 15597 | u64 iVal; | |||
| 15598 | *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); | |||
| 15599 | *piRowid += iVal; | |||
| 15600 | } | |||
| 15601 | } | |||
| 15602 | ||||
| 15603 | /* | |||
| 15604 | ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. | |||
| 15605 | ** In this case the buffers consist of a delta-encoded list of rowids only. | |||
| 15606 | */ | |||
| 15607 | static void fts5MergeRowidLists( | |||
| 15608 | Fts5Index *p, /* FTS5 backend object */ | |||
| 15609 | Fts5Buffer *p1, /* First list to merge */ | |||
| 15610 | int nBuf, /* Number of entries in apBuf[] */ | |||
| 15611 | Fts5Buffer *aBuf /* Array of other lists to merge into p1 */ | |||
| 15612 | ){ | |||
| 15613 | int i1 = 0; | |||
| 15614 | int i2 = 0; | |||
| 15615 | i64 iRowid1 = 0; | |||
| 15616 | i64 iRowid2 = 0; | |||
| 15617 | i64 iOut = 0; | |||
| 15618 | Fts5Buffer *p2 = &aBuf[0]; | |||
| 15619 | Fts5Buffer out; | |||
| 15620 | ||||
| 15621 | (void)nBuf; | |||
| 15622 | memset(&out, 0, sizeof(out)); | |||
| 15623 | assert( nBuf==1 )((void) (0)); | |||
| 15624 | sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); | |||
| 15625 | if( p->rc ) return; | |||
| 15626 | ||||
| 15627 | fts5NextRowid(p1, &i1, &iRowid1); | |||
| 15628 | fts5NextRowid(p2, &i2, &iRowid2); | |||
| 15629 | while( i1>=0 || i2>=0 ){ | |||
| 15630 | if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ | |||
| 15631 | assert( iOut==0 || iRowid1>iOut )((void) (0)); | |||
| 15632 | fts5BufferSafeAppendVarint(&out, iRowid1 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid1 - iOut)); ((void) (0)); }; | |||
| 15633 | iOut = iRowid1; | |||
| 15634 | fts5NextRowid(p1, &i1, &iRowid1); | |||
| 15635 | }else{ | |||
| 15636 | assert( iOut==0 || iRowid2>iOut )((void) (0)); | |||
| 15637 | fts5BufferSafeAppendVarint(&out, iRowid2 - iOut){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], (iRowid2 - iOut)); ((void) (0)); }; | |||
| 15638 | iOut = iRowid2; | |||
| 15639 | if( i1>=0 && iRowid1==iRowid2 ){ | |||
| 15640 | fts5NextRowid(p1, &i1, &iRowid1); | |||
| 15641 | } | |||
| 15642 | fts5NextRowid(p2, &i2, &iRowid2); | |||
| 15643 | } | |||
| 15644 | } | |||
| 15645 | ||||
| 15646 | fts5BufferSwap(&out, p1); | |||
| 15647 | fts5BufferFree(&out)sqlite3Fts5BufferFree(&out); | |||
| 15648 | } | |||
| 15649 | ||||
| 15650 | typedef struct PrefixMerger PrefixMerger; | |||
| 15651 | struct PrefixMerger { | |||
| 15652 | Fts5DoclistIter iter; /* Doclist iterator */ | |||
| 15653 | i64 iPos; /* For iterating through a position list */ | |||
| 15654 | int iOff; | |||
| 15655 | u8 *aPos; | |||
| 15656 | PrefixMerger *pNext; /* Next in docid/poslist order */ | |||
| 15657 | }; | |||
| 15658 | ||||
| 15659 | static void fts5PrefixMergerInsertByRowid( | |||
| 15660 | PrefixMerger **ppHead, | |||
| 15661 | PrefixMerger *p | |||
| 15662 | ){ | |||
| 15663 | if( p->iter.aPoslist ){ | |||
| 15664 | PrefixMerger **pp = ppHead; | |||
| 15665 | while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){ | |||
| 15666 | pp = &(*pp)->pNext; | |||
| 15667 | } | |||
| 15668 | p->pNext = *pp; | |||
| 15669 | *pp = p; | |||
| 15670 | } | |||
| 15671 | } | |||
| 15672 | ||||
| 15673 | static void fts5PrefixMergerInsertByPosition( | |||
| 15674 | PrefixMerger **ppHead, | |||
| 15675 | PrefixMerger *p | |||
| 15676 | ){ | |||
| 15677 | if( p->iPos>=0 ){ | |||
| 15678 | PrefixMerger **pp = ppHead; | |||
| 15679 | while( *pp && p->iPos>(*pp)->iPos ){ | |||
| 15680 | pp = &(*pp)->pNext; | |||
| 15681 | } | |||
| 15682 | p->pNext = *pp; | |||
| 15683 | *pp = p; | |||
| 15684 | } | |||
| 15685 | } | |||
| 15686 | ||||
| 15687 | ||||
| 15688 | /* | |||
| 15689 | ** Array aBuf[] contains nBuf doclists. These are all merged in with the | |||
| 15690 | ** doclist in buffer p1. | |||
| 15691 | */ | |||
| 15692 | static void fts5MergePrefixLists( | |||
| 15693 | Fts5Index *p, /* FTS5 backend object */ | |||
| 15694 | Fts5Buffer *p1, /* First list to merge */ | |||
| 15695 | int nBuf, /* Number of buffers in array aBuf[] */ | |||
| 15696 | Fts5Buffer *aBuf /* Other lists to merge in */ | |||
| 15697 | ){ | |||
| 15698 | #define fts5PrefixMergerNextPosition(p)sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,& (p)->iOff,&(p)->iPos) \ | |||
| 15699 | sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) | |||
| 15700 | #define FTS5_MERGE_NLIST16 16 | |||
| 15701 | PrefixMerger aMerger[FTS5_MERGE_NLIST16]; | |||
| 15702 | PrefixMerger *pHead = 0; | |||
| 15703 | int i; | |||
| 15704 | int nOut = 0; | |||
| 15705 | Fts5Buffer out = {0, 0, 0}; | |||
| 15706 | Fts5Buffer tmp = {0, 0, 0}; | |||
| 15707 | i64 iLastRowid = 0; | |||
| 15708 | ||||
| 15709 | /* Initialize a doclist-iterator for each input buffer. Arrange them in | |||
| 15710 | ** a linked-list starting at pHead in ascending order of rowid. Avoid | |||
| 15711 | ** linking any iterators already at EOF into the linked list at all. */ | |||
| 15712 | assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) )((void) (0)); | |||
| 15713 | memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); | |||
| 15714 | pHead = &aMerger[nBuf]; | |||
| 15715 | fts5DoclistIterInit(p1, &pHead->iter); | |||
| 15716 | for(i=0; i<nBuf; i++){ | |||
| 15717 | fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter); | |||
| 15718 | fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]); | |||
| 15719 | nOut += aBuf[i].n; | |||
| 15720 | } | |||
| 15721 | if( nOut==0 ) return; | |||
| 15722 | nOut += p1->n + 9 + 10*nBuf; | |||
| 15723 | ||||
| 15724 | /* The maximum size of the output is equal to the sum of the | |||
| 15725 | ** input sizes + 1 varint (9 bytes). The extra varint is because if the | |||
| 15726 | ** first rowid in one input is a large negative number, and the first in | |||
| 15727 | ** the other a non-negative number, the delta for the non-negative | |||
| 15728 | ** number will be larger on disk than the literal integer value | |||
| 15729 | ** was. | |||
| 15730 | ** | |||
| 15731 | ** Or, if the input position-lists are corrupt, then the output might | |||
| 15732 | ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 | |||
| 15733 | ** (the value PoslistNext64() uses for EOF) as a position and appending | |||
| 15734 | ** it to the output. This can happen at most once for each input | |||
| 15735 | ** position-list, hence (nBuf+1) 10 byte paddings. */ | |||
| 15736 | if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return; | |||
| 15737 | ||||
| 15738 | while( pHead ){ | |||
| 15739 | fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid){ ((void) (0)); { ((&out))->n += sqlite3Fts5PutVarint( &((&out))->p[((&out))->n], ((u64)(pHead-> iter.iRowid) - (u64)(iLastRowid))); ((void) (0)); }; (iLastRowid ) = (pHead->iter.iRowid); }; | |||
| 15740 | ||||
| 15741 | if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ | |||
| 15742 | /* Merge data from two or more poslists */ | |||
| 15743 | i64 iPrev = 0; | |||
| 15744 | int nTmp = FTS5_DATA_ZERO_PADDING8; | |||
| 15745 | int nMerge = 0; | |||
| 15746 | PrefixMerger *pSave = pHead; | |||
| 15747 | PrefixMerger *pThis = 0; | |||
| 15748 | int nTail = 0; | |||
| 15749 | ||||
| 15750 | pHead = 0; | |||
| 15751 | while( pSave && pSave->iter.iRowid==iLastRowid ){ | |||
| 15752 | PrefixMerger *pNext = pSave->pNext; | |||
| 15753 | pSave->iOff = 0; | |||
| 15754 | pSave->iPos = 0; | |||
| 15755 | pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize]; | |||
| 15756 | fts5PrefixMergerNextPosition(pSave)sqlite3Fts5PoslistNext64((pSave)->aPos,(pSave)->iter.nPoslist ,&(pSave)->iOff,&(pSave)->iPos); | |||
| 15757 | nTmp += pSave->iter.nPoslist + 10; | |||
| 15758 | nMerge++; | |||
| 15759 | fts5PrefixMergerInsertByPosition(&pHead, pSave); | |||
| 15760 | pSave = pNext; | |||
| 15761 | } | |||
| 15762 | ||||
| 15763 | if( pHead==0 || pHead->pNext==0 ){ | |||
| 15764 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 15765 | break; | |||
| 15766 | } | |||
| 15767 | ||||
| 15768 | /* See the earlier comment in this function for an explanation of why | |||
| 15769 | ** corrupt input position lists might cause the output to consume | |||
| 15770 | ** at most nMerge*10 bytes of unexpected space. */ | |||
| 15771 | if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ | |||
| 15772 | break; | |||
| 15773 | } | |||
| 15774 | fts5BufferZero(&tmp)sqlite3Fts5BufferZero(&tmp); | |||
| 15775 | ||||
| 15776 | pThis = pHead; | |||
| 15777 | pHead = pThis->pNext; | |||
| 15778 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | |||
| 15779 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | |||
| 15780 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | |||
| 15781 | ||||
| 15782 | while( pHead->pNext ){ | |||
| 15783 | pThis = pHead; | |||
| 15784 | if( pThis->iPos!=iPrev ){ | |||
| 15785 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); | |||
| 15786 | } | |||
| 15787 | fts5PrefixMergerNextPosition(pThis)sqlite3Fts5PoslistNext64((pThis)->aPos,(pThis)->iter.nPoslist ,&(pThis)->iOff,&(pThis)->iPos); | |||
| 15788 | pHead = pThis->pNext; | |||
| 15789 | fts5PrefixMergerInsertByPosition(&pHead, pThis); | |||
| 15790 | } | |||
| 15791 | ||||
| 15792 | if( pHead->iPos!=iPrev ){ | |||
| 15793 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos); | |||
| 15794 | } | |||
| 15795 | nTail = pHead->iter.nPoslist - pHead->iOff; | |||
| 15796 | ||||
| 15797 | /* WRITEPOSLISTSIZE */ | |||
| 15798 | assert_nc( tmp.n+nTail<=nTmp )((void) (0)); | |||
| 15799 | assert( tmp.n+nTail<=nTmp+nMerge*10 )((void) (0)); | |||
| 15800 | if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING8 ){ | |||
| 15801 | if( p->rc==SQLITE_OK0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 15802 | break; | |||
| 15803 | } | |||
| 15804 | fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2){ (&out)->n += sqlite3Fts5PutVarint(&(&out)-> p[(&out)->n], ((tmp.n+nTail) * 2)); ((void) (0)); }; | |||
| 15805 | fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], tmp.p, tmp.n); (&out)->n += tmp.n; }; | |||
| 15806 | if( nTail>0 ){ | |||
| 15807 | fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], &pHead->aPos[pHead->iOff], nTail); (&out)-> n += nTail; }; | |||
| 15808 | } | |||
| 15809 | ||||
| 15810 | pHead = pSave; | |||
| 15811 | for(i=0; i<nBuf+1; i++){ | |||
| 15812 | PrefixMerger *pX = &aMerger[i]; | |||
| 15813 | if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){ | |||
| 15814 | fts5DoclistIterNext(&pX->iter); | |||
| 15815 | fts5PrefixMergerInsertByRowid(&pHead, pX); | |||
| 15816 | } | |||
| 15817 | } | |||
| 15818 | ||||
| 15819 | }else{ | |||
| 15820 | /* Copy poslist from pHead to output */ | |||
| 15821 | PrefixMerger *pThis = pHead; | |||
| 15822 | Fts5DoclistIter *pI = &pThis->iter; | |||
| 15823 | fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize){ ((void) (0)); memcpy(&(&out)->p[(&out)->n ], pI->aPoslist, pI->nPoslist+pI->nSize); (&out) ->n += pI->nPoslist+pI->nSize; }; | |||
| 15824 | fts5DoclistIterNext(pI); | |||
| 15825 | pHead = pThis->pNext; | |||
| 15826 | fts5PrefixMergerInsertByRowid(&pHead, pThis); | |||
| 15827 | } | |||
| 15828 | } | |||
| 15829 | ||||
| 15830 | fts5BufferFree(p1)sqlite3Fts5BufferFree(p1); | |||
| 15831 | fts5BufferFree(&tmp)sqlite3Fts5BufferFree(&tmp); | |||
| 15832 | memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING8); | |||
| 15833 | *p1 = out; | |||
| 15834 | } | |||
| 15835 | ||||
| 15836 | ||||
| 15837 | /* | |||
| 15838 | ** Iterate through a range of entries in the FTS index, invoking the xVisit | |||
| 15839 | ** callback for each of them. | |||
| 15840 | ** | |||
| 15841 | ** Parameter pToken points to an nToken buffer containing an FTS index term | |||
| 15842 | ** (i.e. a document term with the preceding 1 byte index identifier - | |||
| 15843 | ** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits | |||
| 15844 | ** all entries for terms that have pToken/nToken as a prefix. If bPrefix | |||
| 15845 | ** is false, then only entries with pToken/nToken as the entire key are | |||
| 15846 | ** visited. | |||
| 15847 | ** | |||
| 15848 | ** If the current table is a tokendata=1 table, then if bPrefix is true then | |||
| 15849 | ** each index term is treated separately. However, if bPrefix is false, then | |||
| 15850 | ** all index terms corresponding to pToken/nToken are collapsed into a single | |||
| 15851 | ** term before the callback is invoked. | |||
| 15852 | ** | |||
| 15853 | ** The callback invoked for each entry visited is specified by paramter xVisit. | |||
| 15854 | ** Each time it is invoked, it is passed a pointer to the Fts5Index object, | |||
| 15855 | ** a copy of the 7th paramter to this function (pCtx) and a pointer to the | |||
| 15856 | ** iterator that indicates the current entry. If the current entry is the | |||
| 15857 | ** first with a new term (i.e. different from that of the previous entry, | |||
| 15858 | ** including the very first term), then the final two parameters are passed | |||
| 15859 | ** a pointer to the term and its size in bytes, respectively. If the current | |||
| 15860 | ** entry is not the first associated with its term, these two parameters | |||
| 15861 | ** are passed 0. | |||
| 15862 | ** | |||
| 15863 | ** If parameter pColset is not NULL, then it is used to filter entries before | |||
| 15864 | ** the callback is invoked. | |||
| 15865 | */ | |||
| 15866 | static int fts5VisitEntries( | |||
| 15867 | Fts5Index *p, /* Fts5 index object */ | |||
| 15868 | Fts5Colset *pColset, /* Columns filter to apply, or NULL */ | |||
| 15869 | u8 *pToken, /* Buffer containing token */ | |||
| 15870 | int nToken, /* Size of buffer pToken in bytes */ | |||
| 15871 | int bPrefix, /* True for a prefix scan */ | |||
| 15872 | void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int), | |||
| 15873 | void *pCtx /* Passed as second argument to xVisit() */ | |||
| 15874 | ){ | |||
| 15875 | const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN0x0008 : 0) | |||
| 15876 | | FTS5INDEX_QUERY_SKIPEMPTY0x0010 | |||
| 15877 | | FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
| 15878 | Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ | |||
| 15879 | int bNewTerm = 1; | |||
| 15880 | Fts5Structure *pStruct = fts5StructureRead(p); | |||
| 15881 | ||||
| 15882 | fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); | |||
| 15883 | fts5IterSetOutputCb(&p->rc, p1); | |||
| 15884 | for( /* no-op */ ; | |||
| 15885 | fts5MultiIterEof(p, p1)==0; | |||
| 15886 | fts5MultiIterNext2(p, p1, &bNewTerm) | |||
| 15887 | ){ | |||
| 15888 | Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; | |||
| 15889 | int nNew = 0; | |||
| 15890 | const u8 *pNew = 0; | |||
| 15891 | ||||
| 15892 | p1->xSetOutputs(p1, pSeg); | |||
| 15893 | if( p->rc ) break; | |||
| 15894 | ||||
| 15895 | if( bNewTerm ){ | |||
| 15896 | nNew = pSeg->term.n; | |||
| 15897 | pNew = pSeg->term.p; | |||
| 15898 | if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break; | |||
| 15899 | } | |||
| 15900 | ||||
| 15901 | xVisit(p, pCtx, p1, pNew, nNew); | |||
| 15902 | } | |||
| 15903 | fts5MultiIterFree(p1); | |||
| 15904 | ||||
| 15905 | fts5StructureRelease(pStruct); | |||
| 15906 | return p->rc; | |||
| 15907 | } | |||
| 15908 | ||||
| 15909 | ||||
| 15910 | /* | |||
| 15911 | ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an | |||
| 15912 | ** array of these for each row it visits (so all iRowid fields are the same). | |||
| 15913 | ** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an | |||
| 15914 | ** array of these for the entire query (in which case iRowid fields may take | |||
| 15915 | ** a variety of values). | |||
| 15916 | ** | |||
| 15917 | ** Each instance in the array indicates the iterator (and therefore term) | |||
| 15918 | ** associated with position iPos of rowid iRowid. This is used by the | |||
| 15919 | ** xInstToken() API. | |||
| 15920 | ** | |||
| 15921 | ** iRowid: | |||
| 15922 | ** Rowid for the current entry. | |||
| 15923 | ** | |||
| 15924 | ** iPos: | |||
| 15925 | ** Position of current entry within row. In the usual ((iCol<<32)+iOff) | |||
| 15926 | ** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()). | |||
| 15927 | ** | |||
| 15928 | ** iIter: | |||
| 15929 | ** If the Fts5TokenDataIter iterator that the entry is part of is | |||
| 15930 | ** actually an iterator (i.e. with nIter>0, not just a container for | |||
| 15931 | ** Fts5TokenDataMap structures), then this variable is an index into | |||
| 15932 | ** the apIter[] array. The corresponding term is that which the iterator | |||
| 15933 | ** at apIter[iIter] currently points to. | |||
| 15934 | ** | |||
| 15935 | ** Or, if the Fts5TokenDataIter iterator is just a container object | |||
| 15936 | ** (nIter==0), then iIter is an index into the term.p[] buffer where | |||
| 15937 | ** the term is stored. | |||
| 15938 | ** | |||
| 15939 | ** nByte: | |||
| 15940 | ** In the case where iIter is an index into term.p[], this variable | |||
| 15941 | ** is the size of the term in bytes. If iIter is an index into apIter[], | |||
| 15942 | ** this variable is unused. | |||
| 15943 | */ | |||
| 15944 | struct Fts5TokenDataMap { | |||
| 15945 | i64 iRowid; /* Row this token is located in */ | |||
| 15946 | i64 iPos; /* Position of token */ | |||
| 15947 | int iIter; /* Iterator token was read from */ | |||
| 15948 | int nByte; /* Length of token in bytes (or 0) */ | |||
| 15949 | }; | |||
| 15950 | ||||
| 15951 | /* | |||
| 15952 | ** An object used to supplement Fts5Iter for tokendata=1 iterators. | |||
| 15953 | ** | |||
| 15954 | ** This object serves two purposes. The first is as a container for an array | |||
| 15955 | ** of Fts5TokenDataMap structures, which are used to find the token required | |||
| 15956 | ** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and | |||
| 15957 | ** aMap[] variables. | |||
| 15958 | */ | |||
| 15959 | struct Fts5TokenDataIter { | |||
| 15960 | int nMapAlloc; /* Allocated size of aMap[] in entries */ | |||
| 15961 | int nMap; /* Number of valid entries in aMap[] */ | |||
| 15962 | Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */ | |||
| 15963 | ||||
| 15964 | /* The following are used for prefix-queries only. */ | |||
| 15965 | Fts5Buffer terms; | |||
| 15966 | ||||
| 15967 | /* The following are used for other full-token tokendata queries only. */ | |||
| 15968 | int nIter; | |||
| 15969 | int nIterAlloc; | |||
| 15970 | Fts5PoslistReader *aPoslistReader; | |||
| 15971 | int *aPoslistToIter; | |||
| 15972 | Fts5Iter *apIter[FLEXARRAY]; | |||
| 15973 | }; | |||
| 15974 | ||||
| 15975 | /* Size in bytes of an Fts5TokenDataIter object holding up to N iterators */ | |||
| 15976 | #define SZ_FTS5TOKENDATAITER(N)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter )) \ | |||
| 15977 | (offsetof(Fts5TokenDataIter,apIter)__builtin_offsetof(Fts5TokenDataIter, apIter) + (N)*sizeof(Fts5Iter)) | |||
| 15978 | ||||
| 15979 | /* | |||
| 15980 | ** The two input arrays - a1[] and a2[] - are in sorted order. This function | |||
| 15981 | ** merges the two arrays together and writes the result to output array | |||
| 15982 | ** aOut[]. aOut[] is guaranteed to be large enough to hold the result. | |||
| 15983 | ** | |||
| 15984 | ** Duplicate entries are copied into the output. So the size of the output | |||
| 15985 | ** array is always (n1+n2) entries. | |||
| 15986 | */ | |||
| 15987 | static void fts5TokendataMerge( | |||
| 15988 | Fts5TokenDataMap *a1, int n1, /* Input array 1 */ | |||
| 15989 | Fts5TokenDataMap *a2, int n2, /* Input array 2 */ | |||
| 15990 | Fts5TokenDataMap *aOut /* Output array */ | |||
| 15991 | ){ | |||
| 15992 | int i1 = 0; | |||
| 15993 | int i2 = 0; | |||
| 15994 | ||||
| 15995 | assert( n1>=0 && n2>=0 )((void) (0)); | |||
| 15996 | while( i1<n1 || i2<n2 ){ | |||
| 15997 | Fts5TokenDataMap *pOut = &aOut[i1+i2]; | |||
| 15998 | if( i2>=n2 || (i1<n1 && ( | |||
| 15999 | a1[i1].iRowid<a2[i2].iRowid | |||
| 16000 | || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos) | |||
| 16001 | ))){ | |||
| 16002 | memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap)); | |||
| 16003 | i1++; | |||
| 16004 | }else{ | |||
| 16005 | memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap)); | |||
| 16006 | i2++; | |||
| 16007 | } | |||
| 16008 | } | |||
| 16009 | } | |||
| 16010 | ||||
| 16011 | ||||
| 16012 | /* | |||
| 16013 | ** Append a mapping to the token-map belonging to object pT. | |||
| 16014 | */ | |||
| 16015 | static void fts5TokendataIterAppendMap( | |||
| 16016 | Fts5Index *p, | |||
| 16017 | Fts5TokenDataIter *pT, | |||
| 16018 | int iIter, | |||
| 16019 | int nByte, | |||
| 16020 | i64 iRowid, | |||
| 16021 | i64 iPos | |||
| 16022 | ){ | |||
| 16023 | if( p->rc==SQLITE_OK0 ){ | |||
| 16024 | if( pT->nMap==pT->nMapAlloc ){ | |||
| 16025 | int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64; | |||
| 16026 | int nAlloc = nNew * sizeof(Fts5TokenDataMap); | |||
| 16027 | Fts5TokenDataMap *aNew; | |||
| 16028 | ||||
| 16029 | aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc(pT->aMap, nAlloc); | |||
| 16030 | if( aNew==0 ){ | |||
| 16031 | p->rc = SQLITE_NOMEM7; | |||
| 16032 | return; | |||
| 16033 | } | |||
| 16034 | ||||
| 16035 | pT->aMap = aNew; | |||
| 16036 | pT->nMapAlloc = nNew; | |||
| 16037 | } | |||
| 16038 | ||||
| 16039 | pT->aMap[pT->nMap].iRowid = iRowid; | |||
| 16040 | pT->aMap[pT->nMap].iPos = iPos; | |||
| 16041 | pT->aMap[pT->nMap].iIter = iIter; | |||
| 16042 | pT->aMap[pT->nMap].nByte = nByte; | |||
| 16043 | pT->nMap++; | |||
| 16044 | } | |||
| 16045 | } | |||
| 16046 | ||||
| 16047 | /* | |||
| 16048 | ** Sort the contents of the pT->aMap[] array. | |||
| 16049 | ** | |||
| 16050 | ** The sorting algorithm requires a malloc(). If this fails, an error code | |||
| 16051 | ** is left in Fts5Index.rc before returning. | |||
| 16052 | */ | |||
| 16053 | static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){ | |||
| 16054 | Fts5TokenDataMap *aTmp = 0; | |||
| 16055 | int nByte = pT->nMap * sizeof(Fts5TokenDataMap); | |||
| 16056 | ||||
| 16057 | aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte); | |||
| 16058 | if( aTmp ){ | |||
| 16059 | Fts5TokenDataMap *a1 = pT->aMap; | |||
| 16060 | Fts5TokenDataMap *a2 = aTmp; | |||
| 16061 | i64 nHalf; | |||
| 16062 | ||||
| 16063 | for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){ | |||
| 16064 | int i1; | |||
| 16065 | for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){ | |||
| 16066 | int n1 = MIN(nHalf, pT->nMap-i1)(((nHalf) < (pT->nMap-i1)) ? (nHalf) : (pT->nMap-i1) ); | |||
| 16067 | int n2 = MIN(nHalf, pT->nMap-i1-n1)(((nHalf) < (pT->nMap-i1-n1)) ? (nHalf) : (pT->nMap- i1-n1)); | |||
| 16068 | fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]); | |||
| 16069 | } | |||
| 16070 | SWAPVAL(Fts5TokenDataMap*, a1, a2){ Fts5TokenDataMap* tmp; tmp=a1; a1=a2; a2=tmp; }; | |||
| 16071 | } | |||
| 16072 | ||||
| 16073 | if( a1!=pT->aMap ){ | |||
| 16074 | memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap)); | |||
| 16075 | } | |||
| 16076 | sqlite3_freesqlite3_api->free(aTmp); | |||
| 16077 | ||||
| 16078 | #ifdef SQLITE_DEBUG | |||
| 16079 | { | |||
| 16080 | int ii; | |||
| 16081 | for(ii=1; ii<pT->nMap; ii++){ | |||
| 16082 | Fts5TokenDataMap *p1 = &pT->aMap[ii-1]; | |||
| 16083 | Fts5TokenDataMap *p2 = &pT->aMap[ii]; | |||
| 16084 | assert( p1->iRowid<p2->iRowid((void) (0)) | |||
| 16085 | || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)((void) (0)) | |||
| 16086 | )((void) (0)); | |||
| 16087 | } | |||
| 16088 | } | |||
| 16089 | #endif | |||
| 16090 | } | |||
| 16091 | } | |||
| 16092 | ||||
| 16093 | /* | |||
| 16094 | ** Delete an Fts5TokenDataIter structure and its contents. | |||
| 16095 | */ | |||
| 16096 | static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){ | |||
| 16097 | if( pSet ){ | |||
| 16098 | int ii; | |||
| 16099 | for(ii=0; ii<pSet->nIter; ii++){ | |||
| 16100 | fts5MultiIterFree(pSet->apIter[ii]); | |||
| 16101 | } | |||
| 16102 | fts5BufferFree(&pSet->terms)sqlite3Fts5BufferFree(&pSet->terms); | |||
| 16103 | sqlite3_freesqlite3_api->free(pSet->aPoslistReader); | |||
| 16104 | sqlite3_freesqlite3_api->free(pSet->aMap); | |||
| 16105 | sqlite3_freesqlite3_api->free(pSet); | |||
| 16106 | } | |||
| 16107 | } | |||
| 16108 | ||||
| 16109 | ||||
| 16110 | /* | |||
| 16111 | ** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata() | |||
| 16112 | ** to pass data to prefixIterSetupTokendataCb(). | |||
| 16113 | */ | |||
| 16114 | typedef struct TokendataSetupCtx TokendataSetupCtx; | |||
| 16115 | struct TokendataSetupCtx { | |||
| 16116 | Fts5TokenDataIter *pT; /* Object being populated with mappings */ | |||
| 16117 | int iTermOff; /* Offset of current term in terms.p[] */ | |||
| 16118 | int nTermByte; /* Size of current term in bytes */ | |||
| 16119 | }; | |||
| 16120 | ||||
| 16121 | /* | |||
| 16122 | ** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This | |||
| 16123 | ** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each | |||
| 16124 | ** position in the current position-list. It doesn't matter that some of | |||
| 16125 | ** these may be out of order - they will be sorted later. | |||
| 16126 | */ | |||
| 16127 | static void prefixIterSetupTokendataCb( | |||
| 16128 | Fts5Index *p, | |||
| 16129 | void *pCtx, | |||
| 16130 | Fts5Iter *p1, | |||
| 16131 | const u8 *pNew, | |||
| 16132 | int nNew | |||
| 16133 | ){ | |||
| 16134 | TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx; | |||
| 16135 | int iPosOff = 0; | |||
| 16136 | i64 iPos = 0; | |||
| 16137 | ||||
| 16138 | if( pNew ){ | |||
| 16139 | pSetup->nTermByte = nNew-1; | |||
| 16140 | pSetup->iTermOff = pSetup->pT->terms.n; | |||
| 16141 | fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1)sqlite3Fts5BufferAppendBlob(&p->rc,&pSetup->pT-> terms,nNew-1,pNew+1); | |||
| 16142 | } | |||
| 16143 | ||||
| 16144 | while( 0==sqlite3Fts5PoslistNext64( | |||
| 16145 | p1->base.pData, p1->base.nData, &iPosOff, &iPos | |||
| 16146 | ) ){ | |||
| 16147 | fts5TokendataIterAppendMap(p, | |||
| 16148 | pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos | |||
| 16149 | ); | |||
| 16150 | } | |||
| 16151 | } | |||
| 16152 | ||||
| 16153 | ||||
| 16154 | /* | |||
| 16155 | ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries(). | |||
| 16156 | */ | |||
| 16157 | typedef struct PrefixSetupCtx PrefixSetupCtx; | |||
| 16158 | struct PrefixSetupCtx { | |||
| 16159 | void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); | |||
| 16160 | void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); | |||
| 16161 | i64 iLastRowid; | |||
| 16162 | int nMerge; | |||
| 16163 | Fts5Buffer *aBuf; | |||
| 16164 | int nBuf; | |||
| 16165 | Fts5Buffer doclist; | |||
| 16166 | TokendataSetupCtx *pTokendata; | |||
| 16167 | }; | |||
| 16168 | ||||
| 16169 | /* | |||
| 16170 | ** fts5VisitEntries() callback used by fts5SetupPrefixIter() | |||
| 16171 | */ | |||
| 16172 | static void prefixIterSetupCb( | |||
| 16173 | Fts5Index *p, | |||
| 16174 | void *pCtx, | |||
| 16175 | Fts5Iter *p1, | |||
| 16176 | const u8 *pNew, | |||
| 16177 | int nNew | |||
| 16178 | ){ | |||
| 16179 | PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx; | |||
| 16180 | const int nMerge = pSetup->nMerge; | |||
| 16181 | ||||
| 16182 | if( p1->base.nData>0 ){ | |||
| 16183 | if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){ | |||
| 16184 | int i; | |||
| 16185 | for(i=0; p->rc==SQLITE_OK0 && pSetup->doclist.n; i++){ | |||
| 16186 | int i1 = i*nMerge; | |||
| 16187 | int iStore; | |||
| 16188 | assert( i1+nMerge<=pSetup->nBuf )((void) (0)); | |||
| 16189 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | |||
| 16190 | if( pSetup->aBuf[iStore].n==0 ){ | |||
| 16191 | fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]); | |||
| 16192 | fts5BufferZero(&pSetup->doclist)sqlite3Fts5BufferZero(&pSetup->doclist); | |||
| 16193 | break; | |||
| 16194 | } | |||
| 16195 | } | |||
| 16196 | if( iStore==i1+nMerge ){ | |||
| 16197 | pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]); | |||
| 16198 | for(iStore=i1; iStore<i1+nMerge; iStore++){ | |||
| 16199 | fts5BufferZero(&pSetup->aBuf[iStore])sqlite3Fts5BufferZero(&pSetup->aBuf[iStore]); | |||
| 16200 | } | |||
| 16201 | } | |||
| 16202 | } | |||
| 16203 | pSetup->iLastRowid = 0; | |||
| 16204 | } | |||
| 16205 | ||||
| 16206 | pSetup->xAppend( | |||
| 16207 | p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist | |||
| 16208 | ); | |||
| 16209 | pSetup->iLastRowid = p1->base.iRowid; | |||
| 16210 | } | |||
| 16211 | ||||
| 16212 | if( pSetup->pTokendata ){ | |||
| 16213 | prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew); | |||
| 16214 | } | |||
| 16215 | } | |||
| 16216 | ||||
| 16217 | static void fts5SetupPrefixIter( | |||
| 16218 | Fts5Index *p, /* Index to read from */ | |||
| 16219 | int bDesc, /* True for "ORDER BY rowid DESC" */ | |||
| 16220 | int iIdx, /* Index to scan for data */ | |||
| 16221 | u8 *pToken, /* Buffer containing prefix to match */ | |||
| 16222 | int nToken, /* Size of buffer pToken in bytes */ | |||
| 16223 | Fts5Colset *pColset, /* Restrict matches to these columns */ | |||
| 16224 | Fts5Iter **ppIter /* OUT: New iterator */ | |||
| 16225 | ){ | |||
| 16226 | Fts5Structure *pStruct; | |||
| 16227 | PrefixSetupCtx s; | |||
| 16228 | TokendataSetupCtx s2; | |||
| 16229 | ||||
| 16230 | memset(&s, 0, sizeof(s)); | |||
| 16231 | memset(&s2, 0, sizeof(s2)); | |||
| 16232 | ||||
| 16233 | s.nMerge = 1; | |||
| 16234 | s.iLastRowid = 0; | |||
| 16235 | s.nBuf = 32; | |||
| 16236 | if( iIdx==0 | |||
| 16237 | && p->pConfig->eDetail==FTS5_DETAIL_FULL0 | |||
| 16238 | && p->pConfig->bPrefixInsttoken | |||
| 16239 | ){ | |||
| 16240 | s.pTokendata = &s2; | |||
| 16241 | s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
| 16242 | } | |||
| 16243 | ||||
| 16244 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 16245 | s.xMerge = fts5MergeRowidLists; | |||
| 16246 | s.xAppend = fts5AppendRowid; | |||
| 16247 | }else{ | |||
| 16248 | s.nMerge = FTS5_MERGE_NLIST16-1; | |||
| 16249 | s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ | |||
| 16250 | s.xMerge = fts5MergePrefixLists; | |||
| 16251 | s.xAppend = fts5AppendPoslist; | |||
| 16252 | } | |||
| 16253 | ||||
| 16254 | s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf); | |||
| 16255 | pStruct = fts5StructureRead(p); | |||
| 16256 | assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) )((void) (0)); | |||
| 16257 | ||||
| 16258 | if( p->rc==SQLITE_OK0 ){ | |||
| 16259 | void *pCtx = (void*)&s; | |||
| 16260 | int i; | |||
| 16261 | Fts5Data *pData; | |||
| 16262 | ||||
| 16263 | /* If iIdx is non-zero, then it is the number of a prefix-index for | |||
| 16264 | ** prefixes 1 character longer than the prefix being queried for. That | |||
| 16265 | ** index contains all the doclists required, except for the one | |||
| 16266 | ** corresponding to the prefix itself. That one is extracted from the | |||
| 16267 | ** main term index here. */ | |||
| 16268 | if( iIdx!=0 ){ | |||
| 16269 | pToken[0] = FTS5_MAIN_PREFIX'0'; | |||
| 16270 | fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx); | |||
| 16271 | } | |||
| 16272 | ||||
| 16273 | pToken[0] = FTS5_MAIN_PREFIX'0' + iIdx; | |||
| 16274 | fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx); | |||
| 16275 | ||||
| 16276 | assert( (s.nBuf%s.nMerge)==0 )((void) (0)); | |||
| 16277 | for(i=0; i<s.nBuf; i+=s.nMerge){ | |||
| 16278 | int iFree; | |||
| 16279 | if( p->rc==SQLITE_OK0 ){ | |||
| 16280 | s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]); | |||
| 16281 | } | |||
| 16282 | for(iFree=i; iFree<i+s.nMerge; iFree++){ | |||
| 16283 | fts5BufferFree(&s.aBuf[iFree])sqlite3Fts5BufferFree(&s.aBuf[iFree]); | |||
| 16284 | } | |||
| 16285 | } | |||
| 16286 | ||||
| 16287 | pData = fts5IdxMalloc(p, sizeof(*pData) | |||
| 16288 | + ((i64)s.doclist.n)+FTS5_DATA_ZERO_PADDING8); | |||
| 16289 | assert( pData!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 16290 | if( pData ){ | |||
| 16291 | pData->p = (u8*)&pData[1]; | |||
| 16292 | pData->nn = pData->szLeaf = s.doclist.n; | |||
| 16293 | if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n); | |||
| 16294 | fts5MultiIterNew2(p, pData, bDesc, ppIter); | |||
| 16295 | } | |||
| 16296 | ||||
| 16297 | assert( (*ppIter)!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 16298 | if( p->rc==SQLITE_OK0 && s.pTokendata ){ | |||
| 16299 | fts5TokendataIterSortMap(p, s2.pT); | |||
| 16300 | (*ppIter)->pTokenDataIter = s2.pT; | |||
| 16301 | s2.pT = 0; | |||
| 16302 | } | |||
| 16303 | } | |||
| 16304 | ||||
| 16305 | fts5TokendataIterDelete(s2.pT); | |||
| 16306 | fts5BufferFree(&s.doclist)sqlite3Fts5BufferFree(&s.doclist); | |||
| 16307 | fts5StructureRelease(pStruct); | |||
| 16308 | sqlite3_freesqlite3_api->free(s.aBuf); | |||
| 16309 | } | |||
| 16310 | ||||
| 16311 | ||||
| 16312 | /* | |||
| 16313 | ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain | |||
| 16314 | ** to the document with rowid iRowid. | |||
| 16315 | */ | |||
| 16316 | static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ | |||
| 16317 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 16318 | ||||
| 16319 | /* Allocate the hash table if it has not already been allocated */ | |||
| 16320 | if( p->pHash==0 ){ | |||
| 16321 | p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); | |||
| 16322 | } | |||
| 16323 | ||||
| 16324 | /* Flush the hash table to disk if required */ | |||
| 16325 | if( iRowid<p->iWriteRowid | |||
| 16326 | || (iRowid==p->iWriteRowid && p->bDelete==0) | |||
| 16327 | || (p->nPendingData > p->pConfig->nHashSize) | |||
| 16328 | ){ | |||
| 16329 | fts5IndexFlush(p); | |||
| 16330 | } | |||
| 16331 | ||||
| 16332 | p->iWriteRowid = iRowid; | |||
| 16333 | p->bDelete = bDelete; | |||
| 16334 | if( bDelete==0 ){ | |||
| 16335 | p->nPendingRow++; | |||
| 16336 | } | |||
| 16337 | return fts5IndexReturn(p); | |||
| 16338 | } | |||
| 16339 | ||||
| 16340 | /* | |||
| 16341 | ** Commit data to disk. | |||
| 16342 | */ | |||
| 16343 | static int sqlite3Fts5IndexSync(Fts5Index *p){ | |||
| 16344 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 16345 | fts5IndexFlush(p); | |||
| 16346 | fts5IndexCloseReader(p); | |||
| 16347 | return fts5IndexReturn(p); | |||
| 16348 | } | |||
| 16349 | ||||
| 16350 | /* | |||
| 16351 | ** Discard any data stored in the in-memory hash tables. Do not write it | |||
| 16352 | ** to the database. Additionally, assume that the contents of the %_data | |||
| 16353 | ** table may have changed on disk. So any in-memory caches of %_data | |||
| 16354 | ** records must be invalidated. | |||
| 16355 | */ | |||
| 16356 | static int sqlite3Fts5IndexRollback(Fts5Index *p){ | |||
| 16357 | fts5IndexCloseReader(p); | |||
| 16358 | fts5IndexDiscardData(p); | |||
| 16359 | fts5StructureInvalidate(p); | |||
| 16360 | return fts5IndexReturn(p); | |||
| 16361 | } | |||
| 16362 | ||||
| 16363 | /* | |||
| 16364 | ** The %_data table is completely empty when this function is called. This | |||
| 16365 | ** function populates it with the initial structure objects for each index, | |||
| 16366 | ** and the initial version of the "averages" record (a zero-byte blob). | |||
| 16367 | */ | |||
| 16368 | static int sqlite3Fts5IndexReinit(Fts5Index *p){ | |||
| 16369 | Fts5Structure *pTmp; | |||
| 16370 | u8 tmpSpace[SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))]; | |||
| 16371 | fts5StructureInvalidate(p); | |||
| 16372 | fts5IndexDiscardData(p); | |||
| 16373 | pTmp = (Fts5Structure*)tmpSpace; | |||
| 16374 | memset(pTmp, 0, SZ_FTS5STRUCTURE(1)(__builtin_offsetof(Fts5Structure, aLevel) + (1)*sizeof(Fts5StructureLevel ))); | |||
| 16375 | if( p->pConfig->bContentlessDelete ){ | |||
| 16376 | pTmp->nOriginCntr = 1; | |||
| 16377 | } | |||
| 16378 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, (const u8*)"", 0); | |||
| 16379 | fts5StructureWrite(p, pTmp); | |||
| 16380 | return fts5IndexReturn(p); | |||
| 16381 | } | |||
| 16382 | ||||
| 16383 | /* | |||
| 16384 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | |||
| 16385 | ** and initialize the underlying %_data table. | |||
| 16386 | ** | |||
| 16387 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | |||
| 16388 | ** Otherwise, set *pp to NULL and return an SQLite error code. | |||
| 16389 | */ | |||
| 16390 | static int sqlite3Fts5IndexOpen( | |||
| 16391 | Fts5Config *pConfig, | |||
| 16392 | int bCreate, | |||
| 16393 | Fts5Index **pp, | |||
| 16394 | char **pzErr | |||
| 16395 | ){ | |||
| 16396 | int rc = SQLITE_OK0; | |||
| 16397 | Fts5Index *p; /* New object */ | |||
| 16398 | ||||
| 16399 | *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); | |||
| 16400 | if( rc==SQLITE_OK0 ){ | |||
| 16401 | p->pConfig = pConfig; | |||
| 16402 | p->nWorkUnit = FTS5_WORK_UNIT64; | |||
| 16403 | p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); | |||
| 16404 | if( p->zDataTbl && bCreate ){ | |||
| 16405 | rc = sqlite3Fts5CreateTable( | |||
| 16406 | pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr | |||
| 16407 | ); | |||
| 16408 | if( rc==SQLITE_OK0 ){ | |||
| 16409 | rc = sqlite3Fts5CreateTable(pConfig, "idx", | |||
| 16410 | "segid, term, pgno, PRIMARY KEY(segid, term)", | |||
| 16411 | 1, pzErr | |||
| 16412 | ); | |||
| 16413 | } | |||
| 16414 | if( rc==SQLITE_OK0 ){ | |||
| 16415 | rc = sqlite3Fts5IndexReinit(p); | |||
| 16416 | } | |||
| 16417 | } | |||
| 16418 | } | |||
| 16419 | ||||
| 16420 | assert( rc!=SQLITE_OK || p->rc==SQLITE_OK )((void) (0)); | |||
| 16421 | if( rc ){ | |||
| 16422 | sqlite3Fts5IndexClose(p); | |||
| 16423 | *pp = 0; | |||
| 16424 | } | |||
| 16425 | return rc; | |||
| 16426 | } | |||
| 16427 | ||||
| 16428 | /* | |||
| 16429 | ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). | |||
| 16430 | */ | |||
| 16431 | static int sqlite3Fts5IndexClose(Fts5Index *p){ | |||
| 16432 | int rc = SQLITE_OK0; | |||
| 16433 | if( p ){ | |||
| 16434 | assert( p->pReader==0 )((void) (0)); | |||
| 16435 | fts5StructureInvalidate(p); | |||
| 16436 | sqlite3_finalizesqlite3_api->finalize(p->pWriter); | |||
| 16437 | sqlite3_finalizesqlite3_api->finalize(p->pDeleter); | |||
| 16438 | sqlite3_finalizesqlite3_api->finalize(p->pIdxWriter); | |||
| 16439 | sqlite3_finalizesqlite3_api->finalize(p->pIdxDeleter); | |||
| 16440 | sqlite3_finalizesqlite3_api->finalize(p->pIdxSelect); | |||
| 16441 | sqlite3_finalizesqlite3_api->finalize(p->pIdxNextSelect); | |||
| 16442 | sqlite3_finalizesqlite3_api->finalize(p->pDataVersion); | |||
| 16443 | sqlite3_finalizesqlite3_api->finalize(p->pDeleteFromIdx); | |||
| 16444 | sqlite3Fts5HashFree(p->pHash); | |||
| 16445 | sqlite3_freesqlite3_api->free(p->zDataTbl); | |||
| 16446 | sqlite3_freesqlite3_api->free(p); | |||
| 16447 | } | |||
| 16448 | return rc; | |||
| 16449 | } | |||
| 16450 | ||||
| 16451 | /* | |||
| 16452 | ** Argument p points to a buffer containing utf-8 text that is n bytes in | |||
| 16453 | ** size. Return the number of bytes in the nChar character prefix of the | |||
| 16454 | ** buffer, or 0 if there are less than nChar characters in total. | |||
| 16455 | */ | |||
| 16456 | static int sqlite3Fts5IndexCharlenToBytelen( | |||
| 16457 | const char *p, | |||
| 16458 | int nByte, | |||
| 16459 | int nChar | |||
| 16460 | ){ | |||
| 16461 | int n = 0; | |||
| 16462 | int i; | |||
| 16463 | for(i=0; i<nChar; i++){ | |||
| 16464 | if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ | |||
| 16465 | if( (unsigned char)p[n++]>=0xc0 ){ | |||
| 16466 | if( n>=nByte ) return 0; | |||
| 16467 | while( (p[n] & 0xc0)==0x80 ){ | |||
| 16468 | n++; | |||
| 16469 | if( n>=nByte ){ | |||
| 16470 | if( i+1==nChar ) break; | |||
| 16471 | return 0; | |||
| 16472 | } | |||
| 16473 | } | |||
| 16474 | } | |||
| 16475 | } | |||
| 16476 | return n; | |||
| 16477 | } | |||
| 16478 | ||||
| 16479 | /* | |||
| 16480 | ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of | |||
| 16481 | ** unicode characters in the string. | |||
| 16482 | */ | |||
| 16483 | static int fts5IndexCharlen(const char *pIn, int nIn){ | |||
| 16484 | int nChar = 0; | |||
| 16485 | int i = 0; | |||
| 16486 | while( i<nIn ){ | |||
| 16487 | if( (unsigned char)pIn[i++]>=0xc0 ){ | |||
| 16488 | while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; | |||
| 16489 | } | |||
| 16490 | nChar++; | |||
| 16491 | } | |||
| 16492 | return nChar; | |||
| 16493 | } | |||
| 16494 | ||||
| 16495 | /* | |||
| 16496 | ** Insert or remove data to or from the index. Each time a document is | |||
| 16497 | ** added to or removed from the index, this function is called one or more | |||
| 16498 | ** times. | |||
| 16499 | ** | |||
| 16500 | ** For an insert, it must be called once for each token in the new document. | |||
| 16501 | ** If the operation is a delete, it must be called (at least) once for each | |||
| 16502 | ** unique token in the document with an iCol value less than zero. The iPos | |||
| 16503 | ** argument is ignored for a delete. | |||
| 16504 | */ | |||
| 16505 | static int sqlite3Fts5IndexWrite( | |||
| 16506 | Fts5Index *p, /* Index to write to */ | |||
| 16507 | int iCol, /* Column token appears in (-ve -> delete) */ | |||
| 16508 | int iPos, /* Position of token within column */ | |||
| 16509 | const char *pToken, int nToken /* Token to add or remove to or from index */ | |||
| 16510 | ){ | |||
| 16511 | int i; /* Used to iterate through indexes */ | |||
| 16512 | int rc = SQLITE_OK0; /* Return code */ | |||
| 16513 | Fts5Config *pConfig = p->pConfig; | |||
| 16514 | ||||
| 16515 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 16516 | assert( (iCol<0)==p->bDelete )((void) (0)); | |||
| 16517 | ||||
| 16518 | /* Add the entry to the main terms index. */ | |||
| 16519 | rc = sqlite3Fts5HashWrite( | |||
| 16520 | p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX'0', pToken, nToken | |||
| 16521 | ); | |||
| 16522 | ||||
| 16523 | for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK0; i++){ | |||
| 16524 | const int nChar = pConfig->aPrefix[i]; | |||
| 16525 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | |||
| 16526 | if( nByte ){ | |||
| 16527 | rc = sqlite3Fts5HashWrite(p->pHash, | |||
| 16528 | p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX'0'+i+1), pToken, | |||
| 16529 | nByte | |||
| 16530 | ); | |||
| 16531 | } | |||
| 16532 | } | |||
| 16533 | ||||
| 16534 | return rc; | |||
| 16535 | } | |||
| 16536 | ||||
| 16537 | /* | |||
| 16538 | ** pToken points to a buffer of size nToken bytes containing a search | |||
| 16539 | ** term, including the index number at the start, used on a tokendata=1 | |||
| 16540 | ** table. This function returns true if the term in buffer pBuf matches | |||
| 16541 | ** token pToken/nToken. | |||
| 16542 | */ | |||
| 16543 | static int fts5IsTokendataPrefix( | |||
| 16544 | Fts5Buffer *pBuf, | |||
| 16545 | const u8 *pToken, | |||
| 16546 | int nToken | |||
| 16547 | ){ | |||
| 16548 | return ( | |||
| 16549 | pBuf->n>=nToken | |||
| 16550 | && 0==memcmp(pBuf->p, pToken, nToken) | |||
| 16551 | && (pBuf->n==nToken || pBuf->p[nToken]==0x00) | |||
| 16552 | ); | |||
| 16553 | } | |||
| 16554 | ||||
| 16555 | /* | |||
| 16556 | ** Ensure the segment-iterator passed as the only argument points to EOF. | |||
| 16557 | */ | |||
| 16558 | static void fts5SegIterSetEOF(Fts5SegIter *pSeg){ | |||
| 16559 | fts5DataRelease(pSeg->pLeaf); | |||
| 16560 | pSeg->pLeaf = 0; | |||
| 16561 | } | |||
| 16562 | ||||
| 16563 | static void fts5IterClose(Fts5IndexIter *pIndexIter){ | |||
| 16564 | if( pIndexIter ){ | |||
| 16565 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 16566 | Fts5Index *pIndex = pIter->pIndex; | |||
| 16567 | fts5TokendataIterDelete(pIter->pTokenDataIter); | |||
| 16568 | fts5MultiIterFree(pIter); | |||
| 16569 | fts5IndexCloseReader(pIndex); | |||
| 16570 | } | |||
| 16571 | } | |||
| 16572 | ||||
| 16573 | /* | |||
| 16574 | ** This function appends iterator pAppend to Fts5TokenDataIter pIn and | |||
| 16575 | ** returns the result. | |||
| 16576 | */ | |||
| 16577 | static Fts5TokenDataIter *fts5AppendTokendataIter( | |||
| 16578 | Fts5Index *p, /* Index object (for error code) */ | |||
| 16579 | Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */ | |||
| 16580 | Fts5Iter *pAppend /* Append this iterator */ | |||
| 16581 | ){ | |||
| 16582 | Fts5TokenDataIter *pRet = pIn; | |||
| 16583 | ||||
| 16584 | if( p->rc==SQLITE_OK0 ){ | |||
| 16585 | if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){ | |||
| 16586 | int nAlloc = pIn ? pIn->nIterAlloc*2 : 16; | |||
| 16587 | int nByte = SZ_FTS5TOKENDATAITER(nAlloc+1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (nAlloc+1)*sizeof (Fts5Iter)); | |||
| 16588 | Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_reallocsqlite3_api->realloc(pIn, nByte); | |||
| 16589 | ||||
| 16590 | if( pNew==0 ){ | |||
| 16591 | p->rc = SQLITE_NOMEM7; | |||
| 16592 | }else{ | |||
| 16593 | if( pIn==0 ) memset(pNew, 0, nByte); | |||
| 16594 | pRet = pNew; | |||
| 16595 | pNew->nIterAlloc = nAlloc; | |||
| 16596 | } | |||
| 16597 | } | |||
| 16598 | } | |||
| 16599 | if( p->rc ){ | |||
| 16600 | fts5IterClose((Fts5IndexIter*)pAppend); | |||
| 16601 | }else{ | |||
| 16602 | pRet->apIter[pRet->nIter++] = pAppend; | |||
| 16603 | } | |||
| 16604 | assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc )((void) (0)); | |||
| 16605 | ||||
| 16606 | return pRet; | |||
| 16607 | } | |||
| 16608 | ||||
| 16609 | /* | |||
| 16610 | ** The iterator passed as the only argument must be a tokendata=1 iterator | |||
| 16611 | ** (pIter->pTokenDataIter!=0). This function sets the iterator output | |||
| 16612 | ** variables (pIter->base.*) according to the contents of the current | |||
| 16613 | ** row. | |||
| 16614 | */ | |||
| 16615 | static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){ | |||
| 16616 | int ii; | |||
| 16617 | int nHit = 0; | |||
| 16618 | i64 iRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | |||
| 16619 | int iMin = 0; | |||
| 16620 | ||||
| 16621 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
| 16622 | ||||
| 16623 | pIter->base.nData = 0; | |||
| 16624 | pIter->base.pData = 0; | |||
| 16625 | ||||
| 16626 | for(ii=0; ii<pT->nIter; ii++){ | |||
| 16627 | Fts5Iter *p = pT->apIter[ii]; | |||
| 16628 | if( p->base.bEof==0 ){ | |||
| 16629 | if( nHit==0 || p->base.iRowid<iRowid ){ | |||
| 16630 | iRowid = p->base.iRowid; | |||
| 16631 | nHit = 1; | |||
| 16632 | pIter->base.pData = p->base.pData; | |||
| 16633 | pIter->base.nData = p->base.nData; | |||
| 16634 | iMin = ii; | |||
| 16635 | }else if( p->base.iRowid==iRowid ){ | |||
| 16636 | nHit++; | |||
| 16637 | } | |||
| 16638 | } | |||
| 16639 | } | |||
| 16640 | ||||
| 16641 | if( nHit==0 ){ | |||
| 16642 | pIter->base.bEof = 1; | |||
| 16643 | }else{ | |||
| 16644 | int eDetail = pIter->pIndex->pConfig->eDetail; | |||
| 16645 | pIter->base.bEof = 0; | |||
| 16646 | pIter->base.iRowid = iRowid; | |||
| 16647 | ||||
| 16648 | if( nHit==1 && eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 16649 | fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1); | |||
| 16650 | }else | |||
| 16651 | if( nHit>1 && eDetail!=FTS5_DETAIL_NONE1 ){ | |||
| 16652 | int nReader = 0; | |||
| 16653 | int nByte = 0; | |||
| 16654 | i64 iPrev = 0; | |||
| 16655 | ||||
| 16656 | /* Allocate array of iterators if they are not already allocated. */ | |||
| 16657 | if( pT->aPoslistReader==0 ){ | |||
| 16658 | pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero( | |||
| 16659 | &pIter->pIndex->rc, | |||
| 16660 | pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int)) | |||
| 16661 | ); | |||
| 16662 | if( pT->aPoslistReader==0 ) return; | |||
| 16663 | pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter]; | |||
| 16664 | } | |||
| 16665 | ||||
| 16666 | /* Populate an iterator for each poslist that will be merged */ | |||
| 16667 | for(ii=0; ii<pT->nIter; ii++){ | |||
| 16668 | Fts5Iter *p = pT->apIter[ii]; | |||
| 16669 | if( iRowid==p->base.iRowid ){ | |||
| 16670 | pT->aPoslistToIter[nReader] = ii; | |||
| 16671 | sqlite3Fts5PoslistReaderInit( | |||
| 16672 | p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++] | |||
| 16673 | ); | |||
| 16674 | nByte += p->base.nData; | |||
| 16675 | } | |||
| 16676 | } | |||
| 16677 | ||||
| 16678 | /* Ensure the output buffer is large enough */ | |||
| 16679 | if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10)( (u32)((&pIter->poslist)->n) + (u32)(nByte+nHit*10 ) <= (u32)((&pIter->poslist)->nSpace) ? 0 : sqlite3Fts5BufferSize ((&pIter->pIndex->rc),(&pIter->poslist),(nByte +nHit*10)+(&pIter->poslist)->n) ) ){ | |||
| 16680 | return; | |||
| 16681 | } | |||
| 16682 | ||||
| 16683 | /* Ensure the token-mapping is large enough */ | |||
| 16684 | if( eDetail==FTS5_DETAIL_FULL0 && pT->nMapAlloc<(pT->nMap + nByte) ){ | |||
| 16685 | int nNew = (pT->nMapAlloc + nByte) * 2; | |||
| 16686 | Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_reallocsqlite3_api->realloc( | |||
| 16687 | pT->aMap, nNew*sizeof(Fts5TokenDataMap) | |||
| 16688 | ); | |||
| 16689 | if( aNew==0 ){ | |||
| 16690 | pIter->pIndex->rc = SQLITE_NOMEM7; | |||
| 16691 | return; | |||
| 16692 | } | |||
| 16693 | pT->aMap = aNew; | |||
| 16694 | pT->nMapAlloc = nNew; | |||
| 16695 | } | |||
| 16696 | ||||
| 16697 | pIter->poslist.n = 0; | |||
| 16698 | ||||
| 16699 | while( 1 ){ | |||
| 16700 | i64 iMinPos = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
| 16701 | ||||
| 16702 | /* Find smallest position */ | |||
| 16703 | iMin = 0; | |||
| 16704 | for(ii=0; ii<nReader; ii++){ | |||
| 16705 | Fts5PoslistReader *pReader = &pT->aPoslistReader[ii]; | |||
| 16706 | if( pReader->bEof==0 ){ | |||
| 16707 | if( pReader->iPos<iMinPos ){ | |||
| 16708 | iMinPos = pReader->iPos; | |||
| 16709 | iMin = ii; | |||
| 16710 | } | |||
| 16711 | } | |||
| 16712 | } | |||
| 16713 | ||||
| 16714 | /* If all readers were at EOF, break out of the loop. */ | |||
| 16715 | if( iMinPos==LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)) ) break; | |||
| 16716 | ||||
| 16717 | sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos); | |||
| 16718 | sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]); | |||
| 16719 | ||||
| 16720 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 16721 | pT->aMap[pT->nMap].iPos = iMinPos; | |||
| 16722 | pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin]; | |||
| 16723 | pT->aMap[pT->nMap].iRowid = iRowid; | |||
| 16724 | pT->nMap++; | |||
| 16725 | } | |||
| 16726 | } | |||
| 16727 | ||||
| 16728 | pIter->base.pData = pIter->poslist.p; | |||
| 16729 | pIter->base.nData = pIter->poslist.n; | |||
| 16730 | } | |||
| 16731 | } | |||
| 16732 | } | |||
| 16733 | ||||
| 16734 | /* | |||
| 16735 | ** The iterator passed as the only argument must be a tokendata=1 iterator | |||
| 16736 | ** (pIter->pTokenDataIter!=0). This function advances the iterator. If | |||
| 16737 | ** argument bFrom is false, then the iterator is advanced to the next | |||
| 16738 | ** entry. Or, if bFrom is true, it is advanced to the first entry with | |||
| 16739 | ** a rowid of iFrom or greater. | |||
| 16740 | */ | |||
| 16741 | static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ | |||
| 16742 | int ii; | |||
| 16743 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
| 16744 | Fts5Index *pIndex = pIter->pIndex; | |||
| 16745 | ||||
| 16746 | for(ii=0; ii<pT->nIter; ii++){ | |||
| 16747 | Fts5Iter *p = pT->apIter[ii]; | |||
| 16748 | if( p->base.bEof==0 | |||
| 16749 | && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom)) | |||
| 16750 | ){ | |||
| 16751 | fts5MultiIterNext(pIndex, p, bFrom, iFrom); | |||
| 16752 | while( bFrom && p->base.bEof==0 | |||
| 16753 | && p->base.iRowid<iFrom | |||
| 16754 | && pIndex->rc==SQLITE_OK0 | |||
| 16755 | ){ | |||
| 16756 | fts5MultiIterNext(pIndex, p, 0, 0); | |||
| 16757 | } | |||
| 16758 | } | |||
| 16759 | } | |||
| 16760 | ||||
| 16761 | if( pIndex->rc==SQLITE_OK0 ){ | |||
| 16762 | fts5IterSetOutputsTokendata(pIter); | |||
| 16763 | } | |||
| 16764 | } | |||
| 16765 | ||||
| 16766 | /* | |||
| 16767 | ** If the segment-iterator passed as the first argument is at EOF, then | |||
| 16768 | ** set pIter->term to a copy of buffer pTerm. | |||
| 16769 | */ | |||
| 16770 | static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ | |||
| 16771 | if( pIter && pIter->aSeg[0].pLeaf==0 ){ | |||
| 16772 | fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p)sqlite3Fts5BufferSet(&pIter->pIndex->rc,&pIter-> aSeg[0].term,pTerm->n,pTerm->p); | |||
| 16773 | } | |||
| 16774 | } | |||
| 16775 | ||||
| 16776 | /* | |||
| 16777 | ** This function sets up an iterator to use for a non-prefix query on a | |||
| 16778 | ** tokendata=1 table. | |||
| 16779 | */ | |||
| 16780 | static Fts5Iter *fts5SetupTokendataIter( | |||
| 16781 | Fts5Index *p, /* FTS index to query */ | |||
| 16782 | const u8 *pToken, /* Buffer containing query term */ | |||
| 16783 | int nToken, /* Size of buffer pToken in bytes */ | |||
| 16784 | Fts5Colset *pColset /* Colset to filter on */ | |||
| 16785 | ){ | |||
| 16786 | Fts5Iter *pRet = 0; | |||
| 16787 | Fts5TokenDataIter *pSet = 0; | |||
| 16788 | Fts5Structure *pStruct = 0; | |||
| 16789 | const int flags = FTS5INDEX_QUERY_SCANONETERM0x0100 | FTS5INDEX_QUERY_SCAN0x0008; | |||
| 16790 | ||||
| 16791 | Fts5Buffer bSeek = {0, 0, 0}; | |||
| 16792 | Fts5Buffer *pSmall = 0; | |||
| 16793 | ||||
| 16794 | fts5IndexFlush(p); | |||
| 16795 | pStruct = fts5StructureRead(p); | |||
| 16796 | ||||
| 16797 | while( p->rc==SQLITE_OK0 ){ | |||
| 16798 | Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0; | |||
| 16799 | Fts5Iter *pNew = 0; | |||
| 16800 | Fts5SegIter *pNewIter = 0; | |||
| 16801 | Fts5SegIter *pPrevIter = 0; | |||
| 16802 | ||||
| 16803 | int iLvl, iSeg, ii; | |||
| 16804 | ||||
| 16805 | pNew = fts5MultiIterAlloc(p, pStruct->nSegment); | |||
| 16806 | if( pSmall ){ | |||
| 16807 | fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p)sqlite3Fts5BufferSet(&p->rc,&bSeek,pSmall->n,pSmall ->p); | |||
| 16808 | fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0")sqlite3Fts5BufferAppendBlob(&p->rc,&bSeek,1,(const u8*)"\0"); | |||
| 16809 | }else{ | |||
| 16810 | fts5BufferSet(&p->rc, &bSeek, nToken, pToken)sqlite3Fts5BufferSet(&p->rc,&bSeek,nToken,pToken); | |||
| 16811 | } | |||
| 16812 | if( p->rc ){ | |||
| 16813 | fts5IterClose((Fts5IndexIter*)pNew); | |||
| 16814 | break; | |||
| 16815 | } | |||
| 16816 | ||||
| 16817 | pNewIter = &pNew->aSeg[0]; | |||
| 16818 | pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0); | |||
| 16819 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 16820 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | |||
| 16821 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
| 16822 | int bDone = 0; | |||
| 16823 | ||||
| 16824 | if( pPrevIter ){ | |||
| 16825 | if( fts5BufferCompare(pSmall, &pPrevIter->term) ){ | |||
| 16826 | memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter)); | |||
| 16827 | memset(pPrevIter, 0, sizeof(Fts5SegIter)); | |||
| 16828 | bDone = 1; | |||
| 16829 | }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){ | |||
| 16830 | fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter); | |||
| 16831 | bDone = 1; | |||
| 16832 | } | |||
| 16833 | } | |||
| 16834 | ||||
| 16835 | if( bDone==0 ){ | |||
| 16836 | fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter); | |||
| 16837 | } | |||
| 16838 | ||||
| 16839 | if( pPrevIter ){ | |||
| 16840 | if( pPrevIter->pTombArray ){ | |||
| 16841 | pNewIter->pTombArray = pPrevIter->pTombArray; | |||
| 16842 | pNewIter->pTombArray->nRef++; | |||
| 16843 | } | |||
| 16844 | }else{ | |||
| 16845 | fts5SegIterAllocTombstone(p, pNewIter); | |||
| 16846 | } | |||
| 16847 | ||||
| 16848 | pNewIter++; | |||
| 16849 | if( pPrevIter ) pPrevIter++; | |||
| 16850 | if( p->rc ) break; | |||
| 16851 | } | |||
| 16852 | } | |||
| 16853 | fts5TokendataSetTermIfEof(pPrev, pSmall); | |||
| 16854 | ||||
| 16855 | pNew->bSkipEmpty = 1; | |||
| 16856 | pNew->pColset = pColset; | |||
| 16857 | fts5IterSetOutputCb(&p->rc, pNew); | |||
| 16858 | ||||
| 16859 | /* Loop through all segments in the new iterator. Find the smallest | |||
| 16860 | ** term that any segment-iterator points to. Iterator pNew will be | |||
| 16861 | ** used for this term. Also, set any iterator that points to a term that | |||
| 16862 | ** does not match pToken/nToken to point to EOF */ | |||
| 16863 | pSmall = 0; | |||
| 16864 | for(ii=0; ii<pNew->nSeg; ii++){ | |||
| 16865 | Fts5SegIter *pII = &pNew->aSeg[ii]; | |||
| 16866 | if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){ | |||
| 16867 | fts5SegIterSetEOF(pII); | |||
| 16868 | } | |||
| 16869 | if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){ | |||
| 16870 | pSmall = &pII->term; | |||
| 16871 | } | |||
| 16872 | } | |||
| 16873 | ||||
| 16874 | /* If pSmall is still NULL at this point, then the new iterator does | |||
| 16875 | ** not point to any terms that match the query. So delete it and break | |||
| 16876 | ** out of the loop - all required iterators have been collected. */ | |||
| 16877 | if( pSmall==0 ){ | |||
| 16878 | fts5IterClose((Fts5IndexIter*)pNew); | |||
| 16879 | break; | |||
| 16880 | } | |||
| 16881 | ||||
| 16882 | /* Append this iterator to the set and continue. */ | |||
| 16883 | pSet = fts5AppendTokendataIter(p, pSet, pNew); | |||
| 16884 | } | |||
| 16885 | ||||
| 16886 | if( p->rc==SQLITE_OK0 && pSet ){ | |||
| 16887 | int ii; | |||
| 16888 | for(ii=0; ii<pSet->nIter; ii++){ | |||
| 16889 | Fts5Iter *pIter = pSet->apIter[ii]; | |||
| 16890 | int iSeg; | |||
| 16891 | for(iSeg=0; iSeg<pIter->nSeg; iSeg++){ | |||
| 16892 | pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM0x01; | |||
| 16893 | } | |||
| 16894 | fts5MultiIterFinishSetup(p, pIter); | |||
| 16895 | } | |||
| 16896 | } | |||
| 16897 | ||||
| 16898 | if( p->rc==SQLITE_OK0 ){ | |||
| 16899 | pRet = fts5MultiIterAlloc(p, 0); | |||
| 16900 | } | |||
| 16901 | if( pRet ){ | |||
| 16902 | pRet->nSeg = 0; | |||
| 16903 | pRet->pTokenDataIter = pSet; | |||
| 16904 | if( pSet ){ | |||
| 16905 | fts5IterSetOutputsTokendata(pRet); | |||
| 16906 | }else{ | |||
| 16907 | pRet->base.bEof = 1; | |||
| 16908 | } | |||
| 16909 | }else{ | |||
| 16910 | fts5TokendataIterDelete(pSet); | |||
| 16911 | } | |||
| 16912 | ||||
| 16913 | fts5StructureRelease(pStruct); | |||
| 16914 | fts5BufferFree(&bSeek)sqlite3Fts5BufferFree(&bSeek); | |||
| 16915 | return pRet; | |||
| 16916 | } | |||
| 16917 | ||||
| 16918 | /* | |||
| 16919 | ** Open a new iterator to iterate though all rowid that match the | |||
| 16920 | ** specified token or token prefix. | |||
| 16921 | */ | |||
| 16922 | static int sqlite3Fts5IndexQuery( | |||
| 16923 | Fts5Index *p, /* FTS index to query */ | |||
| 16924 | const char *pToken, int nToken, /* Token (or prefix) to query for */ | |||
| 16925 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | |||
| 16926 | Fts5Colset *pColset, /* Match these columns only */ | |||
| 16927 | Fts5IndexIter **ppIter /* OUT: New iterator object */ | |||
| 16928 | ){ | |||
| 16929 | Fts5Config *pConfig = p->pConfig; | |||
| 16930 | Fts5Iter *pRet = 0; | |||
| 16931 | Fts5Buffer buf = {0, 0, 0}; | |||
| 16932 | ||||
| 16933 | /* If the QUERY_SCAN flag is set, all other flags must be clear. */ | |||
| 16934 | assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN )((void) (0)); | |||
| 16935 | ||||
| 16936 | if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ | |||
| 16937 | int iIdx = 0; /* Index to search */ | |||
| 16938 | int iPrefixIdx = 0; /* +1 prefix index */ | |||
| 16939 | int bTokendata = pConfig->bTokendata; | |||
| 16940 | assert( buf.p!=0 )((void) (0)); | |||
| 16941 | if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); | |||
| 16942 | ||||
| 16943 | /* The NOTOKENDATA flag is set when each token in a tokendata=1 table | |||
| 16944 | ** should be treated individually, instead of merging all those with | |||
| 16945 | ** a common prefix into a single entry. This is used, for example, by | |||
| 16946 | ** queries performed as part of an integrity-check, or by the fts5vocab | |||
| 16947 | ** module. */ | |||
| 16948 | if( flags & (FTS5INDEX_QUERY_NOTOKENDATA0x0080|FTS5INDEX_QUERY_SCAN0x0008) ){ | |||
| 16949 | bTokendata = 0; | |||
| 16950 | } | |||
| 16951 | ||||
| 16952 | /* Figure out which index to search and set iIdx accordingly. If this | |||
| 16953 | ** is a prefix query for which there is no prefix index, set iIdx to | |||
| 16954 | ** greater than pConfig->nPrefix to indicate that the query will be | |||
| 16955 | ** satisfied by scanning multiple terms in the main index. | |||
| 16956 | ** | |||
| 16957 | ** If the QUERY_TEST_NOIDX flag was specified, then this must be a | |||
| 16958 | ** prefix-query. Instead of using a prefix-index (if one exists), | |||
| 16959 | ** evaluate the prefix query using the main FTS index. This is used | |||
| 16960 | ** for internal sanity checking by the integrity-check in debug | |||
| 16961 | ** mode only. */ | |||
| 16962 | #ifdef SQLITE_DEBUG | |||
| 16963 | if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX0x0004) ){ | |||
| 16964 | assert( flags & FTS5INDEX_QUERY_PREFIX )((void) (0)); | |||
| 16965 | iIdx = 1+pConfig->nPrefix; | |||
| 16966 | }else | |||
| 16967 | #endif | |||
| 16968 | if( flags & FTS5INDEX_QUERY_PREFIX0x0001 ){ | |||
| 16969 | int nChar = fts5IndexCharlen(pToken, nToken); | |||
| 16970 | for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ | |||
| 16971 | int nIdxChar = pConfig->aPrefix[iIdx-1]; | |||
| 16972 | if( nIdxChar==nChar ) break; | |||
| 16973 | if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; | |||
| 16974 | } | |||
| 16975 | } | |||
| 16976 | ||||
| 16977 | if( bTokendata && iIdx==0 ){ | |||
| 16978 | buf.p[0] = FTS5_MAIN_PREFIX'0'; | |||
| 16979 | pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset); | |||
| 16980 | }else if( iIdx<=pConfig->nPrefix ){ | |||
| 16981 | /* Straight index lookup */ | |||
| 16982 | Fts5Structure *pStruct = fts5StructureRead(p); | |||
| 16983 | buf.p[0] = (u8)(FTS5_MAIN_PREFIX'0' + iIdx); | |||
| 16984 | if( pStruct ){ | |||
| 16985 | fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY0x0010, | |||
| 16986 | pColset, buf.p, nToken+1, -1, 0, &pRet | |||
| 16987 | ); | |||
| 16988 | fts5StructureRelease(pStruct); | |||
| 16989 | } | |||
| 16990 | }else{ | |||
| 16991 | /* Scan multiple terms in the main index for a prefix query. */ | |||
| 16992 | int bDesc = (flags & FTS5INDEX_QUERY_DESC0x0002)!=0; | |||
| 16993 | fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); | |||
| 16994 | if( pRet==0 ){ | |||
| 16995 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
| 16996 | }else{ | |||
| 16997 | assert( pRet->pColset==0 )((void) (0)); | |||
| 16998 | fts5IterSetOutputCb(&p->rc, pRet); | |||
| 16999 | if( p->rc==SQLITE_OK0 ){ | |||
| 17000 | Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; | |||
| 17001 | if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); | |||
| 17002 | } | |||
| 17003 | } | |||
| 17004 | } | |||
| 17005 | ||||
| 17006 | if( p->rc ){ | |||
| 17007 | fts5IterClose((Fts5IndexIter*)pRet); | |||
| 17008 | pRet = 0; | |||
| 17009 | fts5IndexCloseReader(p); | |||
| 17010 | } | |||
| 17011 | ||||
| 17012 | *ppIter = (Fts5IndexIter*)pRet; | |||
| 17013 | sqlite3Fts5BufferFree(&buf); | |||
| 17014 | } | |||
| 17015 | return fts5IndexReturn(p); | |||
| 17016 | } | |||
| 17017 | ||||
| 17018 | /* | |||
| 17019 | ** Return true if the iterator passed as the only argument is at EOF. | |||
| 17020 | */ | |||
| 17021 | /* | |||
| 17022 | ** Move to the next matching rowid. | |||
| 17023 | */ | |||
| 17024 | static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ | |||
| 17025 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17026 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | |||
| 17027 | if( pIter->nSeg==0 ){ | |||
| 17028 | assert( pIter->pTokenDataIter )((void) (0)); | |||
| 17029 | fts5TokendataIterNext(pIter, 0, 0); | |||
| 17030 | }else{ | |||
| 17031 | fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); | |||
| 17032 | } | |||
| 17033 | return fts5IndexReturn(pIter->pIndex); | |||
| 17034 | } | |||
| 17035 | ||||
| 17036 | /* | |||
| 17037 | ** Move to the next matching term/rowid. Used by the fts5vocab module. | |||
| 17038 | */ | |||
| 17039 | static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ | |||
| 17040 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17041 | Fts5Index *p = pIter->pIndex; | |||
| 17042 | ||||
| 17043 | assert( pIter->pIndex->rc==SQLITE_OK )((void) (0)); | |||
| 17044 | ||||
| 17045 | fts5MultiIterNext(p, pIter, 0, 0); | |||
| 17046 | if( p->rc==SQLITE_OK0 ){ | |||
| 17047 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; | |||
| 17048 | if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX'0' ){ | |||
| 17049 | fts5DataRelease(pSeg->pLeaf); | |||
| 17050 | pSeg->pLeaf = 0; | |||
| 17051 | pIter->base.bEof = 1; | |||
| 17052 | } | |||
| 17053 | } | |||
| 17054 | ||||
| 17055 | return fts5IndexReturn(pIter->pIndex); | |||
| 17056 | } | |||
| 17057 | ||||
| 17058 | /* | |||
| 17059 | ** Move to the next matching rowid that occurs at or after iMatch. The | |||
| 17060 | ** definition of "at or after" depends on whether this iterator iterates | |||
| 17061 | ** in ascending or descending rowid order. | |||
| 17062 | */ | |||
| 17063 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ | |||
| 17064 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17065 | if( pIter->nSeg==0 ){ | |||
| 17066 | assert( pIter->pTokenDataIter )((void) (0)); | |||
| 17067 | fts5TokendataIterNext(pIter, 1, iMatch); | |||
| 17068 | }else{ | |||
| 17069 | fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); | |||
| 17070 | } | |||
| 17071 | return fts5IndexReturn(pIter->pIndex); | |||
| 17072 | } | |||
| 17073 | ||||
| 17074 | /* | |||
| 17075 | ** Return the current term. | |||
| 17076 | */ | |||
| 17077 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ | |||
| 17078 | int n; | |||
| 17079 | const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); | |||
| 17080 | assert_nc( z || n<=1 )((void) (0)); | |||
| 17081 | *pn = n-1; | |||
| 17082 | return (z ? &z[1] : 0); | |||
| 17083 | } | |||
| 17084 | ||||
| 17085 | /* | |||
| 17086 | ** pIter is a prefix query. This function populates pIter->pTokenDataIter | |||
| 17087 | ** with an Fts5TokenDataIter object containing mappings for all rows | |||
| 17088 | ** matched by the query. | |||
| 17089 | */ | |||
| 17090 | static int fts5SetupPrefixIterTokendata( | |||
| 17091 | Fts5Iter *pIter, | |||
| 17092 | const char *pToken, /* Token prefix to search for */ | |||
| 17093 | int nToken /* Size of pToken in bytes */ | |||
| 17094 | ){ | |||
| 17095 | Fts5Index *p = pIter->pIndex; | |||
| 17096 | Fts5Buffer token = {0, 0, 0}; | |||
| 17097 | TokendataSetupCtx ctx; | |||
| 17098 | ||||
| 17099 | memset(&ctx, 0, sizeof(ctx)); | |||
| 17100 | ||||
| 17101 | fts5BufferGrow(&p->rc, &token, nToken+1)( (u32)((&token)->n) + (u32)(nToken+1) <= (u32)((& token)->nSpace) ? 0 : sqlite3Fts5BufferSize((&p->rc ),(&token),(nToken+1)+(&token)->n) ); | |||
| 17102 | assert( token.p!=0 || p->rc!=SQLITE_OK )((void) (0)); | |||
| 17103 | ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | |||
| 17104 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
| 17105 | ||||
| 17106 | if( p->rc==SQLITE_OK0 ){ | |||
| 17107 | ||||
| 17108 | /* Fill in the token prefix to search for */ | |||
| 17109 | token.p[0] = FTS5_MAIN_PREFIX'0'; | |||
| 17110 | memcpy(&token.p[1], pToken, nToken); | |||
| 17111 | token.n = nToken+1; | |||
| 17112 | ||||
| 17113 | fts5VisitEntries( | |||
| 17114 | p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx | |||
| 17115 | ); | |||
| 17116 | ||||
| 17117 | fts5TokendataIterSortMap(p, ctx.pT); | |||
| 17118 | } | |||
| 17119 | ||||
| 17120 | if( p->rc==SQLITE_OK0 ){ | |||
| 17121 | pIter->pTokenDataIter = ctx.pT; | |||
| 17122 | }else{ | |||
| 17123 | fts5TokendataIterDelete(ctx.pT); | |||
| 17124 | } | |||
| 17125 | fts5BufferFree(&token)sqlite3Fts5BufferFree(&token); | |||
| 17126 | ||||
| 17127 | return fts5IndexReturn(p); | |||
| 17128 | } | |||
| 17129 | ||||
| 17130 | /* | |||
| 17131 | ** This is used by xInstToken() to access the token at offset iOff, column | |||
| 17132 | ** iCol of row iRowid. The token is returned via output variables *ppOut | |||
| 17133 | ** and *pnOut. The iterator passed as the first argument must be a tokendata=1 | |||
| 17134 | ** iterator (pIter->pTokenDataIter!=0). | |||
| 17135 | ** | |||
| 17136 | ** pToken/nToken: | |||
| 17137 | */ | |||
| 17138 | static int sqlite3Fts5IterToken( | |||
| 17139 | Fts5IndexIter *pIndexIter, | |||
| 17140 | const char *pToken, int nToken, | |||
| 17141 | i64 iRowid, | |||
| 17142 | int iCol, | |||
| 17143 | int iOff, | |||
| 17144 | const char **ppOut, int *pnOut | |||
| 17145 | ){ | |||
| 17146 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17147 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
| 17148 | i64 iPos = (((i64)iCol)<<32) + iOff; | |||
| 17149 | Fts5TokenDataMap *aMap = 0; | |||
| 17150 | int i1 = 0; | |||
| 17151 | int i2 = 0; | |||
| 17152 | int iTest = 0; | |||
| 17153 | ||||
| 17154 | assert( pT || (pToken && pIter->nSeg>0) )((void) (0)); | |||
| 17155 | if( pT==0 ){ | |||
| 17156 | int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken); | |||
| 17157 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 17158 | pT = pIter->pTokenDataIter; | |||
| 17159 | } | |||
| 17160 | ||||
| 17161 | i2 = pT->nMap; | |||
| 17162 | aMap = pT->aMap; | |||
| 17163 | ||||
| 17164 | while( i2>i1 ){ | |||
| 17165 | iTest = (i1 + i2) / 2; | |||
| 17166 | ||||
| 17167 | if( aMap[iTest].iRowid<iRowid ){ | |||
| 17168 | i1 = iTest+1; | |||
| 17169 | }else if( aMap[iTest].iRowid>iRowid ){ | |||
| 17170 | i2 = iTest; | |||
| 17171 | }else{ | |||
| 17172 | if( aMap[iTest].iPos<iPos ){ | |||
| 17173 | if( aMap[iTest].iPos<0 ){ | |||
| 17174 | break; | |||
| 17175 | } | |||
| 17176 | i1 = iTest+1; | |||
| 17177 | }else if( aMap[iTest].iPos>iPos ){ | |||
| 17178 | i2 = iTest; | |||
| 17179 | }else{ | |||
| 17180 | break; | |||
| 17181 | } | |||
| 17182 | } | |||
| 17183 | } | |||
| 17184 | ||||
| 17185 | if( i2>i1 ){ | |||
| 17186 | if( pIter->nSeg==0 ){ | |||
| 17187 | Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter]; | |||
| 17188 | *ppOut = (const char*)pMap->aSeg[0].term.p+1; | |||
| 17189 | *pnOut = pMap->aSeg[0].term.n-1; | |||
| 17190 | }else{ | |||
| 17191 | Fts5TokenDataMap *p = &aMap[iTest]; | |||
| 17192 | *ppOut = (const char*)&pT->terms.p[p->iIter]; | |||
| 17193 | *pnOut = aMap[iTest].nByte; | |||
| 17194 | } | |||
| 17195 | } | |||
| 17196 | ||||
| 17197 | return SQLITE_OK0; | |||
| 17198 | } | |||
| 17199 | ||||
| 17200 | /* | |||
| 17201 | ** Clear any existing entries from the token-map associated with the | |||
| 17202 | ** iterator passed as the only argument. | |||
| 17203 | */ | |||
| 17204 | static void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){ | |||
| 17205 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17206 | if( pIter && pIter->pTokenDataIter | |||
| 17207 | && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL0) | |||
| 17208 | ){ | |||
| 17209 | pIter->pTokenDataIter->nMap = 0; | |||
| 17210 | } | |||
| 17211 | } | |||
| 17212 | ||||
| 17213 | /* | |||
| 17214 | ** Set a token-mapping for the iterator passed as the first argument. This | |||
| 17215 | ** is used in detail=column or detail=none mode when a token is requested | |||
| 17216 | ** using the xInstToken() API. In this case the caller tokenizers the | |||
| 17217 | ** current row and configures the token-mapping via multiple calls to this | |||
| 17218 | ** function. | |||
| 17219 | */ | |||
| 17220 | static int sqlite3Fts5IndexIterWriteTokendata( | |||
| 17221 | Fts5IndexIter *pIndexIter, | |||
| 17222 | const char *pToken, int nToken, | |||
| 17223 | i64 iRowid, int iCol, int iOff | |||
| 17224 | ){ | |||
| 17225 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; | |||
| 17226 | Fts5TokenDataIter *pT = pIter->pTokenDataIter; | |||
| 17227 | Fts5Index *p = pIter->pIndex; | |||
| 17228 | i64 iPos = (((i64)iCol)<<32) + iOff; | |||
| 17229 | ||||
| 17230 | assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL )((void) (0)); | |||
| 17231 | assert( pIter->pTokenDataIter || pIter->nSeg>0 )((void) (0)); | |||
| 17232 | if( pIter->nSeg>0 ){ | |||
| 17233 | /* This is a prefix term iterator. */ | |||
| 17234 | if( pT==0 ){ | |||
| 17235 | pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc, | |||
| 17236 | SZ_FTS5TOKENDATAITER(1)(__builtin_offsetof(Fts5TokenDataIter, apIter) + (1)*sizeof(Fts5Iter ))); | |||
| 17237 | pIter->pTokenDataIter = pT; | |||
| 17238 | } | |||
| 17239 | if( pT ){ | |||
| 17240 | fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos); | |||
| 17241 | fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken)sqlite3Fts5BufferAppendBlob(&p->rc,&pT->terms,nToken ,(const u8*)pToken); | |||
| 17242 | } | |||
| 17243 | }else{ | |||
| 17244 | int ii; | |||
| 17245 | for(ii=0; ii<pT->nIter; ii++){ | |||
| 17246 | Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term; | |||
| 17247 | if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break; | |||
| 17248 | } | |||
| 17249 | if( ii<pT->nIter ){ | |||
| 17250 | fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos); | |||
| 17251 | } | |||
| 17252 | } | |||
| 17253 | return fts5IndexReturn(p); | |||
| 17254 | } | |||
| 17255 | ||||
| 17256 | /* | |||
| 17257 | ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). | |||
| 17258 | */ | |||
| 17259 | static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ | |||
| 17260 | if( pIndexIter ){ | |||
| 17261 | Fts5Index *pIndex = ((Fts5Iter*)pIndexIter)->pIndex; | |||
| 17262 | fts5IterClose(pIndexIter); | |||
| 17263 | fts5IndexReturn(pIndex); | |||
| 17264 | } | |||
| 17265 | } | |||
| 17266 | ||||
| 17267 | /* | |||
| 17268 | ** Read and decode the "averages" record from the database. | |||
| 17269 | ** | |||
| 17270 | ** Parameter anSize must point to an array of size nCol, where nCol is | |||
| 17271 | ** the number of user defined columns in the FTS table. | |||
| 17272 | */ | |||
| 17273 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ | |||
| 17274 | int nCol = p->pConfig->nCol; | |||
| 17275 | Fts5Data *pData; | |||
| 17276 | ||||
| 17277 | *pnRow = 0; | |||
| 17278 | memset(anSize, 0, sizeof(i64) * nCol); | |||
| 17279 | pData = fts5DataRead(p, FTS5_AVERAGES_ROWID1); | |||
| 17280 | if( p->rc==SQLITE_OK0 && pData->nn ){ | |||
| 17281 | int i = 0; | |||
| 17282 | int iCol; | |||
| 17283 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)pnRow); | |||
| 17284 | for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ | |||
| 17285 | i += fts5GetVarintsqlite3Fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); | |||
| 17286 | } | |||
| 17287 | } | |||
| 17288 | ||||
| 17289 | fts5DataRelease(pData); | |||
| 17290 | return fts5IndexReturn(p); | |||
| 17291 | } | |||
| 17292 | ||||
| 17293 | /* | |||
| 17294 | ** Replace the current "averages" record with the contents of the buffer | |||
| 17295 | ** supplied as the second argument. | |||
| 17296 | */ | |||
| 17297 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ | |||
| 17298 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 17299 | fts5DataWrite(p, FTS5_AVERAGES_ROWID1, pData, nData); | |||
| 17300 | return fts5IndexReturn(p); | |||
| 17301 | } | |||
| 17302 | ||||
| 17303 | /* | |||
| 17304 | ** Return the total number of blocks this module has read from the %_data | |||
| 17305 | ** table since it was created. | |||
| 17306 | */ | |||
| 17307 | static int sqlite3Fts5IndexReads(Fts5Index *p){ | |||
| 17308 | return p->nRead; | |||
| 17309 | } | |||
| 17310 | ||||
| 17311 | /* | |||
| 17312 | ** Set the 32-bit cookie value stored at the start of all structure | |||
| 17313 | ** records to the value passed as the second argument. | |||
| 17314 | ** | |||
| 17315 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 17316 | ** occurs. | |||
| 17317 | */ | |||
| 17318 | static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ | |||
| 17319 | int rc; /* Return code */ | |||
| 17320 | Fts5Config *pConfig = p->pConfig; /* Configuration object */ | |||
| 17321 | u8 aCookie[4]; /* Binary representation of iNew */ | |||
| 17322 | sqlite3_blob *pBlob = 0; | |||
| 17323 | ||||
| 17324 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 17325 | sqlite3Fts5Put32(aCookie, iNew); | |||
| 17326 | ||||
| 17327 | rc = sqlite3_blob_opensqlite3_api->blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, | |||
| 17328 | "block", FTS5_STRUCTURE_ROWID10, 1, &pBlob | |||
| 17329 | ); | |||
| 17330 | if( rc==SQLITE_OK0 ){ | |||
| 17331 | sqlite3_blob_writesqlite3_api->blob_write(pBlob, aCookie, 4, 0); | |||
| 17332 | rc = sqlite3_blob_closesqlite3_api->blob_close(pBlob); | |||
| 17333 | } | |||
| 17334 | ||||
| 17335 | return rc; | |||
| 17336 | } | |||
| 17337 | ||||
| 17338 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ | |||
| 17339 | Fts5Structure *pStruct; | |||
| 17340 | pStruct = fts5StructureRead(p); | |||
| 17341 | fts5StructureRelease(pStruct); | |||
| 17342 | return fts5IndexReturn(p); | |||
| 17343 | } | |||
| 17344 | ||||
| 17345 | /* | |||
| 17346 | ** Retrieve the origin value that will be used for the segment currently | |||
| 17347 | ** being accumulated in the in-memory hash table when it is flushed to | |||
| 17348 | ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to | |||
| 17349 | ** the queried value. Or, if an error occurs, an error code is returned | |||
| 17350 | ** and the final value of (*piOrigin) is undefined. | |||
| 17351 | */ | |||
| 17352 | static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){ | |||
| 17353 | Fts5Structure *pStruct; | |||
| 17354 | pStruct = fts5StructureRead(p); | |||
| 17355 | if( pStruct ){ | |||
| 17356 | *piOrigin = pStruct->nOriginCntr; | |||
| 17357 | fts5StructureRelease(pStruct); | |||
| 17358 | } | |||
| 17359 | return fts5IndexReturn(p); | |||
| 17360 | } | |||
| 17361 | ||||
| 17362 | /* | |||
| 17363 | ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages | |||
| 17364 | ** associated with the same segment. This function adds rowid iRowid to | |||
| 17365 | ** the hash table. The caller is required to guarantee that there is at | |||
| 17366 | ** least one free slot on the page. | |||
| 17367 | ** | |||
| 17368 | ** If parameter bForce is false and the hash table is deemed to be full | |||
| 17369 | ** (more than half of the slots are occupied), then non-zero is returned | |||
| 17370 | ** and iRowid not inserted. Or, if bForce is true or if the hash table page | |||
| 17371 | ** is not full, iRowid is inserted and zero returned. | |||
| 17372 | */ | |||
| 17373 | static int fts5IndexTombstoneAddToPage( | |||
| 17374 | Fts5Data *pPg, | |||
| 17375 | int bForce, | |||
| 17376 | int nPg, | |||
| 17377 | u64 iRowid | |||
| 17378 | ){ | |||
| 17379 | const int szKey = TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8); | |||
| 17380 | const int nSlot = TOMBSTONE_NSLOT(pPg)((pPg->nn > 16) ? ((pPg->nn-8) / (pPg->p[0]==4 ? 4 : 8)) : 1); | |||
| 17381 | const int nElem = fts5GetU32(&pPg->p[4]); | |||
| 17382 | int iSlot = (iRowid / nPg) % nSlot; | |||
| 17383 | int nCollide = nSlot; | |||
| 17384 | ||||
| 17385 | if( szKey==4 && iRowid>0xFFFFFFFF ) return 2; | |||
| 17386 | if( iRowid==0 ){ | |||
| 17387 | pPg->p[1] = 0x01; | |||
| 17388 | return 0; | |||
| 17389 | } | |||
| 17390 | ||||
| 17391 | if( bForce==0 && nElem>=(nSlot/2) ){ | |||
| 17392 | return 1; | |||
| 17393 | } | |||
| 17394 | ||||
| 17395 | fts5PutU32(&pPg->p[4], nElem+1); | |||
| 17396 | if( szKey==4 ){ | |||
| 17397 | u32 *aSlot = (u32*)&pPg->p[8]; | |||
| 17398 | while( aSlot[iSlot] ){ | |||
| 17399 | iSlot = (iSlot + 1) % nSlot; | |||
| 17400 | if( nCollide--==0 ) return 0; | |||
| 17401 | } | |||
| 17402 | fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid); | |||
| 17403 | }else{ | |||
| 17404 | u64 *aSlot = (u64*)&pPg->p[8]; | |||
| 17405 | while( aSlot[iSlot] ){ | |||
| 17406 | iSlot = (iSlot + 1) % nSlot; | |||
| 17407 | if( nCollide--==0 ) return 0; | |||
| 17408 | } | |||
| 17409 | fts5PutU64((u8*)&aSlot[iSlot], iRowid); | |||
| 17410 | } | |||
| 17411 | ||||
| 17412 | return 0; | |||
| 17413 | } | |||
| 17414 | ||||
| 17415 | /* | |||
| 17416 | ** This function attempts to build a new hash containing all the keys | |||
| 17417 | ** currently in the tombstone hash table for segment pSeg. The new | |||
| 17418 | ** hash will be stored in the nOut buffers passed in array apOut[]. | |||
| 17419 | ** All pages of the new hash use key-size szKey (4 or 8). | |||
| 17420 | ** | |||
| 17421 | ** Return 0 if the hash is successfully rebuilt into the nOut pages. | |||
| 17422 | ** Or non-zero if it is not (because one page became overfull). In this | |||
| 17423 | ** case the caller should retry with a larger nOut parameter. | |||
| 17424 | ** | |||
| 17425 | ** Parameter pData1 is page iPg1 of the hash table being rebuilt. | |||
| 17426 | */ | |||
| 17427 | static int fts5IndexTombstoneRehash( | |||
| 17428 | Fts5Index *p, | |||
| 17429 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | |||
| 17430 | Fts5Data *pData1, /* One page of current hash - or NULL */ | |||
| 17431 | int iPg1, /* Which page of the current hash is pData1 */ | |||
| 17432 | int szKey, /* 4 or 8, the keysize */ | |||
| 17433 | int nOut, /* Number of output pages */ | |||
| 17434 | Fts5Data **apOut /* Array of output hash pages */ | |||
| 17435 | ){ | |||
| 17436 | int ii; | |||
| 17437 | int res = 0; | |||
| 17438 | ||||
| 17439 | /* Initialize the headers of all the output pages */ | |||
| 17440 | for(ii=0; ii<nOut; ii++){ | |||
| 17441 | apOut[ii]->p[0] = szKey; | |||
| 17442 | fts5PutU32(&apOut[ii]->p[4], 0); | |||
| 17443 | } | |||
| 17444 | ||||
| 17445 | /* Loop through the current pages of the hash table. */ | |||
| 17446 | for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){ | |||
| 17447 | Fts5Data *pData = 0; /* Page ii of the current hash table */ | |||
| 17448 | Fts5Data *pFree = 0; /* Free this at the end of the loop */ | |||
| 17449 | ||||
| 17450 | if( iPg1==ii ){ | |||
| 17451 | pData = pData1; | |||
| 17452 | }else{ | |||
| 17453 | pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) )); | |||
| 17454 | } | |||
| 17455 | ||||
| 17456 | if( pData ){ | |||
| 17457 | int szKeyIn = TOMBSTONE_KEYSIZE(pData)(pData->p[0]==4 ? 4 : 8); | |||
| 17458 | int nSlotIn = (pData->nn - 8) / szKeyIn; | |||
| 17459 | int iIn; | |||
| 17460 | for(iIn=0; iIn<nSlotIn; iIn++){ | |||
| 17461 | u64 iVal = 0; | |||
| 17462 | ||||
| 17463 | /* Read the value from slot iIn of the input page into iVal. */ | |||
| 17464 | if( szKeyIn==4 ){ | |||
| 17465 | u32 *aSlot = (u32*)&pData->p[8]; | |||
| 17466 | if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]); | |||
| 17467 | }else{ | |||
| 17468 | u64 *aSlot = (u64*)&pData->p[8]; | |||
| 17469 | if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]); | |||
| 17470 | } | |||
| 17471 | ||||
| 17472 | /* If iVal is not 0 at this point, insert it into the new hash table */ | |||
| 17473 | if( iVal ){ | |||
| 17474 | Fts5Data *pPg = apOut[(iVal % nOut)]; | |||
| 17475 | res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal); | |||
| 17476 | if( res ) break; | |||
| 17477 | } | |||
| 17478 | } | |||
| 17479 | ||||
| 17480 | /* If this is page 0 of the old hash, copy the rowid-0-flag from the | |||
| 17481 | ** old hash to the new. */ | |||
| 17482 | if( ii==0 ){ | |||
| 17483 | apOut[0]->p[1] = pData->p[1]; | |||
| 17484 | } | |||
| 17485 | } | |||
| 17486 | fts5DataRelease(pFree); | |||
| 17487 | } | |||
| 17488 | ||||
| 17489 | return res; | |||
| 17490 | } | |||
| 17491 | ||||
| 17492 | /* | |||
| 17493 | ** This is called to rebuild the hash table belonging to segment pSeg. | |||
| 17494 | ** If parameter pData1 is not NULL, then one page of the existing hash table | |||
| 17495 | ** has already been loaded - pData1, which is page iPg1. The key-size for | |||
| 17496 | ** the new hash table is szKey (4 or 8). | |||
| 17497 | ** | |||
| 17498 | ** If successful, the new hash table is not written to disk. Instead, | |||
| 17499 | ** output parameter (*pnOut) is set to the number of pages in the new | |||
| 17500 | ** hash table, and (*papOut) to point to an array of buffers containing | |||
| 17501 | ** the new page data. | |||
| 17502 | ** | |||
| 17503 | ** If an error occurs, an error code is left in the Fts5Index object and | |||
| 17504 | ** both output parameters set to 0 before returning. | |||
| 17505 | */ | |||
| 17506 | static void fts5IndexTombstoneRebuild( | |||
| 17507 | Fts5Index *p, | |||
| 17508 | Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */ | |||
| 17509 | Fts5Data *pData1, /* One page of current hash - or NULL */ | |||
| 17510 | int iPg1, /* Which page of the current hash is pData1 */ | |||
| 17511 | int szKey, /* 4 or 8, the keysize */ | |||
| 17512 | int *pnOut, /* OUT: Number of output pages */ | |||
| 17513 | Fts5Data ***papOut /* OUT: Output hash pages */ | |||
| 17514 | ){ | |||
| 17515 | const int MINSLOT = 32; | |||
| 17516 | int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey)(((MINSLOT) > ((p->pConfig->pgsz - 8) / szKey)) ? (MINSLOT ) : ((p->pConfig->pgsz - 8) / szKey)); | |||
| 17517 | int nSlot = 0; /* Number of slots in each output page */ | |||
| 17518 | int nOut = 0; | |||
| 17519 | ||||
| 17520 | /* Figure out how many output pages (nOut) and how many slots per | |||
| 17521 | ** page (nSlot). There are three possibilities: | |||
| 17522 | ** | |||
| 17523 | ** 1. The hash table does not yet exist. In this case the new hash | |||
| 17524 | ** table will consist of a single page with MINSLOT slots. | |||
| 17525 | ** | |||
| 17526 | ** 2. The hash table exists but is currently a single page. In this | |||
| 17527 | ** case an attempt is made to grow the page to accommodate the new | |||
| 17528 | ** entry. The page is allowed to grow up to nSlotPerPage (see above) | |||
| 17529 | ** slots. | |||
| 17530 | ** | |||
| 17531 | ** 3. The hash table already consists of more than one page, or of | |||
| 17532 | ** a single page already so large that it cannot be grown. In this | |||
| 17533 | ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage | |||
| 17534 | ** slots each, where nPg is the current number of pages in the | |||
| 17535 | ** hash table. | |||
| 17536 | */ | |||
| 17537 | if( pSeg->nPgTombstone==0 ){ | |||
| 17538 | /* Case 1. */ | |||
| 17539 | nOut = 1; | |||
| 17540 | nSlot = MINSLOT; | |||
| 17541 | }else if( pSeg->nPgTombstone==1 ){ | |||
| 17542 | /* Case 2. */ | |||
| 17543 | int nElem = (int)fts5GetU32(&pData1->p[4]); | |||
| 17544 | assert( pData1 && iPg1==0 )((void) (0)); | |||
| 17545 | nOut = 1; | |||
| 17546 | nSlot = MAX(nElem*4, MINSLOT)(((nElem*4) > (MINSLOT)) ? (nElem*4) : (MINSLOT)); | |||
| 17547 | if( nSlot>nSlotPerPage ) nOut = 0; | |||
| 17548 | } | |||
| 17549 | if( nOut==0 ){ | |||
| 17550 | /* Case 3. */ | |||
| 17551 | nOut = (pSeg->nPgTombstone * 2 + 1); | |||
| 17552 | nSlot = nSlotPerPage; | |||
| 17553 | } | |||
| 17554 | ||||
| 17555 | /* Allocate the required array and output pages */ | |||
| 17556 | while( 1 ){ | |||
| 17557 | int res = 0; | |||
| 17558 | int ii = 0; | |||
| 17559 | int szPage = 0; | |||
| 17560 | Fts5Data **apOut = 0; | |||
| 17561 | ||||
| 17562 | /* Allocate space for the new hash table */ | |||
| 17563 | assert( nSlot>=MINSLOT )((void) (0)); | |||
| 17564 | apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut); | |||
| 17565 | szPage = 8 + nSlot*szKey; | |||
| 17566 | for(ii=0; ii<nOut; ii++){ | |||
| 17567 | Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc, | |||
| 17568 | sizeof(Fts5Data)+szPage | |||
| 17569 | ); | |||
| 17570 | if( pNew ){ | |||
| 17571 | pNew->nn = szPage; | |||
| 17572 | pNew->p = (u8*)&pNew[1]; | |||
| 17573 | apOut[ii] = pNew; | |||
| 17574 | } | |||
| 17575 | } | |||
| 17576 | ||||
| 17577 | /* Rebuild the hash table. */ | |||
| 17578 | if( p->rc==SQLITE_OK0 ){ | |||
| 17579 | res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut); | |||
| 17580 | } | |||
| 17581 | if( res==0 ){ | |||
| 17582 | if( p->rc ){ | |||
| 17583 | fts5IndexFreeArray(apOut, nOut); | |||
| 17584 | apOut = 0; | |||
| 17585 | nOut = 0; | |||
| 17586 | } | |||
| 17587 | *pnOut = nOut; | |||
| 17588 | *papOut = apOut; | |||
| 17589 | break; | |||
| 17590 | } | |||
| 17591 | ||||
| 17592 | /* If control flows to here, it was not possible to rebuild the hash | |||
| 17593 | ** table. Free all buffers and then try again with more pages. */ | |||
| 17594 | assert( p->rc==SQLITE_OK )((void) (0)); | |||
| 17595 | fts5IndexFreeArray(apOut, nOut); | |||
| 17596 | nSlot = nSlotPerPage; | |||
| 17597 | nOut = nOut*2 + 1; | |||
| 17598 | } | |||
| 17599 | } | |||
| 17600 | ||||
| 17601 | ||||
| 17602 | /* | |||
| 17603 | ** Add a tombstone for rowid iRowid to segment pSeg. | |||
| 17604 | */ | |||
| 17605 | static void fts5IndexTombstoneAdd( | |||
| 17606 | Fts5Index *p, | |||
| 17607 | Fts5StructureSegment *pSeg, | |||
| 17608 | u64 iRowid | |||
| 17609 | ){ | |||
| 17610 | Fts5Data *pPg = 0; | |||
| 17611 | int iPg = -1; | |||
| 17612 | int szKey = 0; | |||
| 17613 | int nHash = 0; | |||
| 17614 | Fts5Data **apHash = 0; | |||
| 17615 | ||||
| 17616 | p->nContentlessDelete++; | |||
| 17617 | ||||
| 17618 | if( pSeg->nPgTombstone>0 ){ | |||
| 17619 | iPg = iRowid % pSeg->nPgTombstone; | |||
| 17620 | pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) )); | |||
| 17621 | if( pPg==0 ){ | |||
| 17622 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
| 17623 | return; | |||
| 17624 | } | |||
| 17625 | ||||
| 17626 | if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){ | |||
| 17627 | fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(iPg)) ), pPg->p, pPg->nn); | |||
| 17628 | fts5DataRelease(pPg); | |||
| 17629 | return; | |||
| 17630 | } | |||
| 17631 | } | |||
| 17632 | ||||
| 17633 | /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */ | |||
| 17634 | szKey = pPg ? TOMBSTONE_KEYSIZE(pPg)(pPg->p[0]==4 ? 4 : 8) : 4; | |||
| 17635 | if( iRowid>0xFFFFFFFF ) szKey = 8; | |||
| 17636 | ||||
| 17637 | /* Rebuild the hash table */ | |||
| 17638 | fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash); | |||
| 17639 | assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) )((void) (0)); | |||
| 17640 | ||||
| 17641 | /* If all has succeeded, write the new rowid into one of the new hash | |||
| 17642 | ** table pages, then write them all out to disk. */ | |||
| 17643 | if( nHash ){ | |||
| 17644 | int ii = 0; | |||
| 17645 | fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid); | |||
| 17646 | for(ii=0; ii<nHash; ii++){ | |||
| 17647 | i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii)( ((i64)(pSeg->iSegid+(1<<16)) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + (( i64)(ii)) ); | |||
| 17648 | fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn); | |||
| 17649 | } | |||
| 17650 | pSeg->nPgTombstone = nHash; | |||
| 17651 | fts5StructureWrite(p, p->pStruct); | |||
| 17652 | } | |||
| 17653 | ||||
| 17654 | fts5DataRelease(pPg); | |||
| 17655 | fts5IndexFreeArray(apHash, nHash); | |||
| 17656 | } | |||
| 17657 | ||||
| 17658 | /* | |||
| 17659 | ** Add iRowid to the tombstone list of the segment or segments that contain | |||
| 17660 | ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite | |||
| 17661 | ** error code otherwise. | |||
| 17662 | */ | |||
| 17663 | static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){ | |||
| 17664 | Fts5Structure *pStruct; | |||
| 17665 | pStruct = fts5StructureRead(p); | |||
| 17666 | if( pStruct ){ | |||
| 17667 | int bFound = 0; /* True after pSeg->nEntryTombstone incr. */ | |||
| 17668 | int iLvl; | |||
| 17669 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ | |||
| 17670 | int iSeg; | |||
| 17671 | for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){ | |||
| 17672 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
| 17673 | if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){ | |||
| 17674 | if( bFound==0 ){ | |||
| 17675 | pSeg->nEntryTombstone++; | |||
| 17676 | bFound = 1; | |||
| 17677 | } | |||
| 17678 | fts5IndexTombstoneAdd(p, pSeg, iRowid); | |||
| 17679 | } | |||
| 17680 | } | |||
| 17681 | } | |||
| 17682 | fts5StructureRelease(pStruct); | |||
| 17683 | } | |||
| 17684 | return fts5IndexReturn(p); | |||
| 17685 | } | |||
| 17686 | ||||
| 17687 | /************************************************************************* | |||
| 17688 | ************************************************************************** | |||
| 17689 | ** Below this point is the implementation of the integrity-check | |||
| 17690 | ** functionality. | |||
| 17691 | */ | |||
| 17692 | ||||
| 17693 | /* | |||
| 17694 | ** Return a simple checksum value based on the arguments. | |||
| 17695 | */ | |||
| 17696 | static u64 sqlite3Fts5IndexEntryCksum( | |||
| 17697 | i64 iRowid, | |||
| 17698 | int iCol, | |||
| 17699 | int iPos, | |||
| 17700 | int iIdx, | |||
| 17701 | const char *pTerm, | |||
| 17702 | int nTerm | |||
| 17703 | ){ | |||
| 17704 | int i; | |||
| 17705 | u64 ret = iRowid; | |||
| 17706 | ret += (ret<<3) + iCol; | |||
| 17707 | ret += (ret<<3) + iPos; | |||
| 17708 | if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX'0' + iIdx); | |||
| 17709 | for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; | |||
| 17710 | return ret; | |||
| 17711 | } | |||
| 17712 | ||||
| 17713 | #ifdef SQLITE_DEBUG | |||
| 17714 | /* | |||
| 17715 | ** This function is purely an internal test. It does not contribute to | |||
| 17716 | ** FTS functionality, or even the integrity-check, in any way. | |||
| 17717 | ** | |||
| 17718 | ** Instead, it tests that the same set of pgno/rowid combinations are | |||
| 17719 | ** visited regardless of whether the doclist-index identified by parameters | |||
| 17720 | ** iSegid/iLeaf is iterated in forwards or reverse order. | |||
| 17721 | */ | |||
| 17722 | static void fts5TestDlidxReverse( | |||
| 17723 | Fts5Index *p, | |||
| 17724 | int iSegid, /* Segment id to load from */ | |||
| 17725 | int iLeaf /* Load doclist-index for this leaf */ | |||
| 17726 | ){ | |||
| 17727 | Fts5DlidxIter *pDlidx = 0; | |||
| 17728 | u64 cksum1 = 13; | |||
| 17729 | u64 cksum2 = 13; | |||
| 17730 | ||||
| 17731 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); | |||
| 17732 | fts5DlidxIterEof(p, pDlidx)==0; | |||
| 17733 | fts5DlidxIterNext(p, pDlidx) | |||
| 17734 | ){ | |||
| 17735 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | |||
| 17736 | int pgno = fts5DlidxIterPgno(pDlidx); | |||
| 17737 | assert( pgno>iLeaf )((void) (0)); | |||
| 17738 | cksum1 += iRowid + ((i64)pgno<<32); | |||
| 17739 | } | |||
| 17740 | fts5DlidxIterFree(pDlidx); | |||
| 17741 | pDlidx = 0; | |||
| 17742 | ||||
| 17743 | for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); | |||
| 17744 | fts5DlidxIterEof(p, pDlidx)==0; | |||
| 17745 | fts5DlidxIterPrev(p, pDlidx) | |||
| 17746 | ){ | |||
| 17747 | i64 iRowid = fts5DlidxIterRowid(pDlidx); | |||
| 17748 | int pgno = fts5DlidxIterPgno(pDlidx); | |||
| 17749 | assert( fts5DlidxIterPgno(pDlidx)>iLeaf )((void) (0)); | |||
| 17750 | cksum2 += iRowid + ((i64)pgno<<32); | |||
| 17751 | } | |||
| 17752 | fts5DlidxIterFree(pDlidx); | |||
| 17753 | pDlidx = 0; | |||
| 17754 | ||||
| 17755 | if( p->rc==SQLITE_OK0 && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17756 | } | |||
| 17757 | ||||
| 17758 | static int fts5QueryCksum( | |||
| 17759 | Fts5Index *p, /* Fts5 index object */ | |||
| 17760 | int iIdx, | |||
| 17761 | const char *z, /* Index key to query for */ | |||
| 17762 | int n, /* Size of index key in bytes */ | |||
| 17763 | int flags, /* Flags for Fts5IndexQuery */ | |||
| 17764 | u64 *pCksum /* IN/OUT: Checksum value */ | |||
| 17765 | ){ | |||
| 17766 | int eDetail = p->pConfig->eDetail; | |||
| 17767 | u64 cksum = *pCksum; | |||
| 17768 | Fts5IndexIter *pIter = 0; | |||
| 17769 | int rc = sqlite3Fts5IndexQuery( | |||
| 17770 | p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA0x0080), 0, &pIter | |||
| 17771 | ); | |||
| 17772 | ||||
| 17773 | while( rc==SQLITE_OK0 && ALWAYS(pIter!=0)(pIter!=0) && 0==sqlite3Fts5IterEof(pIter)((pIter)->bEof) ){ | |||
| 17774 | i64 rowid = pIter->iRowid; | |||
| 17775 | ||||
| 17776 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 17777 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); | |||
| 17778 | }else{ | |||
| 17779 | Fts5PoslistReader sReader; | |||
| 17780 | for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); | |||
| 17781 | sReader.bEof==0; | |||
| 17782 | sqlite3Fts5PoslistReaderNext(&sReader) | |||
| 17783 | ){ | |||
| 17784 | int iCol = FTS5_POS2COLUMN(sReader.iPos)(int)((sReader.iPos >> 32) & 0x7FFFFFFF); | |||
| 17785 | int iOff = FTS5_POS2OFFSET(sReader.iPos)(int)(sReader.iPos & 0x7FFFFFFF); | |||
| 17786 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); | |||
| 17787 | } | |||
| 17788 | } | |||
| 17789 | if( rc==SQLITE_OK0 ){ | |||
| 17790 | rc = sqlite3Fts5IterNext(pIter); | |||
| 17791 | } | |||
| 17792 | } | |||
| 17793 | fts5IterClose(pIter); | |||
| 17794 | ||||
| 17795 | *pCksum = cksum; | |||
| 17796 | return rc; | |||
| 17797 | } | |||
| 17798 | ||||
| 17799 | /* | |||
| 17800 | ** Check if buffer z[], size n bytes, contains as series of valid utf-8 | |||
| 17801 | ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not | |||
| 17802 | ** contain valid utf-8, return non-zero. | |||
| 17803 | */ | |||
| 17804 | static int fts5TestUtf8(const char *z, int n){ | |||
| 17805 | int i = 0; | |||
| 17806 | assert_nc( n>0 )((void) (0)); | |||
| 17807 | while( i<n ){ | |||
| 17808 | if( (z[i] & 0x80)==0x00 ){ | |||
| 17809 | i++; | |||
| 17810 | }else | |||
| 17811 | if( (z[i] & 0xE0)==0xC0 ){ | |||
| 17812 | if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1; | |||
| 17813 | i += 2; | |||
| 17814 | }else | |||
| 17815 | if( (z[i] & 0xF0)==0xE0 ){ | |||
| 17816 | if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
| 17817 | i += 3; | |||
| 17818 | }else | |||
| 17819 | if( (z[i] & 0xF8)==0xF0 ){ | |||
| 17820 | if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
| 17821 | if( (z[i+2] & 0xC0)!=0x80 ) return 1; | |||
| 17822 | i += 3; | |||
| 17823 | }else{ | |||
| 17824 | return 1; | |||
| 17825 | } | |||
| 17826 | } | |||
| 17827 | ||||
| 17828 | return 0; | |||
| 17829 | } | |||
| 17830 | ||||
| 17831 | /* | |||
| 17832 | ** This function is also purely an internal test. It does not contribute to | |||
| 17833 | ** FTS functionality, or even the integrity-check, in any way. | |||
| 17834 | */ | |||
| 17835 | static void fts5TestTerm( | |||
| 17836 | Fts5Index *p, | |||
| 17837 | Fts5Buffer *pPrev, /* Previous term */ | |||
| 17838 | const char *z, int n, /* Possibly new term to test */ | |||
| 17839 | u64 expected, | |||
| 17840 | u64 *pCksum | |||
| 17841 | ){ | |||
| 17842 | int rc = p->rc; | |||
| 17843 | if( pPrev->n==0 ){ | |||
| 17844 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | |||
| 17845 | }else | |||
| 17846 | if( rc==SQLITE_OK0 && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ | |||
| 17847 | u64 cksum3 = *pCksum; | |||
| 17848 | const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ | |||
| 17849 | int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ | |||
| 17850 | int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX'0'); | |||
| 17851 | int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX0x0001); | |||
| 17852 | u64 ck1 = 0; | |||
| 17853 | u64 ck2 = 0; | |||
| 17854 | ||||
| 17855 | /* Check that the results returned for ASC and DESC queries are | |||
| 17856 | ** the same. If not, call this corruption. */ | |||
| 17857 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); | |||
| 17858 | if( rc==SQLITE_OK0 ){ | |||
| 17859 | int f = flags|FTS5INDEX_QUERY_DESC0x0002; | |||
| 17860 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
| 17861 | } | |||
| 17862 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17863 | ||||
| 17864 | /* If this is a prefix query, check that the results returned if the | |||
| 17865 | ** the index is disabled are the same. In both ASC and DESC order. | |||
| 17866 | ** | |||
| 17867 | ** This check may only be performed if the hash table is empty. This | |||
| 17868 | ** is because the hash table only supports a single scan query at | |||
| 17869 | ** a time, and the multi-iter loop from which this function is called | |||
| 17870 | ** is already performing such a scan. | |||
| 17871 | ** | |||
| 17872 | ** Also only do this if buffer zTerm contains nTerm bytes of valid | |||
| 17873 | ** utf-8. Otherwise, the last part of the buffer contents might contain | |||
| 17874 | ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 | |||
| 17875 | ** character stored in the main fts index, which will cause the | |||
| 17876 | ** test to fail. */ | |||
| 17877 | if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ | |||
| 17878 | if( iIdx>0 && rc==SQLITE_OK0 ){ | |||
| 17879 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004; | |||
| 17880 | ck2 = 0; | |||
| 17881 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
| 17882 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17883 | } | |||
| 17884 | if( iIdx>0 && rc==SQLITE_OK0 ){ | |||
| 17885 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX0x0004|FTS5INDEX_QUERY_DESC0x0002; | |||
| 17886 | ck2 = 0; | |||
| 17887 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); | |||
| 17888 | if( rc==SQLITE_OK0 && ck1!=ck2 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17889 | } | |||
| 17890 | } | |||
| 17891 | ||||
| 17892 | cksum3 ^= ck1; | |||
| 17893 | fts5BufferSet(&rc, pPrev, n, (const u8*)z)sqlite3Fts5BufferSet(&rc,pPrev,n,(const u8*)z); | |||
| 17894 | ||||
| 17895 | if( rc==SQLITE_OK0 && cksum3!=expected ){ | |||
| 17896 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17897 | } | |||
| 17898 | *pCksum = cksum3; | |||
| 17899 | } | |||
| 17900 | p->rc = rc; | |||
| 17901 | } | |||
| 17902 | ||||
| 17903 | #else | |||
| 17904 | # define fts5TestDlidxReverse(x,y,z) | |||
| 17905 | # define fts5TestTerm(u,v,w,x,y,z) | |||
| 17906 | #endif | |||
| 17907 | ||||
| 17908 | /* | |||
| 17909 | ** Check that: | |||
| 17910 | ** | |||
| 17911 | ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and | |||
| 17912 | ** contain zero terms. | |||
| 17913 | ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and | |||
| 17914 | ** contain zero rowids. | |||
| 17915 | */ | |||
| 17916 | static void fts5IndexIntegrityCheckEmpty( | |||
| 17917 | Fts5Index *p, | |||
| 17918 | Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ | |||
| 17919 | int iFirst, | |||
| 17920 | int iNoRowid, | |||
| 17921 | int iLast | |||
| 17922 | ){ | |||
| 17923 | int i; | |||
| 17924 | ||||
| 17925 | /* Now check that the iter.nEmpty leaves following the current leaf | |||
| 17926 | ** (a) exist and (b) contain no terms. */ | |||
| 17927 | for(i=iFirst; p->rc==SQLITE_OK0 && i<=iLast; i++){ | |||
| 17928 | Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(i)) )); | |||
| 17929 | if( pLeaf ){ | |||
| 17930 | if( !fts5LeafIsTermless(pLeaf)((pLeaf)->szLeaf >= (pLeaf)->nn) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17931 | if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)) ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17932 | } | |||
| 17933 | fts5DataRelease(pLeaf); | |||
| 17934 | } | |||
| 17935 | } | |||
| 17936 | ||||
| 17937 | static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ | |||
| 17938 | i64 iTermOff = 0; | |||
| 17939 | int ii; | |||
| 17940 | ||||
| 17941 | Fts5Buffer buf1 = {0,0,0}; | |||
| 17942 | Fts5Buffer buf2 = {0,0,0}; | |||
| 17943 | ||||
| 17944 | ii = pLeaf->szLeaf; | |||
| 17945 | while( ii<pLeaf->nn && p->rc==SQLITE_OK0 ){ | |||
| 17946 | int res; | |||
| 17947 | i64 iOff; | |||
| 17948 | int nIncr; | |||
| 17949 | ||||
| 17950 | ii += fts5GetVarint32(&pLeaf->p[ii], nIncr)sqlite3Fts5GetVarint32(&pLeaf->p[ii],(u32*)&(nIncr )); | |||
| 17951 | iTermOff += nIncr; | |||
| 17952 | iOff = iTermOff; | |||
| 17953 | ||||
| 17954 | if( iOff>=pLeaf->szLeaf ){ | |||
| 17955 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17956 | }else if( iTermOff==nIncr ){ | |||
| 17957 | int nByte; | |||
| 17958 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | |||
| 17959 | if( (iOff+nByte)>pLeaf->szLeaf ){ | |||
| 17960 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17961 | }else{ | |||
| 17962 | fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferSet(&p->rc,&buf1,nByte,&pLeaf ->p[iOff]); | |||
| 17963 | } | |||
| 17964 | }else{ | |||
| 17965 | int nKeep, nByte; | |||
| 17966 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nKeep )); | |||
| 17967 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nByte )); | |||
| 17968 | if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ | |||
| 17969 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17970 | }else{ | |||
| 17971 | buf1.n = nKeep; | |||
| 17972 | fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff])sqlite3Fts5BufferAppendBlob(&p->rc,&buf1,nByte,& pLeaf->p[iOff]); | |||
| 17973 | } | |||
| 17974 | ||||
| 17975 | if( p->rc==SQLITE_OK0 ){ | |||
| 17976 | res = fts5BufferCompare(&buf1, &buf2); | |||
| 17977 | if( res<=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 17978 | } | |||
| 17979 | } | |||
| 17980 | fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p)sqlite3Fts5BufferSet(&p->rc,&buf2,buf1.n,buf1.p); | |||
| 17981 | } | |||
| 17982 | ||||
| 17983 | fts5BufferFree(&buf1)sqlite3Fts5BufferFree(&buf1); | |||
| 17984 | fts5BufferFree(&buf2)sqlite3Fts5BufferFree(&buf2); | |||
| 17985 | } | |||
| 17986 | ||||
| 17987 | static void fts5IndexIntegrityCheckSegment( | |||
| 17988 | Fts5Index *p, /* FTS5 backend object */ | |||
| 17989 | Fts5StructureSegment *pSeg /* Segment to check internal consistency */ | |||
| 17990 | ){ | |||
| 17991 | Fts5Config *pConfig = p->pConfig; | |||
| 17992 | int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5); | |||
| 17993 | sqlite3_stmt *pStmt = 0; | |||
| 17994 | int rc2; | |||
| 17995 | int iIdxPrevLeaf = pSeg->pgnoFirst-1; | |||
| 17996 | int iDlidxPrevLeaf = pSeg->pgnoLast; | |||
| 17997 | ||||
| 17998 | if( pSeg->pgnoFirst==0 ) return; | |||
| 17999 | ||||
| 18000 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintfsqlite3_api->mprintf( | |||
| 18001 | "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d " | |||
| 18002 | "ORDER BY 1, 2", | |||
| 18003 | pConfig->zDb, pConfig->zName, pSeg->iSegid | |||
| 18004 | )); | |||
| 18005 | ||||
| 18006 | /* Iterate through the b-tree hierarchy. */ | |||
| 18007 | while( p->rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | |||
| 18008 | i64 iRow; /* Rowid for this leaf */ | |||
| 18009 | Fts5Data *pLeaf; /* Data for this leaf */ | |||
| 18010 | ||||
| 18011 | const char *zIdxTerm = (const char*)sqlite3_column_blobsqlite3_api->column_blob(pStmt, 1); | |||
| 18012 | int nIdxTerm = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, 1); | |||
| 18013 | int iIdxLeaf = sqlite3_column_intsqlite3_api->column_int(pStmt, 2); | |||
| 18014 | int bIdxDlidx = sqlite3_column_intsqlite3_api->column_int(pStmt, 3); | |||
| 18015 | ||||
| 18016 | /* If the leaf in question has already been trimmed from the segment, | |||
| 18017 | ** ignore this b-tree entry. Otherwise, load it into memory. */ | |||
| 18018 | if( iIdxLeaf<pSeg->pgnoFirst ) continue; | |||
| 18019 | iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf)( ((i64)(pSeg->iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iIdxLeaf)) ); | |||
| 18020 | pLeaf = fts5LeafRead(p, iRow); | |||
| 18021 | if( pLeaf==0 ) break; | |||
| 18022 | ||||
| 18023 | /* Check that the leaf contains at least one term, and that it is equal | |||
| 18024 | ** to or larger than the split-key in zIdxTerm. Also check that if there | |||
| 18025 | ** is also a rowid pointer within the leaf page header, it points to a | |||
| 18026 | ** location before the term. */ | |||
| 18027 | if( pLeaf->nn<=pLeaf->szLeaf ){ | |||
| 18028 | ||||
| 18029 | if( nIdxTerm==0 | |||
| 18030 | && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE5 | |||
| 18031 | && pLeaf->nn==pLeaf->szLeaf | |||
| 18032 | && pLeaf->nn==4 | |||
| 18033 | ){ | |||
| 18034 | /* special case - the very first page in a segment keeps its %_idx | |||
| 18035 | ** entry even if all the terms are removed from it by secure-delete | |||
| 18036 | ** operations. */ | |||
| 18037 | }else{ | |||
| 18038 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18039 | } | |||
| 18040 | ||||
| 18041 | }else{ | |||
| 18042 | int iOff; /* Offset of first term on leaf */ | |||
| 18043 | int iRowidOff; /* Offset of first rowid on leaf */ | |||
| 18044 | int nTerm; /* Size of term on leaf in bytes */ | |||
| 18045 | int res; /* Comparison of term and split-key */ | |||
| 18046 | ||||
| 18047 | iOff = fts5LeafFirstTermOff(pLeaf); | |||
| 18048 | iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | |||
| 18049 | if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){ | |||
| 18050 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18051 | }else{ | |||
| 18052 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm)sqlite3Fts5GetVarint32(&pLeaf->p[iOff],(u32*)&(nTerm )); | |||
| 18053 | res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm))(((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm)))<=0 ? 0 : memcmp((&pLeaf->p[iOff]), (zIdxTerm), ((((nTerm) < (nIdxTerm)) ? (nTerm) : (nIdxTerm))))); | |||
| 18054 | if( res==0 ) res = nTerm - nIdxTerm; | |||
| 18055 | if( res<0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18056 | } | |||
| 18057 | ||||
| 18058 | fts5IntegrityCheckPgidx(p, pLeaf); | |||
| 18059 | } | |||
| 18060 | fts5DataRelease(pLeaf); | |||
| 18061 | if( p->rc ) break; | |||
| 18062 | ||||
| 18063 | /* Now check that the iter.nEmpty leaves following the current leaf | |||
| 18064 | ** (a) exist and (b) contain no terms. */ | |||
| 18065 | fts5IndexIntegrityCheckEmpty( | |||
| 18066 | p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 | |||
| 18067 | ); | |||
| 18068 | if( p->rc ) break; | |||
| 18069 | ||||
| 18070 | /* If there is a doclist-index, check that it looks right. */ | |||
| 18071 | if( bIdxDlidx ){ | |||
| 18072 | Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ | |||
| 18073 | int iPrevLeaf = iIdxLeaf; | |||
| 18074 | int iSegid = pSeg->iSegid; | |||
| 18075 | int iPg = 0; | |||
| 18076 | i64 iKey; | |||
| 18077 | ||||
| 18078 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); | |||
| 18079 | fts5DlidxIterEof(p, pDlidx)==0; | |||
| 18080 | fts5DlidxIterNext(p, pDlidx) | |||
| 18081 | ){ | |||
| 18082 | ||||
| 18083 | /* Check any rowid-less pages that occur before the current leaf. */ | |||
| 18084 | for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ | |||
| 18085 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPg)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPg)) ); | |||
| 18086 | pLeaf = fts5DataRead(p, iKey); | |||
| 18087 | if( pLeaf ){ | |||
| 18088 | if( fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p))!=0 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18089 | fts5DataRelease(pLeaf); | |||
| 18090 | } | |||
| 18091 | } | |||
| 18092 | iPrevLeaf = fts5DlidxIterPgno(pDlidx); | |||
| 18093 | ||||
| 18094 | /* Check that the leaf page indicated by the iterator really does | |||
| 18095 | ** contain the rowid suggested by the same. */ | |||
| 18096 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf)( ((i64)(iSegid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(iPrevLeaf)) ); | |||
| 18097 | pLeaf = fts5DataRead(p, iKey); | |||
| 18098 | if( pLeaf ){ | |||
| 18099 | i64 iRowid; | |||
| 18100 | int iRowidOff = fts5LeafFirstRowidOff(pLeaf)(fts5GetU16((pLeaf)->p)); | |||
| 18101 | ASSERT_SZLEAF_OK(pLeaf)((void) (0)); | |||
| 18102 | if( iRowidOff>=pLeaf->szLeaf ){ | |||
| 18103 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18104 | }else if( bSecureDelete==0 || iRowidOff>0 ){ | |||
| 18105 | i64 iDlRowid = fts5DlidxIterRowid(pDlidx); | |||
| 18106 | fts5GetVarintsqlite3Fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); | |||
| 18107 | if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){ | |||
| 18108 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18109 | } | |||
| 18110 | } | |||
| 18111 | fts5DataRelease(pLeaf); | |||
| 18112 | } | |||
| 18113 | } | |||
| 18114 | ||||
| 18115 | iDlidxPrevLeaf = iPg; | |||
| 18116 | fts5DlidxIterFree(pDlidx); | |||
| 18117 | fts5TestDlidxReverse(p, iSegid, iIdxLeaf); | |||
| 18118 | }else{ | |||
| 18119 | iDlidxPrevLeaf = pSeg->pgnoLast; | |||
| 18120 | /* TODO: Check there is no doclist index */ | |||
| 18121 | } | |||
| 18122 | ||||
| 18123 | iIdxPrevLeaf = iIdxLeaf; | |||
| 18124 | } | |||
| 18125 | ||||
| 18126 | rc2 = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 18127 | if( p->rc==SQLITE_OK0 ) p->rc = rc2; | |||
| 18128 | ||||
| 18129 | /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ | |||
| 18130 | #if 0 | |||
| 18131 | if( p->rc==SQLITE_OK0 && iter.iLeaf!=pSeg->pgnoLast ){ | |||
| 18132 | p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18133 | } | |||
| 18134 | #endif | |||
| 18135 | } | |||
| 18136 | ||||
| 18137 | ||||
| 18138 | /* | |||
| 18139 | ** Run internal checks to ensure that the FTS index (a) is internally | |||
| 18140 | ** consistent and (b) contains entries for which the XOR of the checksums | |||
| 18141 | ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. | |||
| 18142 | ** | |||
| 18143 | ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the | |||
| 18144 | ** checksum does not match. Return SQLITE_OK if all checks pass without | |||
| 18145 | ** error, or some other SQLite error code if another error (e.g. OOM) | |||
| 18146 | ** occurs. | |||
| 18147 | */ | |||
| 18148 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){ | |||
| 18149 | int eDetail = p->pConfig->eDetail; | |||
| 18150 | u64 cksum2 = 0; /* Checksum based on contents of indexes */ | |||
| 18151 | Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ | |||
| 18152 | Fts5Iter *pIter; /* Used to iterate through entire index */ | |||
| 18153 | Fts5Structure *pStruct; /* Index structure */ | |||
| 18154 | int iLvl, iSeg; | |||
| 18155 | ||||
| 18156 | #ifdef SQLITE_DEBUG | |||
| 18157 | /* Used by extra internal tests only run if NDEBUG is not defined */ | |||
| 18158 | u64 cksum3 = 0; /* Checksum based on contents of indexes */ | |||
| 18159 | Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ | |||
| 18160 | #endif | |||
| 18161 | const int flags = FTS5INDEX_QUERY_NOOUTPUT0x0020; | |||
| 18162 | ||||
| 18163 | /* Load the FTS index structure */ | |||
| 18164 | pStruct = fts5StructureRead(p); | |||
| 18165 | if( pStruct==0 ){ | |||
| 18166 | assert( p->rc!=SQLITE_OK )((void) (0)); | |||
| 18167 | return fts5IndexReturn(p); | |||
| 18168 | } | |||
| 18169 | ||||
| 18170 | /* Check that the internal nodes of each segment match the leaves */ | |||
| 18171 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ | |||
| 18172 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ | |||
| 18173 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; | |||
| 18174 | fts5IndexIntegrityCheckSegment(p, pSeg); | |||
| 18175 | } | |||
| 18176 | } | |||
| 18177 | ||||
| 18178 | /* The cksum argument passed to this function is a checksum calculated | |||
| 18179 | ** based on all expected entries in the FTS index (including prefix index | |||
| 18180 | ** entries). This block checks that a checksum calculated based on the | |||
| 18181 | ** actual contents of FTS index is identical. | |||
| 18182 | ** | |||
| 18183 | ** Two versions of the same checksum are calculated. The first (stack | |||
| 18184 | ** variable cksum2) based on entries extracted from the full-text index | |||
| 18185 | ** while doing a linear scan of each individual index in turn. | |||
| 18186 | ** | |||
| 18187 | ** As each term visited by the linear scans, a separate query for the | |||
| 18188 | ** same term is performed. cksum3 is calculated based on the entries | |||
| 18189 | ** extracted by these queries. | |||
| 18190 | */ | |||
| 18191 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); | |||
| 18192 | fts5MultiIterEof(p, pIter)==0; | |||
| 18193 | fts5MultiIterNext(p, pIter, 0, 0) | |||
| 18194 | ){ | |||
| 18195 | int n; /* Size of term in bytes */ | |||
| 18196 | i64 iPos = 0; /* Position read from poslist */ | |||
| 18197 | int iOff = 0; /* Offset within poslist */ | |||
| 18198 | i64 iRowid = fts5MultiIterRowid(pIter); | |||
| 18199 | char *z = (char*)fts5MultiIterTerm(pIter, &n); | |||
| 18200 | ||||
| 18201 | /* If this is a new term, query for it. Update cksum3 with the results. */ | |||
| 18202 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); | |||
| 18203 | if( p->rc ) break; | |||
| 18204 | ||||
| 18205 | if( eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 18206 | if( 0==fts5MultiIterIsEmpty(p, pIter) ){ | |||
| 18207 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); | |||
| 18208 | } | |||
| 18209 | }else{ | |||
| 18210 | poslist.n = 0; | |||
| 18211 | fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); | |||
| 18212 | fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0")sqlite3Fts5BufferAppendBlob(&p->rc,&poslist,4,(const u8*)"\0\0\0\0"); | |||
| 18213 | while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ | |||
| 18214 | int iCol = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | |||
| 18215 | int iTokOff = FTS5_POS2OFFSET(iPos)(int)(iPos & 0x7FFFFFFF); | |||
| 18216 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); | |||
| 18217 | } | |||
| 18218 | } | |||
| 18219 | } | |||
| 18220 | fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); | |||
| 18221 | ||||
| 18222 | fts5MultiIterFree(pIter); | |||
| 18223 | if( p->rc==SQLITE_OK0 && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18224 | ||||
| 18225 | fts5StructureRelease(pStruct); | |||
| 18226 | #ifdef SQLITE_DEBUG | |||
| 18227 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
| 18228 | #endif | |||
| 18229 | fts5BufferFree(&poslist)sqlite3Fts5BufferFree(&poslist); | |||
| 18230 | return fts5IndexReturn(p); | |||
| 18231 | } | |||
| 18232 | ||||
| 18233 | /************************************************************************* | |||
| 18234 | ************************************************************************** | |||
| 18235 | ** Below this point is the implementation of the fts5_decode() scalar | |||
| 18236 | ** function only. | |||
| 18237 | */ | |||
| 18238 | ||||
| 18239 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18240 | /* | |||
| 18241 | ** Decode a segment-data rowid from the %_data table. This function is | |||
| 18242 | ** the opposite of macro FTS5_SEGMENT_ROWID(). | |||
| 18243 | */ | |||
| 18244 | static void fts5DecodeRowid( | |||
| 18245 | i64 iRowid, /* Rowid from %_data table */ | |||
| 18246 | int *pbTombstone, /* OUT: Tombstone hash flag */ | |||
| 18247 | int *piSegid, /* OUT: Segment id */ | |||
| 18248 | int *pbDlidx, /* OUT: Dlidx flag */ | |||
| 18249 | int *piHeight, /* OUT: Height */ | |||
| 18250 | int *piPgno /* OUT: Page number */ | |||
| 18251 | ){ | |||
| 18252 | *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B31) - 1)); | |||
| 18253 | iRowid >>= FTS5_DATA_PAGE_B31; | |||
| 18254 | ||||
| 18255 | *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B5) - 1)); | |||
| 18256 | iRowid >>= FTS5_DATA_HEIGHT_B5; | |||
| 18257 | ||||
| 18258 | *pbDlidx = (int)(iRowid & 0x0001); | |||
| 18259 | iRowid >>= FTS5_DATA_DLI_B1; | |||
| 18260 | ||||
| 18261 | *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B16) - 1)); | |||
| 18262 | iRowid >>= FTS5_DATA_ID_B16; | |||
| 18263 | ||||
| 18264 | *pbTombstone = (int)(iRowid & 0x0001); | |||
| 18265 | } | |||
| 18266 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18267 | ||||
| 18268 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18269 | static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ | |||
| 18270 | int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */ | |||
| 18271 | fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | |||
| 18272 | ||||
| 18273 | if( iSegid==0 ){ | |||
| 18274 | if( iKey==FTS5_AVERAGES_ROWID1 ){ | |||
| 18275 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); | |||
| 18276 | }else{ | |||
| 18277 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); | |||
| 18278 | } | |||
| 18279 | } | |||
| 18280 | else{ | |||
| 18281 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}", | |||
| 18282 | bDlidx ? "dlidx " : "", | |||
| 18283 | bTomb ? "tombstone " : "", | |||
| 18284 | iSegid, iHeight, iPgno | |||
| 18285 | ); | |||
| 18286 | } | |||
| 18287 | } | |||
| 18288 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18289 | ||||
| 18290 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18291 | static void fts5DebugStructure( | |||
| 18292 | int *pRc, /* IN/OUT: error code */ | |||
| 18293 | Fts5Buffer *pBuf, | |||
| 18294 | Fts5Structure *p | |||
| 18295 | ){ | |||
| 18296 | int iLvl, iSeg; /* Iterate through levels, segments */ | |||
| 18297 | ||||
| 18298 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ | |||
| 18299 | Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; | |||
| 18300 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, | |||
| 18301 | " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg | |||
| 18302 | ); | |||
| 18303 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ | |||
| 18304 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; | |||
| 18305 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d", | |||
| 18306 | pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast | |||
| 18307 | ); | |||
| 18308 | if( pSeg->iOrigin1>0 ){ | |||
| 18309 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld", | |||
| 18310 | pSeg->iOrigin1, pSeg->iOrigin2 | |||
| 18311 | ); | |||
| 18312 | } | |||
| 18313 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | |||
| 18314 | } | |||
| 18315 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); | |||
| 18316 | } | |||
| 18317 | } | |||
| 18318 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18319 | ||||
| 18320 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18321 | /* | |||
| 18322 | ** This is part of the fts5_decode() debugging aid. | |||
| 18323 | ** | |||
| 18324 | ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This | |||
| 18325 | ** function appends a human-readable representation of the same object | |||
| 18326 | ** to the buffer passed as the second argument. | |||
| 18327 | */ | |||
| 18328 | static void fts5DecodeStructure( | |||
| 18329 | int *pRc, /* IN/OUT: error code */ | |||
| 18330 | Fts5Buffer *pBuf, | |||
| 18331 | const u8 *pBlob, int nBlob | |||
| 18332 | ){ | |||
| 18333 | int rc; /* Return code */ | |||
| 18334 | Fts5Structure *p = 0; /* Decoded structure object */ | |||
| 18335 | ||||
| 18336 | rc = fts5StructureDecode(pBlob, nBlob, 0, &p); | |||
| 18337 | if( rc!=SQLITE_OK0 ){ | |||
| 18338 | *pRc = rc; | |||
| 18339 | return; | |||
| 18340 | } | |||
| 18341 | ||||
| 18342 | fts5DebugStructure(pRc, pBuf, p); | |||
| 18343 | fts5StructureRelease(p); | |||
| 18344 | } | |||
| 18345 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18346 | ||||
| 18347 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18348 | /* | |||
| 18349 | ** This is part of the fts5_decode() debugging aid. | |||
| 18350 | ** | |||
| 18351 | ** Arguments pBlob/nBlob contain an "averages" record. This function | |||
| 18352 | ** appends a human-readable representation of record to the buffer passed | |||
| 18353 | ** as the second argument. | |||
| 18354 | */ | |||
| 18355 | static void fts5DecodeAverages( | |||
| 18356 | int *pRc, /* IN/OUT: error code */ | |||
| 18357 | Fts5Buffer *pBuf, | |||
| 18358 | const u8 *pBlob, int nBlob | |||
| 18359 | ){ | |||
| 18360 | int i = 0; | |||
| 18361 | const char *zSpace = ""; | |||
| 18362 | ||||
| 18363 | while( i<nBlob ){ | |||
| 18364 | u64 iVal; | |||
| 18365 | i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); | |||
| 18366 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); | |||
| 18367 | zSpace = " "; | |||
| 18368 | } | |||
| 18369 | } | |||
| 18370 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18371 | ||||
| 18372 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18373 | /* | |||
| 18374 | ** Buffer (a/n) is assumed to contain a list of serialized varints. Read | |||
| 18375 | ** each varint and append its string representation to buffer pBuf. Return | |||
| 18376 | ** after either the input buffer is exhausted or a 0 value is read. | |||
| 18377 | ** | |||
| 18378 | ** The return value is the number of bytes read from the input buffer. | |||
| 18379 | */ | |||
| 18380 | static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | |||
| 18381 | int iOff = 0; | |||
| 18382 | while( iOff<n ){ | |||
| 18383 | int iVal; | |||
| 18384 | iOff += fts5GetVarint32(&a[iOff], iVal)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(iVal)); | |||
| 18385 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); | |||
| 18386 | } | |||
| 18387 | return iOff; | |||
| 18388 | } | |||
| 18389 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18390 | ||||
| 18391 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18392 | /* | |||
| 18393 | ** The start of buffer (a/n) contains the start of a doclist. The doclist | |||
| 18394 | ** may or may not finish within the buffer. This function appends a text | |||
| 18395 | ** representation of the part of the doclist that is present to buffer | |||
| 18396 | ** pBuf. | |||
| 18397 | ** | |||
| 18398 | ** The return value is the number of bytes read from the input buffer. | |||
| 18399 | */ | |||
| 18400 | static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ | |||
| 18401 | i64 iDocid = 0; | |||
| 18402 | int iOff = 0; | |||
| 18403 | ||||
| 18404 | if( n>0 ){ | |||
| 18405 | iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); | |||
| 18406 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | |||
| 18407 | } | |||
| 18408 | while( iOff<n ){ | |||
| 18409 | int nPos; | |||
| 18410 | int bDel; | |||
| 18411 | iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); | |||
| 18412 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); | |||
| 18413 | iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)(((n-iOff) < (nPos)) ? (n-iOff) : (nPos))); | |||
| 18414 | if( iOff<n ){ | |||
| 18415 | i64 iDelta; | |||
| 18416 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); | |||
| 18417 | iDocid += iDelta; | |||
| 18418 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); | |||
| 18419 | } | |||
| 18420 | } | |||
| 18421 | ||||
| 18422 | return iOff; | |||
| 18423 | } | |||
| 18424 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18425 | ||||
| 18426 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18427 | /* | |||
| 18428 | ** This function is part of the fts5_decode() debugging function. It is | |||
| 18429 | ** only ever used with detail=none tables. | |||
| 18430 | ** | |||
| 18431 | ** Buffer (pData/nData) contains a doclist in the format used by detail=none | |||
| 18432 | ** tables. This function appends a human-readable version of that list to | |||
| 18433 | ** buffer pBuf. | |||
| 18434 | ** | |||
| 18435 | ** If *pRc is other than SQLITE_OK when this function is called, it is a | |||
| 18436 | ** no-op. If an OOM or other error occurs within this function, *pRc is | |||
| 18437 | ** set to an SQLite error code before returning. The final state of buffer | |||
| 18438 | ** pBuf is undefined in this case. | |||
| 18439 | */ | |||
| 18440 | static void fts5DecodeRowidList( | |||
| 18441 | int *pRc, /* IN/OUT: Error code */ | |||
| 18442 | Fts5Buffer *pBuf, /* Buffer to append text to */ | |||
| 18443 | const u8 *pData, int nData /* Data to decode list-of-rowids from */ | |||
| 18444 | ){ | |||
| 18445 | int i = 0; | |||
| 18446 | i64 iRowid = 0; | |||
| 18447 | ||||
| 18448 | while( i<nData ){ | |||
| 18449 | const char *zApp = ""; | |||
| 18450 | u64 iVal; | |||
| 18451 | i += sqlite3Fts5GetVarint(&pData[i], &iVal); | |||
| 18452 | iRowid += iVal; | |||
| 18453 | ||||
| 18454 | if( i<nData && pData[i]==0x00 ){ | |||
| 18455 | i++; | |||
| 18456 | if( i<nData && pData[i]==0x00 ){ | |||
| 18457 | i++; | |||
| 18458 | zApp = "+"; | |||
| 18459 | }else{ | |||
| 18460 | zApp = "*"; | |||
| 18461 | } | |||
| 18462 | } | |||
| 18463 | ||||
| 18464 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); | |||
| 18465 | } | |||
| 18466 | } | |||
| 18467 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18468 | ||||
| 18469 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18470 | static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){ | |||
| 18471 | int ii; | |||
| 18472 | fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1)( (u32)((pBuf)->n) + (u32)(pTerm->n*2 + 1) <= (u32)( (pBuf)->nSpace) ? 0 : sqlite3Fts5BufferSize((pRc),(pBuf),( pTerm->n*2 + 1)+(pBuf)->n) ); | |||
| 18473 | if( *pRc==SQLITE_OK0 ){ | |||
| 18474 | for(ii=0; ii<pTerm->n; ii++){ | |||
| 18475 | if( pTerm->p[ii]==0x00 ){ | |||
| 18476 | pBuf->p[pBuf->n++] = '\\'; | |||
| 18477 | pBuf->p[pBuf->n++] = '0'; | |||
| 18478 | }else{ | |||
| 18479 | pBuf->p[pBuf->n++] = pTerm->p[ii]; | |||
| 18480 | } | |||
| 18481 | } | |||
| 18482 | pBuf->p[pBuf->n] = 0x00; | |||
| 18483 | } | |||
| 18484 | } | |||
| 18485 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18486 | ||||
| 18487 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18488 | /* | |||
| 18489 | ** The implementation of user-defined scalar function fts5_decode(). | |||
| 18490 | */ | |||
| 18491 | static void fts5DecodeFunction( | |||
| 18492 | sqlite3_context *pCtx, /* Function call context */ | |||
| 18493 | int nArg, /* Number of args (always 2) */ | |||
| 18494 | sqlite3_value **apVal /* Function arguments */ | |||
| 18495 | ){ | |||
| 18496 | i64 iRowid; /* Rowid for record being decoded */ | |||
| 18497 | int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ | |||
| 18498 | int bTomb; | |||
| 18499 | const u8 *aBlob; int n; /* Record to decode */ | |||
| 18500 | u8 *a = 0; | |||
| 18501 | Fts5Buffer s; /* Build up text to return here */ | |||
| 18502 | int rc = SQLITE_OK0; /* Return code */ | |||
| 18503 | sqlite3_int64 nSpace = 0; | |||
| 18504 | int eDetailNone = (sqlite3_user_datasqlite3_api->user_data(pCtx)!=0); | |||
| 18505 | ||||
| 18506 | assert( nArg==2 )((void) (0)); | |||
| 18507 | UNUSED_PARAM(nArg)(void)(nArg); | |||
| 18508 | memset(&s, 0, sizeof(Fts5Buffer)); | |||
| 18509 | iRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); | |||
| 18510 | ||||
| 18511 | /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] | |||
| 18512 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents | |||
| 18513 | ** buffer overreads even if the record is corrupt. */ | |||
| 18514 | n = sqlite3_value_bytessqlite3_api->value_bytes(apVal[1]); | |||
| 18515 | aBlob = sqlite3_value_blobsqlite3_api->value_blob(apVal[1]); | |||
| 18516 | nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING8; | |||
| 18517 | a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); | |||
| 18518 | if( a==0 ) goto decode_out; | |||
| 18519 | if( n>0 ) memcpy(a, aBlob, n); | |||
| 18520 | ||||
| 18521 | fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno); | |||
| 18522 | ||||
| 18523 | fts5DebugRowid(&rc, &s, iRowid); | |||
| 18524 | if( bDlidx ){ | |||
| 18525 | Fts5Data dlidx; | |||
| 18526 | Fts5DlidxLvl lvl; | |||
| 18527 | ||||
| 18528 | dlidx.p = a; | |||
| 18529 | dlidx.nn = n; | |||
| 18530 | ||||
| 18531 | memset(&lvl, 0, sizeof(Fts5DlidxLvl)); | |||
| 18532 | lvl.pData = &dlidx; | |||
| 18533 | lvl.iLeafPgno = iPgno; | |||
| 18534 | ||||
| 18535 | for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ | |||
| 18536 | sqlite3Fts5BufferAppendPrintf(&rc, &s, | |||
| 18537 | " %d(%lld)", lvl.iLeafPgno, lvl.iRowid | |||
| 18538 | ); | |||
| 18539 | } | |||
| 18540 | }else if( bTomb ){ | |||
| 18541 | u32 nElem = fts5GetU32(&a[4]); | |||
| 18542 | int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8; | |||
| 18543 | int nSlot = (n - 8) / szKey; | |||
| 18544 | int ii; | |||
| 18545 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem); | |||
| 18546 | if( aBlob[1] ){ | |||
| 18547 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0"); | |||
| 18548 | } | |||
| 18549 | for(ii=0; ii<nSlot; ii++){ | |||
| 18550 | u64 iVal = 0; | |||
| 18551 | if( szKey==4 ){ | |||
| 18552 | u32 *aSlot = (u32*)&aBlob[8]; | |||
| 18553 | if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]); | |||
| 18554 | }else{ | |||
| 18555 | u64 *aSlot = (u64*)&aBlob[8]; | |||
| 18556 | if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]); | |||
| 18557 | } | |||
| 18558 | if( iVal!=0 ){ | |||
| 18559 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal); | |||
| 18560 | } | |||
| 18561 | } | |||
| 18562 | }else if( iSegid==0 ){ | |||
| 18563 | if( iRowid==FTS5_AVERAGES_ROWID1 ){ | |||
| 18564 | fts5DecodeAverages(&rc, &s, a, n); | |||
| 18565 | }else{ | |||
| 18566 | fts5DecodeStructure(&rc, &s, a, n); | |||
| 18567 | } | |||
| 18568 | }else if( eDetailNone ){ | |||
| 18569 | Fts5Buffer term; /* Current term read from page */ | |||
| 18570 | int szLeaf; | |||
| 18571 | int iPgidxOff = szLeaf = fts5GetU16(&a[2]); | |||
| 18572 | int iTermOff; | |||
| 18573 | int nKeep = 0; | |||
| 18574 | int iOff; | |||
| 18575 | ||||
| 18576 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
| 18577 | ||||
| 18578 | /* Decode any entries that occur before the first term. */ | |||
| 18579 | if( szLeaf<n ){ | |||
| 18580 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | |||
| 18581 | }else{ | |||
| 18582 | iTermOff = szLeaf; | |||
| 18583 | } | |||
| 18584 | fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); | |||
| 18585 | ||||
| 18586 | iOff = iTermOff; | |||
| 18587 | while( iOff<szLeaf && rc==SQLITE_OK0 ){ | |||
| 18588 | int nAppend; | |||
| 18589 | ||||
| 18590 | /* Read the term data for the next term*/ | |||
| 18591 | iOff += fts5GetVarint32(&a[iOff], nAppend)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nAppend)); | |||
| 18592 | term.n = nKeep; | |||
| 18593 | fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nAppend,&a[ iOff]); | |||
| 18594 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | |||
| 18595 | fts5BufferAppendTerm(&rc, &s, &term); | |||
| 18596 | iOff += nAppend; | |||
| 18597 | ||||
| 18598 | /* Figure out where the doclist for this term ends */ | |||
| 18599 | if( iPgidxOff<n ){ | |||
| 18600 | int nIncr; | |||
| 18601 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nIncr)); | |||
| 18602 | iTermOff += nIncr; | |||
| 18603 | }else{ | |||
| 18604 | iTermOff = szLeaf; | |||
| 18605 | } | |||
| 18606 | if( iTermOff>szLeaf ){ | |||
| 18607 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18608 | }else{ | |||
| 18609 | fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); | |||
| 18610 | } | |||
| 18611 | iOff = iTermOff; | |||
| 18612 | if( iOff<szLeaf ){ | |||
| 18613 | iOff += fts5GetVarint32(&a[iOff], nKeep)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nKeep)); | |||
| 18614 | } | |||
| 18615 | } | |||
| 18616 | ||||
| 18617 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
| 18618 | }else{ | |||
| 18619 | Fts5Buffer term; /* Current term read from page */ | |||
| 18620 | int szLeaf; /* Offset of pgidx in a[] */ | |||
| 18621 | int iPgidxOff; | |||
| 18622 | int iPgidxPrev = 0; /* Previous value read from pgidx */ | |||
| 18623 | int iTermOff = 0; | |||
| 18624 | int iRowidOff = 0; | |||
| 18625 | int iOff; | |||
| 18626 | int nDoclist; | |||
| 18627 | ||||
| 18628 | memset(&term, 0, sizeof(Fts5Buffer)); | |||
| 18629 | ||||
| 18630 | if( n<4 ){ | |||
| 18631 | sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); | |||
| 18632 | goto decode_out; | |||
| 18633 | }else{ | |||
| 18634 | iRowidOff = fts5GetU16(&a[0]); | |||
| 18635 | iPgidxOff = szLeaf = fts5GetU16(&a[2]); | |||
| 18636 | if( iPgidxOff<n ){ | |||
| 18637 | fts5GetVarint32(&a[iPgidxOff], iTermOff)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(iTermOff )); | |||
| 18638 | }else if( iPgidxOff>n ){ | |||
| 18639 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18640 | goto decode_out; | |||
| 18641 | } | |||
| 18642 | } | |||
| 18643 | ||||
| 18644 | /* Decode the position list tail at the start of the page */ | |||
| 18645 | if( iRowidOff!=0 ){ | |||
| 18646 | iOff = iRowidOff; | |||
| 18647 | }else if( iTermOff!=0 ){ | |||
| 18648 | iOff = iTermOff; | |||
| 18649 | }else{ | |||
| 18650 | iOff = szLeaf; | |||
| 18651 | } | |||
| 18652 | if( iOff>n ){ | |||
| 18653 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18654 | goto decode_out; | |||
| 18655 | } | |||
| 18656 | fts5DecodePoslist(&rc, &s, &a[4], iOff-4); | |||
| 18657 | ||||
| 18658 | /* Decode any more doclist data that appears on the page before the | |||
| 18659 | ** first term. */ | |||
| 18660 | nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; | |||
| 18661 | if( nDoclist+iOff>n ){ | |||
| 18662 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18663 | goto decode_out; | |||
| 18664 | } | |||
| 18665 | fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); | |||
| 18666 | ||||
| 18667 | while( iPgidxOff<n && rc==SQLITE_OK0 ){ | |||
| 18668 | int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ | |||
| 18669 | int nByte; /* Bytes of data */ | |||
| 18670 | int iEnd; | |||
| 18671 | ||||
| 18672 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | |||
| 18673 | iPgidxPrev += nByte; | |||
| 18674 | iOff = iPgidxPrev; | |||
| 18675 | ||||
| 18676 | if( iPgidxOff<n ){ | |||
| 18677 | fts5GetVarint32(&a[iPgidxOff], nByte)sqlite3Fts5GetVarint32(&a[iPgidxOff],(u32*)&(nByte)); | |||
| 18678 | iEnd = iPgidxPrev + nByte; | |||
| 18679 | }else{ | |||
| 18680 | iEnd = szLeaf; | |||
| 18681 | } | |||
| 18682 | if( iEnd>szLeaf ){ | |||
| 18683 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18684 | break; | |||
| 18685 | } | |||
| 18686 | ||||
| 18687 | if( bFirst==0 ){ | |||
| 18688 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | |||
| 18689 | if( nByte>term.n ){ | |||
| 18690 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18691 | break; | |||
| 18692 | } | |||
| 18693 | term.n = nByte; | |||
| 18694 | } | |||
| 18695 | iOff += fts5GetVarint32(&a[iOff], nByte)sqlite3Fts5GetVarint32(&a[iOff],(u32*)&(nByte)); | |||
| 18696 | if( iOff+nByte>n ){ | |||
| 18697 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 18698 | break; | |||
| 18699 | } | |||
| 18700 | fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff])sqlite3Fts5BufferAppendBlob(&rc,&term,nByte,&a[iOff ]); | |||
| 18701 | iOff += nByte; | |||
| 18702 | ||||
| 18703 | sqlite3Fts5BufferAppendPrintf(&rc, &s, " term="); | |||
| 18704 | fts5BufferAppendTerm(&rc, &s, &term); | |||
| 18705 | iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); | |||
| 18706 | } | |||
| 18707 | ||||
| 18708 | fts5BufferFree(&term)sqlite3Fts5BufferFree(&term); | |||
| 18709 | } | |||
| 18710 | ||||
| 18711 | decode_out: | |||
| 18712 | sqlite3_freesqlite3_api->free(a); | |||
| 18713 | if( rc==SQLITE_OK0 ){ | |||
| 18714 | sqlite3_result_textsqlite3_api->result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 18715 | }else{ | |||
| 18716 | sqlite3_result_error_codesqlite3_api->result_error_code(pCtx, rc); | |||
| 18717 | } | |||
| 18718 | fts5BufferFree(&s)sqlite3Fts5BufferFree(&s); | |||
| 18719 | } | |||
| 18720 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18721 | ||||
| 18722 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18723 | /* | |||
| 18724 | ** The implementation of user-defined scalar function fts5_rowid(). | |||
| 18725 | */ | |||
| 18726 | static void fts5RowidFunction( | |||
| 18727 | sqlite3_context *pCtx, /* Function call context */ | |||
| 18728 | int nArg, /* Number of args (always 2) */ | |||
| 18729 | sqlite3_value **apVal /* Function arguments */ | |||
| 18730 | ){ | |||
| 18731 | const char *zArg; | |||
| 18732 | if( nArg==0 ){ | |||
| 18733 | sqlite3_result_errorsqlite3_api->result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); | |||
| 18734 | }else{ | |||
| 18735 | zArg = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[0]); | |||
| 18736 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(zArg, "segment") ){ | |||
| 18737 | i64 iRowid; | |||
| 18738 | int segid, pgno; | |||
| 18739 | if( nArg!=3 ){ | |||
| 18740 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
| 18741 | "should be: fts5_rowid('segment', segid, pgno))", -1 | |||
| 18742 | ); | |||
| 18743 | }else{ | |||
| 18744 | segid = sqlite3_value_intsqlite3_api->value_int(apVal[1]); | |||
| 18745 | pgno = sqlite3_value_intsqlite3_api->value_int(apVal[2]); | |||
| 18746 | iRowid = FTS5_SEGMENT_ROWID(segid, pgno)( ((i64)(segid) << (31 +5 +1)) + ((i64)(0) << (31 + 5)) + ((i64)(0) << (31)) + ((i64)(pgno)) ); | |||
| 18747 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, iRowid); | |||
| 18748 | } | |||
| 18749 | }else{ | |||
| 18750 | sqlite3_result_errorsqlite3_api->result_error(pCtx, | |||
| 18751 | "first arg to fts5_rowid() must be 'segment'" , -1 | |||
| 18752 | ); | |||
| 18753 | } | |||
| 18754 | } | |||
| 18755 | } | |||
| 18756 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18757 | ||||
| 18758 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18759 | ||||
| 18760 | typedef struct Fts5StructVtab Fts5StructVtab; | |||
| 18761 | struct Fts5StructVtab { | |||
| 18762 | sqlite3_vtab base; | |||
| 18763 | }; | |||
| 18764 | ||||
| 18765 | typedef struct Fts5StructVcsr Fts5StructVcsr; | |||
| 18766 | struct Fts5StructVcsr { | |||
| 18767 | sqlite3_vtab_cursor base; | |||
| 18768 | Fts5Structure *pStruct; | |||
| 18769 | int iLevel; | |||
| 18770 | int iSeg; | |||
| 18771 | int iRowid; | |||
| 18772 | }; | |||
| 18773 | ||||
| 18774 | /* | |||
| 18775 | ** Create a new fts5_structure() table-valued function. | |||
| 18776 | */ | |||
| 18777 | static int fts5structConnectMethod( | |||
| 18778 | sqlite3 *db, | |||
| 18779 | void *pAux, | |||
| 18780 | int argc, const char *const*argv, | |||
| 18781 | sqlite3_vtab **ppVtab, | |||
| 18782 | char **pzErr | |||
| 18783 | ){ | |||
| 18784 | Fts5StructVtab *pNew = 0; | |||
| 18785 | int rc = SQLITE_OK0; | |||
| 18786 | ||||
| 18787 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, | |||
| 18788 | "CREATE TABLE xyz(" | |||
| 18789 | "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, " | |||
| 18790 | "npgtombstone, nentrytombstone, nentry, struct HIDDEN);" | |||
| 18791 | ); | |||
| 18792 | if( rc==SQLITE_OK0 ){ | |||
| 18793 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
| 18794 | } | |||
| 18795 | ||||
| 18796 | *ppVtab = (sqlite3_vtab*)pNew; | |||
| 18797 | return rc; | |||
| 18798 | } | |||
| 18799 | ||||
| 18800 | /* | |||
| 18801 | ** We must have a single struct=? constraint that will be passed through | |||
| 18802 | ** into the xFilter method. If there is no valid struct=? constraint, | |||
| 18803 | ** then return an SQLITE_CONSTRAINT error. | |||
| 18804 | */ | |||
| 18805 | static int fts5structBestIndexMethod( | |||
| 18806 | sqlite3_vtab *tab, | |||
| 18807 | sqlite3_index_info *pIdxInfo | |||
| 18808 | ){ | |||
| 18809 | int i; | |||
| 18810 | int rc = SQLITE_CONSTRAINT19; | |||
| 18811 | struct sqlite3_index_constraint *p; | |||
| 18812 | pIdxInfo->estimatedCost = (double)100; | |||
| 18813 | pIdxInfo->estimatedRows = 100; | |||
| 18814 | pIdxInfo->idxNum = 0; | |||
| 18815 | for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){ | |||
| 18816 | if( p->usable==0 ) continue; | |||
| 18817 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && p->iColumn==11 ){ | |||
| 18818 | rc = SQLITE_OK0; | |||
| 18819 | pIdxInfo->aConstraintUsage[i].omit = 1; | |||
| 18820 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; | |||
| 18821 | break; | |||
| 18822 | } | |||
| 18823 | } | |||
| 18824 | return rc; | |||
| 18825 | } | |||
| 18826 | ||||
| 18827 | /* | |||
| 18828 | ** This method is the destructor for bytecodevtab objects. | |||
| 18829 | */ | |||
| 18830 | static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){ | |||
| 18831 | Fts5StructVtab *p = (Fts5StructVtab*)pVtab; | |||
| 18832 | sqlite3_freesqlite3_api->free(p); | |||
| 18833 | return SQLITE_OK0; | |||
| 18834 | } | |||
| 18835 | ||||
| 18836 | /* | |||
| 18837 | ** Constructor for a new bytecodevtab_cursor object. | |||
| 18838 | */ | |||
| 18839 | static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){ | |||
| 18840 | int rc = SQLITE_OK0; | |||
| 18841 | Fts5StructVcsr *pNew = 0; | |||
| 18842 | ||||
| 18843 | pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
| 18844 | *ppCsr = (sqlite3_vtab_cursor*)pNew; | |||
| 18845 | ||||
| 18846 | return SQLITE_OK0; | |||
| 18847 | } | |||
| 18848 | ||||
| 18849 | /* | |||
| 18850 | ** Destructor for a bytecodevtab_cursor. | |||
| 18851 | */ | |||
| 18852 | static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){ | |||
| 18853 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
| 18854 | fts5StructureRelease(pCsr->pStruct); | |||
| 18855 | sqlite3_freesqlite3_api->free(pCsr); | |||
| 18856 | return SQLITE_OK0; | |||
| 18857 | } | |||
| 18858 | ||||
| 18859 | ||||
| 18860 | /* | |||
| 18861 | ** Advance a bytecodevtab_cursor to its next row of output. | |||
| 18862 | */ | |||
| 18863 | static int fts5structNextMethod(sqlite3_vtab_cursor *cur){ | |||
| 18864 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
| 18865 | Fts5Structure *p = pCsr->pStruct; | |||
| 18866 | ||||
| 18867 | assert( pCsr->pStruct )((void) (0)); | |||
| 18868 | pCsr->iSeg++; | |||
| 18869 | pCsr->iRowid++; | |||
| 18870 | while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){ | |||
| 18871 | pCsr->iLevel++; | |||
| 18872 | pCsr->iSeg = 0; | |||
| 18873 | } | |||
| 18874 | if( pCsr->iLevel>=p->nLevel ){ | |||
| 18875 | fts5StructureRelease(pCsr->pStruct); | |||
| 18876 | pCsr->pStruct = 0; | |||
| 18877 | } | |||
| 18878 | return SQLITE_OK0; | |||
| 18879 | } | |||
| 18880 | ||||
| 18881 | /* | |||
| 18882 | ** Return TRUE if the cursor has been moved off of the last | |||
| 18883 | ** row of output. | |||
| 18884 | */ | |||
| 18885 | static int fts5structEofMethod(sqlite3_vtab_cursor *cur){ | |||
| 18886 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
| 18887 | return pCsr->pStruct==0; | |||
| 18888 | } | |||
| 18889 | ||||
| 18890 | static int fts5structRowidMethod( | |||
| 18891 | sqlite3_vtab_cursor *cur, | |||
| 18892 | sqlite_int64 *piRowid | |||
| 18893 | ){ | |||
| 18894 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
| 18895 | *piRowid = pCsr->iRowid; | |||
| 18896 | return SQLITE_OK0; | |||
| 18897 | } | |||
| 18898 | ||||
| 18899 | /* | |||
| 18900 | ** Return values of columns for the row at which the bytecodevtab_cursor | |||
| 18901 | ** is currently pointing. | |||
| 18902 | */ | |||
| 18903 | static int fts5structColumnMethod( | |||
| 18904 | sqlite3_vtab_cursor *cur, /* The cursor */ | |||
| 18905 | sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ | |||
| 18906 | int i /* Which column to return */ | |||
| 18907 | ){ | |||
| 18908 | Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur; | |||
| 18909 | Fts5Structure *p = pCsr->pStruct; | |||
| 18910 | Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg]; | |||
| 18911 | ||||
| 18912 | switch( i ){ | |||
| 18913 | case 0: /* level */ | |||
| 18914 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iLevel); | |||
| 18915 | break; | |||
| 18916 | case 1: /* segment */ | |||
| 18917 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg); | |||
| 18918 | break; | |||
| 18919 | case 2: /* merge */ | |||
| 18920 | sqlite3_result_intsqlite3_api->result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge); | |||
| 18921 | break; | |||
| 18922 | case 3: /* segid */ | |||
| 18923 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->iSegid); | |||
| 18924 | break; | |||
| 18925 | case 4: /* leaf1 */ | |||
| 18926 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoFirst); | |||
| 18927 | break; | |||
| 18928 | case 5: /* leaf2 */ | |||
| 18929 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->pgnoLast); | |||
| 18930 | break; | |||
| 18931 | case 6: /* origin1 */ | |||
| 18932 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin1); | |||
| 18933 | break; | |||
| 18934 | case 7: /* origin2 */ | |||
| 18935 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->iOrigin2); | |||
| 18936 | break; | |||
| 18937 | case 8: /* npgtombstone */ | |||
| 18938 | sqlite3_result_intsqlite3_api->result_int(ctx, pSeg->nPgTombstone); | |||
| 18939 | break; | |||
| 18940 | case 9: /* nentrytombstone */ | |||
| 18941 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntryTombstone); | |||
| 18942 | break; | |||
| 18943 | case 10: /* nentry */ | |||
| 18944 | sqlite3_result_int64sqlite3_api->result_int64(ctx, pSeg->nEntry); | |||
| 18945 | break; | |||
| 18946 | } | |||
| 18947 | return SQLITE_OK0; | |||
| 18948 | } | |||
| 18949 | ||||
| 18950 | /* | |||
| 18951 | ** Initialize a cursor. | |||
| 18952 | ** | |||
| 18953 | ** idxNum==0 means show all subprograms | |||
| 18954 | ** idxNum==1 means show only the main bytecode and omit subprograms. | |||
| 18955 | */ | |||
| 18956 | static int fts5structFilterMethod( | |||
| 18957 | sqlite3_vtab_cursor *pVtabCursor, | |||
| 18958 | int idxNum, const char *idxStr, | |||
| 18959 | int argc, sqlite3_value **argv | |||
| 18960 | ){ | |||
| 18961 | Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor; | |||
| 18962 | int rc = SQLITE_OK0; | |||
| 18963 | ||||
| 18964 | const u8 *aBlob = 0; | |||
| 18965 | int nBlob = 0; | |||
| 18966 | ||||
| 18967 | assert( argc==1 )((void) (0)); | |||
| 18968 | fts5StructureRelease(pCsr->pStruct); | |||
| 18969 | pCsr->pStruct = 0; | |||
| 18970 | ||||
| 18971 | nBlob = sqlite3_value_bytessqlite3_api->value_bytes(argv[0]); | |||
| 18972 | aBlob = (const u8*)sqlite3_value_blobsqlite3_api->value_blob(argv[0]); | |||
| 18973 | rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct); | |||
| 18974 | if( rc==SQLITE_OK0 ){ | |||
| 18975 | pCsr->iLevel = 0; | |||
| 18976 | pCsr->iRowid = 0; | |||
| 18977 | pCsr->iSeg = -1; | |||
| 18978 | rc = fts5structNextMethod(pVtabCursor); | |||
| 18979 | } | |||
| 18980 | ||||
| 18981 | return rc; | |||
| 18982 | } | |||
| 18983 | ||||
| 18984 | #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */ | |||
| 18985 | ||||
| 18986 | /* | |||
| 18987 | ** This is called as part of registering the FTS5 module with database | |||
| 18988 | ** connection db. It registers several user-defined scalar functions useful | |||
| 18989 | ** with FTS5. | |||
| 18990 | ** | |||
| 18991 | ** If successful, SQLITE_OK is returned. If an error occurs, some other | |||
| 18992 | ** SQLite error code is returned instead. | |||
| 18993 | */ | |||
| 18994 | static int sqlite3Fts5IndexInit(sqlite3 *db){ | |||
| 18995 | #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) | |||
| 18996 | int rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 18997 | db, "fts5_decode", 2, SQLITE_UTF81, 0, fts5DecodeFunction, 0, 0 | |||
| 18998 | ); | |||
| 18999 | ||||
| 19000 | if( rc==SQLITE_OK0 ){ | |||
| 19001 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 19002 | db, "fts5_decode_none", 2, | |||
| 19003 | SQLITE_UTF81, (void*)db, fts5DecodeFunction, 0, 0 | |||
| 19004 | ); | |||
| 19005 | } | |||
| 19006 | ||||
| 19007 | if( rc==SQLITE_OK0 ){ | |||
| 19008 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 19009 | db, "fts5_rowid", -1, SQLITE_UTF81, 0, fts5RowidFunction, 0, 0 | |||
| 19010 | ); | |||
| 19011 | } | |||
| 19012 | ||||
| 19013 | if( rc==SQLITE_OK0 ){ | |||
| 19014 | static const sqlite3_module fts5structure_module = { | |||
| 19015 | 0, /* iVersion */ | |||
| 19016 | 0, /* xCreate */ | |||
| 19017 | fts5structConnectMethod, /* xConnect */ | |||
| 19018 | fts5structBestIndexMethod, /* xBestIndex */ | |||
| 19019 | fts5structDisconnectMethod, /* xDisconnect */ | |||
| 19020 | 0, /* xDestroy */ | |||
| 19021 | fts5structOpenMethod, /* xOpen */ | |||
| 19022 | fts5structCloseMethod, /* xClose */ | |||
| 19023 | fts5structFilterMethod, /* xFilter */ | |||
| 19024 | fts5structNextMethod, /* xNext */ | |||
| 19025 | fts5structEofMethod, /* xEof */ | |||
| 19026 | fts5structColumnMethod, /* xColumn */ | |||
| 19027 | fts5structRowidMethod, /* xRowid */ | |||
| 19028 | 0, /* xUpdate */ | |||
| 19029 | 0, /* xBegin */ | |||
| 19030 | 0, /* xSync */ | |||
| 19031 | 0, /* xCommit */ | |||
| 19032 | 0, /* xRollback */ | |||
| 19033 | 0, /* xFindFunction */ | |||
| 19034 | 0, /* xRename */ | |||
| 19035 | 0, /* xSavepoint */ | |||
| 19036 | 0, /* xRelease */ | |||
| 19037 | 0, /* xRollbackTo */ | |||
| 19038 | 0, /* xShadowName */ | |||
| 19039 | 0 /* xIntegrity */ | |||
| 19040 | }; | |||
| 19041 | rc = sqlite3_create_modulesqlite3_api->create_module(db, "fts5_structure", &fts5structure_module, 0); | |||
| 19042 | } | |||
| 19043 | return rc; | |||
| 19044 | #else | |||
| 19045 | return SQLITE_OK0; | |||
| 19046 | UNUSED_PARAM(db)(void)(db); | |||
| 19047 | #endif | |||
| 19048 | } | |||
| 19049 | ||||
| 19050 | ||||
| 19051 | static int sqlite3Fts5IndexReset(Fts5Index *p){ | |||
| 19052 | assert( p->pStruct==0 || p->iStructVersion!=0 )((void) (0)); | |||
| 19053 | if( fts5IndexDataVersion(p)!=p->iStructVersion ){ | |||
| 19054 | fts5StructureInvalidate(p); | |||
| 19055 | } | |||
| 19056 | return fts5IndexReturn(p); | |||
| 19057 | } | |||
| 19058 | ||||
| 19059 | #line 1 "fts5_main.c" | |||
| 19060 | /* | |||
| 19061 | ** 2014 Jun 09 | |||
| 19062 | ** | |||
| 19063 | ** The author disclaims copyright to this source code. In place of | |||
| 19064 | ** a legal notice, here is a blessing: | |||
| 19065 | ** | |||
| 19066 | ** May you do good and not evil. | |||
| 19067 | ** May you find forgiveness for yourself and forgive others. | |||
| 19068 | ** May you share freely, never taking more than you give. | |||
| 19069 | ** | |||
| 19070 | ****************************************************************************** | |||
| 19071 | ** | |||
| 19072 | ** This is an SQLite module implementing full-text search. | |||
| 19073 | */ | |||
| 19074 | ||||
| 19075 | ||||
| 19076 | /* #include "fts5Int.h" */ | |||
| 19077 | ||||
| 19078 | /* | |||
| 19079 | ** This variable is set to false when running tests for which the on disk | |||
| 19080 | ** structures should not be corrupt. Otherwise, true. If it is false, extra | |||
| 19081 | ** assert() conditions in the fts5 code are activated - conditions that are | |||
| 19082 | ** only true if it is guaranteed that the fts5 database is not corrupt. | |||
| 19083 | */ | |||
| 19084 | #ifdef SQLITE_DEBUG | |||
| 19085 | int sqlite3_fts5_may_be_corrupt = 1; | |||
| 19086 | #endif | |||
| 19087 | ||||
| 19088 | ||||
| 19089 | typedef struct Fts5Auxdata Fts5Auxdata; | |||
| 19090 | typedef struct Fts5Auxiliary Fts5Auxiliary; | |||
| 19091 | typedef struct Fts5Cursor Fts5Cursor; | |||
| 19092 | typedef struct Fts5FullTable Fts5FullTable; | |||
| 19093 | typedef struct Fts5Sorter Fts5Sorter; | |||
| 19094 | typedef struct Fts5TokenizerModule Fts5TokenizerModule; | |||
| 19095 | ||||
| 19096 | /* | |||
| 19097 | ** NOTES ON TRANSACTIONS: | |||
| 19098 | ** | |||
| 19099 | ** SQLite invokes the following virtual table methods as transactions are | |||
| 19100 | ** opened and closed by the user: | |||
| 19101 | ** | |||
| 19102 | ** xBegin(): Start of a new transaction. | |||
| 19103 | ** xSync(): Initial part of two-phase commit. | |||
| 19104 | ** xCommit(): Final part of two-phase commit. | |||
| 19105 | ** xRollback(): Rollback the transaction. | |||
| 19106 | ** | |||
| 19107 | ** Anything that is required as part of a commit that may fail is performed | |||
| 19108 | ** in the xSync() callback. Current versions of SQLite ignore any errors | |||
| 19109 | ** returned by xCommit(). | |||
| 19110 | ** | |||
| 19111 | ** And as sub-transactions are opened/closed: | |||
| 19112 | ** | |||
| 19113 | ** xSavepoint(int S): Open savepoint S. | |||
| 19114 | ** xRelease(int S): Commit and close savepoint S. | |||
| 19115 | ** xRollbackTo(int S): Rollback to start of savepoint S. | |||
| 19116 | ** | |||
| 19117 | ** During a write-transaction the fts5_index.c module may cache some data | |||
| 19118 | ** in-memory. It is flushed to disk whenever xSync(), xRelease() or | |||
| 19119 | ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() | |||
| 19120 | ** is called. | |||
| 19121 | ** | |||
| 19122 | ** Additionally, if SQLITE_DEBUG is defined, an instance of the following | |||
| 19123 | ** structure is used to record the current transaction state. This information | |||
| 19124 | ** is not required, but it is used in the assert() statements executed by | |||
| 19125 | ** function fts5CheckTransactionState() (see below). | |||
| 19126 | */ | |||
| 19127 | struct Fts5TransactionState { | |||
| 19128 | int eState; /* 0==closed, 1==open, 2==synced */ | |||
| 19129 | int iSavepoint; /* Number of open savepoints (0 -> none) */ | |||
| 19130 | }; | |||
| 19131 | ||||
| 19132 | /* | |||
| 19133 | ** A single object of this type is allocated when the FTS5 module is | |||
| 19134 | ** registered with a database handle. It is used to store pointers to | |||
| 19135 | ** all registered FTS5 extensions - tokenizers and auxiliary functions. | |||
| 19136 | */ | |||
| 19137 | struct Fts5Global { | |||
| 19138 | fts5_api api; /* User visible part of object (see fts5.h) */ | |||
| 19139 | sqlite3 *db; /* Associated database connection */ | |||
| 19140 | i64 iNextId; /* Used to allocate unique cursor ids */ | |||
| 19141 | Fts5Auxiliary *pAux; /* First in list of all aux. functions */ | |||
| 19142 | Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ | |||
| 19143 | Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ | |||
| 19144 | Fts5Cursor *pCsr; /* First in list of all open cursors */ | |||
| 19145 | u32 aLocaleHdr[4]; | |||
| 19146 | }; | |||
| 19147 | ||||
| 19148 | /* | |||
| 19149 | ** Size of header on fts5_locale() values. And macro to access a buffer | |||
| 19150 | ** containing a copy of the header from an Fts5Config pointer. | |||
| 19151 | */ | |||
| 19152 | #define FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | |||
| 19153 | #define FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) | |||
| 19154 | ||||
| 19155 | #define FTS5_INSTTOKEN_SUBTYPE73 73 | |||
| 19156 | ||||
| 19157 | /* | |||
| 19158 | ** Each auxiliary function registered with the FTS5 module is represented | |||
| 19159 | ** by an object of the following type. All such objects are stored as part | |||
| 19160 | ** of the Fts5Global.pAux list. | |||
| 19161 | */ | |||
| 19162 | struct Fts5Auxiliary { | |||
| 19163 | Fts5Global *pGlobal; /* Global context for this function */ | |||
| 19164 | char *zFunc; /* Function name (nul-terminated) */ | |||
| 19165 | void *pUserData; /* User-data pointer */ | |||
| 19166 | fts5_extension_function xFunc; /* Callback function */ | |||
| 19167 | void (*xDestroy)(void*); /* Destructor function */ | |||
| 19168 | Fts5Auxiliary *pNext; /* Next registered auxiliary function */ | |||
| 19169 | }; | |||
| 19170 | ||||
| 19171 | /* | |||
| 19172 | ** Each tokenizer module registered with the FTS5 module is represented | |||
| 19173 | ** by an object of the following type. All such objects are stored as part | |||
| 19174 | ** of the Fts5Global.pTok list. | |||
| 19175 | ** | |||
| 19176 | ** bV2Native: | |||
| 19177 | ** True if the tokenizer was registered using xCreateTokenizer_v2(), false | |||
| 19178 | ** for xCreateTokenizer(). If this variable is true, then x2 is populated | |||
| 19179 | ** with the routines as supplied by the caller and x1 contains synthesized | |||
| 19180 | ** wrapper routines. In this case the user-data pointer passed to | |||
| 19181 | ** x1.xCreate should be a pointer to the Fts5TokenizerModule structure, | |||
| 19182 | ** not a copy of pUserData. | |||
| 19183 | ** | |||
| 19184 | ** Of course, if bV2Native is false, then x1 contains the real routines and | |||
| 19185 | ** x2 the synthesized ones. In this case a pointer to the Fts5TokenizerModule | |||
| 19186 | ** object should be passed to x2.xCreate. | |||
| 19187 | ** | |||
| 19188 | ** The synthesized wrapper routines are necessary for xFindTokenizer(_v2) | |||
| 19189 | ** calls. | |||
| 19190 | */ | |||
| 19191 | struct Fts5TokenizerModule { | |||
| 19192 | char *zName; /* Name of tokenizer */ | |||
| 19193 | void *pUserData; /* User pointer passed to xCreate() */ | |||
| 19194 | int bV2Native; /* True if v2 native tokenizer */ | |||
| 19195 | fts5_tokenizer x1; /* Tokenizer functions */ | |||
| 19196 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | |||
| 19197 | void (*xDestroy)(void*); /* Destructor function */ | |||
| 19198 | Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ | |||
| 19199 | }; | |||
| 19200 | ||||
| 19201 | struct Fts5FullTable { | |||
| 19202 | Fts5Table p; /* Public class members from fts5Int.h */ | |||
| 19203 | Fts5Storage *pStorage; /* Document store */ | |||
| 19204 | Fts5Global *pGlobal; /* Global (connection wide) data */ | |||
| 19205 | Fts5Cursor *pSortCsr; /* Sort data from this cursor */ | |||
| 19206 | int iSavepoint; /* Successful xSavepoint()+1 */ | |||
| 19207 | ||||
| 19208 | #ifdef SQLITE_DEBUG | |||
| 19209 | struct Fts5TransactionState ts; | |||
| 19210 | #endif | |||
| 19211 | }; | |||
| 19212 | ||||
| 19213 | struct Fts5MatchPhrase { | |||
| 19214 | Fts5Buffer *pPoslist; /* Pointer to current poslist */ | |||
| 19215 | int nTerm; /* Size of phrase in terms */ | |||
| 19216 | }; | |||
| 19217 | ||||
| 19218 | /* | |||
| 19219 | ** pStmt: | |||
| 19220 | ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; | |||
| 19221 | ** | |||
| 19222 | ** aIdx[]: | |||
| 19223 | ** There is one entry in the aIdx[] array for each phrase in the query, | |||
| 19224 | ** the value of which is the offset within aPoslist[] following the last | |||
| 19225 | ** byte of the position list for the corresponding phrase. | |||
| 19226 | */ | |||
| 19227 | struct Fts5Sorter { | |||
| 19228 | sqlite3_stmt *pStmt; | |||
| 19229 | i64 iRowid; /* Current rowid */ | |||
| 19230 | const u8 *aPoslist; /* Position lists for current row */ | |||
| 19231 | int nIdx; /* Number of entries in aIdx[] */ | |||
| 19232 | int aIdx[FLEXARRAY]; /* Offsets into aPoslist for current row */ | |||
| 19233 | }; | |||
| 19234 | ||||
| 19235 | /* Size (int bytes) of an Fts5Sorter object with N indexes */ | |||
| 19236 | #define SZ_FTS5SORTER(N)(__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) (offsetof(Fts5Sorter,nIdx)__builtin_offsetof(Fts5Sorter, nIdx)+((N+2)/2)*sizeof(i64)) | |||
| 19237 | ||||
| 19238 | /* | |||
| 19239 | ** Virtual-table cursor object. | |||
| 19240 | ** | |||
| 19241 | ** iSpecial: | |||
| 19242 | ** If this is a 'special' query (refer to function fts5SpecialMatch()), | |||
| 19243 | ** then this variable contains the result of the query. | |||
| 19244 | ** | |||
| 19245 | ** iFirstRowid, iLastRowid: | |||
| 19246 | ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the | |||
| 19247 | ** cursor iterates in ascending order of rowids, iFirstRowid is the lower | |||
| 19248 | ** limit of rowids to return, and iLastRowid the upper. In other words, the | |||
| 19249 | ** WHERE clause in the user's query might have been: | |||
| 19250 | ** | |||
| 19251 | ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid | |||
| 19252 | ** | |||
| 19253 | ** If the cursor iterates in descending order of rowid, iFirstRowid | |||
| 19254 | ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid | |||
| 19255 | ** the lower. | |||
| 19256 | */ | |||
| 19257 | struct Fts5Cursor { | |||
| 19258 | sqlite3_vtab_cursor base; /* Base class used by SQLite core */ | |||
| 19259 | Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ | |||
| 19260 | int *aColumnSize; /* Values for xColumnSize() */ | |||
| 19261 | i64 iCsrId; /* Cursor id */ | |||
| 19262 | ||||
| 19263 | /* Zero from this point onwards on cursor reset */ | |||
| 19264 | int ePlan; /* FTS5_PLAN_XXX value */ | |||
| 19265 | int bDesc; /* True for "ORDER BY rowid DESC" queries */ | |||
| 19266 | i64 iFirstRowid; /* Return no rowids earlier than this */ | |||
| 19267 | i64 iLastRowid; /* Return no rowids later than this */ | |||
| 19268 | sqlite3_stmt *pStmt; /* Statement used to read %_content */ | |||
| 19269 | Fts5Expr *pExpr; /* Expression for MATCH queries */ | |||
| 19270 | Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ | |||
| 19271 | int csrflags; /* Mask of cursor flags (see below) */ | |||
| 19272 | i64 iSpecial; /* Result of special query */ | |||
| 19273 | ||||
| 19274 | /* "rank" function. Populated on demand from vtab.xColumn(). */ | |||
| 19275 | char *zRank; /* Custom rank function */ | |||
| 19276 | char *zRankArgs; /* Custom rank function args */ | |||
| 19277 | Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ | |||
| 19278 | int nRankArg; /* Number of trailing arguments for rank() */ | |||
| 19279 | sqlite3_value **apRankArg; /* Array of trailing arguments */ | |||
| 19280 | sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ | |||
| 19281 | ||||
| 19282 | /* Auxiliary data storage */ | |||
| 19283 | Fts5Auxiliary *pAux; /* Currently executing extension function */ | |||
| 19284 | Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ | |||
| 19285 | ||||
| 19286 | /* Cache used by auxiliary API functions xInst() and xInstCount() */ | |||
| 19287 | Fts5PoslistReader *aInstIter; /* One for each phrase */ | |||
| 19288 | int nInstAlloc; /* Size of aInst[] array (entries / 3) */ | |||
| 19289 | int nInstCount; /* Number of phrase instances */ | |||
| 19290 | int *aInst; /* 3 integers per phrase instance */ | |||
| 19291 | }; | |||
| 19292 | ||||
| 19293 | /* | |||
| 19294 | ** Bits that make up the "idxNum" parameter passed indirectly by | |||
| 19295 | ** xBestIndex() to xFilter(). | |||
| 19296 | */ | |||
| 19297 | #define FTS5_BI_MATCH0x0001 0x0001 /* <tbl> MATCH ? */ | |||
| 19298 | #define FTS5_BI_RANK0x0002 0x0002 /* rank MATCH ? */ | |||
| 19299 | #define FTS5_BI_ROWID_EQ0x0004 0x0004 /* rowid == ? */ | |||
| 19300 | #define FTS5_BI_ROWID_LE0x0008 0x0008 /* rowid <= ? */ | |||
| 19301 | #define FTS5_BI_ROWID_GE0x0010 0x0010 /* rowid >= ? */ | |||
| 19302 | ||||
| 19303 | #define FTS5_BI_ORDER_RANK0x0020 0x0020 | |||
| 19304 | #define FTS5_BI_ORDER_ROWID0x0040 0x0040 | |||
| 19305 | #define FTS5_BI_ORDER_DESC0x0080 0x0080 | |||
| 19306 | ||||
| 19307 | /* | |||
| 19308 | ** Values for Fts5Cursor.csrflags | |||
| 19309 | */ | |||
| 19310 | #define FTS5CSR_EOF0x01 0x01 | |||
| 19311 | #define FTS5CSR_REQUIRE_CONTENT0x02 0x02 | |||
| 19312 | #define FTS5CSR_REQUIRE_DOCSIZE0x04 0x04 | |||
| 19313 | #define FTS5CSR_REQUIRE_INST0x08 0x08 | |||
| 19314 | #define FTS5CSR_FREE_ZRANK0x10 0x10 | |||
| 19315 | #define FTS5CSR_REQUIRE_RESEEK0x20 0x20 | |||
| 19316 | #define FTS5CSR_REQUIRE_POSLIST0x40 0x40 | |||
| 19317 | ||||
| 19318 | #define BitFlagAllTest(x,y)(((x) & (y))==(y)) (((x) & (y))==(y)) | |||
| 19319 | #define BitFlagTest(x,y)(((x) & (y))!=0) (((x) & (y))!=0) | |||
| 19320 | ||||
| 19321 | ||||
| 19322 | /* | |||
| 19323 | ** Macros to Set(), Clear() and Test() cursor flags. | |||
| 19324 | */ | |||
| 19325 | #define CsrFlagSet(pCsr, flag)((pCsr)->csrflags |= (flag)) ((pCsr)->csrflags |= (flag)) | |||
| 19326 | #define CsrFlagClear(pCsr, flag)((pCsr)->csrflags &= ~(flag)) ((pCsr)->csrflags &= ~(flag)) | |||
| 19327 | #define CsrFlagTest(pCsr, flag)((pCsr)->csrflags & (flag)) ((pCsr)->csrflags & (flag)) | |||
| 19328 | ||||
| 19329 | struct Fts5Auxdata { | |||
| 19330 | Fts5Auxiliary *pAux; /* Extension to which this belongs */ | |||
| 19331 | void *pPtr; /* Pointer value */ | |||
| 19332 | void(*xDelete)(void*); /* Destructor */ | |||
| 19333 | Fts5Auxdata *pNext; /* Next object in linked list */ | |||
| 19334 | }; | |||
| 19335 | ||||
| 19336 | #ifdef SQLITE_DEBUG | |||
| 19337 | #define FTS5_BEGIN 1 | |||
| 19338 | #define FTS5_SYNC 2 | |||
| 19339 | #define FTS5_COMMIT 3 | |||
| 19340 | #define FTS5_ROLLBACK 4 | |||
| 19341 | #define FTS5_SAVEPOINT 5 | |||
| 19342 | #define FTS5_RELEASE 6 | |||
| 19343 | #define FTS5_ROLLBACKTO 7 | |||
| 19344 | static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ | |||
| 19345 | switch( op ){ | |||
| 19346 | case FTS5_BEGIN: | |||
| 19347 | assert( p->ts.eState==0 )((void) (0)); | |||
| 19348 | p->ts.eState = 1; | |||
| 19349 | p->ts.iSavepoint = -1; | |||
| 19350 | break; | |||
| 19351 | ||||
| 19352 | case FTS5_SYNC: | |||
| 19353 | assert( p->ts.eState==1 || p->ts.eState==2 )((void) (0)); | |||
| 19354 | p->ts.eState = 2; | |||
| 19355 | break; | |||
| 19356 | ||||
| 19357 | case FTS5_COMMIT: | |||
| 19358 | assert( p->ts.eState==2 )((void) (0)); | |||
| 19359 | p->ts.eState = 0; | |||
| 19360 | break; | |||
| 19361 | ||||
| 19362 | case FTS5_ROLLBACK: | |||
| 19363 | assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 )((void) (0)); | |||
| 19364 | p->ts.eState = 0; | |||
| 19365 | break; | |||
| 19366 | ||||
| 19367 | case FTS5_SAVEPOINT: | |||
| 19368 | assert( p->ts.eState>=1 )((void) (0)); | |||
| 19369 | assert( iSavepoint>=0 )((void) (0)); | |||
| 19370 | assert( iSavepoint>=p->ts.iSavepoint )((void) (0)); | |||
| 19371 | p->ts.iSavepoint = iSavepoint; | |||
| 19372 | break; | |||
| 19373 | ||||
| 19374 | case FTS5_RELEASE: | |||
| 19375 | assert( p->ts.eState>=1 )((void) (0)); | |||
| 19376 | assert( iSavepoint>=0 )((void) (0)); | |||
| 19377 | assert( iSavepoint<=p->ts.iSavepoint )((void) (0)); | |||
| 19378 | p->ts.iSavepoint = iSavepoint-1; | |||
| 19379 | break; | |||
| 19380 | ||||
| 19381 | case FTS5_ROLLBACKTO: | |||
| 19382 | assert( p->ts.eState>=1 )((void) (0)); | |||
| 19383 | assert( iSavepoint>=-1 )((void) (0)); | |||
| 19384 | /* The following assert() can fail if another vtab strikes an error | |||
| 19385 | ** within an xSavepoint() call then SQLite calls xRollbackTo() - without | |||
| 19386 | ** having called xSavepoint() on this vtab. */ | |||
| 19387 | /* assert( iSavepoint<=p->ts.iSavepoint ); */ | |||
| 19388 | p->ts.iSavepoint = iSavepoint; | |||
| 19389 | break; | |||
| 19390 | } | |||
| 19391 | } | |||
| 19392 | #else | |||
| 19393 | # define fts5CheckTransactionState(x,y,z) | |||
| 19394 | #endif | |||
| 19395 | ||||
| 19396 | /* | |||
| 19397 | ** Return true if pTab is a contentless table. If parameter bIncludeUnindexed | |||
| 19398 | ** is true, this includes contentless tables that store UNINDEXED columns | |||
| 19399 | ** only. | |||
| 19400 | */ | |||
| 19401 | static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){ | |||
| 19402 | int eContent = pTab->p.pConfig->eContent; | |||
| 19403 | return ( | |||
| 19404 | eContent==FTS5_CONTENT_NONE1 | |||
| 19405 | || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED3) | |||
| 19406 | ); | |||
| 19407 | } | |||
| 19408 | ||||
| 19409 | /* | |||
| 19410 | ** Delete a virtual table handle allocated by fts5InitVtab(). | |||
| 19411 | */ | |||
| 19412 | static void fts5FreeVtab(Fts5FullTable *pTab){ | |||
| 19413 | if( pTab ){ | |||
| 19414 | sqlite3Fts5IndexClose(pTab->p.pIndex); | |||
| 19415 | sqlite3Fts5StorageClose(pTab->pStorage); | |||
| 19416 | sqlite3Fts5ConfigFree(pTab->p.pConfig); | |||
| 19417 | sqlite3_freesqlite3_api->free(pTab); | |||
| 19418 | } | |||
| 19419 | } | |||
| 19420 | ||||
| 19421 | /* | |||
| 19422 | ** The xDisconnect() virtual table method. | |||
| 19423 | */ | |||
| 19424 | static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ | |||
| 19425 | fts5FreeVtab((Fts5FullTable*)pVtab); | |||
| 19426 | return SQLITE_OK0; | |||
| 19427 | } | |||
| 19428 | ||||
| 19429 | /* | |||
| 19430 | ** The xDestroy() virtual table method. | |||
| 19431 | */ | |||
| 19432 | static int fts5DestroyMethod(sqlite3_vtab *pVtab){ | |||
| 19433 | Fts5Table *pTab = (Fts5Table*)pVtab; | |||
| 19434 | int rc = sqlite3Fts5DropAll(pTab->pConfig); | |||
| 19435 | if( rc==SQLITE_OK0 ){ | |||
| 19436 | fts5FreeVtab((Fts5FullTable*)pVtab); | |||
| 19437 | } | |||
| 19438 | return rc; | |||
| 19439 | } | |||
| 19440 | ||||
| 19441 | /* | |||
| 19442 | ** This function is the implementation of both the xConnect and xCreate | |||
| 19443 | ** methods of the FTS3 virtual table. | |||
| 19444 | ** | |||
| 19445 | ** The argv[] array contains the following: | |||
| 19446 | ** | |||
| 19447 | ** argv[0] -> module name ("fts5") | |||
| 19448 | ** argv[1] -> database name | |||
| 19449 | ** argv[2] -> table name | |||
| 19450 | ** argv[...] -> "column name" and other module argument fields. | |||
| 19451 | */ | |||
| 19452 | static int fts5InitVtab( | |||
| 19453 | int bCreate, /* True for xCreate, false for xConnect */ | |||
| 19454 | sqlite3 *db, /* The SQLite database connection */ | |||
| 19455 | void *pAux, /* Hash table containing tokenizers */ | |||
| 19456 | int argc, /* Number of elements in argv array */ | |||
| 19457 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 19458 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | |||
| 19459 | char **pzErr /* Write any error message here */ | |||
| 19460 | ){ | |||
| 19461 | Fts5Global *pGlobal = (Fts5Global*)pAux; | |||
| 19462 | const char **azConfig = (const char**)argv; | |||
| 19463 | int rc = SQLITE_OK0; /* Return code */ | |||
| 19464 | Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ | |||
| 19465 | Fts5FullTable *pTab = 0; /* New virtual table object */ | |||
| 19466 | ||||
| 19467 | /* Allocate the new vtab object and parse the configuration */ | |||
| 19468 | pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable)); | |||
| 19469 | if( rc==SQLITE_OK0 ){ | |||
| 19470 | rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); | |||
| 19471 | assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 )((void) (0)); | |||
| 19472 | } | |||
| 19473 | if( rc==SQLITE_OK0 ){ | |||
| 19474 | pConfig->pzErrmsg = pzErr; | |||
| 19475 | pTab->p.pConfig = pConfig; | |||
| 19476 | pTab->pGlobal = pGlobal; | |||
| 19477 | if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){ | |||
| 19478 | rc = sqlite3Fts5LoadTokenizer(pConfig); | |||
| 19479 | } | |||
| 19480 | } | |||
| 19481 | ||||
| 19482 | /* Open the index sub-system */ | |||
| 19483 | if( rc==SQLITE_OK0 ){ | |||
| 19484 | rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr); | |||
| 19485 | } | |||
| 19486 | ||||
| 19487 | /* Open the storage sub-system */ | |||
| 19488 | if( rc==SQLITE_OK0 ){ | |||
| 19489 | rc = sqlite3Fts5StorageOpen( | |||
| 19490 | pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr | |||
| 19491 | ); | |||
| 19492 | } | |||
| 19493 | ||||
| 19494 | /* Call sqlite3_declare_vtab() */ | |||
| 19495 | if( rc==SQLITE_OK0 ){ | |||
| 19496 | rc = sqlite3Fts5ConfigDeclareVtab(pConfig); | |||
| 19497 | } | |||
| 19498 | ||||
| 19499 | /* Load the initial configuration */ | |||
| 19500 | if( rc==SQLITE_OK0 ){ | |||
| 19501 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie-1); | |||
| 19502 | } | |||
| 19503 | ||||
| 19504 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 19505 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT1, (int)1); | |||
| 19506 | } | |||
| 19507 | if( rc==SQLITE_OK0 ){ | |||
| 19508 | rc = sqlite3_vtab_configsqlite3_api->vtab_config(db, SQLITE_VTAB_INNOCUOUS2); | |||
| 19509 | } | |||
| 19510 | ||||
| 19511 | if( pConfig ) pConfig->pzErrmsg = 0; | |||
| 19512 | if( rc!=SQLITE_OK0 ){ | |||
| 19513 | fts5FreeVtab(pTab); | |||
| 19514 | pTab = 0; | |||
| 19515 | }else if( bCreate ){ | |||
| 19516 | fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); | |||
| 19517 | } | |||
| 19518 | *ppVTab = (sqlite3_vtab*)pTab; | |||
| 19519 | return rc; | |||
| 19520 | } | |||
| 19521 | ||||
| 19522 | /* | |||
| 19523 | ** The xConnect() and xCreate() methods for the virtual table. All the | |||
| 19524 | ** work is done in function fts5InitVtab(). | |||
| 19525 | */ | |||
| 19526 | static int fts5ConnectMethod( | |||
| 19527 | sqlite3 *db, /* Database connection */ | |||
| 19528 | void *pAux, /* Pointer to tokenizer hash table */ | |||
| 19529 | int argc, /* Number of elements in argv array */ | |||
| 19530 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 19531 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
| 19532 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
| 19533 | ){ | |||
| 19534 | return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); | |||
| 19535 | } | |||
| 19536 | static int fts5CreateMethod( | |||
| 19537 | sqlite3 *db, /* Database connection */ | |||
| 19538 | void *pAux, /* Pointer to tokenizer hash table */ | |||
| 19539 | int argc, /* Number of elements in argv array */ | |||
| 19540 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 19541 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
| 19542 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
| 19543 | ){ | |||
| 19544 | return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); | |||
| 19545 | } | |||
| 19546 | ||||
| 19547 | /* | |||
| 19548 | ** The different query plans. | |||
| 19549 | */ | |||
| 19550 | #define FTS5_PLAN_MATCH1 1 /* (<tbl> MATCH ?) */ | |||
| 19551 | #define FTS5_PLAN_SOURCE2 2 /* A source cursor for SORTED_MATCH */ | |||
| 19552 | #define FTS5_PLAN_SPECIAL3 3 /* An internal query */ | |||
| 19553 | #define FTS5_PLAN_SORTED_MATCH4 4 /* (<tbl> MATCH ? ORDER BY rank) */ | |||
| 19554 | #define FTS5_PLAN_SCAN5 5 /* No usable constraint */ | |||
| 19555 | #define FTS5_PLAN_ROWID6 6 /* (rowid = ?) */ | |||
| 19556 | ||||
| 19557 | /* | |||
| 19558 | ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this | |||
| 19559 | ** extension is currently being used by a version of SQLite too old to | |||
| 19560 | ** support index-info flags. In that case this function is a no-op. | |||
| 19561 | */ | |||
| 19562 | static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ | |||
| 19563 | #if SQLITE_VERSION_NUMBER3050001>=3008012 | |||
| 19564 | #ifndef SQLITE_CORE | |||
| 19565 | if( sqlite3_libversion_numbersqlite3_api->libversion_number()>=3008012 ) | |||
| 19566 | #endif | |||
| 19567 | { | |||
| 19568 | pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE0x00000001; | |||
| 19569 | } | |||
| 19570 | #endif | |||
| 19571 | } | |||
| 19572 | ||||
| 19573 | static int fts5UsePatternMatch( | |||
| 19574 | Fts5Config *pConfig, | |||
| 19575 | struct sqlite3_index_constraint *p | |||
| 19576 | ){ | |||
| 19577 | assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB )((void) (0)); | |||
| 19578 | assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE )((void) (0)); | |||
| 19579 | if( pConfig->t.ePattern==FTS5_PATTERN_GLOB66 && p->op==FTS5_PATTERN_GLOB66 ){ | |||
| 19580 | return 1; | |||
| 19581 | } | |||
| 19582 | if( pConfig->t.ePattern==FTS5_PATTERN_LIKE65 | |||
| 19583 | && (p->op==FTS5_PATTERN_LIKE65 || p->op==FTS5_PATTERN_GLOB66) | |||
| 19584 | ){ | |||
| 19585 | return 1; | |||
| 19586 | } | |||
| 19587 | return 0; | |||
| 19588 | } | |||
| 19589 | ||||
| 19590 | /* | |||
| 19591 | ** Implementation of the xBestIndex method for FTS5 tables. Within the | |||
| 19592 | ** WHERE constraint, it searches for the following: | |||
| 19593 | ** | |||
| 19594 | ** 1. A MATCH constraint against the table column. | |||
| 19595 | ** 2. A MATCH constraint against the "rank" column. | |||
| 19596 | ** 3. A MATCH constraint against some other column. | |||
| 19597 | ** 4. An == constraint against the rowid column. | |||
| 19598 | ** 5. A < or <= constraint against the rowid column. | |||
| 19599 | ** 6. A > or >= constraint against the rowid column. | |||
| 19600 | ** | |||
| 19601 | ** Within the ORDER BY, the following are supported: | |||
| 19602 | ** | |||
| 19603 | ** 5. ORDER BY rank [ASC|DESC] | |||
| 19604 | ** 6. ORDER BY rowid [ASC|DESC] | |||
| 19605 | ** | |||
| 19606 | ** Information for the xFilter call is passed via both the idxNum and | |||
| 19607 | ** idxStr variables. Specifically, idxNum is a bitmask of the following | |||
| 19608 | ** flags used to encode the ORDER BY clause: | |||
| 19609 | ** | |||
| 19610 | ** FTS5_BI_ORDER_RANK | |||
| 19611 | ** FTS5_BI_ORDER_ROWID | |||
| 19612 | ** FTS5_BI_ORDER_DESC | |||
| 19613 | ** | |||
| 19614 | ** idxStr is used to encode data from the WHERE clause. For each argument | |||
| 19615 | ** passed to the xFilter method, the following is appended to idxStr: | |||
| 19616 | ** | |||
| 19617 | ** Match against table column: "m" | |||
| 19618 | ** Match against rank column: "r" | |||
| 19619 | ** Match against other column: "M<column-number>" | |||
| 19620 | ** LIKE against other column: "L<column-number>" | |||
| 19621 | ** GLOB against other column: "G<column-number>" | |||
| 19622 | ** Equality constraint against the rowid: "=" | |||
| 19623 | ** A < or <= against the rowid: "<" | |||
| 19624 | ** A > or >= against the rowid: ">" | |||
| 19625 | ** | |||
| 19626 | ** This function ensures that there is at most one "r" or "=". And that if | |||
| 19627 | ** there exists an "=" then there is no "<" or ">". | |||
| 19628 | ** | |||
| 19629 | ** If an unusable MATCH operator is present in the WHERE clause, then | |||
| 19630 | ** SQLITE_CONSTRAINT is returned. | |||
| 19631 | ** | |||
| 19632 | ** Costs are assigned as follows: | |||
| 19633 | ** | |||
| 19634 | ** a) If a MATCH operator is present, the cost depends on the other | |||
| 19635 | ** constraints also present. As follows: | |||
| 19636 | ** | |||
| 19637 | ** * No other constraints: cost=1000.0 | |||
| 19638 | ** * One rowid range constraint: cost=750.0 | |||
| 19639 | ** * Both rowid range constraints: cost=500.0 | |||
| 19640 | ** * An == rowid constraint: cost=100.0 | |||
| 19641 | ** | |||
| 19642 | ** b) Otherwise, if there is no MATCH: | |||
| 19643 | ** | |||
| 19644 | ** * No other constraints: cost=1000000.0 | |||
| 19645 | ** * One rowid range constraint: cost=750000.0 | |||
| 19646 | ** * Both rowid range constraints: cost=250000.0 | |||
| 19647 | ** * An == rowid constraint: cost=10.0 | |||
| 19648 | ** | |||
| 19649 | ** Costs are not modified by the ORDER BY clause. | |||
| 19650 | */ | |||
| 19651 | static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ | |||
| 19652 | Fts5Table *pTab = (Fts5Table*)pVTab; | |||
| 19653 | Fts5Config *pConfig = pTab->pConfig; | |||
| 19654 | const int nCol = pConfig->nCol; | |||
| 19655 | int idxFlags = 0; /* Parameter passed through to xFilter() */ | |||
| 19656 | int i; | |||
| 19657 | ||||
| 19658 | char *idxStr; | |||
| 19659 | int iIdxStr = 0; | |||
| 19660 | int iCons = 0; | |||
| 19661 | ||||
| 19662 | int bSeenEq = 0; | |||
| 19663 | int bSeenGt = 0; | |||
| 19664 | int bSeenLt = 0; | |||
| 19665 | int nSeenMatch = 0; | |||
| 19666 | int bSeenRank = 0; | |||
| 19667 | ||||
| 19668 | ||||
| 19669 | assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
| 19670 | assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
| 19671 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
| 19672 | assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
| 19673 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH )((void) (0)); | |||
| 19674 | ||||
| 19675 | if( pConfig->bLock ){ | |||
| 19676 | pTab->base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 19677 | "recursively defined fts5 content table" | |||
| 19678 | ); | |||
| 19679 | return SQLITE_ERROR1; | |||
| 19680 | } | |||
| 19681 | ||||
| 19682 | idxStr = (char*)sqlite3_mallocsqlite3_api->malloc(pInfo->nConstraint * 8 + 1); | |||
| 19683 | if( idxStr==0 ) return SQLITE_NOMEM7; | |||
| 19684 | pInfo->idxStr = idxStr; | |||
| 19685 | pInfo->needToFreeIdxStr = 1; | |||
| 19686 | ||||
| 19687 | for(i=0; i<pInfo->nConstraint; i++){ | |||
| 19688 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
| 19689 | int iCol = p->iColumn; | |||
| 19690 | if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH64 | |||
| 19691 | || (p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol>=nCol) | |||
| 19692 | ){ | |||
| 19693 | /* A MATCH operator or equivalent */ | |||
| 19694 | if( p->usable==0 || iCol<0 ){ | |||
| 19695 | /* As there exists an unusable MATCH constraint this is an | |||
| 19696 | ** unusable plan. Return SQLITE_CONSTRAINT. */ | |||
| 19697 | idxStr[iIdxStr] = 0; | |||
| 19698 | return SQLITE_CONSTRAINT19; | |||
| 19699 | }else{ | |||
| 19700 | if( iCol==nCol+1 ){ | |||
| 19701 | if( bSeenRank ) continue; | |||
| 19702 | idxStr[iIdxStr++] = 'r'; | |||
| 19703 | bSeenRank = 1; | |||
| 19704 | }else{ | |||
| 19705 | nSeenMatch++; | |||
| 19706 | idxStr[iIdxStr++] = 'M'; | |||
| 19707 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | |||
| 19708 | idxStr += strlen(&idxStr[iIdxStr]); | |||
| 19709 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | |||
| 19710 | } | |||
| 19711 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
| 19712 | pInfo->aConstraintUsage[i].omit = 1; | |||
| 19713 | } | |||
| 19714 | }else if( p->usable ){ | |||
| 19715 | if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){ | |||
| 19716 | assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB )((void) (0)); | |||
| 19717 | idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE65 ? 'L' : 'G'; | |||
| 19718 | sqlite3_snprintfsqlite3_api->xsnprintf(6, &idxStr[iIdxStr], "%d", iCol); | |||
| 19719 | idxStr += strlen(&idxStr[iIdxStr]); | |||
| 19720 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
| 19721 | assert( idxStr[iIdxStr]=='\0' )((void) (0)); | |||
| 19722 | nSeenMatch++; | |||
| 19723 | }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ2 && iCol<0 ){ | |||
| 19724 | idxStr[iIdxStr++] = '='; | |||
| 19725 | bSeenEq = 1; | |||
| 19726 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
| 19727 | } | |||
| 19728 | } | |||
| 19729 | } | |||
| 19730 | ||||
| 19731 | if( bSeenEq==0 ){ | |||
| 19732 | for(i=0; i<pInfo->nConstraint; i++){ | |||
| 19733 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
| 19734 | if( p->iColumn<0 && p->usable ){ | |||
| 19735 | int op = p->op; | |||
| 19736 | if( op==SQLITE_INDEX_CONSTRAINT_LT16 || op==SQLITE_INDEX_CONSTRAINT_LE8 ){ | |||
| 19737 | if( bSeenLt ) continue; | |||
| 19738 | idxStr[iIdxStr++] = '<'; | |||
| 19739 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
| 19740 | bSeenLt = 1; | |||
| 19741 | }else | |||
| 19742 | if( op==SQLITE_INDEX_CONSTRAINT_GT4 || op==SQLITE_INDEX_CONSTRAINT_GE32 ){ | |||
| 19743 | if( bSeenGt ) continue; | |||
| 19744 | idxStr[iIdxStr++] = '>'; | |||
| 19745 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; | |||
| 19746 | bSeenGt = 1; | |||
| 19747 | } | |||
| 19748 | } | |||
| 19749 | } | |||
| 19750 | } | |||
| 19751 | idxStr[iIdxStr] = '\0'; | |||
| 19752 | ||||
| 19753 | /* Set idxFlags flags for the ORDER BY clause | |||
| 19754 | ** | |||
| 19755 | ** Note that tokendata=1 tables cannot currently handle "ORDER BY rowid DESC". | |||
| 19756 | */ | |||
| 19757 | if( pInfo->nOrderBy==1 ){ | |||
| 19758 | int iSort = pInfo->aOrderBy[0].iColumn; | |||
| 19759 | if( iSort==(pConfig->nCol+1) && nSeenMatch>0 ){ | |||
| 19760 | idxFlags |= FTS5_BI_ORDER_RANK0x0020; | |||
| 19761 | }else if( iSort==-1 && (!pInfo->aOrderBy[0].desc || !pConfig->bTokendata) ){ | |||
| 19762 | idxFlags |= FTS5_BI_ORDER_ROWID0x0040; | |||
| 19763 | } | |||
| 19764 | if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID)(((idxFlags) & (0x0020|0x0040))!=0) ){ | |||
| 19765 | pInfo->orderByConsumed = 1; | |||
| 19766 | if( pInfo->aOrderBy[0].desc ){ | |||
| 19767 | idxFlags |= FTS5_BI_ORDER_DESC0x0080; | |||
| 19768 | } | |||
| 19769 | } | |||
| 19770 | } | |||
| 19771 | ||||
| 19772 | /* Calculate the estimated cost based on the flags set in idxFlags. */ | |||
| 19773 | if( bSeenEq ){ | |||
| 19774 | pInfo->estimatedCost = nSeenMatch ? 1000.0 : 10.0; | |||
| 19775 | if( nSeenMatch==0 ) fts5SetUniqueFlag(pInfo); | |||
| 19776 | }else if( bSeenLt && bSeenGt ){ | |||
| 19777 | pInfo->estimatedCost = nSeenMatch ? 5000.0 : 250000.0; | |||
| 19778 | }else if( bSeenLt || bSeenGt ){ | |||
| 19779 | pInfo->estimatedCost = nSeenMatch ? 7500.0 : 750000.0; | |||
| 19780 | }else{ | |||
| 19781 | pInfo->estimatedCost = nSeenMatch ? 10000.0 : 1000000.0; | |||
| 19782 | } | |||
| 19783 | for(i=1; i<nSeenMatch; i++){ | |||
| 19784 | pInfo->estimatedCost *= 0.4; | |||
| 19785 | } | |||
| 19786 | ||||
| 19787 | pInfo->idxNum = idxFlags; | |||
| 19788 | return SQLITE_OK0; | |||
| 19789 | } | |||
| 19790 | ||||
| 19791 | static int fts5NewTransaction(Fts5FullTable *pTab){ | |||
| 19792 | Fts5Cursor *pCsr; | |||
| 19793 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
| 19794 | if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK0; | |||
| 19795 | } | |||
| 19796 | return sqlite3Fts5StorageReset(pTab->pStorage); | |||
| 19797 | } | |||
| 19798 | ||||
| 19799 | /* | |||
| 19800 | ** Implementation of xOpen method. | |||
| 19801 | */ | |||
| 19802 | static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ | |||
| 19803 | Fts5FullTable *pTab = (Fts5FullTable*)pVTab; | |||
| 19804 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 19805 | Fts5Cursor *pCsr = 0; /* New cursor object */ | |||
| 19806 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
| 19807 | int rc; /* Return code */ | |||
| 19808 | ||||
| 19809 | rc = fts5NewTransaction(pTab); | |||
| 19810 | if( rc==SQLITE_OK0 ){ | |||
| 19811 | nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); | |||
| 19812 | pCsr = (Fts5Cursor*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 19813 | if( pCsr ){ | |||
| 19814 | Fts5Global *pGlobal = pTab->pGlobal; | |||
| 19815 | memset(pCsr, 0, (size_t)nByte); | |||
| 19816 | pCsr->aColumnSize = (int*)&pCsr[1]; | |||
| 19817 | pCsr->pNext = pGlobal->pCsr; | |||
| 19818 | pGlobal->pCsr = pCsr; | |||
| 19819 | pCsr->iCsrId = ++pGlobal->iNextId; | |||
| 19820 | }else{ | |||
| 19821 | rc = SQLITE_NOMEM7; | |||
| 19822 | } | |||
| 19823 | } | |||
| 19824 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | |||
| 19825 | return rc; | |||
| 19826 | } | |||
| 19827 | ||||
| 19828 | static int fts5StmtType(Fts5Cursor *pCsr){ | |||
| 19829 | if( pCsr->ePlan==FTS5_PLAN_SCAN5 ){ | |||
| 19830 | return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC1 : FTS5_STMT_SCAN_ASC0; | |||
| 19831 | } | |||
| 19832 | return FTS5_STMT_LOOKUP2; | |||
| 19833 | } | |||
| 19834 | ||||
| 19835 | /* | |||
| 19836 | ** This function is called after the cursor passed as the only argument | |||
| 19837 | ** is moved to point at a different row. It clears all cached data | |||
| 19838 | ** specific to the previous row stored by the cursor object. | |||
| 19839 | */ | |||
| 19840 | static void fts5CsrNewrow(Fts5Cursor *pCsr){ | |||
| 19841 | CsrFlagSet(pCsr,((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
| 19842 | FTS5CSR_REQUIRE_CONTENT((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
| 19843 | | FTS5CSR_REQUIRE_DOCSIZE((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
| 19844 | | FTS5CSR_REQUIRE_INST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
| 19845 | | FTS5CSR_REQUIRE_POSLIST((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)) | |||
| 19846 | )((pCsr)->csrflags |= (0x02 | 0x04 | 0x08 | 0x40)); | |||
| 19847 | } | |||
| 19848 | ||||
| 19849 | static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ | |||
| 19850 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 19851 | Fts5Auxdata *pData; | |||
| 19852 | Fts5Auxdata *pNext; | |||
| 19853 | ||||
| 19854 | sqlite3_freesqlite3_api->free(pCsr->aInstIter); | |||
| 19855 | sqlite3_freesqlite3_api->free(pCsr->aInst); | |||
| 19856 | if( pCsr->pStmt ){ | |||
| 19857 | int eStmt = fts5StmtType(pCsr); | |||
| 19858 | sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); | |||
| 19859 | } | |||
| 19860 | if( pCsr->pSorter ){ | |||
| 19861 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
| 19862 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | |||
| 19863 | sqlite3_freesqlite3_api->free(pSorter); | |||
| 19864 | } | |||
| 19865 | ||||
| 19866 | if( pCsr->ePlan!=FTS5_PLAN_SOURCE2 ){ | |||
| 19867 | sqlite3Fts5ExprFree(pCsr->pExpr); | |||
| 19868 | } | |||
| 19869 | ||||
| 19870 | for(pData=pCsr->pAuxdata; pData; pData=pNext){ | |||
| 19871 | pNext = pData->pNext; | |||
| 19872 | if( pData->xDelete ) pData->xDelete(pData->pPtr); | |||
| 19873 | sqlite3_freesqlite3_api->free(pData); | |||
| 19874 | } | |||
| 19875 | ||||
| 19876 | sqlite3_finalizesqlite3_api->finalize(pCsr->pRankArgStmt); | |||
| 19877 | sqlite3_freesqlite3_api->free(pCsr->apRankArg); | |||
| 19878 | ||||
| 19879 | if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags & (0x10)) ){ | |||
| 19880 | sqlite3_freesqlite3_api->free(pCsr->zRank); | |||
| 19881 | sqlite3_freesqlite3_api->free(pCsr->zRankArgs); | |||
| 19882 | } | |||
| 19883 | ||||
| 19884 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | |||
| 19885 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); | |||
| 19886 | } | |||
| 19887 | ||||
| 19888 | ||||
| 19889 | /* | |||
| 19890 | ** Close the cursor. For additional information see the documentation | |||
| 19891 | ** on the xClose method of the virtual table interface. | |||
| 19892 | */ | |||
| 19893 | static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 19894 | if( pCursor ){ | |||
| 19895 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
| 19896 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 19897 | Fts5Cursor **pp; | |||
| 19898 | ||||
| 19899 | fts5FreeCursorComponents(pCsr); | |||
| 19900 | /* Remove the cursor from the Fts5Global.pCsr list */ | |||
| 19901 | for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); | |||
| 19902 | *pp = pCsr->pNext; | |||
| 19903 | ||||
| 19904 | sqlite3_freesqlite3_api->free(pCsr); | |||
| 19905 | } | |||
| 19906 | return SQLITE_OK0; | |||
| 19907 | } | |||
| 19908 | ||||
| 19909 | static int fts5SorterNext(Fts5Cursor *pCsr){ | |||
| 19910 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
| 19911 | int rc; | |||
| 19912 | ||||
| 19913 | rc = sqlite3_stepsqlite3_api->step(pSorter->pStmt); | |||
| 19914 | if( rc==SQLITE_DONE101 ){ | |||
| 19915 | rc = SQLITE_OK0; | |||
| 19916 | CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags |= (0x01|0x02)); | |||
| 19917 | }else if( rc==SQLITE_ROW100 ){ | |||
| 19918 | const u8 *a; | |||
| 19919 | const u8 *aBlob; | |||
| 19920 | int nBlob; | |||
| 19921 | int i; | |||
| 19922 | int iOff = 0; | |||
| 19923 | rc = SQLITE_OK0; | |||
| 19924 | ||||
| 19925 | pSorter->iRowid = sqlite3_column_int64sqlite3_api->column_int64(pSorter->pStmt, 0); | |||
| 19926 | nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pSorter->pStmt, 1); | |||
| 19927 | aBlob = a = sqlite3_column_blobsqlite3_api->column_blob(pSorter->pStmt, 1); | |||
| 19928 | ||||
| 19929 | /* nBlob==0 in detail=none mode. */ | |||
| 19930 | if( nBlob>0 ){ | |||
| 19931 | for(i=0; i<(pSorter->nIdx-1); i++){ | |||
| 19932 | int iVal; | |||
| 19933 | a += fts5GetVarint32(a, iVal)sqlite3Fts5GetVarint32(a,(u32*)&(iVal)); | |||
| 19934 | iOff += iVal; | |||
| 19935 | pSorter->aIdx[i] = iOff; | |||
| 19936 | } | |||
| 19937 | pSorter->aIdx[i] = &aBlob[nBlob] - a; | |||
| 19938 | pSorter->aPoslist = a; | |||
| 19939 | } | |||
| 19940 | ||||
| 19941 | fts5CsrNewrow(pCsr); | |||
| 19942 | } | |||
| 19943 | ||||
| 19944 | return rc; | |||
| 19945 | } | |||
| 19946 | ||||
| 19947 | ||||
| 19948 | /* | |||
| 19949 | ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors | |||
| 19950 | ** open on table pTab. | |||
| 19951 | */ | |||
| 19952 | static void fts5TripCursors(Fts5FullTable *pTab){ | |||
| 19953 | Fts5Cursor *pCsr; | |||
| 19954 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
| 19955 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
| 19956 | && pCsr->base.pVtab==(sqlite3_vtab*)pTab | |||
| 19957 | ){ | |||
| 19958 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags |= (0x20)); | |||
| 19959 | } | |||
| 19960 | } | |||
| 19961 | } | |||
| 19962 | ||||
| 19963 | /* | |||
| 19964 | ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first | |||
| 19965 | ** argument, close and reopen all Fts5IndexIter iterators that the cursor | |||
| 19966 | ** is using. Then attempt to move the cursor to a rowid equal to or laster | |||
| 19967 | ** (in the cursors sort order - ASC or DESC) than the current rowid. | |||
| 19968 | ** | |||
| 19969 | ** If the new rowid is not equal to the old, set output parameter *pbSkip | |||
| 19970 | ** to 1 before returning. Otherwise, leave it unchanged. | |||
| 19971 | ** | |||
| 19972 | ** Return SQLITE_OK if successful or if no reseek was required, or an | |||
| 19973 | ** error code if an error occurred. | |||
| 19974 | */ | |||
| 19975 | static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ | |||
| 19976 | int rc = SQLITE_OK0; | |||
| 19977 | assert( *pbSkip==0 )((void) (0)); | |||
| 19978 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags & (0x20)) ){ | |||
| 19979 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 19980 | int bDesc = pCsr->bDesc; | |||
| 19981 | i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); | |||
| 19982 | ||||
| 19983 | rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc); | |||
| 19984 | if( rc==SQLITE_OK0 && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ | |||
| 19985 | *pbSkip = 1; | |||
| 19986 | } | |||
| 19987 | ||||
| 19988 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK)((pCsr)->csrflags &= ~(0x20)); | |||
| 19989 | fts5CsrNewrow(pCsr); | |||
| 19990 | if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ | |||
| 19991 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
| 19992 | *pbSkip = 1; | |||
| 19993 | } | |||
| 19994 | } | |||
| 19995 | return rc; | |||
| 19996 | } | |||
| 19997 | ||||
| 19998 | ||||
| 19999 | /* | |||
| 20000 | ** Advance the cursor to the next row in the table that matches the | |||
| 20001 | ** search criteria. | |||
| 20002 | ** | |||
| 20003 | ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned | |||
| 20004 | ** even if we reach end-of-file. The fts5EofMethod() will be called | |||
| 20005 | ** subsequently to determine whether or not an EOF was hit. | |||
| 20006 | */ | |||
| 20007 | static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 20008 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 20009 | int rc; | |||
| 20010 | ||||
| 20011 | assert( (pCsr->ePlan<3)==((void) (0)) | |||
| 20012 | (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)((void) (0)) | |||
| 20013 | )((void) (0)); | |||
| 20014 | assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) )((void) (0)); | |||
| 20015 | ||||
| 20016 | /* If this cursor uses FTS5_PLAN_MATCH and this is a tokendata=1 table, | |||
| 20017 | ** clear any token mappings accumulated at the fts5_index.c level. In | |||
| 20018 | ** other cases, specifically FTS5_PLAN_SOURCE and FTS5_PLAN_SORTED_MATCH, | |||
| 20019 | ** we need to retain the mappings for the entire query. */ | |||
| 20020 | if( pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
| 20021 | && ((Fts5Table*)pCursor->pVtab)->pConfig->bTokendata | |||
| 20022 | ){ | |||
| 20023 | sqlite3Fts5ExprClearTokens(pCsr->pExpr); | |||
| 20024 | } | |||
| 20025 | ||||
| 20026 | if( pCsr->ePlan<3 ){ | |||
| 20027 | int bSkip = 0; | |||
| 20028 | if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; | |||
| 20029 | rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); | |||
| 20030 | CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr))((pCsr)->csrflags |= (sqlite3Fts5ExprEof(pCsr->pExpr))); | |||
| 20031 | fts5CsrNewrow(pCsr); | |||
| 20032 | }else{ | |||
| 20033 | switch( pCsr->ePlan ){ | |||
| 20034 | case FTS5_PLAN_SPECIAL3: { | |||
| 20035 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
| 20036 | rc = SQLITE_OK0; | |||
| 20037 | break; | |||
| 20038 | } | |||
| 20039 | ||||
| 20040 | case FTS5_PLAN_SORTED_MATCH4: { | |||
| 20041 | rc = fts5SorterNext(pCsr); | |||
| 20042 | break; | |||
| 20043 | } | |||
| 20044 | ||||
| 20045 | default: { | |||
| 20046 | Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig; | |||
| 20047 | pConfig->bLock++; | |||
| 20048 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | |||
| 20049 | pConfig->bLock--; | |||
| 20050 | if( rc!=SQLITE_ROW100 ){ | |||
| 20051 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
| 20052 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
| 20053 | if( rc!=SQLITE_OK0 ){ | |||
| 20054 | pCursor->pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 20055 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db) | |||
| 20056 | ); | |||
| 20057 | } | |||
| 20058 | }else{ | |||
| 20059 | rc = SQLITE_OK0; | |||
| 20060 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags |= (0x04)); | |||
| 20061 | } | |||
| 20062 | break; | |||
| 20063 | } | |||
| 20064 | } | |||
| 20065 | } | |||
| 20066 | ||||
| 20067 | return rc; | |||
| 20068 | } | |||
| 20069 | ||||
| 20070 | ||||
| 20071 | static int fts5PrepareStatement( | |||
| 20072 | sqlite3_stmt **ppStmt, | |||
| 20073 | Fts5Config *pConfig, | |||
| 20074 | const char *zFmt, | |||
| 20075 | ... | |||
| 20076 | ){ | |||
| 20077 | sqlite3_stmt *pRet = 0; | |||
| 20078 | int rc; | |||
| 20079 | char *zSql; | |||
| 20080 | va_list ap; | |||
| 20081 | ||||
| 20082 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 20083 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 20084 | if( zSql==0 ){ | |||
| 20085 | rc = SQLITE_NOMEM7; | |||
| 20086 | }else{ | |||
| 20087 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | |||
| 20088 | SQLITE_PREPARE_PERSISTENT0x01, &pRet, 0); | |||
| 20089 | if( rc!=SQLITE_OK0 ){ | |||
| 20090 | sqlite3Fts5ConfigErrmsg(pConfig, "%s", sqlite3_errmsgsqlite3_api->errmsg(pConfig->db)); | |||
| 20091 | } | |||
| 20092 | sqlite3_freesqlite3_api->free(zSql); | |||
| 20093 | } | |||
| 20094 | ||||
| 20095 | va_end(ap)__builtin_va_end(ap); | |||
| 20096 | *ppStmt = pRet; | |||
| 20097 | return rc; | |||
| 20098 | } | |||
| 20099 | ||||
| 20100 | static int fts5CursorFirstSorted( | |||
| 20101 | Fts5FullTable *pTab, | |||
| 20102 | Fts5Cursor *pCsr, | |||
| 20103 | int bDesc | |||
| 20104 | ){ | |||
| 20105 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 20106 | Fts5Sorter *pSorter; | |||
| 20107 | int nPhrase; | |||
| 20108 | sqlite3_int64 nByte; | |||
| 20109 | int rc; | |||
| 20110 | const char *zRank = pCsr->zRank; | |||
| 20111 | const char *zRankArgs = pCsr->zRankArgs; | |||
| 20112 | ||||
| 20113 | nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
| 20114 | nByte = SZ_FTS5SORTER(nPhrase)(__builtin_offsetof(Fts5Sorter, nIdx)+((nPhrase+2)/2)*sizeof( i64)); | |||
| 20115 | pSorter = (Fts5Sorter*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 20116 | if( pSorter==0 ) return SQLITE_NOMEM7; | |||
| 20117 | memset(pSorter, 0, (size_t)nByte); | |||
| 20118 | pSorter->nIdx = nPhrase; | |||
| 20119 | ||||
| 20120 | /* TODO: It would be better to have some system for reusing statement | |||
| 20121 | ** handles here, rather than preparing a new one for each query. But that | |||
| 20122 | ** is not possible as SQLite reference counts the virtual table objects. | |||
| 20123 | ** And since the statement required here reads from this very virtual | |||
| 20124 | ** table, saving it creates a circular reference. | |||
| 20125 | ** | |||
| 20126 | ** If SQLite a built-in statement cache, this wouldn't be a problem. */ | |||
| 20127 | rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, | |||
| 20128 | "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s", | |||
| 20129 | pConfig->zDb, pConfig->zName, zRank, pConfig->zName, | |||
| 20130 | (zRankArgs ? ", " : ""), | |||
| 20131 | (zRankArgs ? zRankArgs : ""), | |||
| 20132 | bDesc ? "DESC" : "ASC" | |||
| 20133 | ); | |||
| 20134 | ||||
| 20135 | pCsr->pSorter = pSorter; | |||
| 20136 | if( rc==SQLITE_OK0 ){ | |||
| 20137 | assert( pTab->pSortCsr==0 )((void) (0)); | |||
| 20138 | pTab->pSortCsr = pCsr; | |||
| 20139 | rc = fts5SorterNext(pCsr); | |||
| 20140 | pTab->pSortCsr = 0; | |||
| 20141 | } | |||
| 20142 | ||||
| 20143 | if( rc!=SQLITE_OK0 ){ | |||
| 20144 | sqlite3_finalizesqlite3_api->finalize(pSorter->pStmt); | |||
| 20145 | sqlite3_freesqlite3_api->free(pSorter); | |||
| 20146 | pCsr->pSorter = 0; | |||
| 20147 | } | |||
| 20148 | ||||
| 20149 | return rc; | |||
| 20150 | } | |||
| 20151 | ||||
| 20152 | static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){ | |||
| 20153 | int rc; | |||
| 20154 | Fts5Expr *pExpr = pCsr->pExpr; | |||
| 20155 | rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc); | |||
| 20156 | if( sqlite3Fts5ExprEof(pExpr) ){ | |||
| 20157 | CsrFlagSet(pCsr, FTS5CSR_EOF)((pCsr)->csrflags |= (0x01)); | |||
| 20158 | } | |||
| 20159 | fts5CsrNewrow(pCsr); | |||
| 20160 | return rc; | |||
| 20161 | } | |||
| 20162 | ||||
| 20163 | /* | |||
| 20164 | ** Process a "special" query. A special query is identified as one with a | |||
| 20165 | ** MATCH expression that begins with a '*' character. The remainder of | |||
| 20166 | ** the text passed to the MATCH operator are used as the special query | |||
| 20167 | ** parameters. | |||
| 20168 | */ | |||
| 20169 | static int fts5SpecialMatch( | |||
| 20170 | Fts5FullTable *pTab, | |||
| 20171 | Fts5Cursor *pCsr, | |||
| 20172 | const char *zQuery | |||
| 20173 | ){ | |||
| 20174 | int rc = SQLITE_OK0; /* Return code */ | |||
| 20175 | const char *z = zQuery; /* Special query text */ | |||
| 20176 | int n; /* Number of bytes in text at z */ | |||
| 20177 | ||||
| 20178 | while( z[0]==' ' ) z++; | |||
| 20179 | for(n=0; z[n] && z[n]!=' '; n++); | |||
| 20180 | ||||
| 20181 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | |||
| 20182 | pCsr->ePlan = FTS5_PLAN_SPECIAL3; | |||
| 20183 | ||||
| 20184 | if( n==5 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("reads", z, n) ){ | |||
| 20185 | pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex); | |||
| 20186 | } | |||
| 20187 | else if( n==2 && 0==sqlite3_strnicmpsqlite3_api->strnicmp("id", z, n) ){ | |||
| 20188 | pCsr->iSpecial = pCsr->iCsrId; | |||
| 20189 | } | |||
| 20190 | else{ | |||
| 20191 | /* An unrecognized directive. Return an error message. */ | |||
| 20192 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("unknown special query: %.*s", n, z); | |||
| 20193 | rc = SQLITE_ERROR1; | |||
| 20194 | } | |||
| 20195 | ||||
| 20196 | return rc; | |||
| 20197 | } | |||
| 20198 | ||||
| 20199 | /* | |||
| 20200 | ** Search for an auxiliary function named zName that can be used with table | |||
| 20201 | ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary | |||
| 20202 | ** structure. Otherwise, if no such function exists, return NULL. | |||
| 20203 | */ | |||
| 20204 | static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){ | |||
| 20205 | Fts5Auxiliary *pAux; | |||
| 20206 | ||||
| 20207 | for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ | |||
| 20208 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pAux->zFunc)==0 ) return pAux; | |||
| 20209 | } | |||
| 20210 | ||||
| 20211 | /* No function of the specified name was found. Return 0. */ | |||
| 20212 | return 0; | |||
| 20213 | } | |||
| 20214 | ||||
| 20215 | ||||
| 20216 | static int fts5FindRankFunction(Fts5Cursor *pCsr){ | |||
| 20217 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 20218 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 20219 | int rc = SQLITE_OK0; | |||
| 20220 | Fts5Auxiliary *pAux = 0; | |||
| 20221 | const char *zRank = pCsr->zRank; | |||
| 20222 | const char *zRankArgs = pCsr->zRankArgs; | |||
| 20223 | ||||
| 20224 | if( zRankArgs ){ | |||
| 20225 | char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); | |||
| 20226 | if( zSql ){ | |||
| 20227 | sqlite3_stmt *pStmt = 0; | |||
| 20228 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pConfig->db, zSql, -1, | |||
| 20229 | SQLITE_PREPARE_PERSISTENT0x01, &pStmt, 0); | |||
| 20230 | sqlite3_freesqlite3_api->free(zSql); | |||
| 20231 | assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 )((void) (0)); | |||
| 20232 | if( rc==SQLITE_OK0 ){ | |||
| 20233 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pStmt) ){ | |||
| 20234 | sqlite3_int64 nByte; | |||
| 20235 | pCsr->nRankArg = sqlite3_column_countsqlite3_api->column_count(pStmt); | |||
| 20236 | nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; | |||
| 20237 | pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 20238 | if( rc==SQLITE_OK0 ){ | |||
| 20239 | int i; | |||
| 20240 | for(i=0; i<pCsr->nRankArg; i++){ | |||
| 20241 | pCsr->apRankArg[i] = sqlite3_column_valuesqlite3_api->column_value(pStmt, i); | |||
| 20242 | } | |||
| 20243 | } | |||
| 20244 | pCsr->pRankArgStmt = pStmt; | |||
| 20245 | }else{ | |||
| 20246 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 20247 | assert( rc!=SQLITE_OK )((void) (0)); | |||
| 20248 | } | |||
| 20249 | } | |||
| 20250 | } | |||
| 20251 | } | |||
| 20252 | ||||
| 20253 | if( rc==SQLITE_OK0 ){ | |||
| 20254 | pAux = fts5FindAuxiliary(pTab, zRank); | |||
| 20255 | if( pAux==0 ){ | |||
| 20256 | assert( pTab->p.base.zErrMsg==0 )((void) (0)); | |||
| 20257 | pTab->p.base.zErrMsg = sqlite3_mprintfsqlite3_api->mprintf("no such function: %s", zRank); | |||
| 20258 | rc = SQLITE_ERROR1; | |||
| 20259 | } | |||
| 20260 | } | |||
| 20261 | ||||
| 20262 | pCsr->pRank = pAux; | |||
| 20263 | return rc; | |||
| 20264 | } | |||
| 20265 | ||||
| 20266 | ||||
| 20267 | static int fts5CursorParseRank( | |||
| 20268 | Fts5Config *pConfig, | |||
| 20269 | Fts5Cursor *pCsr, | |||
| 20270 | sqlite3_value *pRank | |||
| 20271 | ){ | |||
| 20272 | int rc = SQLITE_OK0; | |||
| 20273 | if( pRank ){ | |||
| 20274 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(pRank); | |||
| 20275 | char *zRank = 0; | |||
| 20276 | char *zRankArgs = 0; | |||
| 20277 | ||||
| 20278 | if( z==0 ){ | |||
| 20279 | if( sqlite3_value_typesqlite3_api->value_type(pRank)==SQLITE_NULL5 ) rc = SQLITE_ERROR1; | |||
| 20280 | }else{ | |||
| 20281 | rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); | |||
| 20282 | } | |||
| 20283 | if( rc==SQLITE_OK0 ){ | |||
| 20284 | pCsr->zRank = zRank; | |||
| 20285 | pCsr->zRankArgs = zRankArgs; | |||
| 20286 | CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK)((pCsr)->csrflags |= (0x10)); | |||
| 20287 | }else if( rc==SQLITE_ERROR1 ){ | |||
| 20288 | pCsr->base.pVtab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 20289 | "parse error in rank function: %s", z | |||
| 20290 | ); | |||
| 20291 | } | |||
| 20292 | }else{ | |||
| 20293 | if( pConfig->zRank ){ | |||
| 20294 | pCsr->zRank = (char*)pConfig->zRank; | |||
| 20295 | pCsr->zRankArgs = (char*)pConfig->zRankArgs; | |||
| 20296 | }else{ | |||
| 20297 | pCsr->zRank = (char*)FTS5_DEFAULT_RANK"bm25"; | |||
| 20298 | pCsr->zRankArgs = 0; | |||
| 20299 | } | |||
| 20300 | } | |||
| 20301 | return rc; | |||
| 20302 | } | |||
| 20303 | ||||
| 20304 | static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ | |||
| 20305 | if( pVal ){ | |||
| 20306 | int eType = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(pVal); | |||
| 20307 | if( eType==SQLITE_INTEGER1 ){ | |||
| 20308 | return sqlite3_value_int64sqlite3_api->value_int64(pVal); | |||
| 20309 | } | |||
| 20310 | } | |||
| 20311 | return iDefault; | |||
| 20312 | } | |||
| 20313 | ||||
| 20314 | /* | |||
| 20315 | ** Set the error message on the virtual table passed as the first argument. | |||
| 20316 | */ | |||
| 20317 | static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ | |||
| 20318 | va_list ap; /* ... printf arguments */ | |||
| 20319 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | |||
| 20320 | sqlite3_freesqlite3_api->free(p->p.base.zErrMsg); | |||
| 20321 | p->p.base.zErrMsg = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | |||
| 20322 | va_end(ap)__builtin_va_end(ap); | |||
| 20323 | } | |||
| 20324 | ||||
| 20325 | /* | |||
| 20326 | ** Arrange for subsequent calls to sqlite3Fts5Tokenize() to use the locale | |||
| 20327 | ** specified by pLocale/nLocale. The buffer indicated by pLocale must remain | |||
| 20328 | ** valid until after the final call to sqlite3Fts5Tokenize() that will use | |||
| 20329 | ** the locale. | |||
| 20330 | */ | |||
| 20331 | static void sqlite3Fts5SetLocale( | |||
| 20332 | Fts5Config *pConfig, | |||
| 20333 | const char *zLocale, | |||
| 20334 | int nLocale | |||
| 20335 | ){ | |||
| 20336 | Fts5TokenizerConfig *pT = &pConfig->t; | |||
| 20337 | pT->pLocale = zLocale; | |||
| 20338 | pT->nLocale = nLocale; | |||
| 20339 | } | |||
| 20340 | ||||
| 20341 | /* | |||
| 20342 | ** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale(). | |||
| 20343 | */ | |||
| 20344 | static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ | |||
| 20345 | sqlite3Fts5SetLocale(pConfig, 0, 0); | |||
| 20346 | } | |||
| 20347 | ||||
| 20348 | /* | |||
| 20349 | ** Return true if the value passed as the only argument is an | |||
| 20350 | ** fts5_locale() value. | |||
| 20351 | */ | |||
| 20352 | static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ | |||
| 20353 | int ret = 0; | |||
| 20354 | if( sqlite3_value_typesqlite3_api->value_type(pVal)==SQLITE_BLOB4 ){ | |||
| 20355 | /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case. | |||
| 20356 | ** If the blob was created using zeroblob(), then sqlite3_value_blob() | |||
| 20357 | ** may call malloc(). If this malloc() fails, then the values returned | |||
| 20358 | ** by both value_blob() and value_bytes() will be 0. If value_bytes() were | |||
| 20359 | ** called first, then the NULL pointer returned by value_blob() might | |||
| 20360 | ** be dereferenced. */ | |||
| 20361 | const u8 *pBlob = sqlite3_value_blobsqlite3_api->value_blob(pVal); | |||
| 20362 | int nBlob = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 20363 | if( nBlob>FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) | |||
| 20364 | && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig)((const u8*)(pConfig->pGlobal->aLocaleHdr)), FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))) | |||
| 20365 | ){ | |||
| 20366 | ret = 1; | |||
| 20367 | } | |||
| 20368 | } | |||
| 20369 | return ret; | |||
| 20370 | } | |||
| 20371 | ||||
| 20372 | /* | |||
| 20373 | ** Value pVal is guaranteed to be an fts5_locale() value, according to | |||
| 20374 | ** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale | |||
| 20375 | ** from the value and returns them separately. | |||
| 20376 | ** | |||
| 20377 | ** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set | |||
| 20378 | ** to point to buffers containing the text and locale, as utf-8, | |||
| 20379 | ** respectively. In this case output parameters (*pnText) and (*pnLoc) are | |||
| 20380 | ** set to the sizes in bytes of these two buffers. | |||
| 20381 | ** | |||
| 20382 | ** Or, if an error occurs, then an SQLite error code is returned. The final | |||
| 20383 | ** value of the four output parameters is undefined in this case. | |||
| 20384 | */ | |||
| 20385 | static int sqlite3Fts5DecodeLocaleValue( | |||
| 20386 | sqlite3_value *pVal, | |||
| 20387 | const char **ppText, | |||
| 20388 | int *pnText, | |||
| 20389 | const char **ppLoc, | |||
| 20390 | int *pnLoc | |||
| 20391 | ){ | |||
| 20392 | const char *p = sqlite3_value_blobsqlite3_api->value_blob(pVal); | |||
| 20393 | int n = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 20394 | int nLoc = 0; | |||
| 20395 | ||||
| 20396 | assert( sqlite3_value_type(pVal)==SQLITE_BLOB )((void) (0)); | |||
| 20397 | assert( n>FTS5_LOCALE_HDR_SIZE )((void) (0)); | |||
| 20398 | ||||
| 20399 | for(nLoc=FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); p[nLoc]; nLoc++){ | |||
| 20400 | if( nLoc==(n-1) ){ | |||
| 20401 | return SQLITE_MISMATCH20; | |||
| 20402 | } | |||
| 20403 | } | |||
| 20404 | *ppLoc = &p[FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))]; | |||
| 20405 | *pnLoc = nLoc - FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | |||
| 20406 | ||||
| 20407 | *ppText = &p[nLoc+1]; | |||
| 20408 | *pnText = n - nLoc - 1; | |||
| 20409 | return SQLITE_OK0; | |||
| 20410 | } | |||
| 20411 | ||||
| 20412 | /* | |||
| 20413 | ** Argument pVal is the text of a full-text search expression. It may or | |||
| 20414 | ** may not have been wrapped by fts5_locale(). This function extracts | |||
| 20415 | ** the text of the expression, and sets output variable (*pzText) to | |||
| 20416 | ** point to a nul-terminated buffer containing the expression. | |||
| 20417 | ** | |||
| 20418 | ** If pVal was an fts5_locale() value, then sqlite3Fts5SetLocale() is called | |||
| 20419 | ** to set the tokenizer to use the specified locale. | |||
| 20420 | ** | |||
| 20421 | ** If output variable (*pbFreeAndReset) is set to true, then the caller | |||
| 20422 | ** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer | |||
| 20423 | ** locale, and (b) call sqlite3_free() to free (*pzText). | |||
| 20424 | */ | |||
| 20425 | static int fts5ExtractExprText( | |||
| 20426 | Fts5Config *pConfig, /* Fts5 configuration */ | |||
| 20427 | sqlite3_value *pVal, /* Value to extract expression text from */ | |||
| 20428 | char **pzText, /* OUT: nul-terminated buffer of text */ | |||
| 20429 | int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */ | |||
| 20430 | ){ | |||
| 20431 | int rc = SQLITE_OK0; | |||
| 20432 | ||||
| 20433 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
| 20434 | const char *pText = 0; | |||
| 20435 | int nText = 0; | |||
| 20436 | const char *pLoc = 0; | |||
| 20437 | int nLoc = 0; | |||
| 20438 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
| 20439 | *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, pText); | |||
| 20440 | if( rc==SQLITE_OK0 ){ | |||
| 20441 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 20442 | } | |||
| 20443 | *pbFreeAndReset = 1; | |||
| 20444 | }else{ | |||
| 20445 | *pzText = (char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 20446 | *pbFreeAndReset = 0; | |||
| 20447 | } | |||
| 20448 | ||||
| 20449 | return rc; | |||
| 20450 | } | |||
| 20451 | ||||
| 20452 | ||||
| 20453 | /* | |||
| 20454 | ** This is the xFilter interface for the virtual table. See | |||
| 20455 | ** the virtual table xFilter method documentation for additional | |||
| 20456 | ** information. | |||
| 20457 | ** | |||
| 20458 | ** There are three possible query strategies: | |||
| 20459 | ** | |||
| 20460 | ** 1. Full-text search using a MATCH operator. | |||
| 20461 | ** 2. A by-rowid lookup. | |||
| 20462 | ** 3. A full-table scan. | |||
| 20463 | */ | |||
| 20464 | static int fts5FilterMethod( | |||
| 20465 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | |||
| 20466 | int idxNum, /* Strategy index */ | |||
| 20467 | const char *idxStr, /* Unused */ | |||
| 20468 | int nVal, /* Number of elements in apVal */ | |||
| 20469 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | |||
| 20470 | ){ | |||
| 20471 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
| 20472 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 20473 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 20474 | int rc = SQLITE_OK0; /* Error code */ | |||
| 20475 | int bDesc; /* True if ORDER BY [rank|rowid] DESC */ | |||
| 20476 | int bOrderByRank; /* True if ORDER BY rank */ | |||
| 20477 | sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ | |||
| 20478 | sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ | |||
| 20479 | sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ | |||
| 20480 | sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ | |||
| 20481 | int iCol; /* Column on LHS of MATCH operator */ | |||
| 20482 | char **pzErrmsg = pConfig->pzErrmsg; | |||
| 20483 | int bPrefixInsttoken = pConfig->bPrefixInsttoken; | |||
| 20484 | int i; | |||
| 20485 | int iIdxStr = 0; | |||
| 20486 | Fts5Expr *pExpr = 0; | |||
| 20487 | ||||
| 20488 | assert( pConfig->bLock==0 )((void) (0)); | |||
| 20489 | if( pCsr->ePlan ){ | |||
| 20490 | fts5FreeCursorComponents(pCsr); | |||
| 20491 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); | |||
| 20492 | } | |||
| 20493 | ||||
| 20494 | assert( pCsr->pStmt==0 )((void) (0)); | |||
| 20495 | assert( pCsr->pExpr==0 )((void) (0)); | |||
| 20496 | assert( pCsr->csrflags==0 )((void) (0)); | |||
| 20497 | assert( pCsr->pRank==0 )((void) (0)); | |||
| 20498 | assert( pCsr->zRank==0 )((void) (0)); | |||
| 20499 | assert( pCsr->zRankArgs==0 )((void) (0)); | |||
| 20500 | assert( pTab->pSortCsr==0 || nVal==0 )((void) (0)); | |||
| 20501 | ||||
| 20502 | assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg )((void) (0)); | |||
| 20503 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
| 20504 | ||||
| 20505 | /* Decode the arguments passed through to this function. */ | |||
| 20506 | for(i=0; i<nVal; i++){ | |||
| 20507 | switch( idxStr[iIdxStr++] ){ | |||
| 20508 | case 'r': | |||
| 20509 | pRank = apVal[i]; | |||
| 20510 | break; | |||
| 20511 | case 'M': { | |||
| 20512 | char *zText = 0; | |||
| 20513 | int bFreeAndReset = 0; | |||
| 20514 | int bInternal = 0; | |||
| 20515 | ||||
| 20516 | rc = fts5ExtractExprText(pConfig, apVal[i], &zText, &bFreeAndReset); | |||
| 20517 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
| 20518 | if( zText==0 ) zText = ""; | |||
| 20519 | if( sqlite3_value_subtypesqlite3_api->value_subtype(apVal[i])==FTS5_INSTTOKEN_SUBTYPE73 ){ | |||
| 20520 | pConfig->bPrefixInsttoken = 1; | |||
| 20521 | } | |||
| 20522 | ||||
| 20523 | iCol = 0; | |||
| 20524 | do{ | |||
| 20525 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | |||
| 20526 | iIdxStr++; | |||
| 20527 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | |||
| 20528 | ||||
| 20529 | if( zText[0]=='*' ){ | |||
| 20530 | /* The user has issued a query of the form "MATCH '*...'". This | |||
| 20531 | ** indicates that the MATCH expression is not a full text query, | |||
| 20532 | ** but a request for an internal parameter. */ | |||
| 20533 | rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); | |||
| 20534 | bInternal = 1; | |||
| 20535 | }else{ | |||
| 20536 | char **pzErr = &pTab->p.base.zErrMsg; | |||
| 20537 | rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); | |||
| 20538 | if( rc==SQLITE_OK0 ){ | |||
| 20539 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | |||
| 20540 | pExpr = 0; | |||
| 20541 | } | |||
| 20542 | } | |||
| 20543 | ||||
| 20544 | if( bFreeAndReset ){ | |||
| 20545 | sqlite3_freesqlite3_api->free(zText); | |||
| 20546 | sqlite3Fts5ClearLocale(pConfig); | |||
| 20547 | } | |||
| 20548 | ||||
| 20549 | if( bInternal || rc!=SQLITE_OK0 ) goto filter_out; | |||
| 20550 | ||||
| 20551 | break; | |||
| 20552 | } | |||
| 20553 | case 'L': | |||
| 20554 | case 'G': { | |||
| 20555 | int bGlob = (idxStr[iIdxStr-1]=='G'); | |||
| 20556 | const char *zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[i]); | |||
| 20557 | iCol = 0; | |||
| 20558 | do{ | |||
| 20559 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); | |||
| 20560 | iIdxStr++; | |||
| 20561 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); | |||
| 20562 | if( zText ){ | |||
| 20563 | rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr); | |||
| 20564 | } | |||
| 20565 | if( rc==SQLITE_OK0 ){ | |||
| 20566 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); | |||
| 20567 | pExpr = 0; | |||
| 20568 | } | |||
| 20569 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
| 20570 | break; | |||
| 20571 | } | |||
| 20572 | case '=': | |||
| 20573 | pRowidEq = apVal[i]; | |||
| 20574 | break; | |||
| 20575 | case '<': | |||
| 20576 | pRowidLe = apVal[i]; | |||
| 20577 | break; | |||
| 20578 | default: assert( idxStr[iIdxStr-1]=='>' )((void) (0)); | |||
| 20579 | pRowidGe = apVal[i]; | |||
| 20580 | break; | |||
| 20581 | } | |||
| 20582 | } | |||
| 20583 | bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK0x0020) ? 1 : 0); | |||
| 20584 | pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC0x0080) ? 1 : 0); | |||
| 20585 | ||||
| 20586 | /* Set the cursor upper and lower rowid limits. Only some strategies | |||
| 20587 | ** actually use them. This is ok, as the xBestIndex() method leaves the | |||
| 20588 | ** sqlite3_index_constraint.omit flag clear for range constraints | |||
| 20589 | ** on the rowid field. */ | |||
| 20590 | if( pRowidEq ){ | |||
| 20591 | pRowidLe = pRowidGe = pRowidEq; | |||
| 20592 | } | |||
| 20593 | if( bDesc ){ | |||
| 20594 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | |||
| 20595 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | |||
| 20596 | }else{ | |||
| 20597 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32))); | |||
| 20598 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32)))); | |||
| 20599 | } | |||
| 20600 | ||||
| 20601 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
| 20602 | if( rc!=SQLITE_OK0 ) goto filter_out; | |||
| 20603 | ||||
| 20604 | if( pTab->pSortCsr ){ | |||
| 20605 | /* If pSortCsr is non-NULL, then this call is being made as part of | |||
| 20606 | ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is | |||
| 20607 | ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will | |||
| 20608 | ** return results to the user for this query. The current cursor | |||
| 20609 | ** (pCursor) is used to execute the query issued by function | |||
| 20610 | ** fts5CursorFirstSorted() above. */ | |||
| 20611 | assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 )((void) (0)); | |||
| 20612 | assert( nVal==0 && bOrderByRank==0 && bDesc==0 )((void) (0)); | |||
| 20613 | assert( pCsr->iLastRowid==LARGEST_INT64 )((void) (0)); | |||
| 20614 | assert( pCsr->iFirstRowid==SMALLEST_INT64 )((void) (0)); | |||
| 20615 | if( pTab->pSortCsr->bDesc ){ | |||
| 20616 | pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid; | |||
| 20617 | pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid; | |||
| 20618 | }else{ | |||
| 20619 | pCsr->iLastRowid = pTab->pSortCsr->iLastRowid; | |||
| 20620 | pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid; | |||
| 20621 | } | |||
| 20622 | pCsr->ePlan = FTS5_PLAN_SOURCE2; | |||
| 20623 | pCsr->pExpr = pTab->pSortCsr->pExpr; | |||
| 20624 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | |||
| 20625 | }else if( pCsr->pExpr ){ | |||
| 20626 | assert( rc==SQLITE_OK )((void) (0)); | |||
| 20627 | rc = fts5CursorParseRank(pConfig, pCsr, pRank); | |||
| 20628 | if( rc==SQLITE_OK0 ){ | |||
| 20629 | if( bOrderByRank ){ | |||
| 20630 | pCsr->ePlan = FTS5_PLAN_SORTED_MATCH4; | |||
| 20631 | rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); | |||
| 20632 | }else{ | |||
| 20633 | pCsr->ePlan = FTS5_PLAN_MATCH1; | |||
| 20634 | rc = fts5CursorFirst(pTab, pCsr, bDesc); | |||
| 20635 | } | |||
| 20636 | } | |||
| 20637 | }else if( pConfig->zContent==0 ){ | |||
| 20638 | fts5SetVtabError(pTab,"%s: table does not support scanning",pConfig->zName); | |||
| 20639 | rc = SQLITE_ERROR1; | |||
| 20640 | }else{ | |||
| 20641 | /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup | |||
| 20642 | ** by rowid (ePlan==FTS5_PLAN_ROWID). */ | |||
| 20643 | pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID6 : FTS5_PLAN_SCAN5); | |||
| 20644 | rc = sqlite3Fts5StorageStmt( | |||
| 20645 | pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg | |||
| 20646 | ); | |||
| 20647 | if( rc==SQLITE_OK0 ){ | |||
| 20648 | if( pRowidEq!=0 ){ | |||
| 20649 | assert( pCsr->ePlan==FTS5_PLAN_ROWID )((void) (0)); | |||
| 20650 | sqlite3_bind_valuesqlite3_api->bind_value(pCsr->pStmt, 1, pRowidEq); | |||
| 20651 | }else{ | |||
| 20652 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); | |||
| 20653 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); | |||
| 20654 | } | |||
| 20655 | rc = fts5NextMethod(pCursor); | |||
| 20656 | } | |||
| 20657 | } | |||
| 20658 | ||||
| 20659 | filter_out: | |||
| 20660 | sqlite3Fts5ExprFree(pExpr); | |||
| 20661 | pConfig->pzErrmsg = pzErrmsg; | |||
| 20662 | pConfig->bPrefixInsttoken = bPrefixInsttoken; | |||
| 20663 | return rc; | |||
| 20664 | } | |||
| 20665 | ||||
| 20666 | /* | |||
| 20667 | ** This is the xEof method of the virtual table. SQLite calls this | |||
| 20668 | ** routine to find out if it has reached the end of a result set. | |||
| 20669 | */ | |||
| 20670 | static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 20671 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 20672 | return (CsrFlagTest(pCsr, FTS5CSR_EOF)((pCsr)->csrflags & (0x01)) ? 1 : 0); | |||
| 20673 | } | |||
| 20674 | ||||
| 20675 | /* | |||
| 20676 | ** Return the rowid that the cursor currently points to. | |||
| 20677 | */ | |||
| 20678 | static i64 fts5CursorRowid(Fts5Cursor *pCsr){ | |||
| 20679 | assert( pCsr->ePlan==FTS5_PLAN_MATCH((void) (0)) | |||
| 20680 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH((void) (0)) | |||
| 20681 | || pCsr->ePlan==FTS5_PLAN_SOURCE((void) (0)) | |||
| 20682 | || pCsr->ePlan==FTS5_PLAN_SCAN((void) (0)) | |||
| 20683 | || pCsr->ePlan==FTS5_PLAN_ROWID((void) (0)) | |||
| 20684 | )((void) (0)); | |||
| 20685 | if( pCsr->pSorter ){ | |||
| 20686 | return pCsr->pSorter->iRowid; | |||
| 20687 | }else if( pCsr->ePlan>=FTS5_PLAN_SCAN5 ){ | |||
| 20688 | return sqlite3_column_int64sqlite3_api->column_int64(pCsr->pStmt, 0); | |||
| 20689 | }else{ | |||
| 20690 | return sqlite3Fts5ExprRowid(pCsr->pExpr); | |||
| 20691 | } | |||
| 20692 | } | |||
| 20693 | ||||
| 20694 | /* | |||
| 20695 | ** This is the xRowid method. The SQLite core calls this routine to | |||
| 20696 | ** retrieve the rowid for the current row of the result set. fts5 | |||
| 20697 | ** exposes %_content.rowid as the rowid for the virtual table. The | |||
| 20698 | ** rowid should be written to *pRowid. | |||
| 20699 | */ | |||
| 20700 | static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ | |||
| 20701 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 20702 | int ePlan = pCsr->ePlan; | |||
| 20703 | ||||
| 20704 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | |||
| 20705 | if( ePlan==FTS5_PLAN_SPECIAL3 ){ | |||
| 20706 | *pRowid = 0; | |||
| 20707 | }else{ | |||
| 20708 | *pRowid = fts5CursorRowid(pCsr); | |||
| 20709 | } | |||
| 20710 | ||||
| 20711 | return SQLITE_OK0; | |||
| 20712 | } | |||
| 20713 | ||||
| 20714 | ||||
| 20715 | /* | |||
| 20716 | ** If the cursor requires seeking (bSeekRequired flag is set), seek it. | |||
| 20717 | ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. | |||
| 20718 | ** | |||
| 20719 | ** If argument bErrormsg is true and an error occurs, an error message may | |||
| 20720 | ** be left in sqlite3_vtab.zErrMsg. | |||
| 20721 | */ | |||
| 20722 | static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ | |||
| 20723 | int rc = SQLITE_OK0; | |||
| 20724 | ||||
| 20725 | /* If the cursor does not yet have a statement handle, obtain one now. */ | |||
| 20726 | if( pCsr->pStmt==0 ){ | |||
| 20727 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 20728 | int eStmt = fts5StmtType(pCsr); | |||
| 20729 | rc = sqlite3Fts5StorageStmt( | |||
| 20730 | pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0) | |||
| 20731 | ); | |||
| 20732 | assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 )((void) (0)); | |||
| 20733 | assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) )((void) (0)); | |||
| 20734 | } | |||
| 20735 | ||||
| 20736 | if( rc==SQLITE_OK0 && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags & (0x02)) ){ | |||
| 20737 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
| 20738 | assert( pCsr->pExpr )((void) (0)); | |||
| 20739 | sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
| 20740 | sqlite3_bind_int64sqlite3_api->bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); | |||
| 20741 | pTab->pConfig->bLock++; | |||
| 20742 | rc = sqlite3_stepsqlite3_api->step(pCsr->pStmt); | |||
| 20743 | pTab->pConfig->bLock--; | |||
| 20744 | if( rc==SQLITE_ROW100 ){ | |||
| 20745 | rc = SQLITE_OK0; | |||
| 20746 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT)((pCsr)->csrflags &= ~(0x02)); | |||
| 20747 | }else{ | |||
| 20748 | rc = sqlite3_resetsqlite3_api->reset(pCsr->pStmt); | |||
| 20749 | if( rc==SQLITE_OK0 ){ | |||
| 20750 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 20751 | fts5SetVtabError((Fts5FullTable*)pTab, | |||
| 20752 | "fts5: missing row %lld from content table %s", | |||
| 20753 | fts5CursorRowid(pCsr), | |||
| 20754 | pTab->pConfig->zContent | |||
| 20755 | ); | |||
| 20756 | }else if( pTab->pConfig->pzErrmsg ){ | |||
| 20757 | fts5SetVtabError((Fts5FullTable*)pTab, | |||
| 20758 | "%s", sqlite3_errmsgsqlite3_api->errmsg(pTab->pConfig->db) | |||
| 20759 | ); | |||
| 20760 | } | |||
| 20761 | } | |||
| 20762 | } | |||
| 20763 | return rc; | |||
| 20764 | } | |||
| 20765 | ||||
| 20766 | /* | |||
| 20767 | ** This function is called to handle an FTS INSERT command. In other words, | |||
| 20768 | ** an INSERT statement of the form: | |||
| 20769 | ** | |||
| 20770 | ** INSERT INTO fts(fts) VALUES($pCmd) | |||
| 20771 | ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) | |||
| 20772 | ** | |||
| 20773 | ** Argument pVal is the value assigned to column "fts" by the INSERT | |||
| 20774 | ** statement. This function returns SQLITE_OK if successful, or an SQLite | |||
| 20775 | ** error code if an error occurs. | |||
| 20776 | ** | |||
| 20777 | ** The commands implemented by this function are documented in the "Special | |||
| 20778 | ** INSERT Directives" section of the documentation. It should be updated if | |||
| 20779 | ** more commands are added to this function. | |||
| 20780 | */ | |||
| 20781 | static int fts5SpecialInsert( | |||
| 20782 | Fts5FullTable *pTab, /* Fts5 table object */ | |||
| 20783 | const char *zCmd, /* Text inserted into table-name column */ | |||
| 20784 | sqlite3_value *pVal /* Value inserted into rank column */ | |||
| 20785 | ){ | |||
| 20786 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 20787 | int rc = SQLITE_OK0; | |||
| 20788 | int bError = 0; | |||
| 20789 | int bLoadConfig = 0; | |||
| 20790 | ||||
| 20791 | if( 0==sqlite3_stricmpsqlite3_api->stricmp("delete-all", zCmd) ){ | |||
| 20792 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 20793 | fts5SetVtabError(pTab, | |||
| 20794 | "'delete-all' may only be used with a " | |||
| 20795 | "contentless or external content fts5 table" | |||
| 20796 | ); | |||
| 20797 | rc = SQLITE_ERROR1; | |||
| 20798 | }else{ | |||
| 20799 | rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); | |||
| 20800 | } | |||
| 20801 | bLoadConfig = 1; | |||
| 20802 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("rebuild", zCmd) ){ | |||
| 20803 | if( fts5IsContentless(pTab, 1) ){ | |||
| 20804 | fts5SetVtabError(pTab, | |||
| 20805 | "'rebuild' may not be used with a contentless fts5 table" | |||
| 20806 | ); | |||
| 20807 | rc = SQLITE_ERROR1; | |||
| 20808 | }else{ | |||
| 20809 | rc = sqlite3Fts5StorageRebuild(pTab->pStorage); | |||
| 20810 | } | |||
| 20811 | bLoadConfig = 1; | |||
| 20812 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("optimize", zCmd) ){ | |||
| 20813 | rc = sqlite3Fts5StorageOptimize(pTab->pStorage); | |||
| 20814 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("merge", zCmd) ){ | |||
| 20815 | int nMerge = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 20816 | rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); | |||
| 20817 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("integrity-check", zCmd) ){ | |||
| 20818 | int iArg = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 20819 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg); | |||
| 20820 | #ifdef SQLITE_DEBUG | |||
| 20821 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("prefix-index", zCmd) ){ | |||
| 20822 | pConfig->bPrefixIndex = sqlite3_value_intsqlite3_api->value_int(pVal); | |||
| 20823 | #endif | |||
| 20824 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp("flush", zCmd) ){ | |||
| 20825 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
| 20826 | }else{ | |||
| 20827 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
| 20828 | if( rc==SQLITE_OK0 ){ | |||
| 20829 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
| 20830 | } | |||
| 20831 | if( rc==SQLITE_OK0 ){ | |||
| 20832 | rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError); | |||
| 20833 | } | |||
| 20834 | if( rc==SQLITE_OK0 ){ | |||
| 20835 | if( bError ){ | |||
| 20836 | rc = SQLITE_ERROR1; | |||
| 20837 | }else{ | |||
| 20838 | rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); | |||
| 20839 | } | |||
| 20840 | } | |||
| 20841 | } | |||
| 20842 | ||||
| 20843 | if( rc==SQLITE_OK0 && bLoadConfig ){ | |||
| 20844 | pTab->p.pConfig->iCookie--; | |||
| 20845 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); | |||
| 20846 | } | |||
| 20847 | ||||
| 20848 | return rc; | |||
| 20849 | } | |||
| 20850 | ||||
| 20851 | static int fts5SpecialDelete( | |||
| 20852 | Fts5FullTable *pTab, | |||
| 20853 | sqlite3_value **apVal | |||
| 20854 | ){ | |||
| 20855 | int rc = SQLITE_OK0; | |||
| 20856 | int eType1 = sqlite3_value_typesqlite3_api->value_type(apVal[1]); | |||
| 20857 | if( eType1==SQLITE_INTEGER1 ){ | |||
| 20858 | sqlite3_int64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | |||
| 20859 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2], 0); | |||
| 20860 | } | |||
| 20861 | return rc; | |||
| 20862 | } | |||
| 20863 | ||||
| 20864 | static void fts5StorageInsert( | |||
| 20865 | int *pRc, | |||
| 20866 | Fts5FullTable *pTab, | |||
| 20867 | sqlite3_value **apVal, | |||
| 20868 | i64 *piRowid | |||
| 20869 | ){ | |||
| 20870 | int rc = *pRc; | |||
| 20871 | if( rc==SQLITE_OK0 ){ | |||
| 20872 | rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid); | |||
| 20873 | } | |||
| 20874 | if( rc==SQLITE_OK0 ){ | |||
| 20875 | rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); | |||
| 20876 | } | |||
| 20877 | *pRc = rc; | |||
| 20878 | } | |||
| 20879 | ||||
| 20880 | /* | |||
| 20881 | ** | |||
| 20882 | ** This function is called when the user attempts an UPDATE on a contentless | |||
| 20883 | ** table. Parameter bRowidModified is true if the UPDATE statement modifies | |||
| 20884 | ** the rowid value. Parameter apVal[] contains the new values for each user | |||
| 20885 | ** defined column of the fts5 table. pConfig is the configuration object of the | |||
| 20886 | ** table being updated (guaranteed to be contentless). The contentless_delete=1 | |||
| 20887 | ** and contentless_unindexed=1 options may or may not be set. | |||
| 20888 | ** | |||
| 20889 | ** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite | |||
| 20890 | ** error code if it cannot. In this case an error message is also loaded into | |||
| 20891 | ** pConfig. Output parameter (*pbContent) is set to true if the caller should | |||
| 20892 | ** update the %_content table only - not the FTS index or any other shadow | |||
| 20893 | ** table. This occurs when an UPDATE modifies only UNINDEXED columns of the | |||
| 20894 | ** table. | |||
| 20895 | ** | |||
| 20896 | ** An UPDATE may proceed if: | |||
| 20897 | ** | |||
| 20898 | ** * The only columns modified are UNINDEXED columns, or | |||
| 20899 | ** | |||
| 20900 | ** * The contentless_delete=1 option was specified and all of the indexed | |||
| 20901 | ** columns (not a subset) have been modified. | |||
| 20902 | */ | |||
| 20903 | static int fts5ContentlessUpdate( | |||
| 20904 | Fts5Config *pConfig, | |||
| 20905 | sqlite3_value **apVal, | |||
| 20906 | int bRowidModified, | |||
| 20907 | int *pbContent | |||
| 20908 | ){ | |||
| 20909 | int ii; | |||
| 20910 | int bSeenIndex = 0; /* Have seen modified indexed column */ | |||
| 20911 | int bSeenIndexNC = 0; /* Have seen unmodified indexed column */ | |||
| 20912 | int rc = SQLITE_OK0; | |||
| 20913 | ||||
| 20914 | for(ii=0; ii<pConfig->nCol; ii++){ | |||
| 20915 | if( pConfig->abUnindexed[ii]==0 ){ | |||
| 20916 | if( sqlite3_value_nochangesqlite3_api->value_nochange(apVal[ii]) ){ | |||
| 20917 | bSeenIndexNC++; | |||
| 20918 | }else{ | |||
| 20919 | bSeenIndex++; | |||
| 20920 | } | |||
| 20921 | } | |||
| 20922 | } | |||
| 20923 | ||||
| 20924 | if( bSeenIndex==0 && bRowidModified==0 ){ | |||
| 20925 | *pbContent = 1; | |||
| 20926 | }else{ | |||
| 20927 | if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){ | |||
| 20928 | rc = SQLITE_ERROR1; | |||
| 20929 | sqlite3Fts5ConfigErrmsg(pConfig, | |||
| 20930 | (pConfig->bContentlessDelete ? | |||
| 20931 | "%s a subset of columns on fts5 contentless-delete table: %s" : | |||
| 20932 | "%s contentless fts5 table: %s") | |||
| 20933 | , "cannot UPDATE", pConfig->zName | |||
| 20934 | ); | |||
| 20935 | } | |||
| 20936 | } | |||
| 20937 | ||||
| 20938 | return rc; | |||
| 20939 | } | |||
| 20940 | ||||
| 20941 | /* | |||
| 20942 | ** This function is the implementation of the xUpdate callback used by | |||
| 20943 | ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be | |||
| 20944 | ** inserted, updated or deleted. | |||
| 20945 | ** | |||
| 20946 | ** A delete specifies a single argument - the rowid of the row to remove. | |||
| 20947 | ** | |||
| 20948 | ** Update and insert operations pass: | |||
| 20949 | ** | |||
| 20950 | ** 1. The "old" rowid, or NULL. | |||
| 20951 | ** 2. The "new" rowid. | |||
| 20952 | ** 3. Values for each of the nCol matchable columns. | |||
| 20953 | ** 4. Values for the two hidden columns (<tablename> and "rank"). | |||
| 20954 | */ | |||
| 20955 | static int fts5UpdateMethod( | |||
| 20956 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
| 20957 | int nArg, /* Size of argument array */ | |||
| 20958 | sqlite3_value **apVal, /* Array of arguments */ | |||
| 20959 | sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ | |||
| 20960 | ){ | |||
| 20961 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 20962 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 20963 | int eType0; /* value_type() of apVal[0] */ | |||
| 20964 | int rc = SQLITE_OK0; /* Return code */ | |||
| 20965 | ||||
| 20966 | /* A transaction must be open when this is called. */ | |||
| 20967 | assert( pTab->ts.eState==1 || pTab->ts.eState==2 )((void) (0)); | |||
| 20968 | ||||
| 20969 | assert( pVtab->zErrMsg==0 )((void) (0)); | |||
| 20970 | assert( nArg==1 || nArg==(2+pConfig->nCol+2) )((void) (0)); | |||
| 20971 | assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER((void) (0)) | |||
| 20972 | || sqlite3_value_type(apVal[0])==SQLITE_NULL((void) (0)) | |||
| 20973 | )((void) (0)); | |||
| 20974 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | |||
| 20975 | if( pConfig->pgsz==0 ){ | |||
| 20976 | rc = sqlite3Fts5ConfigLoad(pTab->p.pConfig, pTab->p.pConfig->iCookie); | |||
| 20977 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 20978 | } | |||
| 20979 | ||||
| 20980 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
| 20981 | ||||
| 20982 | /* Put any active cursors into REQUIRE_SEEK state. */ | |||
| 20983 | fts5TripCursors(pTab); | |||
| 20984 | ||||
| 20985 | eType0 = sqlite3_value_typesqlite3_api->value_type(apVal[0]); | |||
| 20986 | if( eType0==SQLITE_NULL5 | |||
| 20987 | && sqlite3_value_typesqlite3_api->value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL5 | |||
| 20988 | ){ | |||
| 20989 | /* A "special" INSERT op. These are handled separately. */ | |||
| 20990 | const char *z = (const char*)sqlite3_value_textsqlite3_api->value_text(apVal[2+pConfig->nCol]); | |||
| 20991 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | |||
| 20992 | && 0==sqlite3_stricmpsqlite3_api->stricmp("delete", z) | |||
| 20993 | ){ | |||
| 20994 | if( pConfig->bContentlessDelete ){ | |||
| 20995 | fts5SetVtabError(pTab, | |||
| 20996 | "'delete' may not be used with a contentless_delete=1 table" | |||
| 20997 | ); | |||
| 20998 | rc = SQLITE_ERROR1; | |||
| 20999 | }else{ | |||
| 21000 | rc = fts5SpecialDelete(pTab, apVal); | |||
| 21001 | } | |||
| 21002 | }else{ | |||
| 21003 | rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); | |||
| 21004 | } | |||
| 21005 | }else{ | |||
| 21006 | /* A regular INSERT, UPDATE or DELETE statement. The trick here is that | |||
| 21007 | ** any conflict on the rowid value must be detected before any | |||
| 21008 | ** modifications are made to the database file. There are 4 cases: | |||
| 21009 | ** | |||
| 21010 | ** 1) DELETE | |||
| 21011 | ** 2) UPDATE (rowid not modified) | |||
| 21012 | ** 3) UPDATE (rowid modified) | |||
| 21013 | ** 4) INSERT | |||
| 21014 | ** | |||
| 21015 | ** Cases 3 and 4 may violate the rowid constraint. | |||
| 21016 | */ | |||
| 21017 | int eConflict = SQLITE_ABORT4; | |||
| 21018 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || pConfig->bContentlessDelete ){ | |||
| 21019 | eConflict = sqlite3_vtab_on_conflictsqlite3_api->vtab_on_conflict(pConfig->db); | |||
| 21020 | } | |||
| 21021 | ||||
| 21022 | assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL )((void) (0)); | |||
| 21023 | assert( nArg!=1 || eType0==SQLITE_INTEGER )((void) (0)); | |||
| 21024 | ||||
| 21025 | /* DELETE */ | |||
| 21026 | if( nArg==1 ){ | |||
| 21027 | /* It is only possible to DELETE from a contentless table if the | |||
| 21028 | ** contentless_delete=1 flag is set. */ | |||
| 21029 | if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){ | |||
| 21030 | fts5SetVtabError(pTab, | |||
| 21031 | "cannot DELETE from contentless fts5 table: %s", pConfig->zName | |||
| 21032 | ); | |||
| 21033 | rc = SQLITE_ERROR1; | |||
| 21034 | }else{ | |||
| 21035 | i64 iDel = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Rowid to delete */ | |||
| 21036 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); | |||
| 21037 | } | |||
| 21038 | } | |||
| 21039 | ||||
| 21040 | /* INSERT or UPDATE */ | |||
| 21041 | else{ | |||
| 21042 | int eType1 = sqlite3_value_numeric_typesqlite3_api->value_numeric_type(apVal[1]); | |||
| 21043 | ||||
| 21044 | /* It is an error to write an fts5_locale() value to a table without | |||
| 21045 | ** the locale=1 option. */ | |||
| 21046 | if( pConfig->bLocale==0 ){ | |||
| 21047 | int ii; | |||
| 21048 | for(ii=0; ii<pConfig->nCol; ii++){ | |||
| 21049 | sqlite3_value *pVal = apVal[ii+2]; | |||
| 21050 | if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
| 21051 | fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); | |||
| 21052 | rc = SQLITE_MISMATCH20; | |||
| 21053 | goto update_out; | |||
| 21054 | } | |||
| 21055 | } | |||
| 21056 | } | |||
| 21057 | ||||
| 21058 | if( eType0!=SQLITE_INTEGER1 ){ | |||
| 21059 | /* An INSERT statement. If the conflict-mode is REPLACE, first remove | |||
| 21060 | ** the current entry (if any). */ | |||
| 21061 | if( eConflict==SQLITE_REPLACE5 && eType1==SQLITE_INTEGER1 ){ | |||
| 21062 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* Rowid to delete */ | |||
| 21063 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); | |||
| 21064 | } | |||
| 21065 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
| 21066 | } | |||
| 21067 | ||||
| 21068 | /* UPDATE */ | |||
| 21069 | else{ | |||
| 21070 | Fts5Storage *pStorage = pTab->pStorage; | |||
| 21071 | i64 iOld = sqlite3_value_int64sqlite3_api->value_int64(apVal[0]); /* Old rowid */ | |||
| 21072 | i64 iNew = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); /* New rowid */ | |||
| 21073 | int bContent = 0; /* Content only update */ | |||
| 21074 | ||||
| 21075 | /* If this is a contentless table (including contentless_unindexed=1 | |||
| 21076 | ** tables), check if the UPDATE may proceed. */ | |||
| 21077 | if( fts5IsContentless(pTab, 1) ){ | |||
| 21078 | rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent); | |||
| 21079 | if( rc!=SQLITE_OK0 ) goto update_out; | |||
| 21080 | } | |||
| 21081 | ||||
| 21082 | if( eType1!=SQLITE_INTEGER1 ){ | |||
| 21083 | rc = SQLITE_MISMATCH20; | |||
| 21084 | }else if( iOld!=iNew ){ | |||
| 21085 | assert( bContent==0 )((void) (0)); | |||
| 21086 | if( eConflict==SQLITE_REPLACE5 ){ | |||
| 21087 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | |||
| 21088 | if( rc==SQLITE_OK0 ){ | |||
| 21089 | rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0); | |||
| 21090 | } | |||
| 21091 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
| 21092 | }else{ | |||
| 21093 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | |||
| 21094 | if( rc==SQLITE_OK0 ){ | |||
| 21095 | rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid); | |||
| 21096 | } | |||
| 21097 | if( rc==SQLITE_OK0 ){ | |||
| 21098 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0); | |||
| 21099 | } | |||
| 21100 | if( rc==SQLITE_OK0 ){ | |||
| 21101 | rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid); | |||
| 21102 | } | |||
| 21103 | } | |||
| 21104 | }else if( bContent ){ | |||
| 21105 | /* This occurs when an UPDATE on a contentless table affects *only* | |||
| 21106 | ** UNINDEXED columns. This is a no-op for contentless_unindexed=0 | |||
| 21107 | ** tables, or a write to the %_content table only for =1 tables. */ | |||
| 21108 | assert( fts5IsContentless(pTab, 1) )((void) (0)); | |||
| 21109 | rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); | |||
| 21110 | if( rc==SQLITE_OK0 ){ | |||
| 21111 | rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid); | |||
| 21112 | } | |||
| 21113 | }else{ | |||
| 21114 | rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); | |||
| 21115 | fts5StorageInsert(&rc, pTab, apVal, pRowid); | |||
| 21116 | } | |||
| 21117 | sqlite3Fts5StorageReleaseDeleteRow(pStorage); | |||
| 21118 | } | |||
| 21119 | } | |||
| 21120 | } | |||
| 21121 | ||||
| 21122 | update_out: | |||
| 21123 | pTab->p.pConfig->pzErrmsg = 0; | |||
| 21124 | return rc; | |||
| 21125 | } | |||
| 21126 | ||||
| 21127 | /* | |||
| 21128 | ** Implementation of xSync() method. | |||
| 21129 | */ | |||
| 21130 | static int fts5SyncMethod(sqlite3_vtab *pVtab){ | |||
| 21131 | int rc; | |||
| 21132 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 21133 | fts5CheckTransactionState(pTab, FTS5_SYNC, 0); | |||
| 21134 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
| 21135 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
| 21136 | pTab->p.pConfig->pzErrmsg = 0; | |||
| 21137 | return rc; | |||
| 21138 | } | |||
| 21139 | ||||
| 21140 | /* | |||
| 21141 | ** Implementation of xBegin() method. | |||
| 21142 | */ | |||
| 21143 | static int fts5BeginMethod(sqlite3_vtab *pVtab){ | |||
| 21144 | int rc = fts5NewTransaction((Fts5FullTable*)pVtab); | |||
| 21145 | if( rc==SQLITE_OK0 ){ | |||
| 21146 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0); | |||
| 21147 | } | |||
| 21148 | return rc; | |||
| 21149 | } | |||
| 21150 | ||||
| 21151 | /* | |||
| 21152 | ** Implementation of xCommit() method. This is a no-op. The contents of | |||
| 21153 | ** the pending-terms hash-table have already been flushed into the database | |||
| 21154 | ** by fts5SyncMethod(). | |||
| 21155 | */ | |||
| 21156 | static int fts5CommitMethod(sqlite3_vtab *pVtab){ | |||
| 21157 | UNUSED_PARAM(pVtab)(void)(pVtab); /* Call below is a no-op for NDEBUG builds */ | |||
| 21158 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0); | |||
| 21159 | return SQLITE_OK0; | |||
| 21160 | } | |||
| 21161 | ||||
| 21162 | /* | |||
| 21163 | ** Implementation of xRollback(). Discard the contents of the pending-terms | |||
| 21164 | ** hash-table. Any changes made to the database are reverted by SQLite. | |||
| 21165 | */ | |||
| 21166 | static int fts5RollbackMethod(sqlite3_vtab *pVtab){ | |||
| 21167 | int rc; | |||
| 21168 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 21169 | fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); | |||
| 21170 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | |||
| 21171 | pTab->p.pConfig->pgsz = 0; | |||
| 21172 | return rc; | |||
| 21173 | } | |||
| 21174 | ||||
| 21175 | static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); | |||
| 21176 | ||||
| 21177 | static void *fts5ApiUserData(Fts5Context *pCtx){ | |||
| 21178 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21179 | return pCsr->pAux->pUserData; | |||
| 21180 | } | |||
| 21181 | ||||
| 21182 | static int fts5ApiColumnCount(Fts5Context *pCtx){ | |||
| 21183 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21184 | return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; | |||
| 21185 | } | |||
| 21186 | ||||
| 21187 | static int fts5ApiColumnTotalSize( | |||
| 21188 | Fts5Context *pCtx, | |||
| 21189 | int iCol, | |||
| 21190 | sqlite3_int64 *pnToken | |||
| 21191 | ){ | |||
| 21192 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21193 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 21194 | return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); | |||
| 21195 | } | |||
| 21196 | ||||
| 21197 | static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ | |||
| 21198 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21199 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 21200 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); | |||
| 21201 | } | |||
| 21202 | ||||
| 21203 | /* | |||
| 21204 | ** Implementation of xTokenize_v2() API. | |||
| 21205 | */ | |||
| 21206 | static int fts5ApiTokenize_v2( | |||
| 21207 | Fts5Context *pCtx, | |||
| 21208 | const char *pText, int nText, | |||
| 21209 | const char *pLoc, int nLoc, | |||
| 21210 | void *pUserData, | |||
| 21211 | int (*xToken)(void*, int, const char*, int, int, int) | |||
| 21212 | ){ | |||
| 21213 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21214 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
| 21215 | int rc = SQLITE_OK0; | |||
| 21216 | ||||
| 21217 | sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc); | |||
| 21218 | rc = sqlite3Fts5Tokenize(pTab->pConfig, | |||
| 21219 | FTS5_TOKENIZE_AUX0x0008, pText, nText, pUserData, xToken | |||
| 21220 | ); | |||
| 21221 | sqlite3Fts5SetLocale(pTab->pConfig, 0, 0); | |||
| 21222 | ||||
| 21223 | return rc; | |||
| 21224 | } | |||
| 21225 | ||||
| 21226 | /* | |||
| 21227 | ** Implementation of xTokenize() API. This is just xTokenize_v2() with NULL/0 | |||
| 21228 | ** passed as the locale. | |||
| 21229 | */ | |||
| 21230 | static int fts5ApiTokenize( | |||
| 21231 | Fts5Context *pCtx, | |||
| 21232 | const char *pText, int nText, | |||
| 21233 | void *pUserData, | |||
| 21234 | int (*xToken)(void*, int, const char*, int, int, int) | |||
| 21235 | ){ | |||
| 21236 | return fts5ApiTokenize_v2(pCtx, pText, nText, 0, 0, pUserData, xToken); | |||
| 21237 | } | |||
| 21238 | ||||
| 21239 | static int fts5ApiPhraseCount(Fts5Context *pCtx){ | |||
| 21240 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21241 | return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
| 21242 | } | |||
| 21243 | ||||
| 21244 | static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ | |||
| 21245 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21246 | return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); | |||
| 21247 | } | |||
| 21248 | ||||
| 21249 | /* | |||
| 21250 | ** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This | |||
| 21251 | ** function extracts the text value of column iCol of the current row. | |||
| 21252 | ** Additionally, if there is an associated locale, it invokes | |||
| 21253 | ** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller | |||
| 21254 | ** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point | |||
| 21255 | ** after this function returns. | |||
| 21256 | ** | |||
| 21257 | ** If successful, (*ppText) is set to point to a buffer containing the text | |||
| 21258 | ** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that | |||
| 21259 | ** buffer in bytes. It is not guaranteed to be nul-terminated. If an error | |||
| 21260 | ** occurs, an SQLite error code is returned. The final values of the two | |||
| 21261 | ** output parameters are undefined in this case. | |||
| 21262 | */ | |||
| 21263 | static int fts5TextFromStmt( | |||
| 21264 | Fts5Config *pConfig, | |||
| 21265 | sqlite3_stmt *pStmt, | |||
| 21266 | int iCol, | |||
| 21267 | const char **ppText, | |||
| 21268 | int *pnText | |||
| 21269 | ){ | |||
| 21270 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pStmt, iCol+1); | |||
| 21271 | const char *pLoc = 0; | |||
| 21272 | int nLoc = 0; | |||
| 21273 | int rc = SQLITE_OK0; | |||
| 21274 | ||||
| 21275 | if( pConfig->bLocale | |||
| 21276 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
| 21277 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
| 21278 | ){ | |||
| 21279 | rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc); | |||
| 21280 | }else{ | |||
| 21281 | *ppText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 21282 | *pnText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 21283 | if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 21284 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pStmt, iCol+1+pConfig->nCol); | |||
| 21285 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pStmt, iCol+1+pConfig->nCol); | |||
| 21286 | } | |||
| 21287 | } | |||
| 21288 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 21289 | return rc; | |||
| 21290 | } | |||
| 21291 | ||||
| 21292 | static int fts5ApiColumnText( | |||
| 21293 | Fts5Context *pCtx, | |||
| 21294 | int iCol, | |||
| 21295 | const char **pz, | |||
| 21296 | int *pn | |||
| 21297 | ){ | |||
| 21298 | int rc = SQLITE_OK0; | |||
| 21299 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21300 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); | |||
| 21301 | ||||
| 21302 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
| 21303 | if( iCol<0 || iCol>=pTab->pConfig->nCol ){ | |||
| 21304 | rc = SQLITE_RANGE25; | |||
| 21305 | }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){ | |||
| 21306 | *pz = 0; | |||
| 21307 | *pn = 0; | |||
| 21308 | }else{ | |||
| 21309 | rc = fts5SeekCursor(pCsr, 0); | |||
| 21310 | if( rc==SQLITE_OK0 ){ | |||
| 21311 | rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn); | |||
| 21312 | sqlite3Fts5ClearLocale(pTab->pConfig); | |||
| 21313 | } | |||
| 21314 | } | |||
| 21315 | return rc; | |||
| 21316 | } | |||
| 21317 | ||||
| 21318 | /* | |||
| 21319 | ** This is called by various API functions - xInst, xPhraseFirst, | |||
| 21320 | ** xPhraseFirstColumn etc. - to obtain the position list for phrase iPhrase | |||
| 21321 | ** of the current row. This function works for both detail=full tables (in | |||
| 21322 | ** which case the position-list was read from the fts index) or for other | |||
| 21323 | ** detail= modes if the row content is available. | |||
| 21324 | */ | |||
| 21325 | static int fts5CsrPoslist( | |||
| 21326 | Fts5Cursor *pCsr, /* Fts5 cursor object */ | |||
| 21327 | int iPhrase, /* Phrase to find position list for */ | |||
| 21328 | const u8 **pa, /* OUT: Pointer to position list buffer */ | |||
| 21329 | int *pn /* OUT: Size of (*pa) in bytes */ | |||
| 21330 | ){ | |||
| 21331 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
| 21332 | int rc = SQLITE_OK0; | |||
| 21333 | int bLive = (pCsr->pSorter==0); | |||
| 21334 | ||||
| 21335 | if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ | |||
| 21336 | rc = SQLITE_RANGE25; | |||
| 21337 | }else if( pConfig->eDetail!=FTS5_DETAIL_FULL0 | |||
| 21338 | && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | |||
| 21339 | ){ | |||
| 21340 | *pa = 0; | |||
| 21341 | *pn = 0; | |||
| 21342 | return SQLITE_OK0; | |||
| 21343 | }else if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags & (0x40)) ){ | |||
| 21344 | if( pConfig->eDetail!=FTS5_DETAIL_FULL0 ){ | |||
| 21345 | Fts5PoslistPopulator *aPopulator; | |||
| 21346 | int i; | |||
| 21347 | ||||
| 21348 | aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); | |||
| 21349 | if( aPopulator==0 ) rc = SQLITE_NOMEM7; | |||
| 21350 | if( rc==SQLITE_OK0 ){ | |||
| 21351 | rc = fts5SeekCursor(pCsr, 0); | |||
| 21352 | } | |||
| 21353 | for(i=0; i<pConfig->nCol && rc==SQLITE_OK0; i++){ | |||
| 21354 | const char *z = 0; | |||
| 21355 | int n = 0; | |||
| 21356 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | |||
| 21357 | if( rc==SQLITE_OK0 ){ | |||
| 21358 | rc = sqlite3Fts5ExprPopulatePoslists( | |||
| 21359 | pConfig, pCsr->pExpr, aPopulator, i, z, n | |||
| 21360 | ); | |||
| 21361 | } | |||
| 21362 | sqlite3Fts5ClearLocale(pConfig); | |||
| 21363 | } | |||
| 21364 | sqlite3_freesqlite3_api->free(aPopulator); | |||
| 21365 | ||||
| 21366 | if( pCsr->pSorter ){ | |||
| 21367 | sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); | |||
| 21368 | } | |||
| 21369 | } | |||
| 21370 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST)((pCsr)->csrflags &= ~(0x40)); | |||
| 21371 | } | |||
| 21372 | ||||
| 21373 | if( rc==SQLITE_OK0 ){ | |||
| 21374 | if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 21375 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
| 21376 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | |||
| 21377 | *pn = pSorter->aIdx[iPhrase] - i1; | |||
| 21378 | *pa = &pSorter->aPoslist[i1]; | |||
| 21379 | }else{ | |||
| 21380 | *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); | |||
| 21381 | } | |||
| 21382 | }else{ | |||
| 21383 | *pa = 0; | |||
| 21384 | *pn = 0; | |||
| 21385 | } | |||
| 21386 | ||||
| 21387 | return rc; | |||
| 21388 | } | |||
| 21389 | ||||
| 21390 | /* | |||
| 21391 | ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated | |||
| 21392 | ** correctly for the current view. Return SQLITE_OK if successful, or an | |||
| 21393 | ** SQLite error code otherwise. | |||
| 21394 | */ | |||
| 21395 | static int fts5CacheInstArray(Fts5Cursor *pCsr){ | |||
| 21396 | int rc = SQLITE_OK0; | |||
| 21397 | Fts5PoslistReader *aIter; /* One iterator for each phrase */ | |||
| 21398 | int nIter; /* Number of iterators/phrases */ | |||
| 21399 | int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol; | |||
| 21400 | ||||
| 21401 | nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
| 21402 | if( pCsr->aInstIter==0 ){ | |||
| 21403 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter; | |||
| 21404 | pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 21405 | } | |||
| 21406 | aIter = pCsr->aInstIter; | |||
| 21407 | ||||
| 21408 | if( aIter ){ | |||
| 21409 | int nInst = 0; /* Number instances seen so far */ | |||
| 21410 | int i; | |||
| 21411 | ||||
| 21412 | /* Initialize all iterators */ | |||
| 21413 | for(i=0; i<nIter && rc==SQLITE_OK0; i++){ | |||
| 21414 | const u8 *a; | |||
| 21415 | int n; | |||
| 21416 | rc = fts5CsrPoslist(pCsr, i, &a, &n); | |||
| 21417 | if( rc==SQLITE_OK0 ){ | |||
| 21418 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); | |||
| 21419 | } | |||
| 21420 | } | |||
| 21421 | ||||
| 21422 | if( rc==SQLITE_OK0 ){ | |||
| 21423 | while( 1 ){ | |||
| 21424 | int *aInst; | |||
| 21425 | int iBest = -1; | |||
| 21426 | for(i=0; i<nIter; i++){ | |||
| 21427 | if( (aIter[i].bEof==0) | |||
| 21428 | && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) | |||
| 21429 | ){ | |||
| 21430 | iBest = i; | |||
| 21431 | } | |||
| 21432 | } | |||
| 21433 | if( iBest<0 ) break; | |||
| 21434 | ||||
| 21435 | nInst++; | |||
| 21436 | if( nInst>=pCsr->nInstAlloc ){ | |||
| 21437 | int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; | |||
| 21438 | aInst = (int*)sqlite3_realloc64sqlite3_api->realloc64( | |||
| 21439 | pCsr->aInst, nNewSize*sizeof(int)*3 | |||
| 21440 | ); | |||
| 21441 | if( aInst ){ | |||
| 21442 | pCsr->aInst = aInst; | |||
| 21443 | pCsr->nInstAlloc = nNewSize; | |||
| 21444 | }else{ | |||
| 21445 | nInst--; | |||
| 21446 | rc = SQLITE_NOMEM7; | |||
| 21447 | break; | |||
| 21448 | } | |||
| 21449 | } | |||
| 21450 | ||||
| 21451 | aInst = &pCsr->aInst[3 * (nInst-1)]; | |||
| 21452 | aInst[0] = iBest; | |||
| 21453 | aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos)(int)((aIter[iBest].iPos >> 32) & 0x7FFFFFFF); | |||
| 21454 | aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos)(int)(aIter[iBest].iPos & 0x7FFFFFFF); | |||
| 21455 | assert( aInst[1]>=0 )((void) (0)); | |||
| 21456 | if( aInst[1]>=nCol ){ | |||
| 21457 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 21458 | break; | |||
| 21459 | } | |||
| 21460 | sqlite3Fts5PoslistReaderNext(&aIter[iBest]); | |||
| 21461 | } | |||
| 21462 | } | |||
| 21463 | ||||
| 21464 | pCsr->nInstCount = nInst; | |||
| 21465 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags &= ~(0x08)); | |||
| 21466 | } | |||
| 21467 | return rc; | |||
| 21468 | } | |||
| 21469 | ||||
| 21470 | static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ | |||
| 21471 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21472 | int rc = SQLITE_OK0; | |||
| 21473 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
| 21474 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) ){ | |||
| 21475 | *pnInst = pCsr->nInstCount; | |||
| 21476 | } | |||
| 21477 | return rc; | |||
| 21478 | } | |||
| 21479 | ||||
| 21480 | static int fts5ApiInst( | |||
| 21481 | Fts5Context *pCtx, | |||
| 21482 | int iIdx, | |||
| 21483 | int *piPhrase, | |||
| 21484 | int *piCol, | |||
| 21485 | int *piOff | |||
| 21486 | ){ | |||
| 21487 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21488 | int rc = SQLITE_OK0; | |||
| 21489 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
| 21490 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | |||
| 21491 | ){ | |||
| 21492 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | |||
| 21493 | rc = SQLITE_RANGE25; | |||
| 21494 | }else{ | |||
| 21495 | *piPhrase = pCsr->aInst[iIdx*3]; | |||
| 21496 | *piCol = pCsr->aInst[iIdx*3 + 1]; | |||
| 21497 | *piOff = pCsr->aInst[iIdx*3 + 2]; | |||
| 21498 | } | |||
| 21499 | } | |||
| 21500 | return rc; | |||
| 21501 | } | |||
| 21502 | ||||
| 21503 | static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ | |||
| 21504 | return fts5CursorRowid((Fts5Cursor*)pCtx); | |||
| 21505 | } | |||
| 21506 | ||||
| 21507 | static int fts5ColumnSizeCb( | |||
| 21508 | void *pContext, /* Pointer to int */ | |||
| 21509 | int tflags, | |||
| 21510 | const char *pUnused, /* Buffer containing token */ | |||
| 21511 | int nUnused, /* Size of token in bytes */ | |||
| 21512 | int iUnused1, /* Start offset of token */ | |||
| 21513 | int iUnused2 /* End offset of token */ | |||
| 21514 | ){ | |||
| 21515 | int *pCnt = (int*)pContext; | |||
| 21516 | UNUSED_PARAM2(pUnused, nUnused)(void)(pUnused), (void)(nUnused); | |||
| 21517 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
| 21518 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 ){ | |||
| 21519 | (*pCnt)++; | |||
| 21520 | } | |||
| 21521 | return SQLITE_OK0; | |||
| 21522 | } | |||
| 21523 | ||||
| 21524 | static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ | |||
| 21525 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21526 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 21527 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 21528 | int rc = SQLITE_OK0; | |||
| 21529 | ||||
| 21530 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags & (0x04)) ){ | |||
| 21531 | if( pConfig->bColumnsize ){ | |||
| 21532 | i64 iRowid = fts5CursorRowid(pCsr); | |||
| 21533 | rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); | |||
| 21534 | }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | |||
| 21535 | int i; | |||
| 21536 | for(i=0; i<pConfig->nCol; i++){ | |||
| 21537 | if( pConfig->abUnindexed[i]==0 ){ | |||
| 21538 | pCsr->aColumnSize[i] = -1; | |||
| 21539 | } | |||
| 21540 | } | |||
| 21541 | }else{ | |||
| 21542 | int i; | |||
| 21543 | rc = fts5SeekCursor(pCsr, 0); | |||
| 21544 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
| 21545 | if( pConfig->abUnindexed[i]==0 ){ | |||
| 21546 | const char *z = 0; | |||
| 21547 | int n = 0; | |||
| 21548 | pCsr->aColumnSize[i] = 0; | |||
| 21549 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); | |||
| 21550 | if( rc==SQLITE_OK0 ){ | |||
| 21551 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX0x0008, | |||
| 21552 | z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb | |||
| 21553 | ); | |||
| 21554 | } | |||
| 21555 | sqlite3Fts5ClearLocale(pConfig); | |||
| 21556 | } | |||
| 21557 | } | |||
| 21558 | } | |||
| 21559 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE)((pCsr)->csrflags &= ~(0x04)); | |||
| 21560 | } | |||
| 21561 | if( iCol<0 ){ | |||
| 21562 | int i; | |||
| 21563 | *pnToken = 0; | |||
| 21564 | for(i=0; i<pConfig->nCol; i++){ | |||
| 21565 | *pnToken += pCsr->aColumnSize[i]; | |||
| 21566 | } | |||
| 21567 | }else if( iCol<pConfig->nCol ){ | |||
| 21568 | *pnToken = pCsr->aColumnSize[iCol]; | |||
| 21569 | }else{ | |||
| 21570 | *pnToken = 0; | |||
| 21571 | rc = SQLITE_RANGE25; | |||
| 21572 | } | |||
| 21573 | return rc; | |||
| 21574 | } | |||
| 21575 | ||||
| 21576 | /* | |||
| 21577 | ** Implementation of the xSetAuxdata() method. | |||
| 21578 | */ | |||
| 21579 | static int fts5ApiSetAuxdata( | |||
| 21580 | Fts5Context *pCtx, /* Fts5 context */ | |||
| 21581 | void *pPtr, /* Pointer to save as auxdata */ | |||
| 21582 | void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ | |||
| 21583 | ){ | |||
| 21584 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21585 | Fts5Auxdata *pData; | |||
| 21586 | ||||
| 21587 | /* Search through the cursors list of Fts5Auxdata objects for one that | |||
| 21588 | ** corresponds to the currently executing auxiliary function. */ | |||
| 21589 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | |||
| 21590 | if( pData->pAux==pCsr->pAux ) break; | |||
| 21591 | } | |||
| 21592 | ||||
| 21593 | if( pData ){ | |||
| 21594 | if( pData->xDelete ){ | |||
| 21595 | pData->xDelete(pData->pPtr); | |||
| 21596 | } | |||
| 21597 | }else{ | |||
| 21598 | int rc = SQLITE_OK0; | |||
| 21599 | pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); | |||
| 21600 | if( pData==0 ){ | |||
| 21601 | if( xDelete ) xDelete(pPtr); | |||
| 21602 | return rc; | |||
| 21603 | } | |||
| 21604 | pData->pAux = pCsr->pAux; | |||
| 21605 | pData->pNext = pCsr->pAuxdata; | |||
| 21606 | pCsr->pAuxdata = pData; | |||
| 21607 | } | |||
| 21608 | ||||
| 21609 | pData->xDelete = xDelete; | |||
| 21610 | pData->pPtr = pPtr; | |||
| 21611 | return SQLITE_OK0; | |||
| 21612 | } | |||
| 21613 | ||||
| 21614 | static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ | |||
| 21615 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21616 | Fts5Auxdata *pData; | |||
| 21617 | void *pRet = 0; | |||
| 21618 | ||||
| 21619 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ | |||
| 21620 | if( pData->pAux==pCsr->pAux ) break; | |||
| 21621 | } | |||
| 21622 | ||||
| 21623 | if( pData ){ | |||
| 21624 | pRet = pData->pPtr; | |||
| 21625 | if( bClear ){ | |||
| 21626 | pData->pPtr = 0; | |||
| 21627 | pData->xDelete = 0; | |||
| 21628 | } | |||
| 21629 | } | |||
| 21630 | ||||
| 21631 | return pRet; | |||
| 21632 | } | |||
| 21633 | ||||
| 21634 | static void fts5ApiPhraseNext( | |||
| 21635 | Fts5Context *pCtx, | |||
| 21636 | Fts5PhraseIter *pIter, | |||
| 21637 | int *piCol, int *piOff | |||
| 21638 | ){ | |||
| 21639 | if( pIter->a>=pIter->b ){ | |||
| 21640 | *piCol = -1; | |||
| 21641 | *piOff = -1; | |||
| 21642 | }else{ | |||
| 21643 | int iVal; | |||
| 21644 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
| 21645 | if( iVal==1 ){ | |||
| 21646 | /* Avoid returning a (*piCol) value that is too large for the table, | |||
| 21647 | ** even if the position-list is corrupt. The caller might not be | |||
| 21648 | ** expecting it. */ | |||
| 21649 | int nCol = ((Fts5Table*)(((Fts5Cursor*)pCtx)->base.pVtab))->pConfig->nCol; | |||
| 21650 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
| 21651 | *piCol = (iVal>=nCol ? nCol-1 : iVal); | |||
| 21652 | *piOff = 0; | |||
| 21653 | pIter->a += fts5GetVarint32(pIter->a, iVal)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(iVal)); | |||
| 21654 | } | |||
| 21655 | *piOff += (iVal-2); | |||
| 21656 | } | |||
| 21657 | } | |||
| 21658 | ||||
| 21659 | static int fts5ApiPhraseFirst( | |||
| 21660 | Fts5Context *pCtx, | |||
| 21661 | int iPhrase, | |||
| 21662 | Fts5PhraseIter *pIter, | |||
| 21663 | int *piCol, int *piOff | |||
| 21664 | ){ | |||
| 21665 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21666 | int n; | |||
| 21667 | int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | |||
| 21668 | if( rc==SQLITE_OK0 ){ | |||
| 21669 | assert( pIter->a || n==0 )((void) (0)); | |||
| 21670 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
| 21671 | *piCol = 0; | |||
| 21672 | *piOff = 0; | |||
| 21673 | fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); | |||
| 21674 | } | |||
| 21675 | return rc; | |||
| 21676 | } | |||
| 21677 | ||||
| 21678 | static void fts5ApiPhraseNextColumn( | |||
| 21679 | Fts5Context *pCtx, | |||
| 21680 | Fts5PhraseIter *pIter, | |||
| 21681 | int *piCol | |||
| 21682 | ){ | |||
| 21683 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21684 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
| 21685 | ||||
| 21686 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| 21687 | if( pIter->a>=pIter->b ){ | |||
| 21688 | *piCol = -1; | |||
| 21689 | }else{ | |||
| 21690 | int iIncr; | |||
| 21691 | pIter->a += fts5GetVarint32(&pIter->a[0], iIncr)sqlite3Fts5GetVarint32(&pIter->a[0],(u32*)&(iIncr) ); | |||
| 21692 | *piCol += (iIncr-2); | |||
| 21693 | } | |||
| 21694 | }else{ | |||
| 21695 | while( 1 ){ | |||
| 21696 | int dummy; | |||
| 21697 | if( pIter->a>=pIter->b ){ | |||
| 21698 | *piCol = -1; | |||
| 21699 | return; | |||
| 21700 | } | |||
| 21701 | if( pIter->a[0]==0x01 ) break; | |||
| 21702 | pIter->a += fts5GetVarint32(pIter->a, dummy)sqlite3Fts5GetVarint32(pIter->a,(u32*)&(dummy)); | |||
| 21703 | } | |||
| 21704 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | |||
| 21705 | } | |||
| 21706 | } | |||
| 21707 | ||||
| 21708 | static int fts5ApiPhraseFirstColumn( | |||
| 21709 | Fts5Context *pCtx, | |||
| 21710 | int iPhrase, | |||
| 21711 | Fts5PhraseIter *pIter, | |||
| 21712 | int *piCol | |||
| 21713 | ){ | |||
| 21714 | int rc = SQLITE_OK0; | |||
| 21715 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21716 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
| 21717 | ||||
| 21718 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| ||||
| 21719 | Fts5Sorter *pSorter = pCsr->pSorter; | |||
| 21720 | int n; | |||
| 21721 | if( pSorter ){ | |||
| 21722 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); | |||
| 21723 | n = pSorter->aIdx[iPhrase] - i1; | |||
| 21724 | pIter->a = &pSorter->aPoslist[i1]; | |||
| 21725 | }else{ | |||
| 21726 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); | |||
| 21727 | } | |||
| 21728 | if( rc==SQLITE_OK0 ){ | |||
| 21729 | assert( pIter->a || n==0 )((void) (0)); | |||
| 21730 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
| 21731 | *piCol = 0; | |||
| 21732 | fts5ApiPhraseNextColumn(pCtx, pIter, piCol); | |||
| 21733 | } | |||
| 21734 | }else{ | |||
| 21735 | int n; | |||
| 21736 | rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); | |||
| 21737 | if( rc==SQLITE_OK0 ){ | |||
| 21738 | assert( pIter->a || n==0 )((void) (0)); | |||
| 21739 | pIter->b = (pIter->a ? &pIter->a[n] : 0); | |||
| 21740 | if( n<=0 ){ | |||
| 21741 | *piCol = -1; | |||
| 21742 | }else if( pIter->a[0]==0x01 ){ | |||
| ||||
| 21743 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol)sqlite3Fts5GetVarint32(&pIter->a[1],(u32*)&(*piCol )); | |||
| 21744 | }else{ | |||
| 21745 | *piCol = 0; | |||
| 21746 | } | |||
| 21747 | } | |||
| 21748 | } | |||
| 21749 | ||||
| 21750 | return rc; | |||
| 21751 | } | |||
| 21752 | ||||
| 21753 | /* | |||
| 21754 | ** xQueryToken() API implemenetation. | |||
| 21755 | */ | |||
| 21756 | static int fts5ApiQueryToken( | |||
| 21757 | Fts5Context* pCtx, | |||
| 21758 | int iPhrase, | |||
| 21759 | int iToken, | |||
| 21760 | const char **ppOut, | |||
| 21761 | int *pnOut | |||
| 21762 | ){ | |||
| 21763 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21764 | return sqlite3Fts5ExprQueryToken(pCsr->pExpr, iPhrase, iToken, ppOut, pnOut); | |||
| 21765 | } | |||
| 21766 | ||||
| 21767 | /* | |||
| 21768 | ** xInstToken() API implemenetation. | |||
| 21769 | */ | |||
| 21770 | static int fts5ApiInstToken( | |||
| 21771 | Fts5Context *pCtx, | |||
| 21772 | int iIdx, | |||
| 21773 | int iToken, | |||
| 21774 | const char **ppOut, int *pnOut | |||
| 21775 | ){ | |||
| 21776 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21777 | int rc = SQLITE_OK0; | |||
| 21778 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)((pCsr)->csrflags & (0x08))==0 | |||
| 21779 | || SQLITE_OK0==(rc = fts5CacheInstArray(pCsr)) | |||
| 21780 | ){ | |||
| 21781 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ | |||
| 21782 | rc = SQLITE_RANGE25; | |||
| 21783 | }else{ | |||
| 21784 | int iPhrase = pCsr->aInst[iIdx*3]; | |||
| 21785 | int iCol = pCsr->aInst[iIdx*3 + 1]; | |||
| 21786 | int iOff = pCsr->aInst[iIdx*3 + 2]; | |||
| 21787 | i64 iRowid = fts5CursorRowid(pCsr); | |||
| 21788 | rc = sqlite3Fts5ExprInstToken( | |||
| 21789 | pCsr->pExpr, iRowid, iPhrase, iCol, iOff, iToken, ppOut, pnOut | |||
| 21790 | ); | |||
| 21791 | } | |||
| 21792 | } | |||
| 21793 | return rc; | |||
| 21794 | } | |||
| 21795 | ||||
| 21796 | ||||
| 21797 | static int fts5ApiQueryPhrase(Fts5Context*, int, void*, | |||
| 21798 | int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) | |||
| 21799 | ); | |||
| 21800 | ||||
| 21801 | /* | |||
| 21802 | ** The xColumnLocale() API. | |||
| 21803 | */ | |||
| 21804 | static int fts5ApiColumnLocale( | |||
| 21805 | Fts5Context *pCtx, | |||
| 21806 | int iCol, | |||
| 21807 | const char **pzLocale, | |||
| 21808 | int *pnLocale | |||
| 21809 | ){ | |||
| 21810 | int rc = SQLITE_OK0; | |||
| 21811 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21812 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; | |||
| 21813 | ||||
| 21814 | *pzLocale = 0; | |||
| 21815 | *pnLocale = 0; | |||
| 21816 | ||||
| 21817 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
| 21818 | if( iCol<0 || iCol>=pConfig->nCol ){ | |||
| 21819 | rc = SQLITE_RANGE25; | |||
| 21820 | }else if( | |||
| 21821 | pConfig->abUnindexed[iCol]==0 | |||
| 21822 | && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) | |||
| 21823 | && pConfig->bLocale | |||
| 21824 | ){ | |||
| 21825 | rc = fts5SeekCursor(pCsr, 0); | |||
| 21826 | if( rc==SQLITE_OK0 ){ | |||
| 21827 | const char *zDummy = 0; | |||
| 21828 | int nDummy = 0; | |||
| 21829 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &zDummy, &nDummy); | |||
| 21830 | if( rc==SQLITE_OK0 ){ | |||
| 21831 | *pzLocale = pConfig->t.pLocale; | |||
| 21832 | *pnLocale = pConfig->t.nLocale; | |||
| 21833 | } | |||
| 21834 | sqlite3Fts5ClearLocale(pConfig); | |||
| 21835 | } | |||
| 21836 | } | |||
| 21837 | ||||
| 21838 | return rc; | |||
| 21839 | } | |||
| 21840 | ||||
| 21841 | static const Fts5ExtensionApi sFts5Api = { | |||
| 21842 | 4, /* iVersion */ | |||
| 21843 | fts5ApiUserData, | |||
| 21844 | fts5ApiColumnCount, | |||
| 21845 | fts5ApiRowCount, | |||
| 21846 | fts5ApiColumnTotalSize, | |||
| 21847 | fts5ApiTokenize, | |||
| 21848 | fts5ApiPhraseCount, | |||
| 21849 | fts5ApiPhraseSize, | |||
| 21850 | fts5ApiInstCount, | |||
| 21851 | fts5ApiInst, | |||
| 21852 | fts5ApiRowid, | |||
| 21853 | fts5ApiColumnText, | |||
| 21854 | fts5ApiColumnSize, | |||
| 21855 | fts5ApiQueryPhrase, | |||
| 21856 | fts5ApiSetAuxdata, | |||
| 21857 | fts5ApiGetAuxdata, | |||
| 21858 | fts5ApiPhraseFirst, | |||
| 21859 | fts5ApiPhraseNext, | |||
| 21860 | fts5ApiPhraseFirstColumn, | |||
| 21861 | fts5ApiPhraseNextColumn, | |||
| 21862 | fts5ApiQueryToken, | |||
| 21863 | fts5ApiInstToken, | |||
| 21864 | fts5ApiColumnLocale, | |||
| 21865 | fts5ApiTokenize_v2 | |||
| 21866 | }; | |||
| 21867 | ||||
| 21868 | /* | |||
| 21869 | ** Implementation of API function xQueryPhrase(). | |||
| 21870 | */ | |||
| 21871 | static int fts5ApiQueryPhrase( | |||
| 21872 | Fts5Context *pCtx, | |||
| 21873 | int iPhrase, | |||
| 21874 | void *pUserData, | |||
| 21875 | int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) | |||
| 21876 | ){ | |||
| 21877 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; | |||
| 21878 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); | |||
| 21879 | int rc; | |||
| 21880 | Fts5Cursor *pNew = 0; | |||
| 21881 | ||||
| 21882 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); | |||
| 21883 | if( rc==SQLITE_OK0 ){ | |||
| 21884 | pNew->ePlan = FTS5_PLAN_MATCH1; | |||
| 21885 | pNew->iFirstRowid = SMALLEST_INT64(((i64)-1) - (0xffffffff|(((i64)0x7fffffff)<<32))); | |||
| 21886 | pNew->iLastRowid = LARGEST_INT64(0xffffffff|(((i64)0x7fffffff)<<32)); | |||
| 21887 | pNew->base.pVtab = (sqlite3_vtab*)pTab; | |||
| 21888 | rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); | |||
| 21889 | } | |||
| 21890 | ||||
| 21891 | if( rc==SQLITE_OK0 ){ | |||
| 21892 | for(rc = fts5CursorFirst(pTab, pNew, 0); | |||
| 21893 | rc==SQLITE_OK0 && CsrFlagTest(pNew, FTS5CSR_EOF)((pNew)->csrflags & (0x01))==0; | |||
| 21894 | rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) | |||
| 21895 | ){ | |||
| 21896 | rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); | |||
| 21897 | if( rc!=SQLITE_OK0 ){ | |||
| 21898 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
| 21899 | break; | |||
| 21900 | } | |||
| 21901 | } | |||
| 21902 | } | |||
| 21903 | ||||
| 21904 | fts5CloseMethod((sqlite3_vtab_cursor*)pNew); | |||
| 21905 | return rc; | |||
| 21906 | } | |||
| 21907 | ||||
| 21908 | static void fts5ApiInvoke( | |||
| 21909 | Fts5Auxiliary *pAux, | |||
| 21910 | Fts5Cursor *pCsr, | |||
| 21911 | sqlite3_context *context, | |||
| 21912 | int argc, | |||
| 21913 | sqlite3_value **argv | |||
| 21914 | ){ | |||
| 21915 | assert( pCsr->pAux==0 )((void) (0)); | |||
| 21916 | assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL )((void) (0)); | |||
| 21917 | pCsr->pAux = pAux; | |||
| 21918 | pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); | |||
| 21919 | pCsr->pAux = 0; | |||
| 21920 | } | |||
| 21921 | ||||
| 21922 | static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ | |||
| 21923 | Fts5Cursor *pCsr; | |||
| 21924 | for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ | |||
| 21925 | if( pCsr->iCsrId==iCsrId ) break; | |||
| 21926 | } | |||
| 21927 | return pCsr; | |||
| 21928 | } | |||
| 21929 | ||||
| 21930 | /* | |||
| 21931 | ** Parameter zFmt is a printf() style formatting string. This function | |||
| 21932 | ** formats it using the trailing arguments and returns the result as | |||
| 21933 | ** an error message to the context passed as the first argument. | |||
| 21934 | */ | |||
| 21935 | static void fts5ResultError(sqlite3_context *pCtx, const char *zFmt, ...){ | |||
| 21936 | char *zErr = 0; | |||
| 21937 | va_list ap; | |||
| 21938 | va_start(ap, zFmt)__builtin_va_start(ap, zFmt); | |||
| 21939 | zErr = sqlite3_vmprintfsqlite3_api->vmprintf(zFmt, ap); | |||
| 21940 | sqlite3_result_errorsqlite3_api->result_error(pCtx, zErr, -1); | |||
| 21941 | sqlite3_freesqlite3_api->free(zErr); | |||
| 21942 | va_end(ap)__builtin_va_end(ap); | |||
| 21943 | } | |||
| 21944 | ||||
| 21945 | static void fts5ApiCallback( | |||
| 21946 | sqlite3_context *context, | |||
| 21947 | int argc, | |||
| 21948 | sqlite3_value **argv | |||
| 21949 | ){ | |||
| 21950 | ||||
| 21951 | Fts5Auxiliary *pAux; | |||
| 21952 | Fts5Cursor *pCsr; | |||
| 21953 | i64 iCsrId; | |||
| 21954 | ||||
| 21955 | assert( argc>=1 )((void) (0)); | |||
| 21956 | pAux = (Fts5Auxiliary*)sqlite3_user_datasqlite3_api->user_data(context); | |||
| 21957 | iCsrId = sqlite3_value_int64sqlite3_api->value_int64(argv[0]); | |||
| 21958 | ||||
| 21959 | pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); | |||
| 21960 | if( pCsr==0 || (pCsr->ePlan==0 || pCsr->ePlan==FTS5_PLAN_SPECIAL3) ){ | |||
| 21961 | fts5ResultError(context, "no such cursor: %lld", iCsrId); | |||
| 21962 | }else{ | |||
| 21963 | sqlite3_vtab *pTab = pCsr->base.pVtab; | |||
| 21964 | fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); | |||
| 21965 | sqlite3_freesqlite3_api->free(pTab->zErrMsg); | |||
| 21966 | pTab->zErrMsg = 0; | |||
| 21967 | } | |||
| 21968 | } | |||
| 21969 | ||||
| 21970 | ||||
| 21971 | /* | |||
| 21972 | ** Given cursor id iId, return a pointer to the corresponding Fts5Table | |||
| 21973 | ** object. Or NULL If the cursor id does not exist. | |||
| 21974 | */ | |||
| 21975 | static Fts5Table *sqlite3Fts5TableFromCsrid( | |||
| 21976 | Fts5Global *pGlobal, /* FTS5 global context for db handle */ | |||
| 21977 | i64 iCsrId /* Id of cursor to find */ | |||
| 21978 | ){ | |||
| 21979 | Fts5Cursor *pCsr; | |||
| 21980 | pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); | |||
| 21981 | if( pCsr ){ | |||
| 21982 | return (Fts5Table*)pCsr->base.pVtab; | |||
| 21983 | } | |||
| 21984 | return 0; | |||
| 21985 | } | |||
| 21986 | ||||
| 21987 | /* | |||
| 21988 | ** Return a "position-list blob" corresponding to the current position of | |||
| 21989 | ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains | |||
| 21990 | ** the current position-list for each phrase in the query associated with | |||
| 21991 | ** cursor pCsr. | |||
| 21992 | ** | |||
| 21993 | ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is | |||
| 21994 | ** the number of phrases in the query. Following the varints are the | |||
| 21995 | ** concatenated position lists for each phrase, in order. | |||
| 21996 | ** | |||
| 21997 | ** The first varint (if it exists) contains the size of the position list | |||
| 21998 | ** for phrase 0. The second (same disclaimer) contains the size of position | |||
| 21999 | ** list 1. And so on. There is no size field for the final position list, | |||
| 22000 | ** as it can be derived from the total size of the blob. | |||
| 22001 | */ | |||
| 22002 | static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ | |||
| 22003 | int i; | |||
| 22004 | int rc = SQLITE_OK0; | |||
| 22005 | int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); | |||
| 22006 | Fts5Buffer val; | |||
| 22007 | ||||
| 22008 | memset(&val, 0, sizeof(Fts5Buffer)); | |||
| 22009 | switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ | |||
| 22010 | case FTS5_DETAIL_FULL0: | |||
| 22011 | ||||
| 22012 | /* Append the varints */ | |||
| 22013 | for(i=0; i<(nPhrase-1); i++){ | |||
| 22014 | const u8 *dummy; | |||
| 22015 | int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); | |||
| 22016 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | |||
| 22017 | } | |||
| 22018 | ||||
| 22019 | /* Append the position lists */ | |||
| 22020 | for(i=0; i<nPhrase; i++){ | |||
| 22021 | const u8 *pPoslist; | |||
| 22022 | int nPoslist; | |||
| 22023 | nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); | |||
| 22024 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | |||
| 22025 | } | |||
| 22026 | break; | |||
| 22027 | ||||
| 22028 | case FTS5_DETAIL_COLUMNS2: | |||
| 22029 | ||||
| 22030 | /* Append the varints */ | |||
| 22031 | for(i=0; rc==SQLITE_OK0 && i<(nPhrase-1); i++){ | |||
| 22032 | const u8 *dummy; | |||
| 22033 | int nByte; | |||
| 22034 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); | |||
| 22035 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); | |||
| 22036 | } | |||
| 22037 | ||||
| 22038 | /* Append the position lists */ | |||
| 22039 | for(i=0; rc==SQLITE_OK0 && i<nPhrase; i++){ | |||
| 22040 | const u8 *pPoslist; | |||
| 22041 | int nPoslist; | |||
| 22042 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); | |||
| 22043 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); | |||
| 22044 | } | |||
| 22045 | break; | |||
| 22046 | ||||
| 22047 | default: | |||
| 22048 | break; | |||
| 22049 | } | |||
| 22050 | ||||
| 22051 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, val.p, val.n, sqlite3_freesqlite3_api->free); | |||
| 22052 | return rc; | |||
| 22053 | } | |||
| 22054 | ||||
| 22055 | /* | |||
| 22056 | ** This is the xColumn method, called by SQLite to request a value from | |||
| 22057 | ** the row that the supplied cursor currently points to. | |||
| 22058 | */ | |||
| 22059 | static int fts5ColumnMethod( | |||
| 22060 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | |||
| 22061 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | |||
| 22062 | int iCol /* Index of column to read value from */ | |||
| 22063 | ){ | |||
| 22064 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); | |||
| 22065 | Fts5Config *pConfig = pTab->p.pConfig; | |||
| 22066 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; | |||
| 22067 | int rc = SQLITE_OK0; | |||
| 22068 | ||||
| 22069 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 )((void) (0)); | |||
| 22070 | ||||
| 22071 | if( pCsr->ePlan==FTS5_PLAN_SPECIAL3 ){ | |||
| 22072 | if( iCol==pConfig->nCol ){ | |||
| 22073 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iSpecial); | |||
| 22074 | } | |||
| 22075 | }else | |||
| 22076 | ||||
| 22077 | if( iCol==pConfig->nCol ){ | |||
| 22078 | /* User is requesting the value of the special column with the same name | |||
| 22079 | ** as the table. Return the cursor integer id number. This value is only | |||
| 22080 | ** useful in that it may be passed as the first argument to an FTS5 | |||
| 22081 | ** auxiliary function. */ | |||
| 22082 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->iCsrId); | |||
| 22083 | }else if( iCol==pConfig->nCol+1 ){ | |||
| 22084 | /* The value of the "rank" column. */ | |||
| 22085 | ||||
| 22086 | if( pCsr->ePlan==FTS5_PLAN_SOURCE2 ){ | |||
| 22087 | fts5PoslistBlob(pCtx, pCsr); | |||
| 22088 | }else if( | |||
| 22089 | pCsr->ePlan==FTS5_PLAN_MATCH1 | |||
| 22090 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH4 | |||
| 22091 | ){ | |||
| 22092 | if( pCsr->pRank || SQLITE_OK0==(rc = fts5FindRankFunction(pCsr)) ){ | |||
| 22093 | fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); | |||
| 22094 | } | |||
| 22095 | } | |||
| 22096 | }else{ | |||
| 22097 | if( !sqlite3_vtab_nochangesqlite3_api->vtab_nochange(pCtx) && pConfig->eContent!=FTS5_CONTENT_NONE1 ){ | |||
| 22098 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; | |||
| 22099 | rc = fts5SeekCursor(pCsr, 1); | |||
| 22100 | if( rc==SQLITE_OK0 ){ | |||
| 22101 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pCsr->pStmt, iCol+1); | |||
| 22102 | if( pConfig->bLocale | |||
| 22103 | && pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
| 22104 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
| 22105 | ){ | |||
| 22106 | const char *z = 0; | |||
| 22107 | int n = 0; | |||
| 22108 | rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n); | |||
| 22109 | if( rc==SQLITE_OK0 ){ | |||
| 22110 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 22111 | } | |||
| 22112 | sqlite3Fts5ClearLocale(pConfig); | |||
| 22113 | }else{ | |||
| 22114 | sqlite3_result_valuesqlite3_api->result_value(pCtx, pVal); | |||
| 22115 | } | |||
| 22116 | } | |||
| 22117 | ||||
| 22118 | pConfig->pzErrmsg = 0; | |||
| 22119 | } | |||
| 22120 | } | |||
| 22121 | ||||
| 22122 | return rc; | |||
| 22123 | } | |||
| 22124 | ||||
| 22125 | ||||
| 22126 | /* | |||
| 22127 | ** This routine implements the xFindFunction method for the FTS3 | |||
| 22128 | ** virtual table. | |||
| 22129 | */ | |||
| 22130 | static int fts5FindFunctionMethod( | |||
| 22131 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
| 22132 | int nUnused, /* Number of SQL function arguments */ | |||
| 22133 | const char *zName, /* Name of SQL function */ | |||
| 22134 | void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ | |||
| 22135 | void **ppArg /* OUT: User data for *pxFunc */ | |||
| 22136 | ){ | |||
| 22137 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22138 | Fts5Auxiliary *pAux; | |||
| 22139 | ||||
| 22140 | UNUSED_PARAM(nUnused)(void)(nUnused); | |||
| 22141 | pAux = fts5FindAuxiliary(pTab, zName); | |||
| 22142 | if( pAux ){ | |||
| 22143 | *pxFunc = fts5ApiCallback; | |||
| 22144 | *ppArg = (void*)pAux; | |||
| 22145 | return 1; | |||
| 22146 | } | |||
| 22147 | ||||
| 22148 | /* No function of the specified name was found. Return 0. */ | |||
| 22149 | return 0; | |||
| 22150 | } | |||
| 22151 | ||||
| 22152 | /* | |||
| 22153 | ** Implementation of FTS5 xRename method. Rename an fts5 table. | |||
| 22154 | */ | |||
| 22155 | static int fts5RenameMethod( | |||
| 22156 | sqlite3_vtab *pVtab, /* Virtual table handle */ | |||
| 22157 | const char *zName /* New name of table */ | |||
| 22158 | ){ | |||
| 22159 | int rc; | |||
| 22160 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22161 | rc = sqlite3Fts5StorageRename(pTab->pStorage, zName); | |||
| 22162 | return rc; | |||
| 22163 | } | |||
| 22164 | ||||
| 22165 | static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){ | |||
| 22166 | fts5TripCursors((Fts5FullTable*)pTab); | |||
| 22167 | return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage); | |||
| 22168 | } | |||
| 22169 | ||||
| 22170 | /* | |||
| 22171 | ** The xSavepoint() method. | |||
| 22172 | ** | |||
| 22173 | ** Flush the contents of the pending-terms table to disk. | |||
| 22174 | */ | |||
| 22175 | static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
| 22176 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22177 | int rc = SQLITE_OK0; | |||
| 22178 | ||||
| 22179 | fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); | |||
| 22180 | rc = sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); | |||
| 22181 | if( rc==SQLITE_OK0 ){ | |||
| 22182 | pTab->iSavepoint = iSavepoint+1; | |||
| 22183 | } | |||
| 22184 | return rc; | |||
| 22185 | } | |||
| 22186 | ||||
| 22187 | /* | |||
| 22188 | ** The xRelease() method. | |||
| 22189 | ** | |||
| 22190 | ** This is a no-op. | |||
| 22191 | */ | |||
| 22192 | static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
| 22193 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22194 | int rc = SQLITE_OK0; | |||
| 22195 | fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); | |||
| 22196 | if( (iSavepoint+1)<pTab->iSavepoint ){ | |||
| 22197 | rc = sqlite3Fts5FlushToDisk(&pTab->p); | |||
| 22198 | if( rc==SQLITE_OK0 ){ | |||
| 22199 | pTab->iSavepoint = iSavepoint; | |||
| 22200 | } | |||
| 22201 | } | |||
| 22202 | return rc; | |||
| 22203 | } | |||
| 22204 | ||||
| 22205 | /* | |||
| 22206 | ** The xRollbackTo() method. | |||
| 22207 | ** | |||
| 22208 | ** Discard the contents of the pending terms table. | |||
| 22209 | */ | |||
| 22210 | static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ | |||
| 22211 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22212 | int rc = SQLITE_OK0; | |||
| 22213 | fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); | |||
| 22214 | fts5TripCursors(pTab); | |||
| 22215 | if( (iSavepoint+1)<=pTab->iSavepoint ){ | |||
| 22216 | pTab->p.pConfig->pgsz = 0; | |||
| 22217 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); | |||
| 22218 | } | |||
| 22219 | return rc; | |||
| 22220 | } | |||
| 22221 | ||||
| 22222 | /* | |||
| 22223 | ** Register a new auxiliary function with global context pGlobal. | |||
| 22224 | */ | |||
| 22225 | static int fts5CreateAux( | |||
| 22226 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
| 22227 | const char *zName, /* Name of new function */ | |||
| 22228 | void *pUserData, /* User data for aux. function */ | |||
| 22229 | fts5_extension_function xFunc, /* Aux. function implementation */ | |||
| 22230 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
| 22231 | ){ | |||
| 22232 | Fts5Global *pGlobal = (Fts5Global*)pApi; | |||
| 22233 | int rc = sqlite3_overload_functionsqlite3_api->overload_function(pGlobal->db, zName, -1); | |||
| 22234 | if( rc==SQLITE_OK0 ){ | |||
| 22235 | Fts5Auxiliary *pAux; | |||
| 22236 | sqlite3_int64 nName; /* Size of zName in bytes, including \0 */ | |||
| 22237 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
| 22238 | ||||
| 22239 | nName = strlen(zName) + 1; | |||
| 22240 | nByte = sizeof(Fts5Auxiliary) + nName; | |||
| 22241 | pAux = (Fts5Auxiliary*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 22242 | if( pAux ){ | |||
| 22243 | memset(pAux, 0, (size_t)nByte); | |||
| 22244 | pAux->zFunc = (char*)&pAux[1]; | |||
| 22245 | memcpy(pAux->zFunc, zName, nName); | |||
| 22246 | pAux->pGlobal = pGlobal; | |||
| 22247 | pAux->pUserData = pUserData; | |||
| 22248 | pAux->xFunc = xFunc; | |||
| 22249 | pAux->xDestroy = xDestroy; | |||
| 22250 | pAux->pNext = pGlobal->pAux; | |||
| 22251 | pGlobal->pAux = pAux; | |||
| 22252 | }else{ | |||
| 22253 | rc = SQLITE_NOMEM7; | |||
| 22254 | } | |||
| 22255 | } | |||
| 22256 | ||||
| 22257 | return rc; | |||
| 22258 | } | |||
| 22259 | ||||
| 22260 | /* | |||
| 22261 | ** This function is used by xCreateTokenizer_v2() and xCreateTokenizer(). | |||
| 22262 | ** It allocates and partially populates a new Fts5TokenizerModule object. | |||
| 22263 | ** The new object is already linked into the Fts5Global context before | |||
| 22264 | ** returning. | |||
| 22265 | ** | |||
| 22266 | ** If successful, SQLITE_OK is returned and a pointer to the new | |||
| 22267 | ** Fts5TokenizerModule object returned via output parameter (*ppNew). All | |||
| 22268 | ** that is required is for the caller to fill in the methods in | |||
| 22269 | ** Fts5TokenizerModule.x1 and x2, and to set Fts5TokenizerModule.bV2Native | |||
| 22270 | ** as appropriate. | |||
| 22271 | ** | |||
| 22272 | ** If an error occurs, an SQLite error code is returned and the final value | |||
| 22273 | ** of (*ppNew) undefined. | |||
| 22274 | */ | |||
| 22275 | static int fts5NewTokenizerModule( | |||
| 22276 | Fts5Global *pGlobal, /* Global context (one per db handle) */ | |||
| 22277 | const char *zName, /* Name of new function */ | |||
| 22278 | void *pUserData, /* User data for aux. function */ | |||
| 22279 | void(*xDestroy)(void*), /* Destructor for pUserData */ | |||
| 22280 | Fts5TokenizerModule **ppNew | |||
| 22281 | ){ | |||
| 22282 | int rc = SQLITE_OK0; | |||
| 22283 | Fts5TokenizerModule *pNew; | |||
| 22284 | sqlite3_int64 nName; /* Size of zName and its \0 terminator */ | |||
| 22285 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
| 22286 | ||||
| 22287 | nName = strlen(zName) + 1; | |||
| 22288 | nByte = sizeof(Fts5TokenizerModule) + nName; | |||
| 22289 | *ppNew = pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 22290 | if( pNew ){ | |||
| 22291 | pNew->zName = (char*)&pNew[1]; | |||
| 22292 | memcpy(pNew->zName, zName, nName); | |||
| 22293 | pNew->pUserData = pUserData; | |||
| 22294 | pNew->xDestroy = xDestroy; | |||
| 22295 | pNew->pNext = pGlobal->pTok; | |||
| 22296 | pGlobal->pTok = pNew; | |||
| 22297 | if( pNew->pNext==0 ){ | |||
| 22298 | pGlobal->pDfltTok = pNew; | |||
| 22299 | } | |||
| 22300 | } | |||
| 22301 | ||||
| 22302 | return rc; | |||
| 22303 | } | |||
| 22304 | ||||
| 22305 | /* | |||
| 22306 | ** An instance of this type is used as the Fts5Tokenizer object for | |||
| 22307 | ** wrapper tokenizers - those that provide access to a v1 tokenizer via | |||
| 22308 | ** the fts5_tokenizer_v2 API, and those that provide access to a v2 tokenizer | |||
| 22309 | ** via the fts5_tokenizer API. | |||
| 22310 | */ | |||
| 22311 | typedef struct Fts5VtoVTokenizer Fts5VtoVTokenizer; | |||
| 22312 | struct Fts5VtoVTokenizer { | |||
| 22313 | int bV2Native; /* True if v2 native tokenizer */ | |||
| 22314 | fts5_tokenizer x1; /* Tokenizer functions */ | |||
| 22315 | fts5_tokenizer_v2 x2; /* V2 tokenizer functions */ | |||
| 22316 | Fts5Tokenizer *pReal; | |||
| 22317 | }; | |||
| 22318 | ||||
| 22319 | /* | |||
| 22320 | ** Create a wrapper tokenizer. The context argument pCtx points to the | |||
| 22321 | ** Fts5TokenizerModule object. | |||
| 22322 | */ | |||
| 22323 | static int fts5VtoVCreate( | |||
| 22324 | void *pCtx, | |||
| 22325 | const char **azArg, | |||
| 22326 | int nArg, | |||
| 22327 | Fts5Tokenizer **ppOut | |||
| 22328 | ){ | |||
| 22329 | Fts5TokenizerModule *pMod = (Fts5TokenizerModule*)pCtx; | |||
| 22330 | Fts5VtoVTokenizer *pNew = 0; | |||
| 22331 | int rc = SQLITE_OK0; | |||
| 22332 | ||||
| 22333 | pNew = (Fts5VtoVTokenizer*)sqlite3Fts5MallocZero(&rc, sizeof(*pNew)); | |||
| 22334 | if( rc==SQLITE_OK0 ){ | |||
| 22335 | pNew->x1 = pMod->x1; | |||
| 22336 | pNew->x2 = pMod->x2; | |||
| 22337 | pNew->bV2Native = pMod->bV2Native; | |||
| 22338 | if( pMod->bV2Native ){ | |||
| 22339 | rc = pMod->x2.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | |||
| 22340 | }else{ | |||
| 22341 | rc = pMod->x1.xCreate(pMod->pUserData, azArg, nArg, &pNew->pReal); | |||
| 22342 | } | |||
| 22343 | if( rc!=SQLITE_OK0 ){ | |||
| 22344 | sqlite3_freesqlite3_api->free(pNew); | |||
| 22345 | pNew = 0; | |||
| 22346 | } | |||
| 22347 | } | |||
| 22348 | ||||
| 22349 | *ppOut = (Fts5Tokenizer*)pNew; | |||
| 22350 | return rc; | |||
| 22351 | } | |||
| 22352 | ||||
| 22353 | /* | |||
| 22354 | ** Delete an Fts5VtoVTokenizer wrapper tokenizer. | |||
| 22355 | */ | |||
| 22356 | static void fts5VtoVDelete(Fts5Tokenizer *pTok){ | |||
| 22357 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
| 22358 | if( p ){ | |||
| 22359 | if( p->bV2Native ){ | |||
| 22360 | p->x2.xDelete(p->pReal); | |||
| 22361 | }else{ | |||
| 22362 | p->x1.xDelete(p->pReal); | |||
| 22363 | } | |||
| 22364 | sqlite3_freesqlite3_api->free(p); | |||
| 22365 | } | |||
| 22366 | } | |||
| 22367 | ||||
| 22368 | ||||
| 22369 | /* | |||
| 22370 | ** xTokenizer method for a wrapper tokenizer that offers the v1 interface | |||
| 22371 | ** (no support for locales). | |||
| 22372 | */ | |||
| 22373 | static int fts5V1toV2Tokenize( | |||
| 22374 | Fts5Tokenizer *pTok, | |||
| 22375 | void *pCtx, int flags, | |||
| 22376 | const char *pText, int nText, | |||
| 22377 | int (*xToken)(void*, int, const char*, int, int, int) | |||
| 22378 | ){ | |||
| 22379 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
| 22380 | assert( p->bV2Native )((void) (0)); | |||
| 22381 | return p->x2.xTokenize(p->pReal, pCtx, flags, pText, nText, 0, 0, xToken); | |||
| 22382 | } | |||
| 22383 | ||||
| 22384 | /* | |||
| 22385 | ** xTokenizer method for a wrapper tokenizer that offers the v2 interface | |||
| 22386 | ** (with locale support). | |||
| 22387 | */ | |||
| 22388 | static int fts5V2toV1Tokenize( | |||
| 22389 | Fts5Tokenizer *pTok, | |||
| 22390 | void *pCtx, int flags, | |||
| 22391 | const char *pText, int nText, | |||
| 22392 | const char *pLocale, int nLocale, | |||
| 22393 | int (*xToken)(void*, int, const char*, int, int, int) | |||
| 22394 | ){ | |||
| 22395 | Fts5VtoVTokenizer *p = (Fts5VtoVTokenizer*)pTok; | |||
| 22396 | assert( p->bV2Native==0 )((void) (0)); | |||
| 22397 | UNUSED_PARAM2(pLocale,nLocale)(void)(pLocale), (void)(nLocale); | |||
| 22398 | return p->x1.xTokenize(p->pReal, pCtx, flags, pText, nText, xToken); | |||
| 22399 | } | |||
| 22400 | ||||
| 22401 | /* | |||
| 22402 | ** Register a new tokenizer. This is the implementation of the | |||
| 22403 | ** fts5_api.xCreateTokenizer_v2() method. | |||
| 22404 | */ | |||
| 22405 | static int fts5CreateTokenizer_v2( | |||
| 22406 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
| 22407 | const char *zName, /* Name of new function */ | |||
| 22408 | void *pUserData, /* User data for aux. function */ | |||
| 22409 | fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */ | |||
| 22410 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
| 22411 | ){ | |||
| 22412 | Fts5Global *pGlobal = (Fts5Global*)pApi; | |||
| 22413 | int rc = SQLITE_OK0; | |||
| 22414 | ||||
| 22415 | if( pTokenizer->iVersion>2 ){ | |||
| 22416 | rc = SQLITE_ERROR1; | |||
| 22417 | }else{ | |||
| 22418 | Fts5TokenizerModule *pNew = 0; | |||
| 22419 | rc = fts5NewTokenizerModule(pGlobal, zName, pUserData, xDestroy, &pNew); | |||
| 22420 | if( pNew ){ | |||
| 22421 | pNew->x2 = *pTokenizer; | |||
| 22422 | pNew->bV2Native = 1; | |||
| 22423 | pNew->x1.xCreate = fts5VtoVCreate; | |||
| 22424 | pNew->x1.xTokenize = fts5V1toV2Tokenize; | |||
| 22425 | pNew->x1.xDelete = fts5VtoVDelete; | |||
| 22426 | } | |||
| 22427 | } | |||
| 22428 | ||||
| 22429 | return rc; | |||
| 22430 | } | |||
| 22431 | ||||
| 22432 | /* | |||
| 22433 | ** The fts5_api.xCreateTokenizer() method. | |||
| 22434 | */ | |||
| 22435 | static int fts5CreateTokenizer( | |||
| 22436 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
| 22437 | const char *zName, /* Name of new function */ | |||
| 22438 | void *pUserData, /* User data for aux. function */ | |||
| 22439 | fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ | |||
| 22440 | void(*xDestroy)(void*) /* Destructor for pUserData */ | |||
| 22441 | ){ | |||
| 22442 | Fts5TokenizerModule *pNew = 0; | |||
| 22443 | int rc = SQLITE_OK0; | |||
| 22444 | ||||
| 22445 | rc = fts5NewTokenizerModule( | |||
| 22446 | (Fts5Global*)pApi, zName, pUserData, xDestroy, &pNew | |||
| 22447 | ); | |||
| 22448 | if( pNew ){ | |||
| 22449 | pNew->x1 = *pTokenizer; | |||
| 22450 | pNew->x2.xCreate = fts5VtoVCreate; | |||
| 22451 | pNew->x2.xTokenize = fts5V2toV1Tokenize; | |||
| 22452 | pNew->x2.xDelete = fts5VtoVDelete; | |||
| 22453 | } | |||
| 22454 | return rc; | |||
| 22455 | } | |||
| 22456 | ||||
| 22457 | /* | |||
| 22458 | ** Search the global context passed as the first argument for a tokenizer | |||
| 22459 | ** module named zName. If found, return a pointer to the Fts5TokenizerModule | |||
| 22460 | ** object. Otherwise, return NULL. | |||
| 22461 | */ | |||
| 22462 | static Fts5TokenizerModule *fts5LocateTokenizer( | |||
| 22463 | Fts5Global *pGlobal, /* Global (one per db handle) object */ | |||
| 22464 | const char *zName /* Name of tokenizer module to find */ | |||
| 22465 | ){ | |||
| 22466 | Fts5TokenizerModule *pMod = 0; | |||
| 22467 | ||||
| 22468 | if( zName==0 ){ | |||
| 22469 | pMod = pGlobal->pDfltTok; | |||
| 22470 | }else{ | |||
| 22471 | for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ | |||
| 22472 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, pMod->zName)==0 ) break; | |||
| 22473 | } | |||
| 22474 | } | |||
| 22475 | ||||
| 22476 | return pMod; | |||
| 22477 | } | |||
| 22478 | ||||
| 22479 | /* | |||
| 22480 | ** Find a tokenizer. This is the implementation of the | |||
| 22481 | ** fts5_api.xFindTokenizer_v2() method. | |||
| 22482 | */ | |||
| 22483 | static int fts5FindTokenizer_v2( | |||
| 22484 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
| 22485 | const char *zName, /* Name of tokenizer */ | |||
| 22486 | void **ppUserData, | |||
| 22487 | fts5_tokenizer_v2 **ppTokenizer /* Populate this object */ | |||
| 22488 | ){ | |||
| 22489 | int rc = SQLITE_OK0; | |||
| 22490 | Fts5TokenizerModule *pMod; | |||
| 22491 | ||||
| 22492 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | |||
| 22493 | if( pMod ){ | |||
| 22494 | if( pMod->bV2Native ){ | |||
| 22495 | *ppUserData = pMod->pUserData; | |||
| 22496 | }else{ | |||
| 22497 | *ppUserData = (void*)pMod; | |||
| 22498 | } | |||
| 22499 | *ppTokenizer = &pMod->x2; | |||
| 22500 | }else{ | |||
| 22501 | *ppTokenizer = 0; | |||
| 22502 | *ppUserData = 0; | |||
| 22503 | rc = SQLITE_ERROR1; | |||
| 22504 | } | |||
| 22505 | ||||
| 22506 | return rc; | |||
| 22507 | } | |||
| 22508 | ||||
| 22509 | /* | |||
| 22510 | ** Find a tokenizer. This is the implementation of the | |||
| 22511 | ** fts5_api.xFindTokenizer() method. | |||
| 22512 | */ | |||
| 22513 | static int fts5FindTokenizer( | |||
| 22514 | fts5_api *pApi, /* Global context (one per db handle) */ | |||
| 22515 | const char *zName, /* Name of new function */ | |||
| 22516 | void **ppUserData, | |||
| 22517 | fts5_tokenizer *pTokenizer /* Populate this object */ | |||
| 22518 | ){ | |||
| 22519 | int rc = SQLITE_OK0; | |||
| 22520 | Fts5TokenizerModule *pMod; | |||
| 22521 | ||||
| 22522 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); | |||
| 22523 | if( pMod ){ | |||
| 22524 | if( pMod->bV2Native==0 ){ | |||
| 22525 | *ppUserData = pMod->pUserData; | |||
| 22526 | }else{ | |||
| 22527 | *ppUserData = (void*)pMod; | |||
| 22528 | } | |||
| 22529 | *pTokenizer = pMod->x1; | |||
| 22530 | }else{ | |||
| 22531 | memset(pTokenizer, 0, sizeof(*pTokenizer)); | |||
| 22532 | *ppUserData = 0; | |||
| 22533 | rc = SQLITE_ERROR1; | |||
| 22534 | } | |||
| 22535 | ||||
| 22536 | return rc; | |||
| 22537 | } | |||
| 22538 | ||||
| 22539 | /* | |||
| 22540 | ** Attempt to instantiate the tokenizer. | |||
| 22541 | */ | |||
| 22542 | static int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ | |||
| 22543 | const char **azArg = pConfig->t.azArg; | |||
| 22544 | const int nArg = pConfig->t.nArg; | |||
| 22545 | Fts5TokenizerModule *pMod = 0; | |||
| 22546 | int rc = SQLITE_OK0; | |||
| 22547 | ||||
| 22548 | pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]); | |||
| 22549 | if( pMod==0 ){ | |||
| 22550 | assert( nArg>0 )((void) (0)); | |||
| 22551 | rc = SQLITE_ERROR1; | |||
| 22552 | sqlite3Fts5ConfigErrmsg(pConfig, "no such tokenizer: %s", azArg[0]); | |||
| 22553 | }else{ | |||
| 22554 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**) = 0; | |||
| 22555 | if( pMod->bV2Native ){ | |||
| 22556 | xCreate = pMod->x2.xCreate; | |||
| 22557 | pConfig->t.pApi2 = &pMod->x2; | |||
| 22558 | }else{ | |||
| 22559 | pConfig->t.pApi1 = &pMod->x1; | |||
| 22560 | xCreate = pMod->x1.xCreate; | |||
| 22561 | } | |||
| 22562 | ||||
| 22563 | rc = xCreate(pMod->pUserData, | |||
| 22564 | (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok | |||
| 22565 | ); | |||
| 22566 | ||||
| 22567 | if( rc!=SQLITE_OK0 ){ | |||
| 22568 | if( rc!=SQLITE_NOMEM7 ){ | |||
| 22569 | sqlite3Fts5ConfigErrmsg(pConfig, "error in tokenizer constructor"); | |||
| 22570 | } | |||
| 22571 | }else if( pMod->bV2Native==0 ){ | |||
| 22572 | pConfig->t.ePattern = sqlite3Fts5TokenizerPattern( | |||
| 22573 | pMod->x1.xCreate, pConfig->t.pTok | |||
| 22574 | ); | |||
| 22575 | } | |||
| 22576 | } | |||
| 22577 | ||||
| 22578 | if( rc!=SQLITE_OK0 ){ | |||
| 22579 | pConfig->t.pApi1 = 0; | |||
| 22580 | pConfig->t.pApi2 = 0; | |||
| 22581 | pConfig->t.pTok = 0; | |||
| 22582 | } | |||
| 22583 | ||||
| 22584 | return rc; | |||
| 22585 | } | |||
| 22586 | ||||
| 22587 | ||||
| 22588 | /* | |||
| 22589 | ** xDestroy callback passed to sqlite3_create_module(). This is invoked | |||
| 22590 | ** when the db handle is being closed. Free memory associated with | |||
| 22591 | ** tokenizers and aux functions registered with this db handle. | |||
| 22592 | */ | |||
| 22593 | static void fts5ModuleDestroy(void *pCtx){ | |||
| 22594 | Fts5TokenizerModule *pTok, *pNextTok; | |||
| 22595 | Fts5Auxiliary *pAux, *pNextAux; | |||
| 22596 | Fts5Global *pGlobal = (Fts5Global*)pCtx; | |||
| 22597 | ||||
| 22598 | for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ | |||
| 22599 | pNextAux = pAux->pNext; | |||
| 22600 | if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); | |||
| 22601 | sqlite3_freesqlite3_api->free(pAux); | |||
| 22602 | } | |||
| 22603 | ||||
| 22604 | for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ | |||
| 22605 | pNextTok = pTok->pNext; | |||
| 22606 | if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); | |||
| 22607 | sqlite3_freesqlite3_api->free(pTok); | |||
| 22608 | } | |||
| 22609 | ||||
| 22610 | sqlite3_freesqlite3_api->free(pGlobal); | |||
| 22611 | } | |||
| 22612 | ||||
| 22613 | /* | |||
| 22614 | ** Implementation of the fts5() function used by clients to obtain the | |||
| 22615 | ** API pointer. | |||
| 22616 | */ | |||
| 22617 | static void fts5Fts5Func( | |||
| 22618 | sqlite3_context *pCtx, /* Function call context */ | |||
| 22619 | int nArg, /* Number of args */ | |||
| 22620 | sqlite3_value **apArg /* Function arguments */ | |||
| 22621 | ){ | |||
| 22622 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
| 22623 | fts5_api **ppApi; | |||
| 22624 | UNUSED_PARAM(nArg)(void)(nArg); | |||
| 22625 | assert( nArg==1 )((void) (0)); | |||
| 22626 | ppApi = (fts5_api**)sqlite3_value_pointersqlite3_api->value_pointer(apArg[0], "fts5_api_ptr"); | |||
| 22627 | if( ppApi ) *ppApi = &pGlobal->api; | |||
| 22628 | } | |||
| 22629 | ||||
| 22630 | /* | |||
| 22631 | ** Implementation of fts5_source_id() function. | |||
| 22632 | */ | |||
| 22633 | static void fts5SourceIdFunc( | |||
| 22634 | sqlite3_context *pCtx, /* Function call context */ | |||
| 22635 | int nArg, /* Number of args */ | |||
| 22636 | sqlite3_value **apUnused /* Function arguments */ | |||
| 22637 | ){ | |||
| 22638 | assert( nArg==0 )((void) (0)); | |||
| 22639 | UNUSED_PARAM2(nArg, apUnused)(void)(nArg), (void)(apUnused); | |||
| 22640 | sqlite3_result_textsqlite3_api->result_text(pCtx, "fts5: 2025-06-06 14:52:32 b77dc5e0f596d2140d9ac682b2893ff65d3a4140aa86067a3efebe29dc914c95", -1, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 22641 | } | |||
| 22642 | ||||
| 22643 | /* | |||
| 22644 | ** Implementation of fts5_locale(LOCALE, TEXT) function. | |||
| 22645 | ** | |||
| 22646 | ** If parameter LOCALE is NULL, or a zero-length string, then a copy of | |||
| 22647 | ** TEXT is returned. Otherwise, both LOCALE and TEXT are interpreted as | |||
| 22648 | ** text, and the value returned is a blob consisting of: | |||
| 22649 | ** | |||
| 22650 | ** * The 4 bytes 0x00, 0xE0, 0xB2, 0xEb (FTS5_LOCALE_HEADER). | |||
| 22651 | ** * The LOCALE, as utf-8 text, followed by | |||
| 22652 | ** * 0x00, followed by | |||
| 22653 | ** * The TEXT, as utf-8 text. | |||
| 22654 | ** | |||
| 22655 | ** There is no final nul-terminator following the TEXT value. | |||
| 22656 | */ | |||
| 22657 | static void fts5LocaleFunc( | |||
| 22658 | sqlite3_context *pCtx, /* Function call context */ | |||
| 22659 | int nArg, /* Number of args */ | |||
| 22660 | sqlite3_value **apArg /* Function arguments */ | |||
| 22661 | ){ | |||
| 22662 | const char *zLocale = 0; | |||
| 22663 | int nLocale = 0; | |||
| 22664 | const char *zText = 0; | |||
| 22665 | int nText = 0; | |||
| 22666 | ||||
| 22667 | assert( nArg==2 )((void) (0)); | |||
| 22668 | UNUSED_PARAM(nArg)(void)(nArg); | |||
| 22669 | ||||
| 22670 | zLocale = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[0]); | |||
| 22671 | nLocale = sqlite3_value_bytessqlite3_api->value_bytes(apArg[0]); | |||
| 22672 | ||||
| 22673 | zText = (const char*)sqlite3_value_textsqlite3_api->value_text(apArg[1]); | |||
| 22674 | nText = sqlite3_value_bytessqlite3_api->value_bytes(apArg[1]); | |||
| 22675 | ||||
| 22676 | if( zLocale==0 || zLocale[0]=='\0' ){ | |||
| 22677 | sqlite3_result_textsqlite3_api->result_text(pCtx, zText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 22678 | }else{ | |||
| 22679 | Fts5Global *p = (Fts5Global*)sqlite3_user_datasqlite3_api->user_data(pCtx); | |||
| 22680 | u8 *pBlob = 0; | |||
| 22681 | u8 *pCsr = 0; | |||
| 22682 | int nBlob = 0; | |||
| 22683 | ||||
| 22684 | nBlob = FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) + nLocale + 1 + nText; | |||
| 22685 | pBlob = (u8*)sqlite3_mallocsqlite3_api->malloc(nBlob); | |||
| 22686 | if( pBlob==0 ){ | |||
| 22687 | sqlite3_result_error_nomemsqlite3_api->result_error_nomem(pCtx); | |||
| 22688 | return; | |||
| 22689 | } | |||
| 22690 | ||||
| 22691 | pCsr = pBlob; | |||
| 22692 | memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr ))); | |||
| 22693 | pCsr += FTS5_LOCALE_HDR_SIZE((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )); | |||
| 22694 | memcpy(pCsr, zLocale, nLocale); | |||
| 22695 | pCsr += nLocale; | |||
| 22696 | (*pCsr++) = 0x00; | |||
| 22697 | if( zText ) memcpy(pCsr, zText, nText); | |||
| 22698 | assert( &pCsr[nText]==&pBlob[nBlob] )((void) (0)); | |||
| 22699 | ||||
| 22700 | sqlite3_result_blobsqlite3_api->result_blob(pCtx, pBlob, nBlob, sqlite3_freesqlite3_api->free); | |||
| 22701 | } | |||
| 22702 | } | |||
| 22703 | ||||
| 22704 | /* | |||
| 22705 | ** Implementation of fts5_insttoken() function. | |||
| 22706 | */ | |||
| 22707 | static void fts5InsttokenFunc( | |||
| 22708 | sqlite3_context *pCtx, /* Function call context */ | |||
| 22709 | int nArg, /* Number of args */ | |||
| 22710 | sqlite3_value **apArg /* Function arguments */ | |||
| 22711 | ){ | |||
| 22712 | assert( nArg==1 )((void) (0)); | |||
| 22713 | (void)nArg; | |||
| 22714 | sqlite3_result_valuesqlite3_api->result_value(pCtx, apArg[0]); | |||
| 22715 | sqlite3_result_subtypesqlite3_api->result_subtype(pCtx, FTS5_INSTTOKEN_SUBTYPE73); | |||
| 22716 | } | |||
| 22717 | ||||
| 22718 | /* | |||
| 22719 | ** Return true if zName is the extension on one of the shadow tables used | |||
| 22720 | ** by this module. | |||
| 22721 | */ | |||
| 22722 | static int fts5ShadowName(const char *zName){ | |||
| 22723 | static const char *azName[] = { | |||
| 22724 | "config", "content", "data", "docsize", "idx" | |||
| 22725 | }; | |||
| 22726 | unsigned int i; | |||
| 22727 | for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){ | |||
| 22728 | if( sqlite3_stricmpsqlite3_api->stricmp(zName, azName[i])==0 ) return 1; | |||
| 22729 | } | |||
| 22730 | return 0; | |||
| 22731 | } | |||
| 22732 | ||||
| 22733 | /* | |||
| 22734 | ** Run an integrity check on the FTS5 data structures. Return a string | |||
| 22735 | ** if anything is found amiss. Return a NULL pointer if everything is | |||
| 22736 | ** OK. | |||
| 22737 | */ | |||
| 22738 | static int fts5IntegrityMethod( | |||
| 22739 | sqlite3_vtab *pVtab, /* the FTS5 virtual table to check */ | |||
| 22740 | const char *zSchema, /* Name of schema in which this table lives */ | |||
| 22741 | const char *zTabname, /* Name of the table itself */ | |||
| 22742 | int isQuick, /* True if this is a quick-check */ | |||
| 22743 | char **pzErr /* Write error message here */ | |||
| 22744 | ){ | |||
| 22745 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; | |||
| 22746 | int rc; | |||
| 22747 | ||||
| 22748 | assert( pzErr!=0 && *pzErr==0 )((void) (0)); | |||
| 22749 | UNUSED_PARAM(isQuick)(void)(isQuick); | |||
| 22750 | assert( pTab->p.pConfig->pzErrmsg==0 )((void) (0)); | |||
| 22751 | pTab->p.pConfig->pzErrmsg = pzErr; | |||
| 22752 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, 0); | |||
| 22753 | if( *pzErr==0 && rc!=SQLITE_OK0 ){ | |||
| 22754 | if( (rc&0xff)==SQLITE_CORRUPT11 ){ | |||
| 22755 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("malformed inverted index for FTS5 table %s.%s", | |||
| 22756 | zSchema, zTabname); | |||
| 22757 | rc = (*pzErr) ? SQLITE_OK0 : SQLITE_NOMEM7; | |||
| 22758 | }else{ | |||
| 22759 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("unable to validate the inverted index for" | |||
| 22760 | " FTS5 table %s.%s: %s", | |||
| 22761 | zSchema, zTabname, sqlite3_errstrsqlite3_api->errstr(rc)); | |||
| 22762 | } | |||
| 22763 | } | |||
| 22764 | ||||
| 22765 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); | |||
| 22766 | pTab->p.pConfig->pzErrmsg = 0; | |||
| 22767 | ||||
| 22768 | return rc; | |||
| 22769 | } | |||
| 22770 | ||||
| 22771 | static int fts5Init(sqlite3 *db){ | |||
| 22772 | static const sqlite3_module fts5Mod = { | |||
| 22773 | /* iVersion */ 4, | |||
| 22774 | /* xCreate */ fts5CreateMethod, | |||
| 22775 | /* xConnect */ fts5ConnectMethod, | |||
| 22776 | /* xBestIndex */ fts5BestIndexMethod, | |||
| 22777 | /* xDisconnect */ fts5DisconnectMethod, | |||
| 22778 | /* xDestroy */ fts5DestroyMethod, | |||
| 22779 | /* xOpen */ fts5OpenMethod, | |||
| 22780 | /* xClose */ fts5CloseMethod, | |||
| 22781 | /* xFilter */ fts5FilterMethod, | |||
| 22782 | /* xNext */ fts5NextMethod, | |||
| 22783 | /* xEof */ fts5EofMethod, | |||
| 22784 | /* xColumn */ fts5ColumnMethod, | |||
| 22785 | /* xRowid */ fts5RowidMethod, | |||
| 22786 | /* xUpdate */ fts5UpdateMethod, | |||
| 22787 | /* xBegin */ fts5BeginMethod, | |||
| 22788 | /* xSync */ fts5SyncMethod, | |||
| 22789 | /* xCommit */ fts5CommitMethod, | |||
| 22790 | /* xRollback */ fts5RollbackMethod, | |||
| 22791 | /* xFindFunction */ fts5FindFunctionMethod, | |||
| 22792 | /* xRename */ fts5RenameMethod, | |||
| 22793 | /* xSavepoint */ fts5SavepointMethod, | |||
| 22794 | /* xRelease */ fts5ReleaseMethod, | |||
| 22795 | /* xRollbackTo */ fts5RollbackToMethod, | |||
| 22796 | /* xShadowName */ fts5ShadowName, | |||
| 22797 | /* xIntegrity */ fts5IntegrityMethod | |||
| 22798 | }; | |||
| 22799 | ||||
| 22800 | int rc; | |||
| 22801 | Fts5Global *pGlobal = 0; | |||
| 22802 | ||||
| 22803 | pGlobal = (Fts5Global*)sqlite3_mallocsqlite3_api->malloc(sizeof(Fts5Global)); | |||
| 22804 | if( pGlobal==0 ){ | |||
| 22805 | rc = SQLITE_NOMEM7; | |||
| 22806 | }else{ | |||
| 22807 | void *p = (void*)pGlobal; | |||
| 22808 | memset(pGlobal, 0, sizeof(Fts5Global)); | |||
| 22809 | pGlobal->db = db; | |||
| 22810 | pGlobal->api.iVersion = 3; | |||
| 22811 | pGlobal->api.xCreateFunction = fts5CreateAux; | |||
| 22812 | pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; | |||
| 22813 | pGlobal->api.xFindTokenizer = fts5FindTokenizer; | |||
| 22814 | pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; | |||
| 22815 | pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; | |||
| 22816 | ||||
| 22817 | /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. | |||
| 22818 | ** The constants below were generated randomly. */ | |||
| 22819 | sqlite3_randomnesssqlite3_api->randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); | |||
| 22820 | pGlobal->aLocaleHdr[0] ^= 0xF924976D; | |||
| 22821 | pGlobal->aLocaleHdr[1] ^= 0x16596E13; | |||
| 22822 | pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; | |||
| 22823 | pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; | |||
| 22824 | assert( sizeof(pGlobal->aLocaleHdr)==16 )((void) (0)); | |||
| 22825 | ||||
| 22826 | rc = sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); | |||
| 22827 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5IndexInit(db); | |||
| 22828 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5ExprInit(pGlobal, db); | |||
| 22829 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5AuxInit(&pGlobal->api); | |||
| 22830 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); | |||
| 22831 | if( rc==SQLITE_OK0 ) rc = sqlite3Fts5VocabInit(pGlobal, db); | |||
| 22832 | if( rc==SQLITE_OK0 ){ | |||
| 22833 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 22834 | db, "fts5", 1, SQLITE_UTF81, p, fts5Fts5Func, 0, 0 | |||
| 22835 | ); | |||
| 22836 | } | |||
| 22837 | if( rc==SQLITE_OK0 ){ | |||
| 22838 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 22839 | db, "fts5_source_id", 0, | |||
| 22840 | SQLITE_UTF81|SQLITE_DETERMINISTIC0x000000800|SQLITE_INNOCUOUS0x000200000, | |||
| 22841 | p, fts5SourceIdFunc, 0, 0 | |||
| 22842 | ); | |||
| 22843 | } | |||
| 22844 | if( rc==SQLITE_OK0 ){ | |||
| 22845 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 22846 | db, "fts5_locale", 2, | |||
| 22847 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000|SQLITE_SUBTYPE0x000100000, | |||
| 22848 | p, fts5LocaleFunc, 0, 0 | |||
| 22849 | ); | |||
| 22850 | } | |||
| 22851 | if( rc==SQLITE_OK0 ){ | |||
| 22852 | rc = sqlite3_create_functionsqlite3_api->create_function( | |||
| 22853 | db, "fts5_insttoken", 1, | |||
| 22854 | SQLITE_UTF81|SQLITE_INNOCUOUS0x000200000|SQLITE_RESULT_SUBTYPE0x001000000, | |||
| 22855 | p, fts5InsttokenFunc, 0, 0 | |||
| 22856 | ); | |||
| 22857 | } | |||
| 22858 | } | |||
| 22859 | ||||
| 22860 | /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file | |||
| 22861 | ** fts5_test_mi.c is compiled and linked into the executable. And call | |||
| 22862 | ** its entry point to enable the matchinfo() demo. */ | |||
| 22863 | #ifdef SQLITE_FTS5_ENABLE_TEST_MI | |||
| 22864 | if( rc==SQLITE_OK0 ){ | |||
| 22865 | extern int sqlite3Fts5TestRegisterMatchinfoAPI(fts5_api*); | |||
| 22866 | rc = sqlite3Fts5TestRegisterMatchinfoAPI(&pGlobal->api); | |||
| 22867 | } | |||
| 22868 | #endif | |||
| 22869 | ||||
| 22870 | return rc; | |||
| 22871 | } | |||
| 22872 | ||||
| 22873 | /* | |||
| 22874 | ** The following functions are used to register the module with SQLite. If | |||
| 22875 | ** this module is being built as part of the SQLite core (SQLITE_CORE is | |||
| 22876 | ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. | |||
| 22877 | ** | |||
| 22878 | ** Or, if this module is being built as a loadable extension, | |||
| 22879 | ** sqlite3Fts5Init() is omitted and the two standard entry points | |||
| 22880 | ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. | |||
| 22881 | */ | |||
| 22882 | #ifndef SQLITE_CORE | |||
| 22883 | #ifdef _WIN32 | |||
| 22884 | __declspec(dllexport) | |||
| 22885 | #endif | |||
| 22886 | int sqlite3_fts_init( | |||
| 22887 | sqlite3 *db, | |||
| 22888 | char **pzErrMsg, | |||
| 22889 | const sqlite3_api_routines *pApi | |||
| 22890 | ){ | |||
| 22891 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
| 22892 | (void)pzErrMsg; /* Unused parameter */ | |||
| 22893 | return fts5Init(db); | |||
| 22894 | } | |||
| 22895 | ||||
| 22896 | #ifdef _WIN32 | |||
| 22897 | __declspec(dllexport) | |||
| 22898 | #endif | |||
| 22899 | int sqlite3_fts5_init( | |||
| 22900 | sqlite3 *db, | |||
| 22901 | char **pzErrMsg, | |||
| 22902 | const sqlite3_api_routines *pApi | |||
| 22903 | ){ | |||
| 22904 | SQLITE_EXTENSION_INIT2(pApi)sqlite3_api=pApi;; | |||
| 22905 | (void)pzErrMsg; /* Unused parameter */ | |||
| 22906 | return fts5Init(db); | |||
| 22907 | } | |||
| 22908 | #else | |||
| 22909 | int sqlite3Fts5Init(sqlite3 *db){ | |||
| 22910 | return fts5Init(db); | |||
| 22911 | } | |||
| 22912 | #endif | |||
| 22913 | ||||
| 22914 | #line 1 "fts5_storage.c" | |||
| 22915 | /* | |||
| 22916 | ** 2014 May 31 | |||
| 22917 | ** | |||
| 22918 | ** The author disclaims copyright to this source code. In place of | |||
| 22919 | ** a legal notice, here is a blessing: | |||
| 22920 | ** | |||
| 22921 | ** May you do good and not evil. | |||
| 22922 | ** May you find forgiveness for yourself and forgive others. | |||
| 22923 | ** May you share freely, never taking more than you give. | |||
| 22924 | ** | |||
| 22925 | ****************************************************************************** | |||
| 22926 | ** | |||
| 22927 | */ | |||
| 22928 | ||||
| 22929 | ||||
| 22930 | ||||
| 22931 | /* #include "fts5Int.h" */ | |||
| 22932 | ||||
| 22933 | /* | |||
| 22934 | ** pSavedRow: | |||
| 22935 | ** SQL statement FTS5_STMT_LOOKUP2 is a copy of FTS5_STMT_LOOKUP, it | |||
| 22936 | ** does a by-rowid lookup to retrieve a single row from the %_content | |||
| 22937 | ** table or equivalent external-content table/view. | |||
| 22938 | ** | |||
| 22939 | ** However, FTS5_STMT_LOOKUP2 is only used when retrieving the original | |||
| 22940 | ** values for a row being UPDATEd. In that case, the SQL statement is | |||
| 22941 | ** not reset and pSavedRow is set to point at it. This is so that the | |||
| 22942 | ** insert operation that follows the delete may access the original | |||
| 22943 | ** row values for any new values for which sqlite3_value_nochange() returns | |||
| 22944 | ** true. i.e. if the user executes: | |||
| 22945 | ** | |||
| 22946 | ** CREATE VIRTUAL TABLE ft USING fts5(a, b, c, locale=1); | |||
| 22947 | ** ... | |||
| 22948 | ** UPDATE fts SET a=?, b=? WHERE rowid=?; | |||
| 22949 | ** | |||
| 22950 | ** then the value passed to the xUpdate() method of this table as the | |||
| 22951 | ** new.c value is an sqlite3_value_nochange() value. So in this case it | |||
| 22952 | ** must be read from the saved row stored in Fts5Storage.pSavedRow. | |||
| 22953 | ** | |||
| 22954 | ** This is necessary - using sqlite3_value_nochange() instead of just having | |||
| 22955 | ** SQLite pass the original value back via xUpdate() - so as not to discard | |||
| 22956 | ** any locale information associated with such values. | |||
| 22957 | ** | |||
| 22958 | */ | |||
| 22959 | struct Fts5Storage { | |||
| 22960 | Fts5Config *pConfig; | |||
| 22961 | Fts5Index *pIndex; | |||
| 22962 | int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ | |||
| 22963 | i64 nTotalRow; /* Total number of rows in FTS table */ | |||
| 22964 | i64 *aTotalSize; /* Total sizes of each column */ | |||
| 22965 | sqlite3_stmt *pSavedRow; | |||
| 22966 | sqlite3_stmt *aStmt[12]; | |||
| 22967 | }; | |||
| 22968 | ||||
| 22969 | ||||
| 22970 | #if FTS5_STMT_SCAN_ASC0!=0 | |||
| 22971 | # error "FTS5_STMT_SCAN_ASC mismatch" | |||
| 22972 | #endif | |||
| 22973 | #if FTS5_STMT_SCAN_DESC1!=1 | |||
| 22974 | # error "FTS5_STMT_SCAN_DESC mismatch" | |||
| 22975 | #endif | |||
| 22976 | #if FTS5_STMT_LOOKUP2!=2 | |||
| 22977 | # error "FTS5_STMT_LOOKUP mismatch" | |||
| 22978 | #endif | |||
| 22979 | ||||
| 22980 | #define FTS5_STMT_LOOKUP23 3 | |||
| 22981 | #define FTS5_STMT_INSERT_CONTENT4 4 | |||
| 22982 | #define FTS5_STMT_REPLACE_CONTENT5 5 | |||
| 22983 | #define FTS5_STMT_DELETE_CONTENT6 6 | |||
| 22984 | #define FTS5_STMT_REPLACE_DOCSIZE7 7 | |||
| 22985 | #define FTS5_STMT_DELETE_DOCSIZE8 8 | |||
| 22986 | #define FTS5_STMT_LOOKUP_DOCSIZE9 9 | |||
| 22987 | #define FTS5_STMT_REPLACE_CONFIG10 10 | |||
| 22988 | #define FTS5_STMT_SCAN11 11 | |||
| 22989 | ||||
| 22990 | /* | |||
| 22991 | ** Prepare the two insert statements - Fts5Storage.pInsertContent and | |||
| 22992 | ** Fts5Storage.pInsertDocsize - if they have not already been prepared. | |||
| 22993 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 22994 | ** occurs. | |||
| 22995 | */ | |||
| 22996 | static int fts5StorageGetStmt( | |||
| 22997 | Fts5Storage *p, /* Storage handle */ | |||
| 22998 | int eStmt, /* FTS5_STMT_XXX constant */ | |||
| 22999 | sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ | |||
| 23000 | char **pzErrMsg /* OUT: Error message (if any) */ | |||
| 23001 | ){ | |||
| 23002 | int rc = SQLITE_OK0; | |||
| 23003 | ||||
| 23004 | /* If there is no %_docsize table, there should be no requests for | |||
| 23005 | ** statements to operate on it. */ | |||
| 23006 | assert( p->pConfig->bColumnsize || (((void) (0)) | |||
| 23007 | eStmt!=FTS5_STMT_REPLACE_DOCSIZE((void) (0)) | |||
| 23008 | && eStmt!=FTS5_STMT_DELETE_DOCSIZE((void) (0)) | |||
| 23009 | && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE((void) (0)) | |||
| 23010 | ))((void) (0)); | |||
| 23011 | ||||
| 23012 | assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) )((void) (0)); | |||
| 23013 | if( p->aStmt[eStmt]==0 ){ | |||
| 23014 | const char *azStmt[] = { | |||
| 23015 | "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", | |||
| 23016 | "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", | |||
| 23017 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ | |||
| 23018 | "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP2 */ | |||
| 23019 | ||||
| 23020 | "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ | |||
| 23021 | "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ | |||
| 23022 | "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ | |||
| 23023 | "REPLACE INTO %Q.'%q_docsize' VALUES(?,?%s)", /* REPLACE_DOCSIZE */ | |||
| 23024 | "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ | |||
| 23025 | ||||
| 23026 | "SELECT sz%s FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ | |||
| 23027 | ||||
| 23028 | "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ | |||
| 23029 | "SELECT %s FROM %s AS T", /* SCAN */ | |||
| 23030 | }; | |||
| 23031 | Fts5Config *pC = p->pConfig; | |||
| 23032 | char *zSql = 0; | |||
| 23033 | ||||
| 23034 | assert( ArraySize(azStmt)==ArraySize(p->aStmt) )((void) (0)); | |||
| 23035 | ||||
| 23036 | switch( eStmt ){ | |||
| 23037 | case FTS5_STMT_SCAN11: | |||
| 23038 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
| 23039 | pC->zContentExprlist, pC->zContent | |||
| 23040 | ); | |||
| 23041 | break; | |||
| 23042 | ||||
| 23043 | case FTS5_STMT_SCAN_ASC0: | |||
| 23044 | case FTS5_STMT_SCAN_DESC1: | |||
| 23045 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zContentExprlist, | |||
| 23046 | pC->zContent, pC->zContentRowid, pC->zContentRowid, | |||
| 23047 | pC->zContentRowid | |||
| 23048 | ); | |||
| 23049 | break; | |||
| 23050 | ||||
| 23051 | case FTS5_STMT_LOOKUP2: | |||
| 23052 | case FTS5_STMT_LOOKUP23: | |||
| 23053 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
| 23054 | pC->zContentExprlist, pC->zContent, pC->zContentRowid | |||
| 23055 | ); | |||
| 23056 | break; | |||
| 23057 | ||||
| 23058 | case FTS5_STMT_INSERT_CONTENT4: | |||
| 23059 | case FTS5_STMT_REPLACE_CONTENT5: { | |||
| 23060 | char *zBind = 0; | |||
| 23061 | int i; | |||
| 23062 | ||||
| 23063 | assert( pC->eContent==FTS5_CONTENT_NORMAL((void) (0)) | |||
| 23064 | || pC->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
| 23065 | )((void) (0)); | |||
| 23066 | ||||
| 23067 | /* Add bindings for the "c*" columns - those that store the actual | |||
| 23068 | ** table content. If eContent==NORMAL, then there is one binding | |||
| 23069 | ** for each column. Or, if eContent==UNINDEXED, then there are only | |||
| 23070 | ** bindings for the UNINDEXED columns. */ | |||
| 23071 | for(i=0; rc==SQLITE_OK0 && i<(pC->nCol+1); i++){ | |||
| 23072 | if( !i || pC->eContent==FTS5_CONTENT_NORMAL0 || pC->abUnindexed[i-1] ){ | |||
| 23073 | zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1); | |||
| 23074 | } | |||
| 23075 | } | |||
| 23076 | ||||
| 23077 | /* Add bindings for any "l*" columns. Only non-UNINDEXED columns | |||
| 23078 | ** require these. */ | |||
| 23079 | if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 23080 | for(i=0; rc==SQLITE_OK0 && i<pC->nCol; i++){ | |||
| 23081 | if( pC->abUnindexed[i]==0 ){ | |||
| 23082 | zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2); | |||
| 23083 | } | |||
| 23084 | } | |||
| 23085 | } | |||
| 23086 | ||||
| 23087 | zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind); | |||
| 23088 | sqlite3_freesqlite3_api->free(zBind); | |||
| 23089 | break; | |||
| 23090 | } | |||
| 23091 | ||||
| 23092 | case FTS5_STMT_REPLACE_DOCSIZE7: | |||
| 23093 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName, | |||
| 23094 | (pC->bContentlessDelete ? ",?" : "") | |||
| 23095 | ); | |||
| 23096 | break; | |||
| 23097 | ||||
| 23098 | case FTS5_STMT_LOOKUP_DOCSIZE9: | |||
| 23099 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], | |||
| 23100 | (pC->bContentlessDelete ? ",origin" : ""), | |||
| 23101 | pC->zDb, pC->zName | |||
| 23102 | ); | |||
| 23103 | break; | |||
| 23104 | ||||
| 23105 | default: | |||
| 23106 | zSql = sqlite3_mprintfsqlite3_api->mprintf(azStmt[eStmt], pC->zDb, pC->zName); | |||
| 23107 | break; | |||
| 23108 | } | |||
| 23109 | ||||
| 23110 | if( zSql==0 ){ | |||
| 23111 | rc = SQLITE_NOMEM7; | |||
| 23112 | }else{ | |||
| 23113 | int f = SQLITE_PREPARE_PERSISTENT0x01; | |||
| 23114 | if( eStmt>FTS5_STMT_LOOKUP23 ) f |= SQLITE_PREPARE_NO_VTAB0x04; | |||
| 23115 | p->pConfig->bLock++; | |||
| 23116 | rc = sqlite3_prepare_v3sqlite3_api->prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); | |||
| 23117 | p->pConfig->bLock--; | |||
| 23118 | sqlite3_freesqlite3_api->free(zSql); | |||
| 23119 | if( rc!=SQLITE_OK0 && pzErrMsg ){ | |||
| 23120 | *pzErrMsg = sqlite3_mprintfsqlite3_api->mprintf("%s", sqlite3_errmsgsqlite3_api->errmsg(pC->db)); | |||
| 23121 | } | |||
| 23122 | if( rc==SQLITE_ERROR1 && eStmt>FTS5_STMT_LOOKUP23 && eStmt<FTS5_STMT_SCAN11 ){ | |||
| 23123 | /* One of the internal tables - not the %_content table - is missing. | |||
| 23124 | ** This counts as a corrupted table. */ | |||
| 23125 | rc = SQLITE_CORRUPT11; | |||
| 23126 | } | |||
| 23127 | } | |||
| 23128 | } | |||
| 23129 | ||||
| 23130 | *ppStmt = p->aStmt[eStmt]; | |||
| 23131 | sqlite3_resetsqlite3_api->reset(*ppStmt); | |||
| 23132 | return rc; | |||
| 23133 | } | |||
| 23134 | ||||
| 23135 | ||||
| 23136 | static int fts5ExecPrintf( | |||
| 23137 | sqlite3 *db, | |||
| 23138 | char **pzErr, | |||
| 23139 | const char *zFormat, | |||
| 23140 | ... | |||
| 23141 | ){ | |||
| 23142 | int rc; | |||
| 23143 | va_list ap; /* ... printf arguments */ | |||
| 23144 | char *zSql; | |||
| 23145 | ||||
| 23146 | va_start(ap, zFormat)__builtin_va_start(ap, zFormat); | |||
| 23147 | zSql = sqlite3_vmprintfsqlite3_api->vmprintf(zFormat, ap); | |||
| 23148 | ||||
| 23149 | if( zSql==0 ){ | |||
| 23150 | rc = SQLITE_NOMEM7; | |||
| 23151 | }else{ | |||
| 23152 | rc = sqlite3_execsqlite3_api->exec(db, zSql, 0, 0, pzErr); | |||
| 23153 | sqlite3_freesqlite3_api->free(zSql); | |||
| 23154 | } | |||
| 23155 | ||||
| 23156 | va_end(ap)__builtin_va_end(ap); | |||
| 23157 | return rc; | |||
| 23158 | } | |||
| 23159 | ||||
| 23160 | /* | |||
| 23161 | ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error | |||
| 23162 | ** code otherwise. | |||
| 23163 | */ | |||
| 23164 | static int sqlite3Fts5DropAll(Fts5Config *pConfig){ | |||
| 23165 | int rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23166 | "DROP TABLE IF EXISTS %Q.'%q_data';" | |||
| 23167 | "DROP TABLE IF EXISTS %Q.'%q_idx';" | |||
| 23168 | "DROP TABLE IF EXISTS %Q.'%q_config';", | |||
| 23169 | pConfig->zDb, pConfig->zName, | |||
| 23170 | pConfig->zDb, pConfig->zName, | |||
| 23171 | pConfig->zDb, pConfig->zName | |||
| 23172 | ); | |||
| 23173 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
| 23174 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23175 | "DROP TABLE IF EXISTS %Q.'%q_docsize';", | |||
| 23176 | pConfig->zDb, pConfig->zName | |||
| 23177 | ); | |||
| 23178 | } | |||
| 23179 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 23180 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23181 | "DROP TABLE IF EXISTS %Q.'%q_content';", | |||
| 23182 | pConfig->zDb, pConfig->zName | |||
| 23183 | ); | |||
| 23184 | } | |||
| 23185 | return rc; | |||
| 23186 | } | |||
| 23187 | ||||
| 23188 | static void fts5StorageRenameOne( | |||
| 23189 | Fts5Config *pConfig, /* Current FTS5 configuration */ | |||
| 23190 | int *pRc, /* IN/OUT: Error code */ | |||
| 23191 | const char *zTail, /* Tail of table name e.g. "data", "config" */ | |||
| 23192 | const char *zName /* New name of FTS5 table */ | |||
| 23193 | ){ | |||
| 23194 | if( *pRc==SQLITE_OK0 ){ | |||
| 23195 | *pRc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23196 | "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", | |||
| 23197 | pConfig->zDb, pConfig->zName, zTail, zName, zTail | |||
| 23198 | ); | |||
| 23199 | } | |||
| 23200 | } | |||
| 23201 | ||||
| 23202 | static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ | |||
| 23203 | Fts5Config *pConfig = pStorage->pConfig; | |||
| 23204 | int rc = sqlite3Fts5StorageSync(pStorage); | |||
| 23205 | ||||
| 23206 | fts5StorageRenameOne(pConfig, &rc, "data", zName); | |||
| 23207 | fts5StorageRenameOne(pConfig, &rc, "idx", zName); | |||
| 23208 | fts5StorageRenameOne(pConfig, &rc, "config", zName); | |||
| 23209 | if( pConfig->bColumnsize ){ | |||
| 23210 | fts5StorageRenameOne(pConfig, &rc, "docsize", zName); | |||
| 23211 | } | |||
| 23212 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 23213 | fts5StorageRenameOne(pConfig, &rc, "content", zName); | |||
| 23214 | } | |||
| 23215 | return rc; | |||
| 23216 | } | |||
| 23217 | ||||
| 23218 | /* | |||
| 23219 | ** Create the shadow table named zPost, with definition zDefn. Return | |||
| 23220 | ** SQLITE_OK if successful, or an SQLite error code otherwise. | |||
| 23221 | */ | |||
| 23222 | static int sqlite3Fts5CreateTable( | |||
| 23223 | Fts5Config *pConfig, /* FTS5 configuration */ | |||
| 23224 | const char *zPost, /* Shadow table to create (e.g. "content") */ | |||
| 23225 | const char *zDefn, /* Columns etc. for shadow table */ | |||
| 23226 | int bWithout, /* True for without rowid */ | |||
| 23227 | char **pzErr /* OUT: Error message */ | |||
| 23228 | ){ | |||
| 23229 | int rc; | |||
| 23230 | char *zErr = 0; | |||
| 23231 | ||||
| 23232 | rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", | |||
| 23233 | pConfig->zDb, pConfig->zName, zPost, zDefn, | |||
| 23234 | #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID | |||
| 23235 | bWithout?" WITHOUT ROWID": | |||
| 23236 | #endif | |||
| 23237 | "" | |||
| 23238 | ); | |||
| 23239 | if( zErr ){ | |||
| 23240 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 23241 | "fts5: error creating shadow table %q_%s: %s", | |||
| 23242 | pConfig->zName, zPost, zErr | |||
| 23243 | ); | |||
| 23244 | sqlite3_freesqlite3_api->free(zErr); | |||
| 23245 | } | |||
| 23246 | ||||
| 23247 | return rc; | |||
| 23248 | } | |||
| 23249 | ||||
| 23250 | /* | |||
| 23251 | ** Open a new Fts5Index handle. If the bCreate argument is true, create | |||
| 23252 | ** and initialize the underlying tables | |||
| 23253 | ** | |||
| 23254 | ** If successful, set *pp to point to the new object and return SQLITE_OK. | |||
| 23255 | ** Otherwise, set *pp to NULL and return an SQLite error code. | |||
| 23256 | */ | |||
| 23257 | static int sqlite3Fts5StorageOpen( | |||
| 23258 | Fts5Config *pConfig, | |||
| 23259 | Fts5Index *pIndex, | |||
| 23260 | int bCreate, | |||
| 23261 | Fts5Storage **pp, | |||
| 23262 | char **pzErr /* OUT: Error message */ | |||
| 23263 | ){ | |||
| 23264 | int rc = SQLITE_OK0; | |||
| 23265 | Fts5Storage *p; /* New object */ | |||
| 23266 | sqlite3_int64 nByte; /* Bytes of space to allocate */ | |||
| 23267 | ||||
| 23268 | nByte = sizeof(Fts5Storage) /* Fts5Storage object */ | |||
| 23269 | + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ | |||
| 23270 | *pp = p = (Fts5Storage*)sqlite3_malloc64sqlite3_api->malloc64(nByte); | |||
| 23271 | if( !p ) return SQLITE_NOMEM7; | |||
| 23272 | ||||
| 23273 | memset(p, 0, (size_t)nByte); | |||
| 23274 | p->aTotalSize = (i64*)&p[1]; | |||
| 23275 | p->pConfig = pConfig; | |||
| 23276 | p->pIndex = pIndex; | |||
| 23277 | ||||
| 23278 | if( bCreate ){ | |||
| 23279 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
| 23280 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
| 23281 | ){ | |||
| 23282 | int nDefn = 32 + pConfig->nCol*10; | |||
| 23283 | char *zDefn = sqlite3_malloc64sqlite3_api->malloc64(32 + (sqlite3_int64)pConfig->nCol * 20); | |||
| 23284 | if( zDefn==0 ){ | |||
| 23285 | rc = SQLITE_NOMEM7; | |||
| 23286 | }else{ | |||
| 23287 | int i; | |||
| 23288 | int iOff; | |||
| 23289 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); | |||
| 23290 | iOff = (int)strlen(zDefn); | |||
| 23291 | for(i=0; i<pConfig->nCol; i++){ | |||
| 23292 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
| 23293 | || pConfig->abUnindexed[i] | |||
| 23294 | ){ | |||
| 23295 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); | |||
| 23296 | iOff += (int)strlen(&zDefn[iOff]); | |||
| 23297 | } | |||
| 23298 | } | |||
| 23299 | if( pConfig->bLocale ){ | |||
| 23300 | for(i=0; i<pConfig->nCol; i++){ | |||
| 23301 | if( pConfig->abUnindexed[i]==0 ){ | |||
| 23302 | sqlite3_snprintfsqlite3_api->xsnprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i); | |||
| 23303 | iOff += (int)strlen(&zDefn[iOff]); | |||
| 23304 | } | |||
| 23305 | } | |||
| 23306 | } | |||
| 23307 | rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); | |||
| 23308 | } | |||
| 23309 | sqlite3_freesqlite3_api->free(zDefn); | |||
| 23310 | } | |||
| 23311 | ||||
| 23312 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
| 23313 | const char *zCols = "id INTEGER PRIMARY KEY, sz BLOB"; | |||
| 23314 | if( pConfig->bContentlessDelete ){ | |||
| 23315 | zCols = "id INTEGER PRIMARY KEY, sz BLOB, origin INTEGER"; | |||
| 23316 | } | |||
| 23317 | rc = sqlite3Fts5CreateTable(pConfig, "docsize", zCols, 0, pzErr); | |||
| 23318 | } | |||
| 23319 | if( rc==SQLITE_OK0 ){ | |||
| 23320 | rc = sqlite3Fts5CreateTable( | |||
| 23321 | pConfig, "config", "k PRIMARY KEY, v", 1, pzErr | |||
| 23322 | ); | |||
| 23323 | } | |||
| 23324 | if( rc==SQLITE_OK0 ){ | |||
| 23325 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | |||
| 23326 | } | |||
| 23327 | } | |||
| 23328 | ||||
| 23329 | if( rc ){ | |||
| 23330 | sqlite3Fts5StorageClose(p); | |||
| 23331 | *pp = 0; | |||
| 23332 | } | |||
| 23333 | return rc; | |||
| 23334 | } | |||
| 23335 | ||||
| 23336 | /* | |||
| 23337 | ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). | |||
| 23338 | */ | |||
| 23339 | static int sqlite3Fts5StorageClose(Fts5Storage *p){ | |||
| 23340 | int rc = SQLITE_OK0; | |||
| 23341 | if( p ){ | |||
| 23342 | int i; | |||
| 23343 | ||||
| 23344 | /* Finalize all SQL statements */ | |||
| 23345 | for(i=0; i<ArraySize(p->aStmt)((int)(sizeof(p->aStmt) / sizeof(p->aStmt[0]))); i++){ | |||
| 23346 | sqlite3_finalizesqlite3_api->finalize(p->aStmt[i]); | |||
| 23347 | } | |||
| 23348 | ||||
| 23349 | sqlite3_freesqlite3_api->free(p); | |||
| 23350 | } | |||
| 23351 | return rc; | |||
| 23352 | } | |||
| 23353 | ||||
| 23354 | typedef struct Fts5InsertCtx Fts5InsertCtx; | |||
| 23355 | struct Fts5InsertCtx { | |||
| 23356 | Fts5Storage *pStorage; | |||
| 23357 | int iCol; | |||
| 23358 | int szCol; /* Size of column value in tokens */ | |||
| 23359 | }; | |||
| 23360 | ||||
| 23361 | /* | |||
| 23362 | ** Tokenization callback used when inserting tokens into the FTS index. | |||
| 23363 | */ | |||
| 23364 | static int fts5StorageInsertCallback( | |||
| 23365 | void *pContext, /* Pointer to Fts5InsertCtx object */ | |||
| 23366 | int tflags, | |||
| 23367 | const char *pToken, /* Buffer containing token */ | |||
| 23368 | int nToken, /* Size of token in bytes */ | |||
| 23369 | int iUnused1, /* Start offset of token */ | |||
| 23370 | int iUnused2 /* End offset of token */ | |||
| 23371 | ){ | |||
| 23372 | Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; | |||
| 23373 | Fts5Index *pIdx = pCtx->pStorage->pIndex; | |||
| 23374 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
| 23375 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
| 23376 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | |||
| 23377 | pCtx->szCol++; | |||
| 23378 | } | |||
| 23379 | return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); | |||
| 23380 | } | |||
| 23381 | ||||
| 23382 | /* | |||
| 23383 | ** This function is used as part of an UPDATE statement that modifies the | |||
| 23384 | ** rowid of a row. In that case, this function is called first to set | |||
| 23385 | ** Fts5Storage.pSavedRow to point to a statement that may be used to | |||
| 23386 | ** access the original values of the row being deleted - iDel. | |||
| 23387 | ** | |||
| 23388 | ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. | |||
| 23389 | ** It is not considered an error if row iDel does not exist. In this case | |||
| 23390 | ** pSavedRow is not set and SQLITE_OK returned. | |||
| 23391 | */ | |||
| 23392 | static int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel){ | |||
| 23393 | int rc = SQLITE_OK0; | |||
| 23394 | sqlite3_stmt *pSeek = 0; | |||
| 23395 | ||||
| 23396 | assert( p->pSavedRow==0 )((void) (0)); | |||
| 23397 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+1, &pSeek, 0); | |||
| 23398 | if( rc==SQLITE_OK0 ){ | |||
| 23399 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | |||
| 23400 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | |||
| 23401 | rc = sqlite3_resetsqlite3_api->reset(pSeek); | |||
| 23402 | }else{ | |||
| 23403 | p->pSavedRow = pSeek; | |||
| 23404 | } | |||
| 23405 | } | |||
| 23406 | ||||
| 23407 | return rc; | |||
| 23408 | } | |||
| 23409 | ||||
| 23410 | /* | |||
| 23411 | ** If a row with rowid iDel is present in the %_content table, add the | |||
| 23412 | ** delete-markers to the FTS index necessary to delete it. Do not actually | |||
| 23413 | ** remove the %_content row at this time though. | |||
| 23414 | ** | |||
| 23415 | ** If parameter bSaveRow is true, then Fts5Storage.pSavedRow is left | |||
| 23416 | ** pointing to a statement (FTS5_STMT_LOOKUP2) that may be used to access | |||
| 23417 | ** the original values of the row being deleted. This is used by UPDATE | |||
| 23418 | ** statements. | |||
| 23419 | */ | |||
| 23420 | static int fts5StorageDeleteFromIndex( | |||
| 23421 | Fts5Storage *p, | |||
| 23422 | i64 iDel, | |||
| 23423 | sqlite3_value **apVal, | |||
| 23424 | int bSaveRow /* True to set pSavedRow */ | |||
| 23425 | ){ | |||
| 23426 | Fts5Config *pConfig = p->pConfig; | |||
| 23427 | sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ | |||
| 23428 | int rc = SQLITE_OK0; /* Return code */ | |||
| 23429 | int rc2; /* sqlite3_reset() return code */ | |||
| 23430 | int iCol; | |||
| 23431 | Fts5InsertCtx ctx; | |||
| 23432 | ||||
| 23433 | assert( bSaveRow==0 || apVal==0 )((void) (0)); | |||
| 23434 | assert( bSaveRow==0 || bSaveRow==1 )((void) (0)); | |||
| 23435 | assert( FTS5_STMT_LOOKUP2==FTS5_STMT_LOOKUP+1 )((void) (0)); | |||
| 23436 | ||||
| 23437 | if( apVal==0 ){ | |||
| 23438 | if( p->pSavedRow && bSaveRow ){ | |||
| 23439 | pSeek = p->pSavedRow; | |||
| 23440 | p->pSavedRow = 0; | |||
| 23441 | }else{ | |||
| 23442 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP2+bSaveRow, &pSeek, 0); | |||
| 23443 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 23444 | sqlite3_bind_int64sqlite3_api->bind_int64(pSeek, 1, iDel); | |||
| 23445 | if( sqlite3_stepsqlite3_api->step(pSeek)!=SQLITE_ROW100 ){ | |||
| 23446 | return sqlite3_resetsqlite3_api->reset(pSeek); | |||
| 23447 | } | |||
| 23448 | } | |||
| 23449 | } | |||
| 23450 | ||||
| 23451 | ctx.pStorage = p; | |||
| 23452 | ctx.iCol = -1; | |||
| 23453 | for(iCol=1; rc==SQLITE_OK0 && iCol<=pConfig->nCol; iCol++){ | |||
| 23454 | if( pConfig->abUnindexed[iCol-1]==0 ){ | |||
| 23455 | sqlite3_value *pVal = 0; | |||
| 23456 | const char *pText = 0; | |||
| 23457 | int nText = 0; | |||
| 23458 | const char *pLoc = 0; | |||
| 23459 | int nLoc = 0; | |||
| 23460 | ||||
| 23461 | assert( pSeek==0 || apVal==0 )((void) (0)); | |||
| 23462 | assert( pSeek!=0 || apVal!=0 )((void) (0)); | |||
| 23463 | if( pSeek ){ | |||
| 23464 | pVal = sqlite3_column_valuesqlite3_api->column_value(pSeek, iCol); | |||
| 23465 | }else{ | |||
| 23466 | pVal = apVal[iCol-1]; | |||
| 23467 | } | |||
| 23468 | ||||
| 23469 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
| 23470 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
| 23471 | }else{ | |||
| 23472 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 23473 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 23474 | if( pConfig->bLocale && pSeek ){ | |||
| 23475 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pSeek, iCol + pConfig->nCol); | |||
| 23476 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pSeek, iCol + pConfig->nCol); | |||
| 23477 | } | |||
| 23478 | } | |||
| 23479 | ||||
| 23480 | if( rc==SQLITE_OK0 ){ | |||
| 23481 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 23482 | ctx.szCol = 0; | |||
| 23483 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT0x0004, | |||
| 23484 | pText, nText, (void*)&ctx, fts5StorageInsertCallback | |||
| 23485 | ); | |||
| 23486 | p->aTotalSize[iCol-1] -= (i64)ctx.szCol; | |||
| 23487 | if( rc==SQLITE_OK0 && p->aTotalSize[iCol-1]<0 ){ | |||
| 23488 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 23489 | } | |||
| 23490 | sqlite3Fts5ClearLocale(pConfig); | |||
| 23491 | } | |||
| 23492 | } | |||
| 23493 | } | |||
| 23494 | if( rc==SQLITE_OK0 && p->nTotalRow<1 ){ | |||
| 23495 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 23496 | }else{ | |||
| 23497 | p->nTotalRow--; | |||
| 23498 | } | |||
| 23499 | ||||
| 23500 | if( rc==SQLITE_OK0 && bSaveRow ){ | |||
| 23501 | assert( p->pSavedRow==0 )((void) (0)); | |||
| 23502 | p->pSavedRow = pSeek; | |||
| 23503 | }else{ | |||
| 23504 | rc2 = sqlite3_resetsqlite3_api->reset(pSeek); | |||
| 23505 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
| 23506 | } | |||
| 23507 | return rc; | |||
| 23508 | } | |||
| 23509 | ||||
| 23510 | /* | |||
| 23511 | ** Reset any saved statement pSavedRow. Zero pSavedRow as well. This | |||
| 23512 | ** should be called by the xUpdate() method of the fts5 table before | |||
| 23513 | ** returning from any operation that may have set Fts5Storage.pSavedRow. | |||
| 23514 | */ | |||
| 23515 | static void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage *pStorage){ | |||
| 23516 | assert( pStorage->pSavedRow==0((void) (0)) | |||
| 23517 | || pStorage->pSavedRow==pStorage->aStmt[FTS5_STMT_LOOKUP2]((void) (0)) | |||
| 23518 | )((void) (0)); | |||
| 23519 | sqlite3_resetsqlite3_api->reset(pStorage->pSavedRow); | |||
| 23520 | pStorage->pSavedRow = 0; | |||
| 23521 | } | |||
| 23522 | ||||
| 23523 | /* | |||
| 23524 | ** This function is called to process a DELETE on a contentless_delete=1 | |||
| 23525 | ** table. It adds the tombstone required to delete the entry with rowid | |||
| 23526 | ** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs, | |||
| 23527 | ** an SQLite error code. | |||
| 23528 | */ | |||
| 23529 | static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){ | |||
| 23530 | i64 iOrigin = 0; | |||
| 23531 | sqlite3_stmt *pLookup = 0; | |||
| 23532 | int rc = SQLITE_OK0; | |||
| 23533 | ||||
| 23534 | assert( p->pConfig->bContentlessDelete )((void) (0)); | |||
| 23535 | assert( p->pConfig->eContent==FTS5_CONTENT_NONE((void) (0)) | |||
| 23536 | || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED((void) (0)) | |||
| 23537 | )((void) (0)); | |||
| 23538 | ||||
| 23539 | /* Look up the origin of the document in the %_docsize table. Store | |||
| 23540 | ** this in stack variable iOrigin. */ | |||
| 23541 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | |||
| 23542 | if( rc==SQLITE_OK0 ){ | |||
| 23543 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iDel); | |||
| 23544 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | |||
| 23545 | iOrigin = sqlite3_column_int64sqlite3_api->column_int64(pLookup, 1); | |||
| 23546 | } | |||
| 23547 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | |||
| 23548 | } | |||
| 23549 | ||||
| 23550 | if( rc==SQLITE_OK0 && iOrigin!=0 ){ | |||
| 23551 | rc = sqlite3Fts5IndexContentlessDelete(p->pIndex, iOrigin, iDel); | |||
| 23552 | } | |||
| 23553 | ||||
| 23554 | return rc; | |||
| 23555 | } | |||
| 23556 | ||||
| 23557 | /* | |||
| 23558 | ** Insert a record into the %_docsize table. Specifically, do: | |||
| 23559 | ** | |||
| 23560 | ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); | |||
| 23561 | ** | |||
| 23562 | ** If there is no %_docsize table (as happens if the columnsize=0 option | |||
| 23563 | ** is specified when the FTS5 table is created), this function is a no-op. | |||
| 23564 | */ | |||
| 23565 | static int fts5StorageInsertDocsize( | |||
| 23566 | Fts5Storage *p, /* Storage module to write to */ | |||
| 23567 | i64 iRowid, /* id value */ | |||
| 23568 | Fts5Buffer *pBuf /* sz value */ | |||
| 23569 | ){ | |||
| 23570 | int rc = SQLITE_OK0; | |||
| 23571 | if( p->pConfig->bColumnsize ){ | |||
| 23572 | sqlite3_stmt *pReplace = 0; | |||
| 23573 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | |||
| 23574 | if( rc==SQLITE_OK0 ){ | |||
| 23575 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 1, iRowid); | |||
| 23576 | if( p->pConfig->bContentlessDelete ){ | |||
| 23577 | i64 iOrigin = 0; | |||
| 23578 | rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin); | |||
| 23579 | sqlite3_bind_int64sqlite3_api->bind_int64(pReplace, 3, iOrigin); | |||
| 23580 | } | |||
| 23581 | } | |||
| 23582 | if( rc==SQLITE_OK0 ){ | |||
| 23583 | sqlite3_bind_blobsqlite3_api->bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 23584 | sqlite3_stepsqlite3_api->step(pReplace); | |||
| 23585 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
| 23586 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | |||
| 23587 | } | |||
| 23588 | } | |||
| 23589 | return rc; | |||
| 23590 | } | |||
| 23591 | ||||
| 23592 | /* | |||
| 23593 | ** Load the contents of the "averages" record from disk into the | |||
| 23594 | ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if | |||
| 23595 | ** argument bCache is true, set the p->bTotalsValid flag to indicate | |||
| 23596 | ** that the contents of aTotalSize[] and nTotalRow are valid until | |||
| 23597 | ** further notice. | |||
| 23598 | ** | |||
| 23599 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 23600 | ** occurs. | |||
| 23601 | */ | |||
| 23602 | static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ | |||
| 23603 | int rc = SQLITE_OK0; | |||
| 23604 | if( p->bTotalsValid==0 ){ | |||
| 23605 | rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); | |||
| 23606 | p->bTotalsValid = bCache; | |||
| 23607 | } | |||
| 23608 | return rc; | |||
| 23609 | } | |||
| 23610 | ||||
| 23611 | /* | |||
| 23612 | ** Store the current contents of the p->nTotalRow and p->aTotalSize[] | |||
| 23613 | ** variables in the "averages" record on disk. | |||
| 23614 | ** | |||
| 23615 | ** Return SQLITE_OK if successful, or an SQLite error code if an error | |||
| 23616 | ** occurs. | |||
| 23617 | */ | |||
| 23618 | static int fts5StorageSaveTotals(Fts5Storage *p){ | |||
| 23619 | int nCol = p->pConfig->nCol; | |||
| 23620 | int i; | |||
| 23621 | Fts5Buffer buf; | |||
| 23622 | int rc = SQLITE_OK0; | |||
| 23623 | memset(&buf, 0, sizeof(buf)); | |||
| 23624 | ||||
| 23625 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); | |||
| 23626 | for(i=0; i<nCol; i++){ | |||
| 23627 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); | |||
| 23628 | } | |||
| 23629 | if( rc==SQLITE_OK0 ){ | |||
| 23630 | rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); | |||
| 23631 | } | |||
| 23632 | sqlite3_freesqlite3_api->free(buf.p); | |||
| 23633 | ||||
| 23634 | return rc; | |||
| 23635 | } | |||
| 23636 | ||||
| 23637 | /* | |||
| 23638 | ** Remove a row from the FTS table. | |||
| 23639 | */ | |||
| 23640 | static int sqlite3Fts5StorageDelete( | |||
| 23641 | Fts5Storage *p, /* Storage object */ | |||
| 23642 | i64 iDel, /* Rowid to delete from table */ | |||
| 23643 | sqlite3_value **apVal, /* Optional - values to remove from index */ | |||
| 23644 | int bSaveRow /* If true, set pSavedRow for deleted row */ | |||
| 23645 | ){ | |||
| 23646 | Fts5Config *pConfig = p->pConfig; | |||
| 23647 | int rc; | |||
| 23648 | sqlite3_stmt *pDel = 0; | |||
| 23649 | ||||
| 23650 | assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 )((void) (0)); | |||
| 23651 | rc = fts5StorageLoadTotals(p, 1); | |||
| 23652 | ||||
| 23653 | /* Delete the index records */ | |||
| 23654 | if( rc==SQLITE_OK0 ){ | |||
| 23655 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); | |||
| 23656 | } | |||
| 23657 | ||||
| 23658 | if( rc==SQLITE_OK0 ){ | |||
| 23659 | if( p->pConfig->bContentlessDelete ){ | |||
| 23660 | rc = fts5StorageContentlessDelete(p, iDel); | |||
| 23661 | if( rc==SQLITE_OK0 | |||
| 23662 | && bSaveRow | |||
| 23663 | && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
| 23664 | ){ | |||
| 23665 | rc = sqlite3Fts5StorageFindDeleteRow(p, iDel); | |||
| 23666 | } | |||
| 23667 | }else{ | |||
| 23668 | rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); | |||
| 23669 | } | |||
| 23670 | } | |||
| 23671 | ||||
| 23672 | /* Delete the %_docsize record */ | |||
| 23673 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
| 23674 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE8, &pDel, 0); | |||
| 23675 | if( rc==SQLITE_OK0 ){ | |||
| 23676 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | |||
| 23677 | sqlite3_stepsqlite3_api->step(pDel); | |||
| 23678 | rc = sqlite3_resetsqlite3_api->reset(pDel); | |||
| 23679 | } | |||
| 23680 | } | |||
| 23681 | ||||
| 23682 | /* Delete the %_content record */ | |||
| 23683 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
| 23684 | || pConfig->eContent==FTS5_CONTENT_UNINDEXED3 | |||
| 23685 | ){ | |||
| 23686 | if( rc==SQLITE_OK0 ){ | |||
| 23687 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT6, &pDel, 0); | |||
| 23688 | } | |||
| 23689 | if( rc==SQLITE_OK0 ){ | |||
| 23690 | sqlite3_bind_int64sqlite3_api->bind_int64(pDel, 1, iDel); | |||
| 23691 | sqlite3_stepsqlite3_api->step(pDel); | |||
| 23692 | rc = sqlite3_resetsqlite3_api->reset(pDel); | |||
| 23693 | } | |||
| 23694 | } | |||
| 23695 | ||||
| 23696 | return rc; | |||
| 23697 | } | |||
| 23698 | ||||
| 23699 | /* | |||
| 23700 | ** Delete all entries in the FTS5 index. | |||
| 23701 | */ | |||
| 23702 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ | |||
| 23703 | Fts5Config *pConfig = p->pConfig; | |||
| 23704 | int rc; | |||
| 23705 | ||||
| 23706 | p->bTotalsValid = 0; | |||
| 23707 | ||||
| 23708 | /* Delete the contents of the %_data and %_docsize tables. */ | |||
| 23709 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23710 | "DELETE FROM %Q.'%q_data';" | |||
| 23711 | "DELETE FROM %Q.'%q_idx';", | |||
| 23712 | pConfig->zDb, pConfig->zName, | |||
| 23713 | pConfig->zDb, pConfig->zName | |||
| 23714 | ); | |||
| 23715 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
| 23716 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23717 | "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName | |||
| 23718 | ); | |||
| 23719 | } | |||
| 23720 | ||||
| 23721 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_UNINDEXED3 ){ | |||
| 23722 | rc = fts5ExecPrintf(pConfig->db, 0, | |||
| 23723 | "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName | |||
| 23724 | ); | |||
| 23725 | } | |||
| 23726 | ||||
| 23727 | /* Reinitialize the %_data table. This call creates the initial structure | |||
| 23728 | ** and averages records. */ | |||
| 23729 | if( rc==SQLITE_OK0 ){ | |||
| 23730 | rc = sqlite3Fts5IndexReinit(p->pIndex); | |||
| 23731 | } | |||
| 23732 | if( rc==SQLITE_OK0 ){ | |||
| 23733 | rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION4); | |||
| 23734 | } | |||
| 23735 | return rc; | |||
| 23736 | } | |||
| 23737 | ||||
| 23738 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ | |||
| 23739 | Fts5Buffer buf = {0,0,0}; | |||
| 23740 | Fts5Config *pConfig = p->pConfig; | |||
| 23741 | sqlite3_stmt *pScan = 0; | |||
| 23742 | Fts5InsertCtx ctx; | |||
| 23743 | int rc, rc2; | |||
| 23744 | ||||
| 23745 | memset(&ctx, 0, sizeof(Fts5InsertCtx)); | |||
| 23746 | ctx.pStorage = p; | |||
| 23747 | rc = sqlite3Fts5StorageDeleteAll(p); | |||
| 23748 | if( rc==SQLITE_OK0 ){ | |||
| 23749 | rc = fts5StorageLoadTotals(p, 1); | |||
| 23750 | } | |||
| 23751 | ||||
| 23752 | if( rc==SQLITE_OK0 ){ | |||
| 23753 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, pConfig->pzErrmsg); | |||
| 23754 | } | |||
| 23755 | ||||
| 23756 | while( rc==SQLITE_OK0 && SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | |||
| 23757 | i64 iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | |||
| 23758 | ||||
| 23759 | sqlite3Fts5BufferZero(&buf); | |||
| 23760 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | |||
| 23761 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | |||
| 23762 | ctx.szCol = 0; | |||
| 23763 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | |||
| 23764 | int nText = 0; /* Size of pText in bytes */ | |||
| 23765 | const char *pText = 0; /* Pointer to buffer containing text value */ | |||
| 23766 | int nLoc = 0; /* Size of pLoc in bytes */ | |||
| 23767 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | |||
| 23768 | ||||
| 23769 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, ctx.iCol+1); | |||
| 23770 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
| 23771 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
| 23772 | ){ | |||
| 23773 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
| 23774 | }else{ | |||
| 23775 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 23776 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 23777 | if( pConfig->bLocale ){ | |||
| 23778 | int iCol = ctx.iCol + 1 + pConfig->nCol; | |||
| 23779 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | |||
| 23780 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | |||
| 23781 | } | |||
| 23782 | } | |||
| 23783 | ||||
| 23784 | if( rc==SQLITE_OK0 ){ | |||
| 23785 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 23786 | rc = sqlite3Fts5Tokenize(pConfig, | |||
| 23787 | FTS5_TOKENIZE_DOCUMENT0x0004, | |||
| 23788 | pText, nText, | |||
| 23789 | (void*)&ctx, | |||
| 23790 | fts5StorageInsertCallback | |||
| 23791 | ); | |||
| 23792 | sqlite3Fts5ClearLocale(pConfig); | |||
| 23793 | } | |||
| 23794 | } | |||
| 23795 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | |||
| 23796 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | |||
| 23797 | } | |||
| 23798 | p->nTotalRow++; | |||
| 23799 | ||||
| 23800 | if( rc==SQLITE_OK0 ){ | |||
| 23801 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | |||
| 23802 | } | |||
| 23803 | } | |||
| 23804 | sqlite3_freesqlite3_api->free(buf.p); | |||
| 23805 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | |||
| 23806 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
| 23807 | ||||
| 23808 | /* Write the averages record */ | |||
| 23809 | if( rc==SQLITE_OK0 ){ | |||
| 23810 | rc = fts5StorageSaveTotals(p); | |||
| 23811 | } | |||
| 23812 | return rc; | |||
| 23813 | } | |||
| 23814 | ||||
| 23815 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ | |||
| 23816 | return sqlite3Fts5IndexOptimize(p->pIndex); | |||
| 23817 | } | |||
| 23818 | ||||
| 23819 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ | |||
| 23820 | return sqlite3Fts5IndexMerge(p->pIndex, nMerge); | |||
| 23821 | } | |||
| 23822 | ||||
| 23823 | static int sqlite3Fts5StorageReset(Fts5Storage *p){ | |||
| 23824 | return sqlite3Fts5IndexReset(p->pIndex); | |||
| 23825 | } | |||
| 23826 | ||||
| 23827 | /* | |||
| 23828 | ** Allocate a new rowid. This is used for "external content" tables when | |||
| 23829 | ** a NULL value is inserted into the rowid column. The new rowid is allocated | |||
| 23830 | ** by inserting a dummy row into the %_docsize table. The dummy will be | |||
| 23831 | ** overwritten later. | |||
| 23832 | ** | |||
| 23833 | ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In | |||
| 23834 | ** this case the user is required to provide a rowid explicitly. | |||
| 23835 | */ | |||
| 23836 | static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ | |||
| 23837 | int rc = SQLITE_MISMATCH20; | |||
| 23838 | if( p->pConfig->bColumnsize ){ | |||
| 23839 | sqlite3_stmt *pReplace = 0; | |||
| 23840 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE7, &pReplace, 0); | |||
| 23841 | if( rc==SQLITE_OK0 ){ | |||
| 23842 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | |||
| 23843 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 2); | |||
| 23844 | sqlite3_stepsqlite3_api->step(pReplace); | |||
| 23845 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
| 23846 | } | |||
| 23847 | if( rc==SQLITE_OK0 ){ | |||
| 23848 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | |||
| 23849 | } | |||
| 23850 | } | |||
| 23851 | return rc; | |||
| 23852 | } | |||
| 23853 | ||||
| 23854 | /* | |||
| 23855 | ** Insert a new row into the FTS content table. | |||
| 23856 | */ | |||
| 23857 | static int sqlite3Fts5StorageContentInsert( | |||
| 23858 | Fts5Storage *p, | |||
| 23859 | int bReplace, /* True to use REPLACE instead of INSERT */ | |||
| 23860 | sqlite3_value **apVal, | |||
| 23861 | i64 *piRowid | |||
| 23862 | ){ | |||
| 23863 | Fts5Config *pConfig = p->pConfig; | |||
| 23864 | int rc = SQLITE_OK0; | |||
| 23865 | ||||
| 23866 | /* Insert the new row into the %_content table. */ | |||
| 23867 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL0 | |||
| 23868 | && pConfig->eContent!=FTS5_CONTENT_UNINDEXED3 | |||
| 23869 | ){ | |||
| 23870 | if( sqlite3_value_typesqlite3_api->value_type(apVal[1])==SQLITE_INTEGER1 ){ | |||
| 23871 | *piRowid = sqlite3_value_int64sqlite3_api->value_int64(apVal[1]); | |||
| 23872 | }else{ | |||
| 23873 | rc = fts5StorageNewRowid(p, piRowid); | |||
| 23874 | } | |||
| 23875 | }else{ | |||
| 23876 | sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ | |||
| 23877 | int i; /* Counter variable */ | |||
| 23878 | ||||
| 23879 | assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT )((void) (0)); | |||
| 23880 | assert( bReplace==0 || bReplace==1 )((void) (0)); | |||
| 23881 | rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT4+bReplace, &pInsert, 0); | |||
| 23882 | if( pInsert ) sqlite3_clear_bindingssqlite3_api->clear_bindings(pInsert); | |||
| 23883 | ||||
| 23884 | /* Bind the rowid value */ | |||
| 23885 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, 1, apVal[1]); | |||
| 23886 | ||||
| 23887 | /* Loop through values for user-defined columns. i=2 is the leftmost | |||
| 23888 | ** user-defined column. As is column 1 of pSavedRow. */ | |||
| 23889 | for(i=2; rc==SQLITE_OK0 && i<=pConfig->nCol+1; i++){ | |||
| 23890 | int bUnindexed = pConfig->abUnindexed[i-2]; | |||
| 23891 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 || bUnindexed ){ | |||
| 23892 | sqlite3_value *pVal = apVal[i]; | |||
| 23893 | ||||
| 23894 | if( sqlite3_value_nochangesqlite3_api->value_nochange(pVal) && p->pSavedRow ){ | |||
| 23895 | /* This is an UPDATE statement, and user-defined column (i-2) was not | |||
| 23896 | ** modified. Retrieve the value from Fts5Storage.pSavedRow. */ | |||
| 23897 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, i-1); | |||
| 23898 | if( pConfig->bLocale && bUnindexed==0 ){ | |||
| 23899 | sqlite3_bind_valuesqlite3_api->bind_value(pInsert, pConfig->nCol + i, | |||
| 23900 | sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, pConfig->nCol + i - 1) | |||
| 23901 | ); | |||
| 23902 | } | |||
| 23903 | }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
| 23904 | const char *pText = 0; | |||
| 23905 | const char *pLoc = 0; | |||
| 23906 | int nText = 0; | |||
| 23907 | int nLoc = 0; | |||
| 23908 | assert( pConfig->bLocale )((void) (0)); | |||
| 23909 | ||||
| 23910 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
| 23911 | if( rc==SQLITE_OK0 ){ | |||
| 23912 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 23913 | if( bUnindexed==0 ){ | |||
| 23914 | int iLoc = pConfig->nCol + i; | |||
| 23915 | sqlite3_bind_textsqlite3_api->bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT((sqlite3_destructor_type)-1)); | |||
| 23916 | } | |||
| 23917 | } | |||
| 23918 | ||||
| 23919 | continue; | |||
| 23920 | } | |||
| 23921 | ||||
| 23922 | rc = sqlite3_bind_valuesqlite3_api->bind_value(pInsert, i, pVal); | |||
| 23923 | } | |||
| 23924 | } | |||
| 23925 | if( rc==SQLITE_OK0 ){ | |||
| 23926 | sqlite3_stepsqlite3_api->step(pInsert); | |||
| 23927 | rc = sqlite3_resetsqlite3_api->reset(pInsert); | |||
| 23928 | } | |||
| 23929 | *piRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(pConfig->db); | |||
| 23930 | } | |||
| 23931 | ||||
| 23932 | return rc; | |||
| 23933 | } | |||
| 23934 | ||||
| 23935 | /* | |||
| 23936 | ** Insert new entries into the FTS index and %_docsize table. | |||
| 23937 | */ | |||
| 23938 | static int sqlite3Fts5StorageIndexInsert( | |||
| 23939 | Fts5Storage *p, | |||
| 23940 | sqlite3_value **apVal, | |||
| 23941 | i64 iRowid | |||
| 23942 | ){ | |||
| 23943 | Fts5Config *pConfig = p->pConfig; | |||
| 23944 | int rc = SQLITE_OK0; /* Return code */ | |||
| 23945 | Fts5InsertCtx ctx; /* Tokenization callback context object */ | |||
| 23946 | Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ | |||
| 23947 | ||||
| 23948 | memset(&buf, 0, sizeof(Fts5Buffer)); | |||
| 23949 | ctx.pStorage = p; | |||
| 23950 | rc = fts5StorageLoadTotals(p, 1); | |||
| 23951 | ||||
| 23952 | if( rc==SQLITE_OK0 ){ | |||
| 23953 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); | |||
| 23954 | } | |||
| 23955 | for(ctx.iCol=0; rc==SQLITE_OK0 && ctx.iCol<pConfig->nCol; ctx.iCol++){ | |||
| 23956 | ctx.szCol = 0; | |||
| 23957 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ | |||
| 23958 | int nText = 0; /* Size of pText in bytes */ | |||
| 23959 | const char *pText = 0; /* Pointer to buffer containing text value */ | |||
| 23960 | int nLoc = 0; /* Size of pText in bytes */ | |||
| 23961 | const char *pLoc = 0; /* Pointer to buffer containing text value */ | |||
| 23962 | ||||
| 23963 | sqlite3_value *pVal = apVal[ctx.iCol+2]; | |||
| 23964 | if( p->pSavedRow && sqlite3_value_nochangesqlite3_api->value_nochange(pVal) ){ | |||
| 23965 | pVal = sqlite3_column_valuesqlite3_api->column_value(p->pSavedRow, ctx.iCol+1); | |||
| 23966 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | |||
| 23967 | int iCol = ctx.iCol + 1 + pConfig->nCol; | |||
| 23968 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(p->pSavedRow, iCol); | |||
| 23969 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(p->pSavedRow, iCol); | |||
| 23970 | } | |||
| 23971 | }else{ | |||
| 23972 | pVal = apVal[ctx.iCol+2]; | |||
| 23973 | } | |||
| 23974 | ||||
| 23975 | if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ | |||
| 23976 | rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); | |||
| 23977 | }else{ | |||
| 23978 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 23979 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 23980 | } | |||
| 23981 | ||||
| 23982 | if( rc==SQLITE_OK0 ){ | |||
| 23983 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 23984 | rc = sqlite3Fts5Tokenize(pConfig, | |||
| 23985 | FTS5_TOKENIZE_DOCUMENT0x0004, pText, nText, (void*)&ctx, | |||
| 23986 | fts5StorageInsertCallback | |||
| 23987 | ); | |||
| 23988 | sqlite3Fts5ClearLocale(pConfig); | |||
| 23989 | } | |||
| 23990 | } | |||
| 23991 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); | |||
| 23992 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; | |||
| 23993 | } | |||
| 23994 | p->nTotalRow++; | |||
| 23995 | ||||
| 23996 | /* Write the %_docsize record */ | |||
| 23997 | if( rc==SQLITE_OK0 ){ | |||
| 23998 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); | |||
| 23999 | } | |||
| 24000 | sqlite3_freesqlite3_api->free(buf.p); | |||
| 24001 | ||||
| 24002 | return rc; | |||
| 24003 | } | |||
| 24004 | ||||
| 24005 | static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ | |||
| 24006 | Fts5Config *pConfig = p->pConfig; | |||
| 24007 | char *zSql; | |||
| 24008 | int rc; | |||
| 24009 | ||||
| 24010 | zSql = sqlite3_mprintfsqlite3_api->mprintf("SELECT count(*) FROM %Q.'%q_%s'", | |||
| 24011 | pConfig->zDb, pConfig->zName, zSuffix | |||
| 24012 | ); | |||
| 24013 | if( zSql==0 ){ | |||
| 24014 | rc = SQLITE_NOMEM7; | |||
| 24015 | }else{ | |||
| 24016 | sqlite3_stmt *pCnt = 0; | |||
| 24017 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); | |||
| 24018 | if( rc==SQLITE_OK0 ){ | |||
| 24019 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pCnt) ){ | |||
| 24020 | *pnRow = sqlite3_column_int64sqlite3_api->column_int64(pCnt, 0); | |||
| 24021 | } | |||
| 24022 | rc = sqlite3_finalizesqlite3_api->finalize(pCnt); | |||
| 24023 | } | |||
| 24024 | } | |||
| 24025 | ||||
| 24026 | sqlite3_freesqlite3_api->free(zSql); | |||
| 24027 | return rc; | |||
| 24028 | } | |||
| 24029 | ||||
| 24030 | /* | |||
| 24031 | ** Context object used by sqlite3Fts5StorageIntegrity(). | |||
| 24032 | */ | |||
| 24033 | typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; | |||
| 24034 | struct Fts5IntegrityCtx { | |||
| 24035 | i64 iRowid; | |||
| 24036 | int iCol; | |||
| 24037 | int szCol; | |||
| 24038 | u64 cksum; | |||
| 24039 | Fts5Termset *pTermset; | |||
| 24040 | Fts5Config *pConfig; | |||
| 24041 | }; | |||
| 24042 | ||||
| 24043 | ||||
| 24044 | /* | |||
| 24045 | ** Tokenization callback used by integrity check. | |||
| 24046 | */ | |||
| 24047 | static int fts5StorageIntegrityCallback( | |||
| 24048 | void *pContext, /* Pointer to Fts5IntegrityCtx object */ | |||
| 24049 | int tflags, | |||
| 24050 | const char *pToken, /* Buffer containing token */ | |||
| 24051 | int nToken, /* Size of token in bytes */ | |||
| 24052 | int iUnused1, /* Start offset of token */ | |||
| 24053 | int iUnused2 /* End offset of token */ | |||
| 24054 | ){ | |||
| 24055 | Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; | |||
| 24056 | Fts5Termset *pTermset = pCtx->pTermset; | |||
| 24057 | int bPresent; | |||
| 24058 | int ii; | |||
| 24059 | int rc = SQLITE_OK0; | |||
| 24060 | int iPos; | |||
| 24061 | int iCol; | |||
| 24062 | ||||
| 24063 | UNUSED_PARAM2(iUnused1, iUnused2)(void)(iUnused1), (void)(iUnused2); | |||
| 24064 | if( nToken>FTS5_MAX_TOKEN_SIZE32768 ) nToken = FTS5_MAX_TOKEN_SIZE32768; | |||
| 24065 | ||||
| 24066 | if( (tflags & FTS5_TOKEN_COLOCATED0x0001)==0 || pCtx->szCol==0 ){ | |||
| 24067 | pCtx->szCol++; | |||
| 24068 | } | |||
| 24069 | ||||
| 24070 | switch( pCtx->pConfig->eDetail ){ | |||
| 24071 | case FTS5_DETAIL_FULL0: | |||
| 24072 | iPos = pCtx->szCol-1; | |||
| 24073 | iCol = pCtx->iCol; | |||
| 24074 | break; | |||
| 24075 | ||||
| 24076 | case FTS5_DETAIL_COLUMNS2: | |||
| 24077 | iPos = pCtx->iCol; | |||
| 24078 | iCol = 0; | |||
| 24079 | break; | |||
| 24080 | ||||
| 24081 | default: | |||
| 24082 | assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
| 24083 | iPos = 0; | |||
| 24084 | iCol = 0; | |||
| 24085 | break; | |||
| 24086 | } | |||
| 24087 | ||||
| 24088 | rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); | |||
| 24089 | if( rc==SQLITE_OK0 && bPresent==0 ){ | |||
| 24090 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | |||
| 24091 | pCtx->iRowid, iCol, iPos, 0, pToken, nToken | |||
| 24092 | ); | |||
| 24093 | } | |||
| 24094 | ||||
| 24095 | for(ii=0; rc==SQLITE_OK0 && ii<pCtx->pConfig->nPrefix; ii++){ | |||
| 24096 | const int nChar = pCtx->pConfig->aPrefix[ii]; | |||
| 24097 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); | |||
| 24098 | if( nByte ){ | |||
| 24099 | rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); | |||
| 24100 | if( bPresent==0 ){ | |||
| 24101 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( | |||
| 24102 | pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte | |||
| 24103 | ); | |||
| 24104 | } | |||
| 24105 | } | |||
| 24106 | } | |||
| 24107 | ||||
| 24108 | return rc; | |||
| 24109 | } | |||
| 24110 | ||||
| 24111 | /* | |||
| 24112 | ** Check that the contents of the FTS index match that of the %_content | |||
| 24113 | ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return | |||
| 24114 | ** some other SQLite error code if an error occurs while attempting to | |||
| 24115 | ** determine this. | |||
| 24116 | */ | |||
| 24117 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ | |||
| 24118 | Fts5Config *pConfig = p->pConfig; | |||
| 24119 | int rc = SQLITE_OK0; /* Return code */ | |||
| 24120 | int *aColSize; /* Array of size pConfig->nCol */ | |||
| 24121 | i64 *aTotalSize; /* Array of size pConfig->nCol */ | |||
| 24122 | Fts5IntegrityCtx ctx; | |||
| 24123 | sqlite3_stmt *pScan; | |||
| 24124 | int bUseCksum; | |||
| 24125 | ||||
| 24126 | memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); | |||
| 24127 | ctx.pConfig = p->pConfig; | |||
| 24128 | aTotalSize = (i64*)sqlite3_malloc64sqlite3_api->malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64))); | |||
| 24129 | if( !aTotalSize ) return SQLITE_NOMEM7; | |||
| 24130 | aColSize = (int*)&aTotalSize[pConfig->nCol]; | |||
| 24131 | memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); | |||
| 24132 | ||||
| 24133 | bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL0 | |||
| 24134 | || (pConfig->eContent==FTS5_CONTENT_EXTERNAL2 && iArg) | |||
| 24135 | ); | |||
| 24136 | if( bUseCksum ){ | |||
| 24137 | /* Generate the expected index checksum based on the contents of the | |||
| 24138 | ** %_content table. This block stores the checksum in ctx.cksum. */ | |||
| 24139 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN11, &pScan, 0); | |||
| 24140 | if( rc==SQLITE_OK0 ){ | |||
| 24141 | int rc2; | |||
| 24142 | while( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pScan) ){ | |||
| 24143 | int i; | |||
| 24144 | ctx.iRowid = sqlite3_column_int64sqlite3_api->column_int64(pScan, 0); | |||
| 24145 | ctx.szCol = 0; | |||
| 24146 | if( pConfig->bColumnsize ){ | |||
| 24147 | rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); | |||
| 24148 | } | |||
| 24149 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_NONE1 ){ | |||
| 24150 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | |||
| 24151 | } | |||
| 24152 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
| 24153 | if( pConfig->abUnindexed[i]==0 ){ | |||
| 24154 | const char *pText = 0; | |||
| 24155 | int nText = 0; | |||
| 24156 | const char *pLoc = 0; | |||
| 24157 | int nLoc = 0; | |||
| 24158 | sqlite3_value *pVal = sqlite3_column_valuesqlite3_api->column_value(pScan, i+1); | |||
| 24159 | ||||
| 24160 | if( pConfig->eContent==FTS5_CONTENT_EXTERNAL2 | |||
| 24161 | && sqlite3Fts5IsLocaleValue(pConfig, pVal) | |||
| 24162 | ){ | |||
| 24163 | rc = sqlite3Fts5DecodeLocaleValue( | |||
| 24164 | pVal, &pText, &nText, &pLoc, &nLoc | |||
| 24165 | ); | |||
| 24166 | }else{ | |||
| 24167 | if( pConfig->eContent==FTS5_CONTENT_NORMAL0 && pConfig->bLocale ){ | |||
| 24168 | int iCol = i + 1 + pConfig->nCol; | |||
| 24169 | pLoc = (const char*)sqlite3_column_textsqlite3_api->column_text(pScan, iCol); | |||
| 24170 | nLoc = sqlite3_column_bytessqlite3_api->column_bytes(pScan, iCol); | |||
| 24171 | } | |||
| 24172 | pText = (const char*)sqlite3_value_textsqlite3_api->value_text(pVal); | |||
| 24173 | nText = sqlite3_value_bytessqlite3_api->value_bytes(pVal); | |||
| 24174 | } | |||
| 24175 | ||||
| 24176 | ctx.iCol = i; | |||
| 24177 | ctx.szCol = 0; | |||
| 24178 | ||||
| 24179 | if( rc==SQLITE_OK0 && pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| 24180 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); | |||
| 24181 | } | |||
| 24182 | ||||
| 24183 | if( rc==SQLITE_OK0 ){ | |||
| 24184 | sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); | |||
| 24185 | rc = sqlite3Fts5Tokenize(pConfig, | |||
| 24186 | FTS5_TOKENIZE_DOCUMENT0x0004, | |||
| 24187 | pText, nText, | |||
| 24188 | (void*)&ctx, | |||
| 24189 | fts5StorageIntegrityCallback | |||
| 24190 | ); | |||
| 24191 | sqlite3Fts5ClearLocale(pConfig); | |||
| 24192 | } | |||
| 24193 | ||||
| 24194 | /* If this is not a columnsize=0 database, check that the number | |||
| 24195 | ** of tokens in the value matches the aColSize[] value read from | |||
| 24196 | ** the %_docsize table. */ | |||
| 24197 | if( rc==SQLITE_OK0 | |||
| 24198 | && pConfig->bColumnsize | |||
| 24199 | && ctx.szCol!=aColSize[i] | |||
| 24200 | ){ | |||
| 24201 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24202 | } | |||
| 24203 | aTotalSize[i] += ctx.szCol; | |||
| 24204 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| 24205 | sqlite3Fts5TermsetFree(ctx.pTermset); | |||
| 24206 | ctx.pTermset = 0; | |||
| 24207 | } | |||
| 24208 | } | |||
| 24209 | } | |||
| 24210 | sqlite3Fts5TermsetFree(ctx.pTermset); | |||
| 24211 | ctx.pTermset = 0; | |||
| 24212 | ||||
| 24213 | if( rc!=SQLITE_OK0 ) break; | |||
| 24214 | } | |||
| 24215 | rc2 = sqlite3_resetsqlite3_api->reset(pScan); | |||
| 24216 | if( rc==SQLITE_OK0 ) rc = rc2; | |||
| 24217 | } | |||
| 24218 | ||||
| 24219 | /* Test that the "totals" (sometimes called "averages") record looks Ok */ | |||
| 24220 | if( rc==SQLITE_OK0 ){ | |||
| 24221 | int i; | |||
| 24222 | rc = fts5StorageLoadTotals(p, 0); | |||
| 24223 | for(i=0; rc==SQLITE_OK0 && i<pConfig->nCol; i++){ | |||
| 24224 | if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24225 | } | |||
| 24226 | } | |||
| 24227 | ||||
| 24228 | /* Check that the %_docsize and %_content tables contain the expected | |||
| 24229 | ** number of rows. */ | |||
| 24230 | if( rc==SQLITE_OK0 && pConfig->eContent==FTS5_CONTENT_NORMAL0 ){ | |||
| 24231 | i64 nRow = 0; | |||
| 24232 | rc = fts5StorageCount(p, "content", &nRow); | |||
| 24233 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24234 | } | |||
| 24235 | if( rc==SQLITE_OK0 && pConfig->bColumnsize ){ | |||
| 24236 | i64 nRow = 0; | |||
| 24237 | rc = fts5StorageCount(p, "docsize", &nRow); | |||
| 24238 | if( rc==SQLITE_OK0 && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24239 | } | |||
| 24240 | } | |||
| 24241 | ||||
| 24242 | /* Pass the expected checksum down to the FTS index module. It will | |||
| 24243 | ** verify, amongst other things, that it matches the checksum generated by | |||
| 24244 | ** inspecting the index itself. */ | |||
| 24245 | if( rc==SQLITE_OK0 ){ | |||
| 24246 | rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum); | |||
| 24247 | } | |||
| 24248 | ||||
| 24249 | sqlite3_freesqlite3_api->free(aTotalSize); | |||
| 24250 | return rc; | |||
| 24251 | } | |||
| 24252 | ||||
| 24253 | /* | |||
| 24254 | ** Obtain an SQLite statement handle that may be used to read data from the | |||
| 24255 | ** %_content table. | |||
| 24256 | */ | |||
| 24257 | static int sqlite3Fts5StorageStmt( | |||
| 24258 | Fts5Storage *p, | |||
| 24259 | int eStmt, | |||
| 24260 | sqlite3_stmt **pp, | |||
| 24261 | char **pzErrMsg | |||
| 24262 | ){ | |||
| 24263 | int rc; | |||
| 24264 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | |||
| 24265 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | |||
| 24266 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | |||
| 24267 | )((void) (0)); | |||
| 24268 | rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); | |||
| 24269 | if( rc==SQLITE_OK0 ){ | |||
| 24270 | assert( p->aStmt[eStmt]==*pp )((void) (0)); | |||
| 24271 | p->aStmt[eStmt] = 0; | |||
| 24272 | } | |||
| 24273 | return rc; | |||
| 24274 | } | |||
| 24275 | ||||
| 24276 | /* | |||
| 24277 | ** Release an SQLite statement handle obtained via an earlier call to | |||
| 24278 | ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function | |||
| 24279 | ** must match that passed to the sqlite3Fts5StorageStmt() call. | |||
| 24280 | */ | |||
| 24281 | static void sqlite3Fts5StorageStmtRelease( | |||
| 24282 | Fts5Storage *p, | |||
| 24283 | int eStmt, | |||
| 24284 | sqlite3_stmt *pStmt | |||
| 24285 | ){ | |||
| 24286 | assert( eStmt==FTS5_STMT_SCAN_ASC((void) (0)) | |||
| 24287 | || eStmt==FTS5_STMT_SCAN_DESC((void) (0)) | |||
| 24288 | || eStmt==FTS5_STMT_LOOKUP((void) (0)) | |||
| 24289 | )((void) (0)); | |||
| 24290 | if( p->aStmt[eStmt]==0 ){ | |||
| 24291 | sqlite3_resetsqlite3_api->reset(pStmt); | |||
| 24292 | p->aStmt[eStmt] = pStmt; | |||
| 24293 | }else{ | |||
| 24294 | sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 24295 | } | |||
| 24296 | } | |||
| 24297 | ||||
| 24298 | static int fts5StorageDecodeSizeArray( | |||
| 24299 | int *aCol, int nCol, /* Array to populate */ | |||
| 24300 | const u8 *aBlob, int nBlob /* Record to read varints from */ | |||
| 24301 | ){ | |||
| 24302 | int i; | |||
| 24303 | int iOff = 0; | |||
| 24304 | for(i=0; i<nCol; i++){ | |||
| 24305 | if( iOff>=nBlob ) return 1; | |||
| 24306 | iOff += fts5GetVarint32(&aBlob[iOff], aCol[i])sqlite3Fts5GetVarint32(&aBlob[iOff],(u32*)&(aCol[i])); | |||
| 24307 | } | |||
| 24308 | return (iOff!=nBlob); | |||
| 24309 | } | |||
| 24310 | ||||
| 24311 | /* | |||
| 24312 | ** Argument aCol points to an array of integers containing one entry for | |||
| 24313 | ** each table column. This function reads the %_docsize record for the | |||
| 24314 | ** specified rowid and populates aCol[] with the results. | |||
| 24315 | ** | |||
| 24316 | ** An SQLite error code is returned if an error occurs, or SQLITE_OK | |||
| 24317 | ** otherwise. | |||
| 24318 | */ | |||
| 24319 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ | |||
| 24320 | int nCol = p->pConfig->nCol; /* Number of user columns in table */ | |||
| 24321 | sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ | |||
| 24322 | int rc; /* Return Code */ | |||
| 24323 | ||||
| 24324 | assert( p->pConfig->bColumnsize )((void) (0)); | |||
| 24325 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE9, &pLookup, 0); | |||
| 24326 | if( pLookup ){ | |||
| 24327 | int bCorrupt = 1; | |||
| 24328 | assert( rc==SQLITE_OK )((void) (0)); | |||
| 24329 | sqlite3_bind_int64sqlite3_api->bind_int64(pLookup, 1, iRowid); | |||
| 24330 | if( SQLITE_ROW100==sqlite3_stepsqlite3_api->step(pLookup) ){ | |||
| 24331 | const u8 *aBlob = sqlite3_column_blobsqlite3_api->column_blob(pLookup, 0); | |||
| 24332 | int nBlob = sqlite3_column_bytessqlite3_api->column_bytes(pLookup, 0); | |||
| 24333 | if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ | |||
| 24334 | bCorrupt = 0; | |||
| 24335 | } | |||
| 24336 | } | |||
| 24337 | rc = sqlite3_resetsqlite3_api->reset(pLookup); | |||
| 24338 | if( bCorrupt && rc==SQLITE_OK0 ){ | |||
| 24339 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24340 | } | |||
| 24341 | }else{ | |||
| 24342 | assert( rc!=SQLITE_OK )((void) (0)); | |||
| 24343 | } | |||
| 24344 | ||||
| 24345 | return rc; | |||
| 24346 | } | |||
| 24347 | ||||
| 24348 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ | |||
| 24349 | int rc = fts5StorageLoadTotals(p, 0); | |||
| 24350 | if( rc==SQLITE_OK0 ){ | |||
| 24351 | *pnToken = 0; | |||
| 24352 | if( iCol<0 ){ | |||
| 24353 | int i; | |||
| 24354 | for(i=0; i<p->pConfig->nCol; i++){ | |||
| 24355 | *pnToken += p->aTotalSize[i]; | |||
| 24356 | } | |||
| 24357 | }else if( iCol<p->pConfig->nCol ){ | |||
| 24358 | *pnToken = p->aTotalSize[iCol]; | |||
| 24359 | }else{ | |||
| 24360 | rc = SQLITE_RANGE25; | |||
| 24361 | } | |||
| 24362 | } | |||
| 24363 | return rc; | |||
| 24364 | } | |||
| 24365 | ||||
| 24366 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ | |||
| 24367 | int rc = fts5StorageLoadTotals(p, 0); | |||
| 24368 | if( rc==SQLITE_OK0 ){ | |||
| 24369 | /* nTotalRow being zero does not necessarily indicate a corrupt | |||
| 24370 | ** database - it might be that the FTS5 table really does contain zero | |||
| 24371 | ** rows. However this function is only called from the xRowCount() API, | |||
| 24372 | ** and there is no way for that API to be invoked if the table contains | |||
| 24373 | ** no rows. Hence the FTS5_CORRUPT return. */ | |||
| 24374 | *pnRow = p->nTotalRow; | |||
| 24375 | if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 24376 | } | |||
| 24377 | return rc; | |||
| 24378 | } | |||
| 24379 | ||||
| 24380 | /* | |||
| 24381 | ** Flush any data currently held in-memory to disk. | |||
| 24382 | */ | |||
| 24383 | static int sqlite3Fts5StorageSync(Fts5Storage *p){ | |||
| 24384 | int rc = SQLITE_OK0; | |||
| 24385 | i64 iLastRowid = sqlite3_last_insert_rowidsqlite3_api->last_insert_rowid(p->pConfig->db); | |||
| 24386 | if( p->bTotalsValid ){ | |||
| 24387 | rc = fts5StorageSaveTotals(p); | |||
| 24388 | if( rc==SQLITE_OK0 ){ | |||
| 24389 | p->bTotalsValid = 0; | |||
| 24390 | } | |||
| 24391 | } | |||
| 24392 | if( rc==SQLITE_OK0 ){ | |||
| 24393 | rc = sqlite3Fts5IndexSync(p->pIndex); | |||
| 24394 | } | |||
| 24395 | sqlite3_set_last_insert_rowidsqlite3_api->set_last_insert_rowid(p->pConfig->db, iLastRowid); | |||
| 24396 | return rc; | |||
| 24397 | } | |||
| 24398 | ||||
| 24399 | static int sqlite3Fts5StorageRollback(Fts5Storage *p){ | |||
| 24400 | p->bTotalsValid = 0; | |||
| 24401 | return sqlite3Fts5IndexRollback(p->pIndex); | |||
| 24402 | } | |||
| 24403 | ||||
| 24404 | static int sqlite3Fts5StorageConfigValue( | |||
| 24405 | Fts5Storage *p, | |||
| 24406 | const char *z, | |||
| 24407 | sqlite3_value *pVal, | |||
| 24408 | int iVal | |||
| 24409 | ){ | |||
| 24410 | sqlite3_stmt *pReplace = 0; | |||
| 24411 | int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG10, &pReplace, 0); | |||
| 24412 | if( rc==SQLITE_OK0 ){ | |||
| 24413 | sqlite3_bind_textsqlite3_api->bind_text(pReplace, 1, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 24414 | if( pVal ){ | |||
| 24415 | sqlite3_bind_valuesqlite3_api->bind_value(pReplace, 2, pVal); | |||
| 24416 | }else{ | |||
| 24417 | sqlite3_bind_intsqlite3_api->bind_int(pReplace, 2, iVal); | |||
| 24418 | } | |||
| 24419 | sqlite3_stepsqlite3_api->step(pReplace); | |||
| 24420 | rc = sqlite3_resetsqlite3_api->reset(pReplace); | |||
| 24421 | sqlite3_bind_nullsqlite3_api->bind_null(pReplace, 1); | |||
| 24422 | } | |||
| 24423 | if( rc==SQLITE_OK0 && pVal ){ | |||
| 24424 | int iNew = p->pConfig->iCookie + 1; | |||
| 24425 | rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); | |||
| 24426 | if( rc==SQLITE_OK0 ){ | |||
| 24427 | p->pConfig->iCookie = iNew; | |||
| 24428 | } | |||
| 24429 | } | |||
| 24430 | return rc; | |||
| 24431 | } | |||
| 24432 | ||||
| 24433 | #line 1 "fts5_tokenize.c" | |||
| 24434 | /* | |||
| 24435 | ** 2014 May 31 | |||
| 24436 | ** | |||
| 24437 | ** The author disclaims copyright to this source code. In place of | |||
| 24438 | ** a legal notice, here is a blessing: | |||
| 24439 | ** | |||
| 24440 | ** May you do good and not evil. | |||
| 24441 | ** May you find forgiveness for yourself and forgive others. | |||
| 24442 | ** May you share freely, never taking more than you give. | |||
| 24443 | ** | |||
| 24444 | ****************************************************************************** | |||
| 24445 | */ | |||
| 24446 | ||||
| 24447 | ||||
| 24448 | /* #include "fts5Int.h" */ | |||
| 24449 | ||||
| 24450 | /************************************************************************** | |||
| 24451 | ** Start of ascii tokenizer implementation. | |||
| 24452 | */ | |||
| 24453 | ||||
| 24454 | /* | |||
| 24455 | ** For tokenizers with no "unicode" modifier, the set of token characters | |||
| 24456 | ** is the same as the set of ASCII range alphanumeric characters. | |||
| 24457 | */ | |||
| 24458 | static unsigned char aAsciiTokenChar[128] = { | |||
| 24459 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ | |||
| 24460 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ | |||
| 24461 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ | |||
| 24462 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ | |||
| 24463 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ | |||
| 24464 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ | |||
| 24465 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ | |||
| 24466 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ | |||
| 24467 | }; | |||
| 24468 | ||||
| 24469 | typedef struct AsciiTokenizer AsciiTokenizer; | |||
| 24470 | struct AsciiTokenizer { | |||
| 24471 | unsigned char aTokenChar[128]; | |||
| 24472 | }; | |||
| 24473 | ||||
| 24474 | static void fts5AsciiAddExceptions( | |||
| 24475 | AsciiTokenizer *p, | |||
| 24476 | const char *zArg, | |||
| 24477 | int bTokenChars | |||
| 24478 | ){ | |||
| 24479 | int i; | |||
| 24480 | for(i=0; zArg[i]; i++){ | |||
| 24481 | if( (zArg[i] & 0x80)==0 ){ | |||
| 24482 | p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; | |||
| 24483 | } | |||
| 24484 | } | |||
| 24485 | } | |||
| 24486 | ||||
| 24487 | /* | |||
| 24488 | ** Delete a "ascii" tokenizer. | |||
| 24489 | */ | |||
| 24490 | static void fts5AsciiDelete(Fts5Tokenizer *p){ | |||
| 24491 | sqlite3_freesqlite3_api->free(p); | |||
| 24492 | } | |||
| 24493 | ||||
| 24494 | /* | |||
| 24495 | ** Create an "ascii" tokenizer. | |||
| 24496 | */ | |||
| 24497 | static int fts5AsciiCreate( | |||
| 24498 | void *pUnused, | |||
| 24499 | const char **azArg, int nArg, | |||
| 24500 | Fts5Tokenizer **ppOut | |||
| 24501 | ){ | |||
| 24502 | int rc = SQLITE_OK0; | |||
| 24503 | AsciiTokenizer *p = 0; | |||
| 24504 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 24505 | if( nArg%2 ){ | |||
| 24506 | rc = SQLITE_ERROR1; | |||
| 24507 | }else{ | |||
| 24508 | p = sqlite3_mallocsqlite3_api->malloc(sizeof(AsciiTokenizer)); | |||
| 24509 | if( p==0 ){ | |||
| 24510 | rc = SQLITE_NOMEM7; | |||
| 24511 | }else{ | |||
| 24512 | int i; | |||
| 24513 | memset(p, 0, sizeof(AsciiTokenizer)); | |||
| 24514 | memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); | |||
| 24515 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
| 24516 | const char *zArg = azArg[i+1]; | |||
| 24517 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | |||
| 24518 | fts5AsciiAddExceptions(p, zArg, 1); | |||
| 24519 | }else | |||
| 24520 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | |||
| 24521 | fts5AsciiAddExceptions(p, zArg, 0); | |||
| 24522 | }else{ | |||
| 24523 | rc = SQLITE_ERROR1; | |||
| 24524 | } | |||
| 24525 | } | |||
| 24526 | if( rc!=SQLITE_OK0 ){ | |||
| 24527 | fts5AsciiDelete((Fts5Tokenizer*)p); | |||
| 24528 | p = 0; | |||
| 24529 | } | |||
| 24530 | } | |||
| 24531 | } | |||
| 24532 | ||||
| 24533 | *ppOut = (Fts5Tokenizer*)p; | |||
| 24534 | return rc; | |||
| 24535 | } | |||
| 24536 | ||||
| 24537 | ||||
| 24538 | static void asciiFold(char *aOut, const char *aIn, int nByte){ | |||
| 24539 | int i; | |||
| 24540 | for(i=0; i<nByte; i++){ | |||
| 24541 | char c = aIn[i]; | |||
| 24542 | if( c>='A' && c<='Z' ) c += 32; | |||
| 24543 | aOut[i] = c; | |||
| 24544 | } | |||
| 24545 | } | |||
| 24546 | ||||
| 24547 | /* | |||
| 24548 | ** Tokenize some text using the ascii tokenizer. | |||
| 24549 | */ | |||
| 24550 | static int fts5AsciiTokenize( | |||
| 24551 | Fts5Tokenizer *pTokenizer, | |||
| 24552 | void *pCtx, | |||
| 24553 | int iUnused, | |||
| 24554 | const char *pText, int nText, | |||
| 24555 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
| 24556 | ){ | |||
| 24557 | AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; | |||
| 24558 | int rc = SQLITE_OK0; | |||
| 24559 | int ie; | |||
| 24560 | int is = 0; | |||
| 24561 | ||||
| 24562 | char aFold[64]; | |||
| 24563 | int nFold = sizeof(aFold); | |||
| 24564 | char *pFold = aFold; | |||
| 24565 | unsigned char *a = p->aTokenChar; | |||
| 24566 | ||||
| 24567 | UNUSED_PARAM(iUnused)(void)(iUnused); | |||
| 24568 | ||||
| 24569 | while( is<nText && rc==SQLITE_OK0 ){ | |||
| 24570 | int nByte; | |||
| 24571 | ||||
| 24572 | /* Skip any leading divider characters. */ | |||
| 24573 | while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ | |||
| 24574 | is++; | |||
| 24575 | } | |||
| 24576 | if( is==nText ) break; | |||
| 24577 | ||||
| 24578 | /* Count the token characters */ | |||
| 24579 | ie = is+1; | |||
| 24580 | while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ | |||
| 24581 | ie++; | |||
| 24582 | } | |||
| 24583 | ||||
| 24584 | /* Fold to lower case */ | |||
| 24585 | nByte = ie-is; | |||
| 24586 | if( nByte>nFold ){ | |||
| 24587 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | |||
| 24588 | pFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nByte*2); | |||
| 24589 | if( pFold==0 ){ | |||
| 24590 | rc = SQLITE_NOMEM7; | |||
| 24591 | break; | |||
| 24592 | } | |||
| 24593 | nFold = nByte*2; | |||
| 24594 | } | |||
| 24595 | asciiFold(pFold, &pText[is], nByte); | |||
| 24596 | ||||
| 24597 | /* Invoke the token callback */ | |||
| 24598 | rc = xToken(pCtx, 0, pFold, nByte, is, ie); | |||
| 24599 | is = ie+1; | |||
| 24600 | } | |||
| 24601 | ||||
| 24602 | if( pFold!=aFold ) sqlite3_freesqlite3_api->free(pFold); | |||
| 24603 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
| 24604 | return rc; | |||
| 24605 | } | |||
| 24606 | ||||
| 24607 | /************************************************************************** | |||
| 24608 | ** Start of unicode61 tokenizer implementation. | |||
| 24609 | */ | |||
| 24610 | ||||
| 24611 | ||||
| 24612 | /* | |||
| 24613 | ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied | |||
| 24614 | ** from the sqlite3 source file utf.c. If this file is compiled as part | |||
| 24615 | ** of the amalgamation, they are not required. | |||
| 24616 | */ | |||
| 24617 | #ifndef SQLITE_AMALGAMATION | |||
| 24618 | ||||
| 24619 | static const unsigned char sqlite3Utf8Trans1[] = { | |||
| 24620 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 24621 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |||
| 24622 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |||
| 24623 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |||
| 24624 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 24625 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |||
| 24626 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 24627 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | |||
| 24628 | }; | |||
| 24629 | ||||
| 24630 | #define READ_UTF8(zIn, zTerm, c)c = *(zIn++); if( c>=0xc0 ){ c = sqlite3Utf8Trans1[c-0xc0] ; while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ c = (c<<6) + (0x3f & *(zIn++)); } if( c<0x80 || ( c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } } \ | |||
| 24631 | c = *(zIn++); \ | |||
| 24632 | if( c>=0xc0 ){ \ | |||
| 24633 | c = sqlite3Utf8Trans1[c-0xc0]; \ | |||
| 24634 | while( zIn<zTerm && (*zIn & 0xc0)==0x80 ){ \ | |||
| 24635 | c = (c<<6) + (0x3f & *(zIn++)); \ | |||
| 24636 | } \ | |||
| 24637 | if( c<0x80 \ | |||
| 24638 | || (c&0xFFFFF800)==0xD800 \ | |||
| 24639 | || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ | |||
| 24640 | } | |||
| 24641 | ||||
| 24642 | ||||
| 24643 | #define WRITE_UTF8(zOut, c){ if( c<0x00080 ){ *zOut++ = (unsigned char)(c&0xFF); } else if( c<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((c >>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } else if( c<0x10000 ){ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); *zOut++ = 0x80 + (unsigned char )((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)( c & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((c>> 18) & 0x07); *zOut++ = 0x80 + (unsigned char)((c>>12 ) & 0x3F); *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); *zOut++ = 0x80 + (unsigned char)(c & 0x3F); } } { \ | |||
| 24644 | if( c<0x00080 ){ \ | |||
| 24645 | *zOut++ = (unsigned char)(c&0xFF); \ | |||
| 24646 | } \ | |||
| 24647 | else if( c<0x00800 ){ \ | |||
| 24648 | *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ | |||
| 24649 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
| 24650 | } \ | |||
| 24651 | else if( c<0x10000 ){ \ | |||
| 24652 | *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ | |||
| 24653 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | |||
| 24654 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
| 24655 | }else{ \ | |||
| 24656 | *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ | |||
| 24657 | *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ | |||
| 24658 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ | |||
| 24659 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ | |||
| 24660 | } \ | |||
| 24661 | } | |||
| 24662 | ||||
| 24663 | #endif /* ifndef SQLITE_AMALGAMATION */ | |||
| 24664 | ||||
| 24665 | #define FTS5_SKIP_UTF8(zIn){ if( ((unsigned char)(*(zIn++)))>=0xc0 ){ while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } } } { \ | |||
| 24666 | if( ((unsigned char)(*(zIn++)))>=0xc0 ){ \ | |||
| 24667 | while( (((unsigned char)*zIn) & 0xc0)==0x80 ){ zIn++; } \ | |||
| 24668 | } \ | |||
| 24669 | } | |||
| 24670 | ||||
| 24671 | typedef struct Unicode61Tokenizer Unicode61Tokenizer; | |||
| 24672 | struct Unicode61Tokenizer { | |||
| 24673 | unsigned char aTokenChar[128]; /* ASCII range token characters */ | |||
| 24674 | char *aFold; /* Buffer to fold text into */ | |||
| 24675 | int nFold; /* Size of aFold[] in bytes */ | |||
| 24676 | int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ | |||
| 24677 | int nException; | |||
| 24678 | int *aiException; | |||
| 24679 | ||||
| 24680 | unsigned char aCategory[32]; /* True for token char categories */ | |||
| 24681 | }; | |||
| 24682 | ||||
| 24683 | /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ | |||
| 24684 | #define FTS5_REMOVE_DIACRITICS_NONE0 0 | |||
| 24685 | #define FTS5_REMOVE_DIACRITICS_SIMPLE1 1 | |||
| 24686 | #define FTS5_REMOVE_DIACRITICS_COMPLEX2 2 | |||
| 24687 | ||||
| 24688 | static int fts5UnicodeAddExceptions( | |||
| 24689 | Unicode61Tokenizer *p, /* Tokenizer object */ | |||
| 24690 | const char *z, /* Characters to treat as exceptions */ | |||
| 24691 | int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ | |||
| 24692 | ){ | |||
| 24693 | int rc = SQLITE_OK0; | |||
| 24694 | int n = (int)strlen(z); | |||
| 24695 | int *aNew; | |||
| 24696 | ||||
| 24697 | if( n>0 ){ | |||
| 24698 | aNew = (int*)sqlite3_realloc64sqlite3_api->realloc64(p->aiException, | |||
| 24699 | (n+p->nException)*sizeof(int)); | |||
| 24700 | if( aNew ){ | |||
| 24701 | int nNew = p->nException; | |||
| 24702 | const unsigned char *zCsr = (const unsigned char*)z; | |||
| 24703 | const unsigned char *zTerm = (const unsigned char*)&z[n]; | |||
| 24704 | while( zCsr<zTerm ){ | |||
| 24705 | u32 iCode; | |||
| 24706 | int bToken; | |||
| 24707 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
| 24708 | if( iCode<128 ){ | |||
| 24709 | p->aTokenChar[iCode] = (unsigned char)bTokenChars; | |||
| 24710 | }else{ | |||
| 24711 | bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]; | |||
| 24712 | assert( (bToken==0 || bToken==1) )((void) (0)); | |||
| 24713 | assert( (bTokenChars==0 || bTokenChars==1) )((void) (0)); | |||
| 24714 | if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ | |||
| 24715 | int i; | |||
| 24716 | for(i=0; i<nNew; i++){ | |||
| 24717 | if( (u32)aNew[i]>iCode ) break; | |||
| 24718 | } | |||
| 24719 | memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); | |||
| 24720 | aNew[i] = iCode; | |||
| 24721 | nNew++; | |||
| 24722 | } | |||
| 24723 | } | |||
| 24724 | } | |||
| 24725 | p->aiException = aNew; | |||
| 24726 | p->nException = nNew; | |||
| 24727 | }else{ | |||
| 24728 | rc = SQLITE_NOMEM7; | |||
| 24729 | } | |||
| 24730 | } | |||
| 24731 | ||||
| 24732 | return rc; | |||
| 24733 | } | |||
| 24734 | ||||
| 24735 | /* | |||
| 24736 | ** Return true if the p->aiException[] array contains the value iCode. | |||
| 24737 | */ | |||
| 24738 | static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ | |||
| 24739 | if( p->nException>0 ){ | |||
| 24740 | int *a = p->aiException; | |||
| 24741 | int iLo = 0; | |||
| 24742 | int iHi = p->nException-1; | |||
| 24743 | ||||
| 24744 | while( iHi>=iLo ){ | |||
| 24745 | int iTest = (iHi + iLo) / 2; | |||
| 24746 | if( iCode==a[iTest] ){ | |||
| 24747 | return 1; | |||
| 24748 | }else if( iCode>a[iTest] ){ | |||
| 24749 | iLo = iTest+1; | |||
| 24750 | }else{ | |||
| 24751 | iHi = iTest-1; | |||
| 24752 | } | |||
| 24753 | } | |||
| 24754 | } | |||
| 24755 | ||||
| 24756 | return 0; | |||
| 24757 | } | |||
| 24758 | ||||
| 24759 | /* | |||
| 24760 | ** Delete a "unicode61" tokenizer. | |||
| 24761 | */ | |||
| 24762 | static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ | |||
| 24763 | if( pTok ){ | |||
| 24764 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; | |||
| 24765 | sqlite3_freesqlite3_api->free(p->aiException); | |||
| 24766 | sqlite3_freesqlite3_api->free(p->aFold); | |||
| 24767 | sqlite3_freesqlite3_api->free(p); | |||
| 24768 | } | |||
| 24769 | return; | |||
| 24770 | } | |||
| 24771 | ||||
| 24772 | static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){ | |||
| 24773 | const char *z = zCat; | |||
| 24774 | ||||
| 24775 | while( *z ){ | |||
| 24776 | while( *z==' ' || *z=='\t' ) z++; | |||
| 24777 | if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){ | |||
| 24778 | return SQLITE_ERROR1; | |||
| 24779 | } | |||
| 24780 | while( *z!=' ' && *z!='\t' && *z!='\0' ) z++; | |||
| 24781 | } | |||
| 24782 | ||||
| 24783 | sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar); | |||
| 24784 | return SQLITE_OK0; | |||
| 24785 | } | |||
| 24786 | ||||
| 24787 | /* | |||
| 24788 | ** Create a "unicode61" tokenizer. | |||
| 24789 | */ | |||
| 24790 | static int fts5UnicodeCreate( | |||
| 24791 | void *pUnused, | |||
| 24792 | const char **azArg, int nArg, | |||
| 24793 | Fts5Tokenizer **ppOut | |||
| 24794 | ){ | |||
| 24795 | int rc = SQLITE_OK0; /* Return code */ | |||
| 24796 | Unicode61Tokenizer *p = 0; /* New tokenizer object */ | |||
| 24797 | ||||
| 24798 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 24799 | ||||
| 24800 | if( nArg%2 ){ | |||
| 24801 | rc = SQLITE_ERROR1; | |||
| 24802 | }else{ | |||
| 24803 | p = (Unicode61Tokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(Unicode61Tokenizer)); | |||
| 24804 | if( p ){ | |||
| 24805 | const char *zCat = "L* N* Co"; | |||
| 24806 | int i; | |||
| 24807 | memset(p, 0, sizeof(Unicode61Tokenizer)); | |||
| 24808 | ||||
| 24809 | p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE1; | |||
| 24810 | p->nFold = 64; | |||
| 24811 | p->aFold = sqlite3_malloc64sqlite3_api->malloc64(p->nFold * sizeof(char)); | |||
| 24812 | if( p->aFold==0 ){ | |||
| 24813 | rc = SQLITE_NOMEM7; | |||
| 24814 | } | |||
| 24815 | ||||
| 24816 | /* Search for a "categories" argument */ | |||
| 24817 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
| 24818 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | |||
| 24819 | zCat = azArg[i+1]; | |||
| 24820 | } | |||
| 24821 | } | |||
| 24822 | if( rc==SQLITE_OK0 ){ | |||
| 24823 | rc = unicodeSetCategories(p, zCat); | |||
| 24824 | } | |||
| 24825 | ||||
| 24826 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
| 24827 | const char *zArg = azArg[i+1]; | |||
| 24828 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | |||
| 24829 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | |||
| 24830 | rc = SQLITE_ERROR1; | |||
| 24831 | }else{ | |||
| 24832 | p->eRemoveDiacritic = (zArg[0] - '0'); | |||
| 24833 | assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE((void) (0)) | |||
| 24834 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE((void) (0)) | |||
| 24835 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX((void) (0)) | |||
| 24836 | )((void) (0)); | |||
| 24837 | } | |||
| 24838 | }else | |||
| 24839 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "tokenchars") ){ | |||
| 24840 | rc = fts5UnicodeAddExceptions(p, zArg, 1); | |||
| 24841 | }else | |||
| 24842 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "separators") ){ | |||
| 24843 | rc = fts5UnicodeAddExceptions(p, zArg, 0); | |||
| 24844 | }else | |||
| 24845 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "categories") ){ | |||
| 24846 | /* no-op */ | |||
| 24847 | }else{ | |||
| 24848 | rc = SQLITE_ERROR1; | |||
| 24849 | } | |||
| 24850 | } | |||
| 24851 | }else{ | |||
| 24852 | rc = SQLITE_NOMEM7; | |||
| 24853 | } | |||
| 24854 | if( rc!=SQLITE_OK0 ){ | |||
| 24855 | fts5UnicodeDelete((Fts5Tokenizer*)p); | |||
| 24856 | p = 0; | |||
| 24857 | } | |||
| 24858 | *ppOut = (Fts5Tokenizer*)p; | |||
| 24859 | } | |||
| 24860 | return rc; | |||
| 24861 | } | |||
| 24862 | ||||
| 24863 | /* | |||
| 24864 | ** Return true if, for the purposes of tokenizing with the tokenizer | |||
| 24865 | ** passed as the first argument, codepoint iCode is considered a token | |||
| 24866 | ** character (not a separator). | |||
| 24867 | */ | |||
| 24868 | static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ | |||
| 24869 | return ( | |||
| 24870 | p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] | |||
| 24871 | ^ fts5UnicodeIsException(p, iCode) | |||
| 24872 | ); | |||
| 24873 | } | |||
| 24874 | ||||
| 24875 | static int fts5UnicodeTokenize( | |||
| 24876 | Fts5Tokenizer *pTokenizer, | |||
| 24877 | void *pCtx, | |||
| 24878 | int iUnused, | |||
| 24879 | const char *pText, int nText, | |||
| 24880 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
| 24881 | ){ | |||
| 24882 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; | |||
| 24883 | int rc = SQLITE_OK0; | |||
| 24884 | unsigned char *a = p->aTokenChar; | |||
| 24885 | ||||
| 24886 | unsigned char *zTerm = (unsigned char*)&pText[nText]; | |||
| 24887 | unsigned char *zCsr = (unsigned char *)pText; | |||
| 24888 | ||||
| 24889 | /* Output buffer */ | |||
| 24890 | char *aFold = p->aFold; | |||
| 24891 | int nFold = p->nFold; | |||
| 24892 | const char *pEnd = &aFold[nFold-6]; | |||
| 24893 | ||||
| 24894 | UNUSED_PARAM(iUnused)(void)(iUnused); | |||
| 24895 | ||||
| 24896 | /* Each iteration of this loop gobbles up a contiguous run of separators, | |||
| 24897 | ** then the next token. */ | |||
| 24898 | while( rc==SQLITE_OK0 ){ | |||
| 24899 | u32 iCode; /* non-ASCII codepoint read from input */ | |||
| 24900 | char *zOut = aFold; | |||
| 24901 | int is; | |||
| 24902 | int ie; | |||
| 24903 | ||||
| 24904 | /* Skip any separator characters. */ | |||
| 24905 | while( 1 ){ | |||
| 24906 | if( zCsr>=zTerm ) goto tokenize_done; | |||
| 24907 | if( *zCsr & 0x80 ) { | |||
| 24908 | /* A character outside of the ascii range. Skip past it if it is | |||
| 24909 | ** a separator character. Or break out of the loop if it is not. */ | |||
| 24910 | is = zCsr - (unsigned char*)pText; | |||
| 24911 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
| 24912 | if( fts5UnicodeIsAlnum(p, iCode) ){ | |||
| 24913 | goto non_ascii_tokenchar; | |||
| 24914 | } | |||
| 24915 | }else{ | |||
| 24916 | if( a[*zCsr] ){ | |||
| 24917 | is = zCsr - (unsigned char*)pText; | |||
| 24918 | goto ascii_tokenchar; | |||
| 24919 | } | |||
| 24920 | zCsr++; | |||
| 24921 | } | |||
| 24922 | } | |||
| 24923 | ||||
| 24924 | /* Run through the tokenchars. Fold them into the output buffer along | |||
| 24925 | ** the way. */ | |||
| 24926 | while( zCsr<zTerm ){ | |||
| 24927 | ||||
| 24928 | /* Grow the output buffer so that there is sufficient space to fit the | |||
| 24929 | ** largest possible utf-8 character. */ | |||
| 24930 | if( zOut>pEnd ){ | |||
| 24931 | aFold = sqlite3_malloc64sqlite3_api->malloc64((sqlite3_int64)nFold*2); | |||
| 24932 | if( aFold==0 ){ | |||
| 24933 | rc = SQLITE_NOMEM7; | |||
| 24934 | goto tokenize_done; | |||
| 24935 | } | |||
| 24936 | zOut = &aFold[zOut - p->aFold]; | |||
| 24937 | memcpy(aFold, p->aFold, nFold); | |||
| 24938 | sqlite3_freesqlite3_api->free(p->aFold); | |||
| 24939 | p->aFold = aFold; | |||
| 24940 | p->nFold = nFold = nFold*2; | |||
| 24941 | pEnd = &aFold[nFold-6]; | |||
| 24942 | } | |||
| 24943 | ||||
| 24944 | if( *zCsr & 0x80 ){ | |||
| 24945 | /* An non-ascii-range character. Fold it into the output buffer if | |||
| 24946 | ** it is a token character, or break out of the loop if it is not. */ | |||
| 24947 | READ_UTF8(zCsr, zTerm, iCode)iCode = *(zCsr++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zCsr<zTerm && (*zCsr & 0xc0 )==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zCsr++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
| 24948 | if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ | |||
| 24949 | non_ascii_tokenchar: | |||
| 24950 | iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); | |||
| 24951 | if( iCode ) WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
| 24952 | }else{ | |||
| 24953 | break; | |||
| 24954 | } | |||
| 24955 | }else if( a[*zCsr]==0 ){ | |||
| 24956 | /* An ascii-range separator character. End of token. */ | |||
| 24957 | break; | |||
| 24958 | }else{ | |||
| 24959 | ascii_tokenchar: | |||
| 24960 | if( *zCsr>='A' && *zCsr<='Z' ){ | |||
| 24961 | *zOut++ = *zCsr + 32; | |||
| 24962 | }else{ | |||
| 24963 | *zOut++ = *zCsr; | |||
| 24964 | } | |||
| 24965 | zCsr++; | |||
| 24966 | } | |||
| 24967 | ie = zCsr - (unsigned char*)pText; | |||
| 24968 | } | |||
| 24969 | ||||
| 24970 | /* Invoke the token callback */ | |||
| 24971 | rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); | |||
| 24972 | } | |||
| 24973 | ||||
| 24974 | tokenize_done: | |||
| 24975 | if( rc==SQLITE_DONE101 ) rc = SQLITE_OK0; | |||
| 24976 | return rc; | |||
| 24977 | } | |||
| 24978 | ||||
| 24979 | /************************************************************************** | |||
| 24980 | ** Start of porter stemmer implementation. | |||
| 24981 | */ | |||
| 24982 | ||||
| 24983 | /* Any tokens larger than this (in bytes) are passed through without | |||
| 24984 | ** stemming. */ | |||
| 24985 | #define FTS5_PORTER_MAX_TOKEN64 64 | |||
| 24986 | ||||
| 24987 | typedef struct PorterTokenizer PorterTokenizer; | |||
| 24988 | struct PorterTokenizer { | |||
| 24989 | fts5_tokenizer_v2 tokenizer_v2; /* Parent tokenizer module */ | |||
| 24990 | Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ | |||
| 24991 | char aBuf[FTS5_PORTER_MAX_TOKEN64 + 64]; | |||
| 24992 | }; | |||
| 24993 | ||||
| 24994 | /* | |||
| 24995 | ** Delete a "porter" tokenizer. | |||
| 24996 | */ | |||
| 24997 | static void fts5PorterDelete(Fts5Tokenizer *pTok){ | |||
| 24998 | if( pTok ){ | |||
| 24999 | PorterTokenizer *p = (PorterTokenizer*)pTok; | |||
| 25000 | if( p->pTokenizer ){ | |||
| 25001 | p->tokenizer_v2.xDelete(p->pTokenizer); | |||
| 25002 | } | |||
| 25003 | sqlite3_freesqlite3_api->free(p); | |||
| 25004 | } | |||
| 25005 | } | |||
| 25006 | ||||
| 25007 | /* | |||
| 25008 | ** Create a "porter" tokenizer. | |||
| 25009 | */ | |||
| 25010 | static int fts5PorterCreate( | |||
| 25011 | void *pCtx, | |||
| 25012 | const char **azArg, int nArg, | |||
| 25013 | Fts5Tokenizer **ppOut | |||
| 25014 | ){ | |||
| 25015 | fts5_api *pApi = (fts5_api*)pCtx; | |||
| 25016 | int rc = SQLITE_OK0; | |||
| 25017 | PorterTokenizer *pRet; | |||
| 25018 | void *pUserdata = 0; | |||
| 25019 | const char *zBase = "unicode61"; | |||
| 25020 | fts5_tokenizer_v2 *pV2 = 0; | |||
| 25021 | ||||
| 25022 | if( nArg>0 ){ | |||
| 25023 | zBase = azArg[0]; | |||
| 25024 | } | |||
| 25025 | ||||
| 25026 | pRet = (PorterTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(PorterTokenizer)); | |||
| 25027 | if( pRet ){ | |||
| 25028 | memset(pRet, 0, sizeof(PorterTokenizer)); | |||
| 25029 | rc = pApi->xFindTokenizer_v2(pApi, zBase, &pUserdata, &pV2); | |||
| 25030 | }else{ | |||
| 25031 | rc = SQLITE_NOMEM7; | |||
| 25032 | } | |||
| 25033 | if( rc==SQLITE_OK0 ){ | |||
| 25034 | int nArg2 = (nArg>0 ? nArg-1 : 0); | |||
| 25035 | const char **az2 = (nArg2 ? &azArg[1] : 0); | |||
| 25036 | memcpy(&pRet->tokenizer_v2, pV2, sizeof(fts5_tokenizer_v2)); | |||
| 25037 | rc = pRet->tokenizer_v2.xCreate(pUserdata, az2, nArg2, &pRet->pTokenizer); | |||
| 25038 | } | |||
| 25039 | ||||
| 25040 | if( rc!=SQLITE_OK0 ){ | |||
| 25041 | fts5PorterDelete((Fts5Tokenizer*)pRet); | |||
| 25042 | pRet = 0; | |||
| 25043 | } | |||
| 25044 | *ppOut = (Fts5Tokenizer*)pRet; | |||
| 25045 | return rc; | |||
| 25046 | } | |||
| 25047 | ||||
| 25048 | typedef struct PorterContext PorterContext; | |||
| 25049 | struct PorterContext { | |||
| 25050 | void *pCtx; | |||
| 25051 | int (*xToken)(void*, int, const char*, int, int, int); | |||
| 25052 | char *aBuf; | |||
| 25053 | }; | |||
| 25054 | ||||
| 25055 | typedef struct PorterRule PorterRule; | |||
| 25056 | struct PorterRule { | |||
| 25057 | const char *zSuffix; | |||
| 25058 | int nSuffix; | |||
| 25059 | int (*xCond)(char *zStem, int nStem); | |||
| 25060 | const char *zOutput; | |||
| 25061 | int nOutput; | |||
| 25062 | }; | |||
| 25063 | ||||
| 25064 | #if 0 | |||
| 25065 | static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ | |||
| 25066 | int ret = -1; | |||
| 25067 | int nBuf = *pnBuf; | |||
| 25068 | PorterRule *p; | |||
| 25069 | ||||
| 25070 | for(p=aRule; p->zSuffix; p++){ | |||
| 25071 | assert( strlen(p->zSuffix)==p->nSuffix )((void) (0)); | |||
| 25072 | assert( strlen(p->zOutput)==p->nOutput )((void) (0)); | |||
| 25073 | if( nBuf<p->nSuffix ) continue; | |||
| 25074 | if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; | |||
| 25075 | } | |||
| 25076 | ||||
| 25077 | if( p->zSuffix ){ | |||
| 25078 | int nStem = nBuf - p->nSuffix; | |||
| 25079 | if( p->xCond==0 || p->xCond(aBuf, nStem) ){ | |||
| 25080 | memcpy(&aBuf[nStem], p->zOutput, p->nOutput); | |||
| 25081 | *pnBuf = nStem + p->nOutput; | |||
| 25082 | ret = p - aRule; | |||
| 25083 | } | |||
| 25084 | } | |||
| 25085 | ||||
| 25086 | return ret; | |||
| 25087 | } | |||
| 25088 | #endif | |||
| 25089 | ||||
| 25090 | static int fts5PorterIsVowel(char c, int bYIsVowel){ | |||
| 25091 | return ( | |||
| 25092 | c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') | |||
| 25093 | ); | |||
| 25094 | } | |||
| 25095 | ||||
| 25096 | static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ | |||
| 25097 | int i; | |||
| 25098 | int bCons = bPrevCons; | |||
| 25099 | ||||
| 25100 | /* Scan for a vowel */ | |||
| 25101 | for(i=0; i<nStem; i++){ | |||
| 25102 | if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; | |||
| 25103 | } | |||
| 25104 | ||||
| 25105 | /* Scan for a consonent */ | |||
| 25106 | for(i++; i<nStem; i++){ | |||
| 25107 | if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; | |||
| 25108 | } | |||
| 25109 | return 0; | |||
| 25110 | } | |||
| 25111 | ||||
| 25112 | /* porter rule condition: (m > 0) */ | |||
| 25113 | static int fts5Porter_MGt0(char *zStem, int nStem){ | |||
| 25114 | return !!fts5PorterGobbleVC(zStem, nStem, 0); | |||
| 25115 | } | |||
| 25116 | ||||
| 25117 | /* porter rule condition: (m > 1) */ | |||
| 25118 | static int fts5Porter_MGt1(char *zStem, int nStem){ | |||
| 25119 | int n; | |||
| 25120 | n = fts5PorterGobbleVC(zStem, nStem, 0); | |||
| 25121 | if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | |||
| 25122 | return 1; | |||
| 25123 | } | |||
| 25124 | return 0; | |||
| 25125 | } | |||
| 25126 | ||||
| 25127 | /* porter rule condition: (m = 1) */ | |||
| 25128 | static int fts5Porter_MEq1(char *zStem, int nStem){ | |||
| 25129 | int n; | |||
| 25130 | n = fts5PorterGobbleVC(zStem, nStem, 0); | |||
| 25131 | if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ | |||
| 25132 | return 1; | |||
| 25133 | } | |||
| 25134 | return 0; | |||
| 25135 | } | |||
| 25136 | ||||
| 25137 | /* porter rule condition: (*o) */ | |||
| 25138 | static int fts5Porter_Ostar(char *zStem, int nStem){ | |||
| 25139 | if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ | |||
| 25140 | return 0; | |||
| 25141 | }else{ | |||
| 25142 | int i; | |||
| 25143 | int mask = 0; | |||
| 25144 | int bCons = 0; | |||
| 25145 | for(i=0; i<nStem; i++){ | |||
| 25146 | bCons = !fts5PorterIsVowel(zStem[i], bCons); | |||
| 25147 | assert( bCons==0 || bCons==1 )((void) (0)); | |||
| 25148 | mask = (mask << 1) + bCons; | |||
| 25149 | } | |||
| 25150 | return ((mask & 0x0007)==0x0005); | |||
| 25151 | } | |||
| 25152 | } | |||
| 25153 | ||||
| 25154 | /* porter rule condition: (m > 1 and (*S or *T)) */ | |||
| 25155 | static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ | |||
| 25156 | assert( nStem>0 )((void) (0)); | |||
| 25157 | return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') | |||
| 25158 | && fts5Porter_MGt1(zStem, nStem); | |||
| 25159 | } | |||
| 25160 | ||||
| 25161 | /* porter rule condition: (*v*) */ | |||
| 25162 | static int fts5Porter_Vowel(char *zStem, int nStem){ | |||
| 25163 | int i; | |||
| 25164 | for(i=0; i<nStem; i++){ | |||
| 25165 | if( fts5PorterIsVowel(zStem[i], i>0) ){ | |||
| 25166 | return 1; | |||
| 25167 | } | |||
| 25168 | } | |||
| 25169 | return 0; | |||
| 25170 | } | |||
| 25171 | ||||
| 25172 | ||||
| 25173 | /************************************************************************** | |||
| 25174 | *************************************************************************** | |||
| 25175 | ** GENERATED CODE STARTS HERE (mkportersteps.tcl) | |||
| 25176 | */ | |||
| 25177 | ||||
| 25178 | static int fts5PorterStep4(char *aBuf, int *pnBuf){ | |||
| 25179 | int ret = 0; | |||
| 25180 | int nBuf = *pnBuf; | |||
| 25181 | switch( aBuf[nBuf-2] ){ | |||
| 25182 | ||||
| 25183 | case 'a': | |||
| 25184 | if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ | |||
| 25185 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
| 25186 | *pnBuf = nBuf - 2; | |||
| 25187 | } | |||
| 25188 | } | |||
| 25189 | break; | |||
| 25190 | ||||
| 25191 | case 'c': | |||
| 25192 | if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ | |||
| 25193 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
| 25194 | *pnBuf = nBuf - 4; | |||
| 25195 | } | |||
| 25196 | }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ | |||
| 25197 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
| 25198 | *pnBuf = nBuf - 4; | |||
| 25199 | } | |||
| 25200 | } | |||
| 25201 | break; | |||
| 25202 | ||||
| 25203 | case 'e': | |||
| 25204 | if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ | |||
| 25205 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
| 25206 | *pnBuf = nBuf - 2; | |||
| 25207 | } | |||
| 25208 | } | |||
| 25209 | break; | |||
| 25210 | ||||
| 25211 | case 'i': | |||
| 25212 | if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ | |||
| 25213 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
| 25214 | *pnBuf = nBuf - 2; | |||
| 25215 | } | |||
| 25216 | } | |||
| 25217 | break; | |||
| 25218 | ||||
| 25219 | case 'l': | |||
| 25220 | if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ | |||
| 25221 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
| 25222 | *pnBuf = nBuf - 4; | |||
| 25223 | } | |||
| 25224 | }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ | |||
| 25225 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
| 25226 | *pnBuf = nBuf - 4; | |||
| 25227 | } | |||
| 25228 | } | |||
| 25229 | break; | |||
| 25230 | ||||
| 25231 | case 'n': | |||
| 25232 | if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ | |||
| 25233 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25234 | *pnBuf = nBuf - 3; | |||
| 25235 | } | |||
| 25236 | }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ | |||
| 25237 | if( fts5Porter_MGt1(aBuf, nBuf-5) ){ | |||
| 25238 | *pnBuf = nBuf - 5; | |||
| 25239 | } | |||
| 25240 | }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ | |||
| 25241 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ | |||
| 25242 | *pnBuf = nBuf - 4; | |||
| 25243 | } | |||
| 25244 | }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ | |||
| 25245 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25246 | *pnBuf = nBuf - 3; | |||
| 25247 | } | |||
| 25248 | } | |||
| 25249 | break; | |||
| 25250 | ||||
| 25251 | case 'o': | |||
| 25252 | if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ | |||
| 25253 | if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ | |||
| 25254 | *pnBuf = nBuf - 3; | |||
| 25255 | } | |||
| 25256 | }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ | |||
| 25257 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ | |||
| 25258 | *pnBuf = nBuf - 2; | |||
| 25259 | } | |||
| 25260 | } | |||
| 25261 | break; | |||
| 25262 | ||||
| 25263 | case 's': | |||
| 25264 | if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ | |||
| 25265 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25266 | *pnBuf = nBuf - 3; | |||
| 25267 | } | |||
| 25268 | } | |||
| 25269 | break; | |||
| 25270 | ||||
| 25271 | case 't': | |||
| 25272 | if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ | |||
| 25273 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25274 | *pnBuf = nBuf - 3; | |||
| 25275 | } | |||
| 25276 | }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ | |||
| 25277 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25278 | *pnBuf = nBuf - 3; | |||
| 25279 | } | |||
| 25280 | } | |||
| 25281 | break; | |||
| 25282 | ||||
| 25283 | case 'u': | |||
| 25284 | if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ | |||
| 25285 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25286 | *pnBuf = nBuf - 3; | |||
| 25287 | } | |||
| 25288 | } | |||
| 25289 | break; | |||
| 25290 | ||||
| 25291 | case 'v': | |||
| 25292 | if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ | |||
| 25293 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25294 | *pnBuf = nBuf - 3; | |||
| 25295 | } | |||
| 25296 | } | |||
| 25297 | break; | |||
| 25298 | ||||
| 25299 | case 'z': | |||
| 25300 | if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ | |||
| 25301 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ | |||
| 25302 | *pnBuf = nBuf - 3; | |||
| 25303 | } | |||
| 25304 | } | |||
| 25305 | break; | |||
| 25306 | ||||
| 25307 | } | |||
| 25308 | return ret; | |||
| 25309 | } | |||
| 25310 | ||||
| 25311 | ||||
| 25312 | static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ | |||
| 25313 | int ret = 0; | |||
| 25314 | int nBuf = *pnBuf; | |||
| 25315 | switch( aBuf[nBuf-2] ){ | |||
| 25316 | ||||
| 25317 | case 'a': | |||
| 25318 | if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ | |||
| 25319 | memcpy(&aBuf[nBuf-2], "ate", 3); | |||
| 25320 | *pnBuf = nBuf - 2 + 3; | |||
| 25321 | ret = 1; | |||
| 25322 | } | |||
| 25323 | break; | |||
| 25324 | ||||
| 25325 | case 'b': | |||
| 25326 | if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ | |||
| 25327 | memcpy(&aBuf[nBuf-2], "ble", 3); | |||
| 25328 | *pnBuf = nBuf - 2 + 3; | |||
| 25329 | ret = 1; | |||
| 25330 | } | |||
| 25331 | break; | |||
| 25332 | ||||
| 25333 | case 'i': | |||
| 25334 | if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ | |||
| 25335 | memcpy(&aBuf[nBuf-2], "ize", 3); | |||
| 25336 | *pnBuf = nBuf - 2 + 3; | |||
| 25337 | ret = 1; | |||
| 25338 | } | |||
| 25339 | break; | |||
| 25340 | ||||
| 25341 | } | |||
| 25342 | return ret; | |||
| 25343 | } | |||
| 25344 | ||||
| 25345 | ||||
| 25346 | static int fts5PorterStep2(char *aBuf, int *pnBuf){ | |||
| 25347 | int ret = 0; | |||
| 25348 | int nBuf = *pnBuf; | |||
| 25349 | switch( aBuf[nBuf-2] ){ | |||
| 25350 | ||||
| 25351 | case 'a': | |||
| 25352 | if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ | |||
| 25353 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
| 25354 | memcpy(&aBuf[nBuf-7], "ate", 3); | |||
| 25355 | *pnBuf = nBuf - 7 + 3; | |||
| 25356 | } | |||
| 25357 | }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ | |||
| 25358 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | |||
| 25359 | memcpy(&aBuf[nBuf-6], "tion", 4); | |||
| 25360 | *pnBuf = nBuf - 6 + 4; | |||
| 25361 | } | |||
| 25362 | } | |||
| 25363 | break; | |||
| 25364 | ||||
| 25365 | case 'c': | |||
| 25366 | if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ | |||
| 25367 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25368 | memcpy(&aBuf[nBuf-4], "ence", 4); | |||
| 25369 | *pnBuf = nBuf - 4 + 4; | |||
| 25370 | } | |||
| 25371 | }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ | |||
| 25372 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25373 | memcpy(&aBuf[nBuf-4], "ance", 4); | |||
| 25374 | *pnBuf = nBuf - 4 + 4; | |||
| 25375 | } | |||
| 25376 | } | |||
| 25377 | break; | |||
| 25378 | ||||
| 25379 | case 'e': | |||
| 25380 | if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ | |||
| 25381 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25382 | memcpy(&aBuf[nBuf-4], "ize", 3); | |||
| 25383 | *pnBuf = nBuf - 4 + 3; | |||
| 25384 | } | |||
| 25385 | } | |||
| 25386 | break; | |||
| 25387 | ||||
| 25388 | case 'g': | |||
| 25389 | if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ | |||
| 25390 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25391 | memcpy(&aBuf[nBuf-4], "log", 3); | |||
| 25392 | *pnBuf = nBuf - 4 + 3; | |||
| 25393 | } | |||
| 25394 | } | |||
| 25395 | break; | |||
| 25396 | ||||
| 25397 | case 'l': | |||
| 25398 | if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ | |||
| 25399 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
| 25400 | memcpy(&aBuf[nBuf-3], "ble", 3); | |||
| 25401 | *pnBuf = nBuf - 3 + 3; | |||
| 25402 | } | |||
| 25403 | }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ | |||
| 25404 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25405 | memcpy(&aBuf[nBuf-4], "al", 2); | |||
| 25406 | *pnBuf = nBuf - 4 + 2; | |||
| 25407 | } | |||
| 25408 | }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ | |||
| 25409 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25410 | memcpy(&aBuf[nBuf-5], "ent", 3); | |||
| 25411 | *pnBuf = nBuf - 5 + 3; | |||
| 25412 | } | |||
| 25413 | }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ | |||
| 25414 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
| 25415 | memcpy(&aBuf[nBuf-3], "e", 1); | |||
| 25416 | *pnBuf = nBuf - 3 + 1; | |||
| 25417 | } | |||
| 25418 | }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ | |||
| 25419 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25420 | memcpy(&aBuf[nBuf-5], "ous", 3); | |||
| 25421 | *pnBuf = nBuf - 5 + 3; | |||
| 25422 | } | |||
| 25423 | } | |||
| 25424 | break; | |||
| 25425 | ||||
| 25426 | case 'o': | |||
| 25427 | if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ | |||
| 25428 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
| 25429 | memcpy(&aBuf[nBuf-7], "ize", 3); | |||
| 25430 | *pnBuf = nBuf - 7 + 3; | |||
| 25431 | } | |||
| 25432 | }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ | |||
| 25433 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25434 | memcpy(&aBuf[nBuf-5], "ate", 3); | |||
| 25435 | *pnBuf = nBuf - 5 + 3; | |||
| 25436 | } | |||
| 25437 | }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ | |||
| 25438 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25439 | memcpy(&aBuf[nBuf-4], "ate", 3); | |||
| 25440 | *pnBuf = nBuf - 4 + 3; | |||
| 25441 | } | |||
| 25442 | } | |||
| 25443 | break; | |||
| 25444 | ||||
| 25445 | case 's': | |||
| 25446 | if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ | |||
| 25447 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25448 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
| 25449 | *pnBuf = nBuf - 5 + 2; | |||
| 25450 | } | |||
| 25451 | }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ | |||
| 25452 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
| 25453 | memcpy(&aBuf[nBuf-7], "ive", 3); | |||
| 25454 | *pnBuf = nBuf - 7 + 3; | |||
| 25455 | } | |||
| 25456 | }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ | |||
| 25457 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
| 25458 | memcpy(&aBuf[nBuf-7], "ful", 3); | |||
| 25459 | *pnBuf = nBuf - 7 + 3; | |||
| 25460 | } | |||
| 25461 | }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ | |||
| 25462 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ | |||
| 25463 | memcpy(&aBuf[nBuf-7], "ous", 3); | |||
| 25464 | *pnBuf = nBuf - 7 + 3; | |||
| 25465 | } | |||
| 25466 | } | |||
| 25467 | break; | |||
| 25468 | ||||
| 25469 | case 't': | |||
| 25470 | if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ | |||
| 25471 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25472 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
| 25473 | *pnBuf = nBuf - 5 + 2; | |||
| 25474 | } | |||
| 25475 | }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ | |||
| 25476 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25477 | memcpy(&aBuf[nBuf-5], "ive", 3); | |||
| 25478 | *pnBuf = nBuf - 5 + 3; | |||
| 25479 | } | |||
| 25480 | }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ | |||
| 25481 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ | |||
| 25482 | memcpy(&aBuf[nBuf-6], "ble", 3); | |||
| 25483 | *pnBuf = nBuf - 6 + 3; | |||
| 25484 | } | |||
| 25485 | } | |||
| 25486 | break; | |||
| 25487 | ||||
| 25488 | } | |||
| 25489 | return ret; | |||
| 25490 | } | |||
| 25491 | ||||
| 25492 | ||||
| 25493 | static int fts5PorterStep3(char *aBuf, int *pnBuf){ | |||
| 25494 | int ret = 0; | |||
| 25495 | int nBuf = *pnBuf; | |||
| 25496 | switch( aBuf[nBuf-2] ){ | |||
| 25497 | ||||
| 25498 | case 'a': | |||
| 25499 | if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ | |||
| 25500 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25501 | memcpy(&aBuf[nBuf-4], "ic", 2); | |||
| 25502 | *pnBuf = nBuf - 4 + 2; | |||
| 25503 | } | |||
| 25504 | } | |||
| 25505 | break; | |||
| 25506 | ||||
| 25507 | case 's': | |||
| 25508 | if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ | |||
| 25509 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ | |||
| 25510 | *pnBuf = nBuf - 4; | |||
| 25511 | } | |||
| 25512 | } | |||
| 25513 | break; | |||
| 25514 | ||||
| 25515 | case 't': | |||
| 25516 | if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ | |||
| 25517 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25518 | memcpy(&aBuf[nBuf-5], "ic", 2); | |||
| 25519 | *pnBuf = nBuf - 5 + 2; | |||
| 25520 | } | |||
| 25521 | }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ | |||
| 25522 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25523 | memcpy(&aBuf[nBuf-5], "ic", 2); | |||
| 25524 | *pnBuf = nBuf - 5 + 2; | |||
| 25525 | } | |||
| 25526 | } | |||
| 25527 | break; | |||
| 25528 | ||||
| 25529 | case 'u': | |||
| 25530 | if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ | |||
| 25531 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
| 25532 | *pnBuf = nBuf - 3; | |||
| 25533 | } | |||
| 25534 | } | |||
| 25535 | break; | |||
| 25536 | ||||
| 25537 | case 'v': | |||
| 25538 | if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ | |||
| 25539 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25540 | *pnBuf = nBuf - 5; | |||
| 25541 | } | |||
| 25542 | } | |||
| 25543 | break; | |||
| 25544 | ||||
| 25545 | case 'z': | |||
| 25546 | if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ | |||
| 25547 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ | |||
| 25548 | memcpy(&aBuf[nBuf-5], "al", 2); | |||
| 25549 | *pnBuf = nBuf - 5 + 2; | |||
| 25550 | } | |||
| 25551 | } | |||
| 25552 | break; | |||
| 25553 | ||||
| 25554 | } | |||
| 25555 | return ret; | |||
| 25556 | } | |||
| 25557 | ||||
| 25558 | ||||
| 25559 | static int fts5PorterStep1B(char *aBuf, int *pnBuf){ | |||
| 25560 | int ret = 0; | |||
| 25561 | int nBuf = *pnBuf; | |||
| 25562 | switch( aBuf[nBuf-2] ){ | |||
| 25563 | ||||
| 25564 | case 'e': | |||
| 25565 | if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ | |||
| 25566 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ | |||
| 25567 | memcpy(&aBuf[nBuf-3], "ee", 2); | |||
| 25568 | *pnBuf = nBuf - 3 + 2; | |||
| 25569 | } | |||
| 25570 | }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ | |||
| 25571 | if( fts5Porter_Vowel(aBuf, nBuf-2) ){ | |||
| 25572 | *pnBuf = nBuf - 2; | |||
| 25573 | ret = 1; | |||
| 25574 | } | |||
| 25575 | } | |||
| 25576 | break; | |||
| 25577 | ||||
| 25578 | case 'n': | |||
| 25579 | if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ | |||
| 25580 | if( fts5Porter_Vowel(aBuf, nBuf-3) ){ | |||
| 25581 | *pnBuf = nBuf - 3; | |||
| 25582 | ret = 1; | |||
| 25583 | } | |||
| 25584 | } | |||
| 25585 | break; | |||
| 25586 | ||||
| 25587 | } | |||
| 25588 | return ret; | |||
| 25589 | } | |||
| 25590 | ||||
| 25591 | /* | |||
| 25592 | ** GENERATED CODE ENDS HERE (mkportersteps.tcl) | |||
| 25593 | *************************************************************************** | |||
| 25594 | **************************************************************************/ | |||
| 25595 | ||||
| 25596 | static void fts5PorterStep1A(char *aBuf, int *pnBuf){ | |||
| 25597 | int nBuf = *pnBuf; | |||
| 25598 | if( aBuf[nBuf-1]=='s' ){ | |||
| 25599 | if( aBuf[nBuf-2]=='e' ){ | |||
| 25600 | if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') | |||
| 25601 | || (nBuf>3 && aBuf[nBuf-3]=='i' ) | |||
| 25602 | ){ | |||
| 25603 | *pnBuf = nBuf-2; | |||
| 25604 | }else{ | |||
| 25605 | *pnBuf = nBuf-1; | |||
| 25606 | } | |||
| 25607 | } | |||
| 25608 | else if( aBuf[nBuf-2]!='s' ){ | |||
| 25609 | *pnBuf = nBuf-1; | |||
| 25610 | } | |||
| 25611 | } | |||
| 25612 | } | |||
| 25613 | ||||
| 25614 | static int fts5PorterCb( | |||
| 25615 | void *pCtx, | |||
| 25616 | int tflags, | |||
| 25617 | const char *pToken, | |||
| 25618 | int nToken, | |||
| 25619 | int iStart, | |||
| 25620 | int iEnd | |||
| 25621 | ){ | |||
| 25622 | PorterContext *p = (PorterContext*)pCtx; | |||
| 25623 | ||||
| 25624 | char *aBuf; | |||
| 25625 | int nBuf; | |||
| 25626 | ||||
| 25627 | if( nToken>FTS5_PORTER_MAX_TOKEN64 || nToken<3 ) goto pass_through; | |||
| 25628 | aBuf = p->aBuf; | |||
| 25629 | nBuf = nToken; | |||
| 25630 | memcpy(aBuf, pToken, nBuf); | |||
| 25631 | ||||
| 25632 | /* Step 1. */ | |||
| 25633 | fts5PorterStep1A(aBuf, &nBuf); | |||
| 25634 | if( fts5PorterStep1B(aBuf, &nBuf) ){ | |||
| 25635 | if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ | |||
| 25636 | char c = aBuf[nBuf-1]; | |||
| 25637 | if( fts5PorterIsVowel(c, 0)==0 | |||
| 25638 | && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] | |||
| 25639 | ){ | |||
| 25640 | nBuf--; | |||
| 25641 | }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ | |||
| 25642 | aBuf[nBuf++] = 'e'; | |||
| 25643 | } | |||
| 25644 | } | |||
| 25645 | } | |||
| 25646 | ||||
| 25647 | /* Step 1C. */ | |||
| 25648 | if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ | |||
| 25649 | aBuf[nBuf-1] = 'i'; | |||
| 25650 | } | |||
| 25651 | ||||
| 25652 | /* Steps 2 through 4. */ | |||
| 25653 | fts5PorterStep2(aBuf, &nBuf); | |||
| 25654 | fts5PorterStep3(aBuf, &nBuf); | |||
| 25655 | fts5PorterStep4(aBuf, &nBuf); | |||
| 25656 | ||||
| 25657 | /* Step 5a. */ | |||
| 25658 | assert( nBuf>0 )((void) (0)); | |||
| 25659 | if( aBuf[nBuf-1]=='e' ){ | |||
| 25660 | if( fts5Porter_MGt1(aBuf, nBuf-1) | |||
| 25661 | || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) | |||
| 25662 | ){ | |||
| 25663 | nBuf--; | |||
| 25664 | } | |||
| 25665 | } | |||
| 25666 | ||||
| 25667 | /* Step 5b. */ | |||
| 25668 | if( nBuf>1 && aBuf[nBuf-1]=='l' | |||
| 25669 | && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) | |||
| 25670 | ){ | |||
| 25671 | nBuf--; | |||
| 25672 | } | |||
| 25673 | ||||
| 25674 | return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); | |||
| 25675 | ||||
| 25676 | pass_through: | |||
| 25677 | return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); | |||
| 25678 | } | |||
| 25679 | ||||
| 25680 | /* | |||
| 25681 | ** Tokenize using the porter tokenizer. | |||
| 25682 | */ | |||
| 25683 | static int fts5PorterTokenize( | |||
| 25684 | Fts5Tokenizer *pTokenizer, | |||
| 25685 | void *pCtx, | |||
| 25686 | int flags, | |||
| 25687 | const char *pText, int nText, | |||
| 25688 | const char *pLoc, int nLoc, | |||
| 25689 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) | |||
| 25690 | ){ | |||
| 25691 | PorterTokenizer *p = (PorterTokenizer*)pTokenizer; | |||
| 25692 | PorterContext sCtx; | |||
| 25693 | sCtx.xToken = xToken; | |||
| 25694 | sCtx.pCtx = pCtx; | |||
| 25695 | sCtx.aBuf = p->aBuf; | |||
| 25696 | return p->tokenizer_v2.xTokenize( | |||
| 25697 | p->pTokenizer, (void*)&sCtx, flags, pText, nText, pLoc, nLoc, fts5PorterCb | |||
| 25698 | ); | |||
| 25699 | } | |||
| 25700 | ||||
| 25701 | /************************************************************************** | |||
| 25702 | ** Start of trigram implementation. | |||
| 25703 | */ | |||
| 25704 | typedef struct TrigramTokenizer TrigramTokenizer; | |||
| 25705 | struct TrigramTokenizer { | |||
| 25706 | int bFold; /* True to fold to lower-case */ | |||
| 25707 | int iFoldParam; /* Parameter to pass to Fts5UnicodeFold() */ | |||
| 25708 | }; | |||
| 25709 | ||||
| 25710 | /* | |||
| 25711 | ** Free a trigram tokenizer. | |||
| 25712 | */ | |||
| 25713 | static void fts5TriDelete(Fts5Tokenizer *p){ | |||
| 25714 | sqlite3_freesqlite3_api->free(p); | |||
| 25715 | } | |||
| 25716 | ||||
| 25717 | /* | |||
| 25718 | ** Allocate a trigram tokenizer. | |||
| 25719 | */ | |||
| 25720 | static int fts5TriCreate( | |||
| 25721 | void *pUnused, | |||
| 25722 | const char **azArg, | |||
| 25723 | int nArg, | |||
| 25724 | Fts5Tokenizer **ppOut | |||
| 25725 | ){ | |||
| 25726 | int rc = SQLITE_OK0; | |||
| 25727 | TrigramTokenizer *pNew = 0; | |||
| 25728 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 25729 | if( nArg%2 ){ | |||
| 25730 | rc = SQLITE_ERROR1; | |||
| 25731 | }else{ | |||
| 25732 | int i; | |||
| 25733 | pNew = (TrigramTokenizer*)sqlite3_mallocsqlite3_api->malloc(sizeof(*pNew)); | |||
| 25734 | if( pNew==0 ){ | |||
| 25735 | rc = SQLITE_NOMEM7; | |||
| 25736 | }else{ | |||
| 25737 | pNew->bFold = 1; | |||
| 25738 | pNew->iFoldParam = 0; | |||
| 25739 | ||||
| 25740 | for(i=0; rc==SQLITE_OK0 && i<nArg; i+=2){ | |||
| 25741 | const char *zArg = azArg[i+1]; | |||
| 25742 | if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "case_sensitive") ){ | |||
| 25743 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ | |||
| 25744 | rc = SQLITE_ERROR1; | |||
| 25745 | }else{ | |||
| 25746 | pNew->bFold = (zArg[0]=='0'); | |||
| 25747 | } | |||
| 25748 | }else if( 0==sqlite3_stricmpsqlite3_api->stricmp(azArg[i], "remove_diacritics") ){ | |||
| 25749 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ | |||
| 25750 | rc = SQLITE_ERROR1; | |||
| 25751 | }else{ | |||
| 25752 | pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; | |||
| 25753 | } | |||
| 25754 | }else{ | |||
| 25755 | rc = SQLITE_ERROR1; | |||
| 25756 | } | |||
| 25757 | } | |||
| 25758 | ||||
| 25759 | if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ | |||
| 25760 | rc = SQLITE_ERROR1; | |||
| 25761 | } | |||
| 25762 | ||||
| 25763 | if( rc!=SQLITE_OK0 ){ | |||
| 25764 | fts5TriDelete((Fts5Tokenizer*)pNew); | |||
| 25765 | pNew = 0; | |||
| 25766 | } | |||
| 25767 | } | |||
| 25768 | } | |||
| 25769 | *ppOut = (Fts5Tokenizer*)pNew; | |||
| 25770 | return rc; | |||
| 25771 | } | |||
| 25772 | ||||
| 25773 | /* | |||
| 25774 | ** Trigram tokenizer tokenize routine. | |||
| 25775 | */ | |||
| 25776 | static int fts5TriTokenize( | |||
| 25777 | Fts5Tokenizer *pTok, | |||
| 25778 | void *pCtx, | |||
| 25779 | int unusedFlags, | |||
| 25780 | const char *pText, int nText, | |||
| 25781 | int (*xToken)(void*, int, const char*, int, int, int) | |||
| 25782 | ){ | |||
| 25783 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | |||
| 25784 | int rc = SQLITE_OK0; | |||
| 25785 | char aBuf[32]; | |||
| 25786 | char *zOut = aBuf; | |||
| 25787 | int ii; | |||
| 25788 | const unsigned char *zIn = (const unsigned char*)pText; | |||
| 25789 | const unsigned char *zEof = (zIn ? &zIn[nText] : 0); | |||
| 25790 | u32 iCode = 0; | |||
| 25791 | int aStart[3]; /* Input offset of each character in aBuf[] */ | |||
| 25792 | ||||
| 25793 | UNUSED_PARAM(unusedFlags)(void)(unusedFlags); | |||
| 25794 | ||||
| 25795 | /* Populate aBuf[] with the characters for the first trigram. */ | |||
| 25796 | for(ii=0; ii<3; ii++){ | |||
| 25797 | do { | |||
| 25798 | aStart[ii] = zIn - (const unsigned char*)pText; | |||
| 25799 | if( zIn>=zEof ) return SQLITE_OK0; | |||
| 25800 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
| 25801 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | |||
| 25802 | }while( iCode==0 ); | |||
| 25803 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
| 25804 | } | |||
| 25805 | ||||
| 25806 | /* At the start of each iteration of this loop: | |||
| 25807 | ** | |||
| 25808 | ** aBuf: Contains 3 characters. The 3 characters of the next trigram. | |||
| 25809 | ** zOut: Points to the byte following the last character in aBuf. | |||
| 25810 | ** aStart[3]: Contains the byte offset in the input text corresponding | |||
| 25811 | ** to the start of each of the three characters in the buffer. | |||
| 25812 | */ | |||
| 25813 | assert( zIn<=zEof )((void) (0)); | |||
| 25814 | while( 1 ){ | |||
| 25815 | int iNext; /* Start of character following current tri */ | |||
| 25816 | const char *z1; | |||
| 25817 | ||||
| 25818 | /* Read characters from the input up until the first non-diacritic */ | |||
| 25819 | do { | |||
| 25820 | iNext = zIn - (const unsigned char*)pText; | |||
| 25821 | if( zIn>=zEof ){ | |||
| 25822 | iCode = 0; | |||
| 25823 | break; | |||
| 25824 | } | |||
| 25825 | READ_UTF8(zIn, zEof, iCode)iCode = *(zIn++); if( iCode>=0xc0 ){ iCode = sqlite3Utf8Trans1 [iCode-0xc0]; while( zIn<zEof && (*zIn & 0xc0) ==0x80 ){ iCode = (iCode<<6) + (0x3f & *(zIn++)); } if( iCode<0x80 || (iCode&0xFFFFF800)==0xD800 || (iCode &0xFFFFFFFE)==0xFFFE ){ iCode = 0xFFFD; } }; | |||
| 25826 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); | |||
| 25827 | }while( iCode==0 ); | |||
| 25828 | ||||
| 25829 | /* Pass the current trigram back to fts5 */ | |||
| 25830 | rc = xToken(pCtx, 0, aBuf, zOut-aBuf, aStart[0], iNext); | |||
| 25831 | if( iCode==0 || rc!=SQLITE_OK0 ) break; | |||
| 25832 | ||||
| 25833 | /* Remove the first character from buffer aBuf[]. Append the character | |||
| 25834 | ** with codepoint iCode. */ | |||
| 25835 | z1 = aBuf; | |||
| 25836 | FTS5_SKIP_UTF8(z1){ if( ((unsigned char)(*(z1++)))>=0xc0 ){ while( (((unsigned char)*z1) & 0xc0)==0x80 ){ z1++; } } }; | |||
| 25837 | memmove(aBuf, z1, zOut - z1); | |||
| 25838 | zOut -= (z1 - aBuf); | |||
| 25839 | WRITE_UTF8(zOut, iCode){ if( iCode<0x00080 ){ *zOut++ = (unsigned char)(iCode& 0xFF); } else if( iCode<0x00800 ){ *zOut++ = 0xC0 + (unsigned char)((iCode>>6)&0x1F); *zOut++ = 0x80 + (unsigned char)(iCode & 0x3F); } else if( iCode<0x10000 ){ *zOut ++ = 0xE0 + (unsigned char)((iCode>>12)&0x0F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); }else{ *zOut++ = 0xF0 + (unsigned char)((iCode>>18) & 0x07); *zOut ++ = 0x80 + (unsigned char)((iCode>>12) & 0x3F); *zOut ++ = 0x80 + (unsigned char)((iCode>>6) & 0x3F); *zOut ++ = 0x80 + (unsigned char)(iCode & 0x3F); } }; | |||
| 25840 | ||||
| 25841 | /* Update the aStart[] array */ | |||
| 25842 | aStart[0] = aStart[1]; | |||
| 25843 | aStart[1] = aStart[2]; | |||
| 25844 | aStart[2] = iNext; | |||
| 25845 | } | |||
| 25846 | ||||
| 25847 | return rc; | |||
| 25848 | } | |||
| 25849 | ||||
| 25850 | /* | |||
| 25851 | ** Argument xCreate is a pointer to a constructor function for a tokenizer. | |||
| 25852 | ** pTok is a tokenizer previously created using the same method. This function | |||
| 25853 | ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB | |||
| 25854 | ** indicating the style of pattern matching that the tokenizer can support. | |||
| 25855 | ** In practice, this is: | |||
| 25856 | ** | |||
| 25857 | ** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB | |||
| 25858 | ** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE | |||
| 25859 | ** all other tokenizers - FTS5_PATTERN_NONE | |||
| 25860 | */ | |||
| 25861 | static int sqlite3Fts5TokenizerPattern( | |||
| 25862 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), | |||
| 25863 | Fts5Tokenizer *pTok | |||
| 25864 | ){ | |||
| 25865 | if( xCreate==fts5TriCreate ){ | |||
| 25866 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; | |||
| 25867 | if( p->iFoldParam==0 ){ | |||
| 25868 | return p->bFold ? FTS5_PATTERN_LIKE65 : FTS5_PATTERN_GLOB66; | |||
| 25869 | } | |||
| 25870 | } | |||
| 25871 | return FTS5_PATTERN_NONE0; | |||
| 25872 | } | |||
| 25873 | ||||
| 25874 | /* | |||
| 25875 | ** Return true if the tokenizer described by p->azArg[] is the trigram | |||
| 25876 | ** tokenizer. This tokenizer needs to be loaded before xBestIndex is | |||
| 25877 | ** called for the first time in order to correctly handle LIKE/GLOB. | |||
| 25878 | */ | |||
| 25879 | static int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){ | |||
| 25880 | return (p->nArg>=1 && 0==sqlite3_stricmpsqlite3_api->stricmp(p->azArg[0], "trigram")); | |||
| 25881 | } | |||
| 25882 | ||||
| 25883 | ||||
| 25884 | /* | |||
| 25885 | ** Register all built-in tokenizers with FTS5. | |||
| 25886 | */ | |||
| 25887 | static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ | |||
| 25888 | struct BuiltinTokenizer { | |||
| 25889 | const char *zName; | |||
| 25890 | fts5_tokenizer x; | |||
| 25891 | } aBuiltin[] = { | |||
| 25892 | { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, | |||
| 25893 | { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, | |||
| 25894 | { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, | |||
| 25895 | }; | |||
| 25896 | ||||
| 25897 | int rc = SQLITE_OK0; /* Return code */ | |||
| 25898 | int i; /* To iterate through builtin functions */ | |||
| 25899 | ||||
| 25900 | for(i=0; rc==SQLITE_OK0 && i<ArraySize(aBuiltin)((int)(sizeof(aBuiltin) / sizeof(aBuiltin[0]))); i++){ | |||
| 25901 | rc = pApi->xCreateTokenizer(pApi, | |||
| 25902 | aBuiltin[i].zName, | |||
| 25903 | (void*)pApi, | |||
| 25904 | &aBuiltin[i].x, | |||
| 25905 | 0 | |||
| 25906 | ); | |||
| 25907 | } | |||
| 25908 | if( rc==SQLITE_OK0 ){ | |||
| 25909 | fts5_tokenizer_v2 sPorter = { | |||
| 25910 | 2, | |||
| 25911 | fts5PorterCreate, | |||
| 25912 | fts5PorterDelete, | |||
| 25913 | fts5PorterTokenize | |||
| 25914 | }; | |||
| 25915 | rc = pApi->xCreateTokenizer_v2(pApi, | |||
| 25916 | "porter", | |||
| 25917 | (void*)pApi, | |||
| 25918 | &sPorter, | |||
| 25919 | 0 | |||
| 25920 | ); | |||
| 25921 | } | |||
| 25922 | return rc; | |||
| 25923 | } | |||
| 25924 | ||||
| 25925 | #line 1 "fts5_unicode2.c" | |||
| 25926 | /* | |||
| 25927 | ** 2012-05-25 | |||
| 25928 | ** | |||
| 25929 | ** The author disclaims copyright to this source code. In place of | |||
| 25930 | ** a legal notice, here is a blessing: | |||
| 25931 | ** | |||
| 25932 | ** May you do good and not evil. | |||
| 25933 | ** May you find forgiveness for yourself and forgive others. | |||
| 25934 | ** May you share freely, never taking more than you give. | |||
| 25935 | ** | |||
| 25936 | ****************************************************************************** | |||
| 25937 | */ | |||
| 25938 | ||||
| 25939 | /* | |||
| 25940 | ** DO NOT EDIT THIS MACHINE GENERATED FILE. | |||
| 25941 | */ | |||
| 25942 | ||||
| 25943 | ||||
| 25944 | #include <assert.h> | |||
| 25945 | ||||
| 25946 | ||||
| 25947 | ||||
| 25948 | /* | |||
| 25949 | ** If the argument is a codepoint corresponding to a lowercase letter | |||
| 25950 | ** in the ASCII range with a diacritic added, return the codepoint | |||
| 25951 | ** of the ASCII letter only. For example, if passed 235 - "LATIN | |||
| 25952 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER | |||
| 25953 | ** E"). The resuls of passing a codepoint that corresponds to an | |||
| 25954 | ** uppercase letter are undefined. | |||
| 25955 | */ | |||
| 25956 | static int fts5_remove_diacritic(int c, int bComplex){ | |||
| 25957 | unsigned short aDia[] = { | |||
| 25958 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | |||
| 25959 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | |||
| 25960 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | |||
| 25961 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | |||
| 25962 | 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, | |||
| 25963 | 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, | |||
| 25964 | 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, | |||
| 25965 | 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, | |||
| 25966 | 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, | |||
| 25967 | 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, | |||
| 25968 | 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, | |||
| 25969 | 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, | |||
| 25970 | 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, | |||
| 25971 | 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, | |||
| 25972 | 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, | |||
| 25973 | 63182, 63242, 63274, 63310, 63368, 63390, | |||
| 25974 | }; | |||
| 25975 | #define HIBIT((unsigned char)0x80) ((unsigned char)0x80) | |||
| 25976 | unsigned char aChar[] = { | |||
| 25977 | '\0', 'a', 'c', 'e', 'i', 'n', | |||
| 25978 | 'o', 'u', 'y', 'y', 'a', 'c', | |||
| 25979 | 'd', 'e', 'e', 'g', 'h', 'i', | |||
| 25980 | 'j', 'k', 'l', 'n', 'o', 'r', | |||
| 25981 | 's', 't', 'u', 'u', 'w', 'y', | |||
| 25982 | 'z', 'o', 'u', 'a', 'i', 'o', | |||
| 25983 | 'u', 'u'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'g', 'k', 'o', | |||
| 25984 | 'o'|HIBIT((unsigned char)0x80), 'j', 'g', 'n', 'a'|HIBIT((unsigned char)0x80), 'a', | |||
| 25985 | 'e', 'i', 'o', 'r', 'u', 's', | |||
| 25986 | 't', 'h', 'a', 'e', 'o'|HIBIT((unsigned char)0x80), 'o', | |||
| 25987 | 'o'|HIBIT((unsigned char)0x80), 'y', '\0', '\0', '\0', '\0', | |||
| 25988 | '\0', '\0', '\0', '\0', 'a', 'b', | |||
| 25989 | 'c'|HIBIT((unsigned char)0x80), 'd', 'd', 'e'|HIBIT((unsigned char)0x80), 'e', 'e'|HIBIT((unsigned char)0x80), | |||
| 25990 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT((unsigned char)0x80), | |||
| 25991 | 'k', 'l', 'l'|HIBIT((unsigned char)0x80), 'l', 'm', 'n', | |||
| 25992 | 'o'|HIBIT((unsigned char)0x80), 'p', 'r', 'r'|HIBIT((unsigned char)0x80), 'r', 's', | |||
| 25993 | 's'|HIBIT((unsigned char)0x80), 't', 'u', 'u'|HIBIT((unsigned char)0x80), 'v', 'w', | |||
| 25994 | 'w', 'x', 'y', 'z', 'h', 't', | |||
| 25995 | 'w', 'y', 'a', 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), 'a'|HIBIT((unsigned char)0x80), | |||
| 25996 | 'e', 'e'|HIBIT((unsigned char)0x80), 'e'|HIBIT((unsigned char)0x80), 'i', 'o', 'o'|HIBIT((unsigned char)0x80), | |||
| 25997 | 'o'|HIBIT((unsigned char)0x80), 'o'|HIBIT((unsigned char)0x80), 'u', 'u'|HIBIT((unsigned char)0x80), 'u'|HIBIT((unsigned char)0x80), 'y', | |||
| 25998 | }; | |||
| 25999 | ||||
| 26000 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; | |||
| 26001 | int iRes = 0; | |||
| 26002 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; | |||
| 26003 | int iLo = 0; | |||
| 26004 | while( iHi>=iLo ){ | |||
| 26005 | int iTest = (iHi + iLo) / 2; | |||
| 26006 | if( key >= aDia[iTest] ){ | |||
| 26007 | iRes = iTest; | |||
| 26008 | iLo = iTest+1; | |||
| 26009 | }else{ | |||
| 26010 | iHi = iTest-1; | |||
| 26011 | } | |||
| 26012 | } | |||
| 26013 | assert( key>=aDia[iRes] )((void) (0)); | |||
| 26014 | if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; | |||
| 26015 | return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); | |||
| 26016 | } | |||
| 26017 | ||||
| 26018 | ||||
| 26019 | /* | |||
| 26020 | ** Return true if the argument interpreted as a unicode codepoint | |||
| 26021 | ** is a diacritical modifier character. | |||
| 26022 | */ | |||
| 26023 | static int sqlite3Fts5UnicodeIsdiacritic(int c){ | |||
| 26024 | unsigned int mask0 = 0x08029FDF; | |||
| 26025 | unsigned int mask1 = 0x000361F8; | |||
| 26026 | if( c<768 || c>817 ) return 0; | |||
| 26027 | return (c < 768+32) ? | |||
| 26028 | (mask0 & ((unsigned int)1 << (c-768))) : | |||
| 26029 | (mask1 & ((unsigned int)1 << (c-768-32))); | |||
| 26030 | } | |||
| 26031 | ||||
| 26032 | ||||
| 26033 | /* | |||
| 26034 | ** Interpret the argument as a unicode codepoint. If the codepoint | |||
| 26035 | ** is an upper case character that has a lower case equivalent, | |||
| 26036 | ** return the codepoint corresponding to the lower case version. | |||
| 26037 | ** Otherwise, return a copy of the argument. | |||
| 26038 | ** | |||
| 26039 | ** The results are undefined if the value passed to this function | |||
| 26040 | ** is less than zero. | |||
| 26041 | */ | |||
| 26042 | static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ | |||
| 26043 | /* Each entry in the following array defines a rule for folding a range | |||
| 26044 | ** of codepoints to lower case. The rule applies to a range of nRange | |||
| 26045 | ** codepoints starting at codepoint iCode. | |||
| 26046 | ** | |||
| 26047 | ** If the least significant bit in flags is clear, then the rule applies | |||
| 26048 | ** to all nRange codepoints (i.e. all nRange codepoints are upper case and | |||
| 26049 | ** need to be folded). Or, if it is set, then the rule only applies to | |||
| 26050 | ** every second codepoint in the range, starting with codepoint C. | |||
| 26051 | ** | |||
| 26052 | ** The 7 most significant bits in flags are an index into the aiOff[] | |||
| 26053 | ** array. If a specific codepoint C does require folding, then its lower | |||
| 26054 | ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). | |||
| 26055 | ** | |||
| 26056 | ** The contents of this array are generated by parsing the CaseFolding.txt | |||
| 26057 | ** file distributed as part of the "Unicode Character Database". See | |||
| 26058 | ** http://www.unicode.org for details. | |||
| 26059 | */ | |||
| 26060 | static const struct TableEntry { | |||
| 26061 | unsigned short iCode; | |||
| 26062 | unsigned char flags; | |||
| 26063 | unsigned char nRange; | |||
| 26064 | } aEntry[] = { | |||
| 26065 | {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, | |||
| 26066 | {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, | |||
| 26067 | {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, | |||
| 26068 | {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, | |||
| 26069 | {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, | |||
| 26070 | {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, | |||
| 26071 | {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, | |||
| 26072 | {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, | |||
| 26073 | {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, | |||
| 26074 | {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, | |||
| 26075 | {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, | |||
| 26076 | {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, | |||
| 26077 | {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, | |||
| 26078 | {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, | |||
| 26079 | {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, | |||
| 26080 | {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, | |||
| 26081 | {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, | |||
| 26082 | {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, | |||
| 26083 | {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, | |||
| 26084 | {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, | |||
| 26085 | {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, | |||
| 26086 | {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, | |||
| 26087 | {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, | |||
| 26088 | {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, | |||
| 26089 | {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, | |||
| 26090 | {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, | |||
| 26091 | {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, | |||
| 26092 | {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, | |||
| 26093 | {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, | |||
| 26094 | {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, | |||
| 26095 | {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, | |||
| 26096 | {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, | |||
| 26097 | {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, | |||
| 26098 | {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, | |||
| 26099 | {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, | |||
| 26100 | {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, | |||
| 26101 | {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, | |||
| 26102 | {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, | |||
| 26103 | {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, | |||
| 26104 | {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, | |||
| 26105 | {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, | |||
| 26106 | {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, | |||
| 26107 | {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, | |||
| 26108 | {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, | |||
| 26109 | {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, | |||
| 26110 | {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, | |||
| 26111 | {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, | |||
| 26112 | {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, | |||
| 26113 | {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, | |||
| 26114 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, | |||
| 26115 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, | |||
| 26116 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, | |||
| 26117 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, | |||
| 26118 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, | |||
| 26119 | {65313, 14, 26}, | |||
| 26120 | }; | |||
| 26121 | static const unsigned short aiOff[] = { | |||
| 26122 | 1, 2, 8, 15, 16, 26, 28, 32, | |||
| 26123 | 37, 38, 40, 48, 63, 64, 69, 71, | |||
| 26124 | 79, 80, 116, 202, 203, 205, 206, 207, | |||
| 26125 | 209, 210, 211, 213, 214, 217, 218, 219, | |||
| 26126 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | |||
| 26127 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | |||
| 26128 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | |||
| 26129 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | |||
| 26130 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | |||
| 26131 | 65514, 65521, 65527, 65528, 65529, | |||
| 26132 | }; | |||
| 26133 | ||||
| 26134 | int ret = c; | |||
| 26135 | ||||
| 26136 | assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 )((void) (0)); | |||
| 26137 | ||||
| 26138 | if( c<128 ){ | |||
| 26139 | if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); | |||
| 26140 | }else if( c<65536 ){ | |||
| 26141 | const struct TableEntry *p; | |||
| 26142 | int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | |||
| 26143 | int iLo = 0; | |||
| 26144 | int iRes = -1; | |||
| 26145 | ||||
| 26146 | assert( c>aEntry[0].iCode )((void) (0)); | |||
| 26147 | while( iHi>=iLo ){ | |||
| 26148 | int iTest = (iHi + iLo) / 2; | |||
| 26149 | int cmp = (c - aEntry[iTest].iCode); | |||
| 26150 | if( cmp>=0 ){ | |||
| 26151 | iRes = iTest; | |||
| 26152 | iLo = iTest+1; | |||
| 26153 | }else{ | |||
| 26154 | iHi = iTest-1; | |||
| 26155 | } | |||
| 26156 | } | |||
| 26157 | ||||
| 26158 | assert( iRes>=0 && c>=aEntry[iRes].iCode )((void) (0)); | |||
| 26159 | p = &aEntry[iRes]; | |||
| 26160 | if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ | |||
| 26161 | ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; | |||
| 26162 | assert( ret>0 )((void) (0)); | |||
| 26163 | } | |||
| 26164 | ||||
| 26165 | if( eRemoveDiacritic ){ | |||
| 26166 | ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); | |||
| 26167 | } | |||
| 26168 | } | |||
| 26169 | ||||
| 26170 | else if( c>=66560 && c<66600 ){ | |||
| 26171 | ret = c + 40; | |||
| 26172 | } | |||
| 26173 | ||||
| 26174 | return ret; | |||
| 26175 | } | |||
| 26176 | ||||
| 26177 | ||||
| 26178 | static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ | |||
| 26179 | aArray[0] = 1; | |||
| 26180 | switch( zCat[0] ){ | |||
| 26181 | case 'C': | |||
| 26182 | switch( zCat[1] ){ | |||
| 26183 | case 'c': aArray[1] = 1; break; | |||
| 26184 | case 'f': aArray[2] = 1; break; | |||
| 26185 | case 'n': aArray[3] = 1; break; | |||
| 26186 | case 's': aArray[4] = 1; break; | |||
| 26187 | case 'o': aArray[31] = 1; break; | |||
| 26188 | case '*': | |||
| 26189 | aArray[1] = 1; | |||
| 26190 | aArray[2] = 1; | |||
| 26191 | aArray[3] = 1; | |||
| 26192 | aArray[4] = 1; | |||
| 26193 | aArray[31] = 1; | |||
| 26194 | break; | |||
| 26195 | default: return 1; } | |||
| 26196 | break; | |||
| 26197 | ||||
| 26198 | case 'L': | |||
| 26199 | switch( zCat[1] ){ | |||
| 26200 | case 'l': aArray[5] = 1; break; | |||
| 26201 | case 'm': aArray[6] = 1; break; | |||
| 26202 | case 'o': aArray[7] = 1; break; | |||
| 26203 | case 't': aArray[8] = 1; break; | |||
| 26204 | case 'u': aArray[9] = 1; break; | |||
| 26205 | case 'C': aArray[30] = 1; break; | |||
| 26206 | case '*': | |||
| 26207 | aArray[5] = 1; | |||
| 26208 | aArray[6] = 1; | |||
| 26209 | aArray[7] = 1; | |||
| 26210 | aArray[8] = 1; | |||
| 26211 | aArray[9] = 1; | |||
| 26212 | aArray[30] = 1; | |||
| 26213 | break; | |||
| 26214 | default: return 1; } | |||
| 26215 | break; | |||
| 26216 | ||||
| 26217 | case 'M': | |||
| 26218 | switch( zCat[1] ){ | |||
| 26219 | case 'c': aArray[10] = 1; break; | |||
| 26220 | case 'e': aArray[11] = 1; break; | |||
| 26221 | case 'n': aArray[12] = 1; break; | |||
| 26222 | case '*': | |||
| 26223 | aArray[10] = 1; | |||
| 26224 | aArray[11] = 1; | |||
| 26225 | aArray[12] = 1; | |||
| 26226 | break; | |||
| 26227 | default: return 1; } | |||
| 26228 | break; | |||
| 26229 | ||||
| 26230 | case 'N': | |||
| 26231 | switch( zCat[1] ){ | |||
| 26232 | case 'd': aArray[13] = 1; break; | |||
| 26233 | case 'l': aArray[14] = 1; break; | |||
| 26234 | case 'o': aArray[15] = 1; break; | |||
| 26235 | case '*': | |||
| 26236 | aArray[13] = 1; | |||
| 26237 | aArray[14] = 1; | |||
| 26238 | aArray[15] = 1; | |||
| 26239 | break; | |||
| 26240 | default: return 1; } | |||
| 26241 | break; | |||
| 26242 | ||||
| 26243 | case 'P': | |||
| 26244 | switch( zCat[1] ){ | |||
| 26245 | case 'c': aArray[16] = 1; break; | |||
| 26246 | case 'd': aArray[17] = 1; break; | |||
| 26247 | case 'e': aArray[18] = 1; break; | |||
| 26248 | case 'f': aArray[19] = 1; break; | |||
| 26249 | case 'i': aArray[20] = 1; break; | |||
| 26250 | case 'o': aArray[21] = 1; break; | |||
| 26251 | case 's': aArray[22] = 1; break; | |||
| 26252 | case '*': | |||
| 26253 | aArray[16] = 1; | |||
| 26254 | aArray[17] = 1; | |||
| 26255 | aArray[18] = 1; | |||
| 26256 | aArray[19] = 1; | |||
| 26257 | aArray[20] = 1; | |||
| 26258 | aArray[21] = 1; | |||
| 26259 | aArray[22] = 1; | |||
| 26260 | break; | |||
| 26261 | default: return 1; } | |||
| 26262 | break; | |||
| 26263 | ||||
| 26264 | case 'S': | |||
| 26265 | switch( zCat[1] ){ | |||
| 26266 | case 'c': aArray[23] = 1; break; | |||
| 26267 | case 'k': aArray[24] = 1; break; | |||
| 26268 | case 'm': aArray[25] = 1; break; | |||
| 26269 | case 'o': aArray[26] = 1; break; | |||
| 26270 | case '*': | |||
| 26271 | aArray[23] = 1; | |||
| 26272 | aArray[24] = 1; | |||
| 26273 | aArray[25] = 1; | |||
| 26274 | aArray[26] = 1; | |||
| 26275 | break; | |||
| 26276 | default: return 1; } | |||
| 26277 | break; | |||
| 26278 | ||||
| 26279 | case 'Z': | |||
| 26280 | switch( zCat[1] ){ | |||
| 26281 | case 'l': aArray[27] = 1; break; | |||
| 26282 | case 'p': aArray[28] = 1; break; | |||
| 26283 | case 's': aArray[29] = 1; break; | |||
| 26284 | case '*': | |||
| 26285 | aArray[27] = 1; | |||
| 26286 | aArray[28] = 1; | |||
| 26287 | aArray[29] = 1; | |||
| 26288 | break; | |||
| 26289 | default: return 1; } | |||
| 26290 | break; | |||
| 26291 | ||||
| 26292 | ||||
| 26293 | default: | |||
| 26294 | return 1; | |||
| 26295 | } | |||
| 26296 | return 0; | |||
| 26297 | } | |||
| 26298 | ||||
| 26299 | static u16 aFts5UnicodeBlock[] = { | |||
| 26300 | 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760, | |||
| 26301 | 1760, 1760, 1760, 1760, 1760, 1763, 1765, | |||
| 26302 | }; | |||
| 26303 | static u16 aFts5UnicodeMap[] = { | |||
| 26304 | 0, 32, 33, 36, 37, 40, 41, 42, 43, 44, | |||
| 26305 | 45, 46, 48, 58, 60, 63, 65, 91, 92, 93, | |||
| 26306 | 94, 95, 96, 97, 123, 124, 125, 126, 127, 160, | |||
| 26307 | 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, | |||
| 26308 | 174, 175, 176, 177, 178, 180, 181, 182, 184, 185, | |||
| 26309 | 186, 187, 188, 191, 192, 215, 216, 223, 247, 248, | |||
| 26310 | 256, 312, 313, 329, 330, 377, 383, 385, 387, 388, | |||
| 26311 | 391, 394, 396, 398, 402, 403, 405, 406, 409, 412, | |||
| 26312 | 414, 415, 417, 418, 423, 427, 428, 431, 434, 436, | |||
| 26313 | 437, 440, 442, 443, 444, 446, 448, 452, 453, 454, | |||
| 26314 | 455, 456, 457, 458, 459, 460, 461, 477, 478, 496, | |||
| 26315 | 497, 498, 499, 500, 503, 505, 506, 564, 570, 572, | |||
| 26316 | 573, 575, 577, 580, 583, 584, 592, 660, 661, 688, | |||
| 26317 | 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, | |||
| 26318 | 880, 884, 885, 886, 890, 891, 894, 900, 902, 903, | |||
| 26319 | 904, 908, 910, 912, 913, 931, 940, 975, 977, 978, | |||
| 26320 | 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072, | |||
| 26321 | 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369, | |||
| 26322 | 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473, | |||
| 26323 | 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545, | |||
| 26324 | 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611, | |||
| 26325 | 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758, | |||
| 26326 | 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791, | |||
| 26327 | 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984, | |||
| 26328 | 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075, | |||
| 26329 | 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210, | |||
| 26330 | 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369, | |||
| 26331 | 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416, | |||
| 26332 | 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482, | |||
| 26333 | 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519, | |||
| 26334 | 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561, | |||
| 26335 | 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, | |||
| 26336 | 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677, | |||
| 26337 | 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749, | |||
| 26338 | 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790, | |||
| 26339 | 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869, | |||
| 26340 | 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902, | |||
| 26341 | 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947, | |||
| 26342 | 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006, | |||
| 26343 | 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059, | |||
| 26344 | 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134, | |||
| 26345 | 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199, | |||
| 26346 | 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263, | |||
| 26347 | 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302, | |||
| 26348 | 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402, | |||
| 26349 | 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458, | |||
| 26350 | 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544, | |||
| 26351 | 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655, | |||
| 26352 | 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737, | |||
| 26353 | 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773, | |||
| 26354 | 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860, | |||
| 26355 | 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896, | |||
| 26356 | 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967, | |||
| 26357 | 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046, | |||
| 26358 | 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153, | |||
| 26359 | 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190, | |||
| 26360 | 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229, | |||
| 26361 | 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295, | |||
| 26362 | 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704, | |||
| 26363 | 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, | |||
| 26364 | 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743, | |||
| 26365 | 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906, | |||
| 26366 | 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068, | |||
| 26367 | 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107, | |||
| 26368 | 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160, | |||
| 26369 | 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435, | |||
| 26370 | 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480, | |||
| 26371 | 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679, | |||
| 26372 | 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754, | |||
| 26373 | 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824, | |||
| 26374 | 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978, | |||
| 26375 | 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043, | |||
| 26376 | 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098, | |||
| 26377 | 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168, | |||
| 26378 | 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288, | |||
| 26379 | 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, | |||
| 26380 | 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616, | |||
| 26381 | 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976, | |||
| 26382 | 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033, | |||
| 26383 | 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118, | |||
| 26384 | 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141, | |||
| 26385 | 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184, | |||
| 26386 | 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219, | |||
| 26387 | 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249, | |||
| 26388 | 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275, | |||
| 26389 | 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317, | |||
| 26390 | 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413, | |||
| 26391 | 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459, | |||
| 26392 | 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484, | |||
| 26393 | 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500, | |||
| 26394 | 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523, | |||
| 26395 | 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597, | |||
| 26396 | 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, | |||
| 26397 | 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, | |||
| 26398 | 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180, | |||
| 26399 | 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665, | |||
| 26400 | 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091, | |||
| 26401 | 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, | |||
| 26402 | 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217, | |||
| 26403 | 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627, | |||
| 26404 | 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, | |||
| 26405 | 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, | |||
| 26406 | 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750, | |||
| 26407 | 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365, | |||
| 26408 | 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393, | |||
| 26409 | 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520, | |||
| 26410 | 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696, | |||
| 26411 | 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780, | |||
| 26412 | 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800, | |||
| 26413 | 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812, | |||
| 26414 | 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904, | |||
| 26415 | 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296, | |||
| 26416 | 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, | |||
| 26417 | 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317, | |||
| 26418 | 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347, | |||
| 26419 | 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449, | |||
| 26420 | 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736, | |||
| 26421 | 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, | |||
| 26422 | 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981, | |||
| 26423 | 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528, | |||
| 26424 | 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624, | |||
| 26425 | 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800, | |||
| 26426 | 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912, | |||
| 26427 | 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, | |||
| 26428 | 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136, | |||
| 26429 | 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264, | |||
| 26430 | 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395, | |||
| 26431 | 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472, | |||
| 26432 | 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588, | |||
| 26433 | 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643, | |||
| 26434 | 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, | |||
| 26435 | 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762, | |||
| 26436 | 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003, | |||
| 26437 | 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203, | |||
| 26438 | 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112, | |||
| 26439 | 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320, | |||
| 26440 | 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020, | |||
| 26441 | 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075, | |||
| 26442 | 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, | |||
| 26443 | 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097, | |||
| 26444 | 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118, | |||
| 26445 | 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279, | |||
| 26446 | 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294, | |||
| 26447 | 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343, | |||
| 26448 | 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378, | |||
| 26449 | 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490, | |||
| 26450 | 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529, | |||
| 26451 | 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263, | |||
| 26452 | 311, 320, 373, 377, 394, 400, 464, 509, 640, 672, | |||
| 26453 | 768, 800, 816, 833, 834, 842, 896, 927, 928, 968, | |||
| 26454 | 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103, | |||
| 26455 | 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432, | |||
| 26456 | 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623, | |||
| 26457 | 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912, | |||
| 26458 | 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178, | |||
| 26459 | 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285, | |||
| 26460 | 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416, | |||
| 26461 | 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760, | |||
| 26462 | 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216, | |||
| 26463 | 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248, | |||
| 26464 | 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637, | |||
| 26465 | 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298, | |||
| 26466 | 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441, | |||
| 26467 | 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541, | |||
| 26468 | 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662, | |||
| 26469 | 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922, | |||
| 26470 | 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062, | |||
| 26471 | 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178, | |||
| 26472 | 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961, | |||
| 26473 | 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003, | |||
| 26474 | 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028, | |||
| 26475 | 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099, | |||
| 26476 | 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744, | |||
| 26477 | 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368, | |||
| 26478 | 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971, | |||
| 26479 | 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488, | |||
| 26480 | 1, 32, 256, 0, 65533, | |||
| 26481 | }; | |||
| 26482 | static u16 aFts5UnicodeData[] = { | |||
| 26483 | 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53, | |||
| 26484 | 49, 85, 333, 85, 121, 85, 841, 54, 53, 50, | |||
| 26485 | 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61, | |||
| 26486 | 53, 151, 58, 53, 56, 58, 39, 52, 57, 34, | |||
| 26487 | 58, 56, 58, 57, 79, 56, 37, 85, 56, 47, | |||
| 26488 | 39, 51, 111, 53, 745, 57, 233, 773, 57, 261, | |||
| 26489 | 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126, | |||
| 26490 | 126, 73, 69, 137, 37, 73, 37, 105, 101, 73, | |||
| 26491 | 37, 73, 37, 190, 158, 37, 126, 126, 73, 37, | |||
| 26492 | 126, 94, 37, 39, 94, 69, 135, 41, 40, 37, | |||
| 26493 | 41, 40, 37, 41, 40, 37, 542, 37, 606, 37, | |||
| 26494 | 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37, | |||
| 26495 | 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582, | |||
| 26496 | 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596, | |||
| 26497 | 158, 38, 56, 94, 38, 101, 53, 88, 41, 53, | |||
| 26498 | 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105, | |||
| 26499 | 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541, | |||
| 26500 | 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38, | |||
| 26501 | 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76, | |||
| 26502 | 53, 76, 53, 44, 871, 103, 85, 162, 121, 85, | |||
| 26503 | 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684, | |||
| 26504 | 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58, | |||
| 26505 | 204, 70, 76, 58, 140, 71, 333, 103, 90, 39, | |||
| 26506 | 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333, | |||
| 26507 | 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300, | |||
| 26508 | 38, 108, 38, 172, 501, 807, 108, 53, 39, 359, | |||
| 26509 | 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268, | |||
| 26510 | 138, 44, 74, 39, 236, 327, 76, 85, 333, 53, | |||
| 26511 | 38, 199, 231, 44, 74, 263, 71, 711, 231, 39, | |||
| 26512 | 135, 44, 39, 106, 140, 74, 74, 44, 39, 42, | |||
| 26513 | 71, 103, 76, 333, 71, 87, 207, 58, 55, 76, | |||
| 26514 | 42, 199, 71, 711, 231, 71, 71, 71, 44, 106, | |||
| 26515 | 76, 76, 108, 44, 135, 39, 333, 76, 103, 44, | |||
| 26516 | 76, 42, 295, 103, 711, 231, 71, 167, 44, 39, | |||
| 26517 | 106, 172, 76, 42, 74, 44, 39, 71, 76, 333, | |||
| 26518 | 53, 55, 44, 74, 263, 71, 711, 231, 71, 167, | |||
| 26519 | 44, 39, 42, 44, 42, 140, 74, 74, 44, 44, | |||
| 26520 | 42, 71, 103, 76, 333, 58, 39, 207, 44, 39, | |||
| 26521 | 199, 103, 135, 71, 39, 71, 71, 103, 391, 74, | |||
| 26522 | 44, 74, 106, 106, 44, 39, 42, 333, 111, 218, | |||
| 26523 | 55, 58, 106, 263, 103, 743, 327, 167, 39, 108, | |||
| 26524 | 138, 108, 140, 76, 71, 71, 76, 333, 239, 58, | |||
| 26525 | 74, 263, 103, 743, 327, 167, 44, 39, 42, 44, | |||
| 26526 | 170, 44, 74, 74, 76, 74, 39, 71, 76, 333, | |||
| 26527 | 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106, | |||
| 26528 | 44, 39, 42, 71, 76, 333, 207, 58, 199, 74, | |||
| 26529 | 583, 775, 295, 39, 231, 44, 106, 108, 44, 266, | |||
| 26530 | 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268, | |||
| 26531 | 53, 333, 85, 71, 39, 71, 39, 39, 135, 231, | |||
| 26532 | 103, 39, 39, 71, 135, 44, 71, 204, 76, 39, | |||
| 26533 | 167, 38, 204, 333, 135, 39, 122, 501, 58, 53, | |||
| 26534 | 122, 76, 218, 333, 335, 58, 44, 58, 44, 58, | |||
| 26535 | 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42, | |||
| 26536 | 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90, | |||
| 26537 | 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76, | |||
| 26538 | 74, 76, 39, 333, 213, 199, 74, 76, 135, 108, | |||
| 26539 | 39, 106, 71, 234, 103, 140, 423, 44, 74, 76, | |||
| 26540 | 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41, | |||
| 26541 | 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319, | |||
| 26542 | 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151, | |||
| 26543 | 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551, | |||
| 26544 | 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108, | |||
| 26545 | 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76, | |||
| 26546 | 42, 236, 266, 44, 74, 364, 117, 38, 117, 55, | |||
| 26547 | 39, 44, 333, 335, 213, 49, 149, 108, 61, 333, | |||
| 26548 | 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138, | |||
| 26549 | 76, 106, 74, 44, 202, 108, 58, 85, 333, 967, | |||
| 26550 | 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76, | |||
| 26551 | 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44, | |||
| 26552 | 74, 268, 202, 332, 44, 333, 333, 245, 38, 213, | |||
| 26553 | 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44, | |||
| 26554 | 74, 231, 333, 245, 346, 300, 314, 76, 42, 967, | |||
| 26555 | 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415, | |||
| 26556 | 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159, | |||
| 26557 | 266, 268, 74, 76, 181, 333, 103, 333, 967, 198, | |||
| 26558 | 85, 277, 108, 53, 428, 42, 236, 135, 44, 135, | |||
| 26559 | 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260, | |||
| 26560 | 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265, | |||
| 26561 | 261, 265, 197, 201, 261, 41, 41, 41, 94, 229, | |||
| 26562 | 265, 453, 261, 264, 261, 264, 261, 264, 165, 69, | |||
| 26563 | 137, 40, 56, 37, 120, 101, 69, 137, 40, 120, | |||
| 26564 | 133, 69, 137, 120, 261, 169, 120, 101, 69, 137, | |||
| 26565 | 40, 88, 381, 162, 209, 85, 52, 51, 54, 84, | |||
| 26566 | 51, 54, 52, 277, 59, 60, 162, 61, 309, 52, | |||
| 26567 | 51, 149, 80, 117, 57, 54, 50, 373, 57, 53, | |||
| 26568 | 48, 341, 61, 162, 194, 47, 38, 207, 121, 54, | |||
| 26569 | 50, 38, 335, 121, 54, 50, 422, 855, 428, 139, | |||
| 26570 | 44, 107, 396, 90, 41, 154, 41, 90, 37, 105, | |||
| 26571 | 69, 105, 37, 58, 41, 90, 57, 169, 218, 41, | |||
| 26572 | 58, 41, 58, 41, 58, 137, 58, 37, 137, 37, | |||
| 26573 | 135, 37, 90, 69, 73, 185, 94, 101, 58, 57, | |||
| 26574 | 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186, | |||
| 26575 | 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018, | |||
| 26576 | 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666, | |||
| 26577 | 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217, | |||
| 26578 | 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57, | |||
| 26579 | 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50, | |||
| 26580 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | |||
| 26581 | 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50, | |||
| 26582 | 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54, | |||
| 26583 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | |||
| 26584 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, | |||
| 26585 | 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281, | |||
| 26586 | 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69, | |||
| 26587 | 254, 105, 37, 94, 37, 94, 165, 70, 105, 37, | |||
| 26588 | 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221, | |||
| 26589 | 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231, | |||
| 26590 | 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52, | |||
| 26591 | 51, 117, 52, 51, 53, 52, 51, 309, 49, 85, | |||
| 26592 | 49, 53, 52, 51, 85, 52, 51, 54, 50, 54, | |||
| 26593 | 50, 54, 50, 54, 50, 181, 38, 341, 81, 858, | |||
| 26594 | 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54, | |||
| 26595 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 90, | |||
| 26596 | 54, 50, 54, 50, 54, 50, 54, 50, 49, 54, | |||
| 26597 | 82, 58, 302, 140, 74, 49, 166, 90, 110, 38, | |||
| 26598 | 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887, | |||
| 26599 | 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178, | |||
| 26600 | 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274, | |||
| 26601 | 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38, | |||
| 26602 | 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333, | |||
| 26603 | 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798, | |||
| 26604 | 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69, | |||
| 26605 | 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382, | |||
| 26606 | 70, 37, 231, 44, 103, 44, 135, 44, 743, 74, | |||
| 26607 | 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74, | |||
| 26608 | 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333, | |||
| 26609 | 903, 268, 85, 743, 364, 74, 53, 935, 108, 42, | |||
| 26610 | 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333, | |||
| 26611 | 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263, | |||
| 26612 | 44, 42, 333, 149, 519, 38, 199, 122, 39, 42, | |||
| 26613 | 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44, | |||
| 26614 | 39, 71, 38, 85, 359, 42, 76, 74, 85, 39, | |||
| 26615 | 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74, | |||
| 26616 | 44, 74, 44, 74, 53, 42, 44, 333, 39, 39, | |||
| 26617 | 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399, | |||
| 26618 | 229, 165, 39, 44, 327, 57, 423, 167, 39, 71, | |||
| 26619 | 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55, | |||
| 26620 | 58, 524, 245, 54, 50, 53, 236, 53, 81, 80, | |||
| 26621 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, | |||
| 26622 | 54, 50, 54, 50, 54, 50, 85, 54, 50, 149, | |||
| 26623 | 112, 117, 149, 49, 54, 50, 54, 50, 54, 50, | |||
| 26624 | 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34, | |||
| 26625 | 117, 55, 117, 54, 50, 53, 57, 53, 49, 85, | |||
| 26626 | 333, 85, 121, 85, 841, 54, 53, 50, 56, 48, | |||
| 26627 | 56, 837, 54, 57, 50, 57, 54, 50, 53, 54, | |||
| 26628 | 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199, | |||
| 26629 | 103, 87, 57, 56, 58, 87, 58, 153, 90, 98, | |||
| 26630 | 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455, | |||
| 26631 | 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575, | |||
| 26632 | 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263, | |||
| 26633 | 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71, | |||
| 26634 | 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799, | |||
| 26635 | 71, 39, 108, 76, 140, 135, 103, 871, 108, 44, | |||
| 26636 | 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615, | |||
| 26637 | 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655, | |||
| 26638 | 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34, | |||
| 26639 | 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149, | |||
| 26640 | 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383, | |||
| 26641 | 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182, | |||
| 26642 | 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898, | |||
| 26643 | 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236, | |||
| 26644 | 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837, | |||
| 26645 | 841, 229, 581, 841, 837, 41, 73, 41, 73, 137, | |||
| 26646 | 265, 133, 37, 229, 357, 841, 837, 73, 137, 265, | |||
| 26647 | 233, 837, 73, 137, 169, 41, 233, 837, 841, 837, | |||
| 26648 | 841, 837, 841, 837, 841, 837, 841, 837, 841, 901, | |||
| 26649 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | |||
| 26650 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, | |||
| 26651 | 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71, | |||
| 26652 | 39, 39, 327, 135, 39, 39, 39, 39, 39, 39, | |||
| 26653 | 103, 71, 39, 39, 39, 39, 39, 39, 71, 39, | |||
| 26654 | 135, 231, 135, 135, 39, 327, 551, 103, 167, 551, | |||
| 26655 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, | |||
| 26656 | 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, | |||
| 26657 | 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, | |||
| 26658 | 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, | |||
| 26659 | 34, 3074, 7692, 63, 63, | |||
| 26660 | }; | |||
| 26661 | ||||
| 26662 | static int sqlite3Fts5UnicodeCategory(u32 iCode) { | |||
| 26663 | int iRes = -1; | |||
| 26664 | int iHi; | |||
| 26665 | int iLo; | |||
| 26666 | int ret; | |||
| 26667 | u16 iKey; | |||
| 26668 | ||||
| 26669 | if( iCode>=(1<<20) ){ | |||
| 26670 | return 0; | |||
| 26671 | } | |||
| 26672 | iLo = aFts5UnicodeBlock[(iCode>>16)]; | |||
| 26673 | iHi = aFts5UnicodeBlock[1+(iCode>>16)]; | |||
| 26674 | iKey = (iCode & 0xFFFF); | |||
| 26675 | while( iHi>iLo ){ | |||
| 26676 | int iTest = (iHi + iLo) / 2; | |||
| 26677 | assert( iTest>=iLo && iTest<iHi )((void) (0)); | |||
| 26678 | if( iKey>=aFts5UnicodeMap[iTest] ){ | |||
| 26679 | iRes = iTest; | |||
| 26680 | iLo = iTest+1; | |||
| 26681 | }else{ | |||
| 26682 | iHi = iTest; | |||
| 26683 | } | |||
| 26684 | } | |||
| 26685 | ||||
| 26686 | if( iRes<0 ) return 0; | |||
| 26687 | if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; | |||
| 26688 | ret = aFts5UnicodeData[iRes] & 0x1F; | |||
| 26689 | if( ret!=30 ) return ret; | |||
| 26690 | return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9; | |||
| 26691 | } | |||
| 26692 | ||||
| 26693 | static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ | |||
| 26694 | int i = 0; | |||
| 26695 | int iTbl = 0; | |||
| 26696 | while( i<128 ){ | |||
| 26697 | int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; | |||
| 26698 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; | |||
| 26699 | for(; i<128 && i<n; i++){ | |||
| 26700 | aAscii[i] = (u8)bToken; | |||
| 26701 | } | |||
| 26702 | iTbl++; | |||
| 26703 | } | |||
| 26704 | aAscii[0] = 0; /* 0x00 is never a token character */ | |||
| 26705 | } | |||
| 26706 | ||||
| 26707 | #line 1 "fts5_varint.c" | |||
| 26708 | /* | |||
| 26709 | ** 2015 May 30 | |||
| 26710 | ** | |||
| 26711 | ** The author disclaims copyright to this source code. In place of | |||
| 26712 | ** a legal notice, here is a blessing: | |||
| 26713 | ** | |||
| 26714 | ** May you do good and not evil. | |||
| 26715 | ** May you find forgiveness for yourself and forgive others. | |||
| 26716 | ** May you share freely, never taking more than you give. | |||
| 26717 | ** | |||
| 26718 | ****************************************************************************** | |||
| 26719 | ** | |||
| 26720 | ** Routines for varint serialization and deserialization. | |||
| 26721 | */ | |||
| 26722 | ||||
| 26723 | ||||
| 26724 | /* #include "fts5Int.h" */ | |||
| 26725 | ||||
| 26726 | /* | |||
| 26727 | ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. | |||
| 26728 | ** Except, this version does handle the single byte case that the core | |||
| 26729 | ** version depends on being handled before its function is called. | |||
| 26730 | */ | |||
| 26731 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ | |||
| 26732 | u32 a,b; | |||
| 26733 | ||||
| 26734 | /* The 1-byte case. Overwhelmingly the most common. */ | |||
| 26735 | a = *p; | |||
| 26736 | /* a: p0 (unmasked) */ | |||
| 26737 | if (!(a&0x80)) | |||
| 26738 | { | |||
| 26739 | /* Values between 0 and 127 */ | |||
| 26740 | *v = a; | |||
| 26741 | return 1; | |||
| 26742 | } | |||
| 26743 | ||||
| 26744 | /* The 2-byte case */ | |||
| 26745 | p++; | |||
| 26746 | b = *p; | |||
| 26747 | /* b: p1 (unmasked) */ | |||
| 26748 | if (!(b&0x80)) | |||
| 26749 | { | |||
| 26750 | /* Values between 128 and 16383 */ | |||
| 26751 | a &= 0x7f; | |||
| 26752 | a = a<<7; | |||
| 26753 | *v = a | b; | |||
| 26754 | return 2; | |||
| 26755 | } | |||
| 26756 | ||||
| 26757 | /* The 3-byte case */ | |||
| 26758 | p++; | |||
| 26759 | a = a<<14; | |||
| 26760 | a |= *p; | |||
| 26761 | /* a: p0<<14 | p2 (unmasked) */ | |||
| 26762 | if (!(a&0x80)) | |||
| 26763 | { | |||
| 26764 | /* Values between 16384 and 2097151 */ | |||
| 26765 | a &= (0x7f<<14)|(0x7f); | |||
| 26766 | b &= 0x7f; | |||
| 26767 | b = b<<7; | |||
| 26768 | *v = a | b; | |||
| 26769 | return 3; | |||
| 26770 | } | |||
| 26771 | ||||
| 26772 | /* A 32-bit varint is used to store size information in btrees. | |||
| 26773 | ** Objects are rarely larger than 2MiB limit of a 3-byte varint. | |||
| 26774 | ** A 3-byte varint is sufficient, for example, to record the size | |||
| 26775 | ** of a 1048569-byte BLOB or string. | |||
| 26776 | ** | |||
| 26777 | ** We only unroll the first 1-, 2-, and 3- byte cases. The very | |||
| 26778 | ** rare larger cases can be handled by the slower 64-bit varint | |||
| 26779 | ** routine. | |||
| 26780 | */ | |||
| 26781 | { | |||
| 26782 | u64 v64; | |||
| 26783 | u8 n; | |||
| 26784 | p -= 2; | |||
| 26785 | n = sqlite3Fts5GetVarint(p, &v64); | |||
| 26786 | *v = ((u32)v64) & 0x7FFFFFFF; | |||
| 26787 | assert( n>3 && n<=9 )((void) (0)); | |||
| 26788 | return n; | |||
| 26789 | } | |||
| 26790 | } | |||
| 26791 | ||||
| 26792 | ||||
| 26793 | /* | |||
| 26794 | ** Bitmasks used by sqlite3GetVarint(). These precomputed constants | |||
| 26795 | ** are defined here rather than simply putting the constant expressions | |||
| 26796 | ** inline in order to work around bugs in the RVT compiler. | |||
| 26797 | ** | |||
| 26798 | ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f | |||
| 26799 | ** | |||
| 26800 | ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 | |||
| 26801 | */ | |||
| 26802 | #define SLOT_2_00x001fc07f 0x001fc07f | |||
| 26803 | #define SLOT_4_2_00xf01fc07f 0xf01fc07f | |||
| 26804 | ||||
| 26805 | /* | |||
| 26806 | ** Read a 64-bit variable-length integer from memory starting at p[0]. | |||
| 26807 | ** Return the number of bytes read. The value is stored in *v. | |||
| 26808 | */ | |||
| 26809 | static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ | |||
| 26810 | u32 a,b,s; | |||
| 26811 | ||||
| 26812 | a = *p; | |||
| 26813 | /* a: p0 (unmasked) */ | |||
| 26814 | if (!(a&0x80)) | |||
| 26815 | { | |||
| 26816 | *v = a; | |||
| 26817 | return 1; | |||
| 26818 | } | |||
| 26819 | ||||
| 26820 | p++; | |||
| 26821 | b = *p; | |||
| 26822 | /* b: p1 (unmasked) */ | |||
| 26823 | if (!(b&0x80)) | |||
| 26824 | { | |||
| 26825 | a &= 0x7f; | |||
| 26826 | a = a<<7; | |||
| 26827 | a |= b; | |||
| 26828 | *v = a; | |||
| 26829 | return 2; | |||
| 26830 | } | |||
| 26831 | ||||
| 26832 | /* Verify that constants are precomputed correctly */ | |||
| 26833 | assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) )((void) (0)); | |||
| 26834 | assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) )((void) (0)); | |||
| 26835 | ||||
| 26836 | p++; | |||
| 26837 | a = a<<14; | |||
| 26838 | a |= *p; | |||
| 26839 | /* a: p0<<14 | p2 (unmasked) */ | |||
| 26840 | if (!(a&0x80)) | |||
| 26841 | { | |||
| 26842 | a &= SLOT_2_00x001fc07f; | |||
| 26843 | b &= 0x7f; | |||
| 26844 | b = b<<7; | |||
| 26845 | a |= b; | |||
| 26846 | *v = a; | |||
| 26847 | return 3; | |||
| 26848 | } | |||
| 26849 | ||||
| 26850 | /* CSE1 from below */ | |||
| 26851 | a &= SLOT_2_00x001fc07f; | |||
| 26852 | p++; | |||
| 26853 | b = b<<14; | |||
| 26854 | b |= *p; | |||
| 26855 | /* b: p1<<14 | p3 (unmasked) */ | |||
| 26856 | if (!(b&0x80)) | |||
| 26857 | { | |||
| 26858 | b &= SLOT_2_00x001fc07f; | |||
| 26859 | /* moved CSE1 up */ | |||
| 26860 | /* a &= (0x7f<<14)|(0x7f); */ | |||
| 26861 | a = a<<7; | |||
| 26862 | a |= b; | |||
| 26863 | *v = a; | |||
| 26864 | return 4; | |||
| 26865 | } | |||
| 26866 | ||||
| 26867 | /* a: p0<<14 | p2 (masked) */ | |||
| 26868 | /* b: p1<<14 | p3 (unmasked) */ | |||
| 26869 | /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
| 26870 | /* moved CSE1 up */ | |||
| 26871 | /* a &= (0x7f<<14)|(0x7f); */ | |||
| 26872 | b &= SLOT_2_00x001fc07f; | |||
| 26873 | s = a; | |||
| 26874 | /* s: p0<<14 | p2 (masked) */ | |||
| 26875 | ||||
| 26876 | p++; | |||
| 26877 | a = a<<14; | |||
| 26878 | a |= *p; | |||
| 26879 | /* a: p0<<28 | p2<<14 | p4 (unmasked) */ | |||
| 26880 | if (!(a&0x80)) | |||
| 26881 | { | |||
| 26882 | /* we can skip these cause they were (effectively) done above in calc'ing s */ | |||
| 26883 | /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | |||
| 26884 | /* b &= (0x7f<<14)|(0x7f); */ | |||
| 26885 | b = b<<7; | |||
| 26886 | a |= b; | |||
| 26887 | s = s>>18; | |||
| 26888 | *v = ((u64)s)<<32 | a; | |||
| 26889 | return 5; | |||
| 26890 | } | |||
| 26891 | ||||
| 26892 | /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
| 26893 | s = s<<7; | |||
| 26894 | s |= b; | |||
| 26895 | /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ | |||
| 26896 | ||||
| 26897 | p++; | |||
| 26898 | b = b<<14; | |||
| 26899 | b |= *p; | |||
| 26900 | /* b: p1<<28 | p3<<14 | p5 (unmasked) */ | |||
| 26901 | if (!(b&0x80)) | |||
| 26902 | { | |||
| 26903 | /* we can skip this cause it was (effectively) done above in calc'ing s */ | |||
| 26904 | /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ | |||
| 26905 | a &= SLOT_2_00x001fc07f; | |||
| 26906 | a = a<<7; | |||
| 26907 | a |= b; | |||
| 26908 | s = s>>18; | |||
| 26909 | *v = ((u64)s)<<32 | a; | |||
| 26910 | return 6; | |||
| 26911 | } | |||
| 26912 | ||||
| 26913 | p++; | |||
| 26914 | a = a<<14; | |||
| 26915 | a |= *p; | |||
| 26916 | /* a: p2<<28 | p4<<14 | p6 (unmasked) */ | |||
| 26917 | if (!(a&0x80)) | |||
| 26918 | { | |||
| 26919 | a &= SLOT_4_2_00xf01fc07f; | |||
| 26920 | b &= SLOT_2_00x001fc07f; | |||
| 26921 | b = b<<7; | |||
| 26922 | a |= b; | |||
| 26923 | s = s>>11; | |||
| 26924 | *v = ((u64)s)<<32 | a; | |||
| 26925 | return 7; | |||
| 26926 | } | |||
| 26927 | ||||
| 26928 | /* CSE2 from below */ | |||
| 26929 | a &= SLOT_2_00x001fc07f; | |||
| 26930 | p++; | |||
| 26931 | b = b<<14; | |||
| 26932 | b |= *p; | |||
| 26933 | /* b: p3<<28 | p5<<14 | p7 (unmasked) */ | |||
| 26934 | if (!(b&0x80)) | |||
| 26935 | { | |||
| 26936 | b &= SLOT_4_2_00xf01fc07f; | |||
| 26937 | /* moved CSE2 up */ | |||
| 26938 | /* a &= (0x7f<<14)|(0x7f); */ | |||
| 26939 | a = a<<7; | |||
| 26940 | a |= b; | |||
| 26941 | s = s>>4; | |||
| 26942 | *v = ((u64)s)<<32 | a; | |||
| 26943 | return 8; | |||
| 26944 | } | |||
| 26945 | ||||
| 26946 | p++; | |||
| 26947 | a = a<<15; | |||
| 26948 | a |= *p; | |||
| 26949 | /* a: p4<<29 | p6<<15 | p8 (unmasked) */ | |||
| 26950 | ||||
| 26951 | /* moved CSE2 up */ | |||
| 26952 | /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ | |||
| 26953 | b &= SLOT_2_00x001fc07f; | |||
| 26954 | b = b<<8; | |||
| 26955 | a |= b; | |||
| 26956 | ||||
| 26957 | s = s<<4; | |||
| 26958 | b = p[-4]; | |||
| 26959 | b &= 0x7f; | |||
| 26960 | b = b>>3; | |||
| 26961 | s |= b; | |||
| 26962 | ||||
| 26963 | *v = ((u64)s)<<32 | a; | |||
| 26964 | ||||
| 26965 | return 9; | |||
| 26966 | } | |||
| 26967 | ||||
| 26968 | /* | |||
| 26969 | ** The variable-length integer encoding is as follows: | |||
| 26970 | ** | |||
| 26971 | ** KEY: | |||
| 26972 | ** A = 0xxxxxxx 7 bits of data and one flag bit | |||
| 26973 | ** B = 1xxxxxxx 7 bits of data and one flag bit | |||
| 26974 | ** C = xxxxxxxx 8 bits of data | |||
| 26975 | ** | |||
| 26976 | ** 7 bits - A | |||
| 26977 | ** 14 bits - BA | |||
| 26978 | ** 21 bits - BBA | |||
| 26979 | ** 28 bits - BBBA | |||
| 26980 | ** 35 bits - BBBBA | |||
| 26981 | ** 42 bits - BBBBBA | |||
| 26982 | ** 49 bits - BBBBBBA | |||
| 26983 | ** 56 bits - BBBBBBBA | |||
| 26984 | ** 64 bits - BBBBBBBBC | |||
| 26985 | */ | |||
| 26986 | ||||
| 26987 | #ifdef SQLITE_NOINLINE | |||
| 26988 | # define FTS5_NOINLINE SQLITE_NOINLINE | |||
| 26989 | #else | |||
| 26990 | # define FTS5_NOINLINE | |||
| 26991 | #endif | |||
| 26992 | ||||
| 26993 | /* | |||
| 26994 | ** Write a 64-bit variable-length integer to memory starting at p[0]. | |||
| 26995 | ** The length of data write will be between 1 and 9 bytes. The number | |||
| 26996 | ** of bytes written is returned. | |||
| 26997 | ** | |||
| 26998 | ** A variable-length integer consists of the lower 7 bits of each byte | |||
| 26999 | ** for all bytes that have the 8th bit set and one byte with the 8th | |||
| 27000 | ** bit clear. Except, if we get to the 9th byte, it stores the full | |||
| 27001 | ** 8 bits and is the last byte. | |||
| 27002 | */ | |||
| 27003 | static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ | |||
| 27004 | int i, j, n; | |||
| 27005 | u8 buf[10]; | |||
| 27006 | if( v & (((u64)0xff000000)<<32) ){ | |||
| 27007 | p[8] = (u8)v; | |||
| 27008 | v >>= 8; | |||
| 27009 | for(i=7; i>=0; i--){ | |||
| 27010 | p[i] = (u8)((v & 0x7f) | 0x80); | |||
| 27011 | v >>= 7; | |||
| 27012 | } | |||
| 27013 | return 9; | |||
| 27014 | } | |||
| 27015 | n = 0; | |||
| 27016 | do{ | |||
| 27017 | buf[n++] = (u8)((v & 0x7f) | 0x80); | |||
| 27018 | v >>= 7; | |||
| 27019 | }while( v!=0 ); | |||
| 27020 | buf[0] &= 0x7f; | |||
| 27021 | assert( n<=9 )((void) (0)); | |||
| 27022 | for(i=0, j=n-1; j>=0; j--, i++){ | |||
| 27023 | p[i] = buf[j]; | |||
| 27024 | } | |||
| 27025 | return n; | |||
| 27026 | } | |||
| 27027 | ||||
| 27028 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ | |||
| 27029 | if( v<=0x7f ){ | |||
| 27030 | p[0] = v&0x7f; | |||
| 27031 | return 1; | |||
| 27032 | } | |||
| 27033 | if( v<=0x3fff ){ | |||
| 27034 | p[0] = ((v>>7)&0x7f)|0x80; | |||
| 27035 | p[1] = v&0x7f; | |||
| 27036 | return 2; | |||
| 27037 | } | |||
| 27038 | return fts5PutVarint64(p,v); | |||
| 27039 | } | |||
| 27040 | ||||
| 27041 | ||||
| 27042 | static int sqlite3Fts5GetVarintLen(u32 iVal){ | |||
| 27043 | #if 0 | |||
| 27044 | if( iVal<(1 << 7 ) ) return 1; | |||
| 27045 | #endif | |||
| 27046 | assert( iVal>=(1 << 7) )((void) (0)); | |||
| 27047 | if( iVal<(1 << 14) ) return 2; | |||
| 27048 | if( iVal<(1 << 21) ) return 3; | |||
| 27049 | if( iVal<(1 << 28) ) return 4; | |||
| 27050 | return 5; | |||
| 27051 | } | |||
| 27052 | ||||
| 27053 | #line 1 "fts5_vocab.c" | |||
| 27054 | /* | |||
| 27055 | ** 2015 May 08 | |||
| 27056 | ** | |||
| 27057 | ** The author disclaims copyright to this source code. In place of | |||
| 27058 | ** a legal notice, here is a blessing: | |||
| 27059 | ** | |||
| 27060 | ** May you do good and not evil. | |||
| 27061 | ** May you find forgiveness for yourself and forgive others. | |||
| 27062 | ** May you share freely, never taking more than you give. | |||
| 27063 | ** | |||
| 27064 | ****************************************************************************** | |||
| 27065 | ** | |||
| 27066 | ** This is an SQLite virtual table module implementing direct access to an | |||
| 27067 | ** existing FTS5 index. The module may create several different types of | |||
| 27068 | ** tables: | |||
| 27069 | ** | |||
| 27070 | ** col: | |||
| 27071 | ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); | |||
| 27072 | ** | |||
| 27073 | ** One row for each term/column combination. The value of $doc is set to | |||
| 27074 | ** the number of fts5 rows that contain at least one instance of term | |||
| 27075 | ** $term within column $col. Field $cnt is set to the total number of | |||
| 27076 | ** instances of term $term in column $col (in any row of the fts5 table). | |||
| 27077 | ** | |||
| 27078 | ** row: | |||
| 27079 | ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); | |||
| 27080 | ** | |||
| 27081 | ** One row for each term in the database. The value of $doc is set to | |||
| 27082 | ** the number of fts5 rows that contain at least one instance of term | |||
| 27083 | ** $term. Field $cnt is set to the total number of instances of term | |||
| 27084 | ** $term in the database. | |||
| 27085 | ** | |||
| 27086 | ** instance: | |||
| 27087 | ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>)); | |||
| 27088 | ** | |||
| 27089 | ** One row for each term instance in the database. | |||
| 27090 | */ | |||
| 27091 | ||||
| 27092 | ||||
| 27093 | /* #include "fts5Int.h" */ | |||
| 27094 | ||||
| 27095 | ||||
| 27096 | typedef struct Fts5VocabTable Fts5VocabTable; | |||
| 27097 | typedef struct Fts5VocabCursor Fts5VocabCursor; | |||
| 27098 | ||||
| 27099 | struct Fts5VocabTable { | |||
| 27100 | sqlite3_vtab base; | |||
| 27101 | char *zFts5Tbl; /* Name of fts5 table */ | |||
| 27102 | char *zFts5Db; /* Db containing fts5 table */ | |||
| 27103 | sqlite3 *db; /* Database handle */ | |||
| 27104 | Fts5Global *pGlobal; /* FTS5 global object for this database */ | |||
| 27105 | int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ | |||
| 27106 | unsigned bBusy; /* True if busy */ | |||
| 27107 | }; | |||
| 27108 | ||||
| 27109 | struct Fts5VocabCursor { | |||
| 27110 | sqlite3_vtab_cursor base; | |||
| 27111 | sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ | |||
| 27112 | Fts5Table *pFts5; /* Associated FTS5 table */ | |||
| 27113 | ||||
| 27114 | int bEof; /* True if this cursor is at EOF */ | |||
| 27115 | Fts5IndexIter *pIter; /* Term/rowid iterator object */ | |||
| 27116 | void *pStruct; /* From sqlite3Fts5StructureRef() */ | |||
| 27117 | ||||
| 27118 | int nLeTerm; /* Size of zLeTerm in bytes */ | |||
| 27119 | char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ | |||
| 27120 | int colUsed; /* Copy of sqlite3_index_info.colUsed */ | |||
| 27121 | ||||
| 27122 | /* These are used by 'col' tables only */ | |||
| 27123 | int iCol; | |||
| 27124 | i64 *aCnt; | |||
| 27125 | i64 *aDoc; | |||
| 27126 | ||||
| 27127 | /* Output values used by all tables. */ | |||
| 27128 | i64 rowid; /* This table's current rowid value */ | |||
| 27129 | Fts5Buffer term; /* Current value of 'term' column */ | |||
| 27130 | ||||
| 27131 | /* Output values Used by 'instance' tables only */ | |||
| 27132 | i64 iInstPos; | |||
| 27133 | int iInstOff; | |||
| 27134 | }; | |||
| 27135 | ||||
| 27136 | #define FTS5_VOCAB_COL0 0 | |||
| 27137 | #define FTS5_VOCAB_ROW1 1 | |||
| 27138 | #define FTS5_VOCAB_INSTANCE2 2 | |||
| 27139 | ||||
| 27140 | #define FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" "term, col, doc, cnt" | |||
| 27141 | #define FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" "term, doc, cnt" | |||
| 27142 | #define FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" "term, doc, col, offset" | |||
| 27143 | ||||
| 27144 | /* | |||
| 27145 | ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. | |||
| 27146 | */ | |||
| 27147 | #define FTS5_VOCAB_TERM_EQ0x0100 0x0100 | |||
| 27148 | #define FTS5_VOCAB_TERM_GE0x0200 0x0200 | |||
| 27149 | #define FTS5_VOCAB_TERM_LE0x0400 0x0400 | |||
| 27150 | ||||
| 27151 | #define FTS5_VOCAB_COLUSED_MASK0xFF 0xFF | |||
| 27152 | ||||
| 27153 | ||||
| 27154 | /* | |||
| 27155 | ** Translate a string containing an fts5vocab table type to an | |||
| 27156 | ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output | |||
| 27157 | ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message | |||
| 27158 | ** and return SQLITE_ERROR. | |||
| 27159 | */ | |||
| 27160 | static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ | |||
| 27161 | int rc = SQLITE_OK0; | |||
| 27162 | char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); | |||
| 27163 | if( rc==SQLITE_OK0 ){ | |||
| 27164 | sqlite3Fts5Dequote(zCopy); | |||
| 27165 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "col")==0 ){ | |||
| 27166 | *peType = FTS5_VOCAB_COL0; | |||
| 27167 | }else | |||
| 27168 | ||||
| 27169 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "row")==0 ){ | |||
| 27170 | *peType = FTS5_VOCAB_ROW1; | |||
| 27171 | }else | |||
| 27172 | if( sqlite3_stricmpsqlite3_api->stricmp(zCopy, "instance")==0 ){ | |||
| 27173 | *peType = FTS5_VOCAB_INSTANCE2; | |||
| 27174 | }else | |||
| 27175 | { | |||
| 27176 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("fts5vocab: unknown table type: %Q", zCopy); | |||
| 27177 | rc = SQLITE_ERROR1; | |||
| 27178 | } | |||
| 27179 | sqlite3_freesqlite3_api->free(zCopy); | |||
| 27180 | } | |||
| 27181 | ||||
| 27182 | return rc; | |||
| 27183 | } | |||
| 27184 | ||||
| 27185 | ||||
| 27186 | /* | |||
| 27187 | ** The xDisconnect() virtual table method. | |||
| 27188 | */ | |||
| 27189 | static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ | |||
| 27190 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | |||
| 27191 | sqlite3_freesqlite3_api->free(pTab); | |||
| 27192 | return SQLITE_OK0; | |||
| 27193 | } | |||
| 27194 | ||||
| 27195 | /* | |||
| 27196 | ** The xDestroy() virtual table method. | |||
| 27197 | */ | |||
| 27198 | static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ | |||
| 27199 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; | |||
| 27200 | sqlite3_freesqlite3_api->free(pTab); | |||
| 27201 | return SQLITE_OK0; | |||
| 27202 | } | |||
| 27203 | ||||
| 27204 | /* | |||
| 27205 | ** This function is the implementation of both the xConnect and xCreate | |||
| 27206 | ** methods of the FTS3 virtual table. | |||
| 27207 | ** | |||
| 27208 | ** The argv[] array contains the following: | |||
| 27209 | ** | |||
| 27210 | ** argv[0] -> module name ("fts5vocab") | |||
| 27211 | ** argv[1] -> database name | |||
| 27212 | ** argv[2] -> table name | |||
| 27213 | ** | |||
| 27214 | ** then: | |||
| 27215 | ** | |||
| 27216 | ** argv[3] -> name of fts5 table | |||
| 27217 | ** argv[4] -> type of fts5vocab table | |||
| 27218 | ** | |||
| 27219 | ** or, for tables in the TEMP schema only. | |||
| 27220 | ** | |||
| 27221 | ** argv[3] -> name of fts5 tables database | |||
| 27222 | ** argv[4] -> name of fts5 table | |||
| 27223 | ** argv[5] -> type of fts5vocab table | |||
| 27224 | */ | |||
| 27225 | static int fts5VocabInitVtab( | |||
| 27226 | sqlite3 *db, /* The SQLite database connection */ | |||
| 27227 | void *pAux, /* Pointer to Fts5Global object */ | |||
| 27228 | int argc, /* Number of elements in argv array */ | |||
| 27229 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 27230 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ | |||
| 27231 | char **pzErr /* Write any error message here */ | |||
| 27232 | ){ | |||
| 27233 | const char *azSchema[] = { | |||
| 27234 | "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA"term, col, doc, cnt" ")", | |||
| 27235 | "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA"term, doc, cnt" ")", | |||
| 27236 | "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA"term, doc, col, offset" ")" | |||
| 27237 | }; | |||
| 27238 | ||||
| 27239 | Fts5VocabTable *pRet = 0; | |||
| 27240 | int rc = SQLITE_OK0; /* Return code */ | |||
| 27241 | int bDb; | |||
| 27242 | ||||
| 27243 | bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); | |||
| 27244 | ||||
| 27245 | if( argc!=5 && bDb==0 ){ | |||
| 27246 | *pzErr = sqlite3_mprintfsqlite3_api->mprintf("wrong number of vtable arguments"); | |||
| 27247 | rc = SQLITE_ERROR1; | |||
| 27248 | }else{ | |||
| 27249 | i64 nByte; /* Bytes of space to allocate */ | |||
| 27250 | const char *zDb = bDb ? argv[3] : argv[1]; | |||
| 27251 | const char *zTab = bDb ? argv[4] : argv[3]; | |||
| 27252 | const char *zType = bDb ? argv[5] : argv[4]; | |||
| 27253 | i64 nDb = strlen(zDb)+1; | |||
| 27254 | i64 nTab = strlen(zTab)+1; | |||
| 27255 | int eType = 0; | |||
| 27256 | ||||
| 27257 | rc = fts5VocabTableType(zType, pzErr, &eType); | |||
| 27258 | if( rc==SQLITE_OK0 ){ | |||
| 27259 | assert( eType>=0 && eType<ArraySize(azSchema) )((void) (0)); | |||
| 27260 | rc = sqlite3_declare_vtabsqlite3_api->declare_vtab(db, azSchema[eType]); | |||
| 27261 | } | |||
| 27262 | ||||
| 27263 | nByte = sizeof(Fts5VocabTable) + nDb + nTab; | |||
| 27264 | pRet = sqlite3Fts5MallocZero(&rc, nByte); | |||
| 27265 | if( pRet ){ | |||
| 27266 | pRet->pGlobal = (Fts5Global*)pAux; | |||
| 27267 | pRet->eType = eType; | |||
| 27268 | pRet->db = db; | |||
| 27269 | pRet->zFts5Tbl = (char*)&pRet[1]; | |||
| 27270 | pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; | |||
| 27271 | memcpy(pRet->zFts5Tbl, zTab, nTab); | |||
| 27272 | memcpy(pRet->zFts5Db, zDb, nDb); | |||
| 27273 | sqlite3Fts5Dequote(pRet->zFts5Tbl); | |||
| 27274 | sqlite3Fts5Dequote(pRet->zFts5Db); | |||
| 27275 | } | |||
| 27276 | } | |||
| 27277 | ||||
| 27278 | *ppVTab = (sqlite3_vtab*)pRet; | |||
| 27279 | return rc; | |||
| 27280 | } | |||
| 27281 | ||||
| 27282 | ||||
| 27283 | /* | |||
| 27284 | ** The xConnect() and xCreate() methods for the virtual table. All the | |||
| 27285 | ** work is done in function fts5VocabInitVtab(). | |||
| 27286 | */ | |||
| 27287 | static int fts5VocabConnectMethod( | |||
| 27288 | sqlite3 *db, /* Database connection */ | |||
| 27289 | void *pAux, /* Pointer to tokenizer hash table */ | |||
| 27290 | int argc, /* Number of elements in argv array */ | |||
| 27291 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 27292 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
| 27293 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
| 27294 | ){ | |||
| 27295 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | |||
| 27296 | } | |||
| 27297 | static int fts5VocabCreateMethod( | |||
| 27298 | sqlite3 *db, /* Database connection */ | |||
| 27299 | void *pAux, /* Pointer to tokenizer hash table */ | |||
| 27300 | int argc, /* Number of elements in argv array */ | |||
| 27301 | const char * const *argv, /* xCreate/xConnect argument array */ | |||
| 27302 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ | |||
| 27303 | char **pzErr /* OUT: sqlite3_malloc'd error message */ | |||
| 27304 | ){ | |||
| 27305 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); | |||
| 27306 | } | |||
| 27307 | ||||
| 27308 | /* | |||
| 27309 | ** Implementation of the xBestIndex method. | |||
| 27310 | ** | |||
| 27311 | ** Only constraints of the form: | |||
| 27312 | ** | |||
| 27313 | ** term <= ? | |||
| 27314 | ** term == ? | |||
| 27315 | ** term >= ? | |||
| 27316 | ** | |||
| 27317 | ** are interpreted. Less-than and less-than-or-equal are treated | |||
| 27318 | ** identically, as are greater-than and greater-than-or-equal. | |||
| 27319 | */ | |||
| 27320 | static int fts5VocabBestIndexMethod( | |||
| 27321 | sqlite3_vtab *pUnused, | |||
| 27322 | sqlite3_index_info *pInfo | |||
| 27323 | ){ | |||
| 27324 | int i; | |||
| 27325 | int iTermEq = -1; | |||
| 27326 | int iTermGe = -1; | |||
| 27327 | int iTermLe = -1; | |||
| 27328 | int idxNum = (int)pInfo->colUsed; | |||
| 27329 | int nArg = 0; | |||
| 27330 | ||||
| 27331 | UNUSED_PARAM(pUnused)(void)(pUnused); | |||
| 27332 | ||||
| 27333 | assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed )((void) (0)); | |||
| 27334 | ||||
| 27335 | for(i=0; i<pInfo->nConstraint; i++){ | |||
| 27336 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; | |||
| 27337 | if( p->usable==0 ) continue; | |||
| 27338 | if( p->iColumn==0 ){ /* term column */ | |||
| 27339 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ2 ) iTermEq = i; | |||
| 27340 | if( p->op==SQLITE_INDEX_CONSTRAINT_LE8 ) iTermLe = i; | |||
| 27341 | if( p->op==SQLITE_INDEX_CONSTRAINT_LT16 ) iTermLe = i; | |||
| 27342 | if( p->op==SQLITE_INDEX_CONSTRAINT_GE32 ) iTermGe = i; | |||
| 27343 | if( p->op==SQLITE_INDEX_CONSTRAINT_GT4 ) iTermGe = i; | |||
| 27344 | } | |||
| 27345 | } | |||
| 27346 | ||||
| 27347 | if( iTermEq>=0 ){ | |||
| 27348 | idxNum |= FTS5_VOCAB_TERM_EQ0x0100; | |||
| 27349 | pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; | |||
| 27350 | pInfo->estimatedCost = 100; | |||
| 27351 | }else{ | |||
| 27352 | pInfo->estimatedCost = 1000000; | |||
| 27353 | if( iTermGe>=0 ){ | |||
| 27354 | idxNum |= FTS5_VOCAB_TERM_GE0x0200; | |||
| 27355 | pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; | |||
| 27356 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | |||
| 27357 | } | |||
| 27358 | if( iTermLe>=0 ){ | |||
| 27359 | idxNum |= FTS5_VOCAB_TERM_LE0x0400; | |||
| 27360 | pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; | |||
| 27361 | pInfo->estimatedCost = pInfo->estimatedCost / 2; | |||
| 27362 | } | |||
| 27363 | } | |||
| 27364 | ||||
| 27365 | /* This virtual table always delivers results in ascending order of | |||
| 27366 | ** the "term" column (column 0). So if the user has requested this | |||
| 27367 | ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the | |||
| 27368 | ** sqlite3_index_info.orderByConsumed flag to tell the core the results | |||
| 27369 | ** are already in sorted order. */ | |||
| 27370 | if( pInfo->nOrderBy==1 | |||
| 27371 | && pInfo->aOrderBy[0].iColumn==0 | |||
| 27372 | && pInfo->aOrderBy[0].desc==0 | |||
| 27373 | ){ | |||
| 27374 | pInfo->orderByConsumed = 1; | |||
| 27375 | } | |||
| 27376 | ||||
| 27377 | pInfo->idxNum = idxNum; | |||
| 27378 | return SQLITE_OK0; | |||
| 27379 | } | |||
| 27380 | ||||
| 27381 | /* | |||
| 27382 | ** Implementation of xOpen method. | |||
| 27383 | */ | |||
| 27384 | static int fts5VocabOpenMethod( | |||
| 27385 | sqlite3_vtab *pVTab, | |||
| 27386 | sqlite3_vtab_cursor **ppCsr | |||
| 27387 | ){ | |||
| 27388 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; | |||
| 27389 | Fts5Table *pFts5 = 0; | |||
| 27390 | Fts5VocabCursor *pCsr = 0; | |||
| 27391 | int rc = SQLITE_OK0; | |||
| 27392 | sqlite3_stmt *pStmt = 0; | |||
| 27393 | char *zSql = 0; | |||
| 27394 | ||||
| 27395 | if( pTab->bBusy ){ | |||
| 27396 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 27397 | "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | |||
| 27398 | ); | |||
| 27399 | return SQLITE_ERROR1; | |||
| 27400 | } | |||
| 27401 | zSql = sqlite3Fts5Mprintf(&rc, | |||
| 27402 | "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", | |||
| 27403 | pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl | |||
| 27404 | ); | |||
| 27405 | if( zSql ){ | |||
| 27406 | rc = sqlite3_prepare_v2sqlite3_api->prepare_v2(pTab->db, zSql, -1, &pStmt, 0); | |||
| 27407 | } | |||
| 27408 | sqlite3_freesqlite3_api->free(zSql); | |||
| 27409 | assert( rc==SQLITE_OK || pStmt==0 )((void) (0)); | |||
| 27410 | if( rc==SQLITE_ERROR1 ) rc = SQLITE_OK0; | |||
| 27411 | ||||
| 27412 | pTab->bBusy = 1; | |||
| 27413 | if( pStmt && sqlite3_stepsqlite3_api->step(pStmt)==SQLITE_ROW100 ){ | |||
| 27414 | i64 iId = sqlite3_column_int64sqlite3_api->column_int64(pStmt, 0); | |||
| 27415 | pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId); | |||
| 27416 | } | |||
| 27417 | pTab->bBusy = 0; | |||
| 27418 | ||||
| 27419 | if( rc==SQLITE_OK0 ){ | |||
| 27420 | if( pFts5==0 ){ | |||
| 27421 | rc = sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 27422 | pStmt = 0; | |||
| 27423 | if( rc==SQLITE_OK0 ){ | |||
| 27424 | pVTab->zErrMsg = sqlite3_mprintfsqlite3_api->mprintf( | |||
| 27425 | "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl | |||
| 27426 | ); | |||
| 27427 | rc = SQLITE_ERROR1; | |||
| 27428 | } | |||
| 27429 | }else{ | |||
| 27430 | rc = sqlite3Fts5FlushToDisk(pFts5); | |||
| 27431 | } | |||
| 27432 | } | |||
| 27433 | ||||
| 27434 | if( rc==SQLITE_OK0 ){ | |||
| 27435 | i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor); | |||
| 27436 | pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); | |||
| 27437 | } | |||
| 27438 | ||||
| 27439 | if( pCsr ){ | |||
| 27440 | pCsr->pFts5 = pFts5; | |||
| 27441 | pCsr->pStmt = pStmt; | |||
| 27442 | pCsr->aCnt = (i64*)&pCsr[1]; | |||
| 27443 | pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol]; | |||
| 27444 | }else{ | |||
| 27445 | sqlite3_finalizesqlite3_api->finalize(pStmt); | |||
| 27446 | } | |||
| 27447 | ||||
| 27448 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; | |||
| 27449 | return rc; | |||
| 27450 | } | |||
| 27451 | ||||
| 27452 | static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ | |||
| 27453 | pCsr->rowid = 0; | |||
| 27454 | sqlite3Fts5IterClose(pCsr->pIter); | |||
| 27455 | sqlite3Fts5StructureRelease(pCsr->pStruct); | |||
| 27456 | pCsr->pStruct = 0; | |||
| 27457 | pCsr->pIter = 0; | |||
| 27458 | sqlite3_freesqlite3_api->free(pCsr->zLeTerm); | |||
| 27459 | pCsr->nLeTerm = -1; | |||
| 27460 | pCsr->zLeTerm = 0; | |||
| 27461 | pCsr->bEof = 0; | |||
| 27462 | } | |||
| 27463 | ||||
| 27464 | /* | |||
| 27465 | ** Close the cursor. For additional information see the documentation | |||
| 27466 | ** on the xClose method of the virtual table interface. | |||
| 27467 | */ | |||
| 27468 | static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 27469 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27470 | fts5VocabResetCursor(pCsr); | |||
| 27471 | sqlite3Fts5BufferFree(&pCsr->term); | |||
| 27472 | sqlite3_finalizesqlite3_api->finalize(pCsr->pStmt); | |||
| 27473 | sqlite3_freesqlite3_api->free(pCsr); | |||
| 27474 | return SQLITE_OK0; | |||
| 27475 | } | |||
| 27476 | ||||
| 27477 | static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ | |||
| 27478 | int rc = SQLITE_OK0; | |||
| 27479 | ||||
| 27480 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | |||
| 27481 | pCsr->bEof = 1; | |||
| 27482 | }else{ | |||
| 27483 | const char *zTerm; | |||
| 27484 | int nTerm; | |||
| 27485 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
| 27486 | if( pCsr->nLeTerm>=0 ){ | |||
| 27487 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | |||
| 27488 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | |||
| 27489 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | |||
| 27490 | pCsr->bEof = 1; | |||
| 27491 | } | |||
| 27492 | } | |||
| 27493 | ||||
| 27494 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | |||
| 27495 | } | |||
| 27496 | return rc; | |||
| 27497 | } | |||
| 27498 | ||||
| 27499 | static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ | |||
| 27500 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
| 27501 | int rc = SQLITE_OK0; | |||
| 27502 | Fts5IndexIter *pIter = pCsr->pIter; | |||
| 27503 | i64 *pp = &pCsr->iInstPos; | |||
| 27504 | int *po = &pCsr->iInstOff; | |||
| 27505 | ||||
| 27506 | assert( sqlite3Fts5IterEof(pIter)==0 )((void) (0)); | |||
| 27507 | assert( pCsr->bEof==0 )((void) (0)); | |||
| 27508 | while( eDetail==FTS5_DETAIL_NONE1 | |||
| 27509 | || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) | |||
| 27510 | ){ | |||
| 27511 | pCsr->iInstPos = 0; | |||
| 27512 | pCsr->iInstOff = 0; | |||
| 27513 | ||||
| 27514 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | |||
| 27515 | if( rc==SQLITE_OK0 ){ | |||
| 27516 | rc = fts5VocabInstanceNewTerm(pCsr); | |||
| 27517 | if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE1 ) break; | |||
| 27518 | } | |||
| 27519 | if( rc ){ | |||
| 27520 | pCsr->bEof = 1; | |||
| 27521 | break; | |||
| 27522 | } | |||
| 27523 | } | |||
| 27524 | ||||
| 27525 | return rc; | |||
| 27526 | } | |||
| 27527 | ||||
| 27528 | /* | |||
| 27529 | ** Advance the cursor to the next row in the table. | |||
| 27530 | */ | |||
| 27531 | static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 27532 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27533 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | |||
| 27534 | int nCol = pCsr->pFts5->pConfig->nCol; | |||
| 27535 | int rc; | |||
| 27536 | ||||
| 27537 | rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct); | |||
| 27538 | if( rc!=SQLITE_OK0 ) return rc; | |||
| 27539 | pCsr->rowid++; | |||
| 27540 | ||||
| 27541 | if( pTab->eType==FTS5_VOCAB_INSTANCE2 ){ | |||
| 27542 | return fts5VocabInstanceNext(pCsr); | |||
| 27543 | } | |||
| 27544 | ||||
| 27545 | if( pTab->eType==FTS5_VOCAB_COL0 ){ | |||
| 27546 | for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ | |||
| 27547 | if( pCsr->aDoc[pCsr->iCol] ) break; | |||
| 27548 | } | |||
| 27549 | } | |||
| 27550 | ||||
| 27551 | if( pTab->eType!=FTS5_VOCAB_COL0 || pCsr->iCol>=nCol ){ | |||
| 27552 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ){ | |||
| 27553 | pCsr->bEof = 1; | |||
| 27554 | }else{ | |||
| 27555 | const char *zTerm; | |||
| 27556 | int nTerm; | |||
| 27557 | ||||
| 27558 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
| 27559 | assert( nTerm>=0 )((void) (0)); | |||
| 27560 | if( pCsr->nLeTerm>=0 ){ | |||
| 27561 | int nCmp = MIN(nTerm, pCsr->nLeTerm)(((nTerm) < (pCsr->nLeTerm)) ? (nTerm) : (pCsr->nLeTerm )); | |||
| 27562 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); | |||
| 27563 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ | |||
| 27564 | pCsr->bEof = 1; | |||
| 27565 | return SQLITE_OK0; | |||
| 27566 | } | |||
| 27567 | } | |||
| 27568 | ||||
| 27569 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); | |||
| 27570 | memset(pCsr->aCnt, 0, nCol * sizeof(i64)); | |||
| 27571 | memset(pCsr->aDoc, 0, nCol * sizeof(i64)); | |||
| 27572 | pCsr->iCol = 0; | |||
| 27573 | ||||
| 27574 | assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW )((void) (0)); | |||
| 27575 | while( rc==SQLITE_OK0 ){ | |||
| 27576 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
| 27577 | const u8 *pPos; int nPos; /* Position list */ | |||
| 27578 | i64 iPos = 0; /* 64-bit position read from poslist */ | |||
| 27579 | int iOff = 0; /* Current offset within position list */ | |||
| 27580 | ||||
| 27581 | pPos = pCsr->pIter->pData; | |||
| 27582 | nPos = pCsr->pIter->nData; | |||
| 27583 | ||||
| 27584 | switch( pTab->eType ){ | |||
| 27585 | case FTS5_VOCAB_ROW1: | |||
| 27586 | /* Do not bother counting the number of instances if the "cnt" | |||
| 27587 | ** column is not being read (according to colUsed). */ | |||
| 27588 | if( eDetail==FTS5_DETAIL_FULL0 && (pCsr->colUsed & 0x04) ){ | |||
| 27589 | while( iPos<nPos ){ | |||
| 27590 | u32 ii; | |||
| 27591 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | |||
| 27592 | if( ii==1 ){ | |||
| 27593 | /* New column in the position list */ | |||
| 27594 | fts5FastGetVarint32(pPos, iPos, ii){ ii = (pPos)[iPos++]; if( ii & 0x80 ){ iPos--; iPos += sqlite3Fts5GetVarint32 (&(pPos)[iPos],(u32*)&(ii)); } }; | |||
| 27595 | }else{ | |||
| 27596 | /* An instance - increment pCsr->aCnt[] */ | |||
| 27597 | pCsr->aCnt[0]++; | |||
| 27598 | } | |||
| 27599 | } | |||
| 27600 | } | |||
| 27601 | pCsr->aDoc[0]++; | |||
| 27602 | break; | |||
| 27603 | ||||
| 27604 | case FTS5_VOCAB_COL0: | |||
| 27605 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 27606 | int iCol = -1; | |||
| 27607 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ | |||
| 27608 | int ii = FTS5_POS2COLUMN(iPos)(int)((iPos >> 32) & 0x7FFFFFFF); | |||
| 27609 | if( iCol!=ii ){ | |||
| 27610 | if( ii>=nCol ){ | |||
| 27611 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 27612 | break; | |||
| 27613 | } | |||
| 27614 | pCsr->aDoc[ii]++; | |||
| 27615 | iCol = ii; | |||
| 27616 | } | |||
| 27617 | pCsr->aCnt[ii]++; | |||
| 27618 | } | |||
| 27619 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| 27620 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ | |||
| 27621 | assert_nc( iPos>=0 && iPos<nCol )((void) (0)); | |||
| 27622 | if( iPos>=nCol ){ | |||
| 27623 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 27624 | break; | |||
| 27625 | } | |||
| 27626 | pCsr->aDoc[iPos]++; | |||
| 27627 | } | |||
| 27628 | }else{ | |||
| 27629 | assert( eDetail==FTS5_DETAIL_NONE )((void) (0)); | |||
| 27630 | pCsr->aDoc[0]++; | |||
| 27631 | } | |||
| 27632 | break; | |||
| 27633 | ||||
| 27634 | default: | |||
| 27635 | assert( pTab->eType==FTS5_VOCAB_INSTANCE )((void) (0)); | |||
| 27636 | break; | |||
| 27637 | } | |||
| 27638 | ||||
| 27639 | if( rc==SQLITE_OK0 ){ | |||
| 27640 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); | |||
| 27641 | } | |||
| 27642 | if( pTab->eType==FTS5_VOCAB_INSTANCE2 ) break; | |||
| 27643 | ||||
| 27644 | if( rc==SQLITE_OK0 ){ | |||
| 27645 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); | |||
| 27646 | if( nTerm!=pCsr->term.n | |||
| 27647 | || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm)) | |||
| 27648 | ){ | |||
| 27649 | break; | |||
| 27650 | } | |||
| 27651 | if( sqlite3Fts5IterEof(pCsr->pIter)((pCsr->pIter)->bEof) ) break; | |||
| 27652 | } | |||
| 27653 | } | |||
| 27654 | } | |||
| 27655 | } | |||
| 27656 | ||||
| 27657 | if( rc==SQLITE_OK0 && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL0 ){ | |||
| 27658 | for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++); | |||
| 27659 | if( pCsr->iCol==nCol ){ | |||
| 27660 | rc = FTS5_CORRUPT(11 | (1<<8)); | |||
| 27661 | } | |||
| 27662 | } | |||
| 27663 | return rc; | |||
| 27664 | } | |||
| 27665 | ||||
| 27666 | /* | |||
| 27667 | ** This is the xFilter implementation for the virtual table. | |||
| 27668 | */ | |||
| 27669 | static int fts5VocabFilterMethod( | |||
| 27670 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ | |||
| 27671 | int idxNum, /* Strategy index */ | |||
| 27672 | const char *zUnused, /* Unused */ | |||
| 27673 | int nUnused, /* Number of elements in apVal */ | |||
| 27674 | sqlite3_value **apVal /* Arguments for the indexing scheme */ | |||
| 27675 | ){ | |||
| 27676 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; | |||
| 27677 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27678 | int eType = pTab->eType; | |||
| 27679 | int rc = SQLITE_OK0; | |||
| 27680 | ||||
| 27681 | int iVal = 0; | |||
| 27682 | int f = FTS5INDEX_QUERY_SCAN0x0008; | |||
| 27683 | const char *zTerm = 0; | |||
| 27684 | int nTerm = 0; | |||
| 27685 | ||||
| 27686 | sqlite3_value *pEq = 0; | |||
| 27687 | sqlite3_value *pGe = 0; | |||
| 27688 | sqlite3_value *pLe = 0; | |||
| 27689 | ||||
| 27690 | UNUSED_PARAM2(zUnused, nUnused)(void)(zUnused), (void)(nUnused); | |||
| 27691 | ||||
| 27692 | fts5VocabResetCursor(pCsr); | |||
| 27693 | if( idxNum & FTS5_VOCAB_TERM_EQ0x0100 ) pEq = apVal[iVal++]; | |||
| 27694 | if( idxNum & FTS5_VOCAB_TERM_GE0x0200 ) pGe = apVal[iVal++]; | |||
| 27695 | if( idxNum & FTS5_VOCAB_TERM_LE0x0400 ) pLe = apVal[iVal++]; | |||
| 27696 | pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK0xFF); | |||
| 27697 | ||||
| 27698 | if( pEq ){ | |||
| 27699 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pEq); | |||
| 27700 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pEq); | |||
| 27701 | f = FTS5INDEX_QUERY_NOTOKENDATA0x0080; | |||
| 27702 | }else{ | |||
| 27703 | if( pGe ){ | |||
| 27704 | zTerm = (const char *)sqlite3_value_textsqlite3_api->value_text(pGe); | |||
| 27705 | nTerm = sqlite3_value_bytessqlite3_api->value_bytes(pGe); | |||
| 27706 | } | |||
| 27707 | if( pLe ){ | |||
| 27708 | const char *zCopy = (const char *)sqlite3_value_textsqlite3_api->value_text(pLe); | |||
| 27709 | if( zCopy==0 ) zCopy = ""; | |||
| 27710 | pCsr->nLeTerm = sqlite3_value_bytessqlite3_api->value_bytes(pLe); | |||
| 27711 | pCsr->zLeTerm = sqlite3_mallocsqlite3_api->malloc(pCsr->nLeTerm+1); | |||
| 27712 | if( pCsr->zLeTerm==0 ){ | |||
| 27713 | rc = SQLITE_NOMEM7; | |||
| 27714 | }else{ | |||
| 27715 | memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); | |||
| 27716 | } | |||
| 27717 | } | |||
| 27718 | } | |||
| 27719 | ||||
| 27720 | if( rc==SQLITE_OK0 ){ | |||
| 27721 | Fts5Index *pIndex = pCsr->pFts5->pIndex; | |||
| 27722 | rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); | |||
| 27723 | if( rc==SQLITE_OK0 ){ | |||
| 27724 | pCsr->pStruct = sqlite3Fts5StructureRef(pIndex); | |||
| 27725 | } | |||
| 27726 | } | |||
| 27727 | if( rc==SQLITE_OK0 && eType==FTS5_VOCAB_INSTANCE2 ){ | |||
| 27728 | rc = fts5VocabInstanceNewTerm(pCsr); | |||
| 27729 | } | |||
| 27730 | if( rc==SQLITE_OK0 && !pCsr->bEof | |||
| 27731 | && (eType!=FTS5_VOCAB_INSTANCE2 | |||
| 27732 | || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE1) | |||
| 27733 | ){ | |||
| 27734 | rc = fts5VocabNextMethod(pCursor); | |||
| 27735 | } | |||
| 27736 | ||||
| 27737 | return rc; | |||
| 27738 | } | |||
| 27739 | ||||
| 27740 | /* | |||
| 27741 | ** This is the xEof method of the virtual table. SQLite calls this | |||
| 27742 | ** routine to find out if it has reached the end of a result set. | |||
| 27743 | */ | |||
| 27744 | static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ | |||
| 27745 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27746 | return pCsr->bEof; | |||
| 27747 | } | |||
| 27748 | ||||
| 27749 | static int fts5VocabColumnMethod( | |||
| 27750 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ | |||
| 27751 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ | |||
| 27752 | int iCol /* Index of column to read value from */ | |||
| 27753 | ){ | |||
| 27754 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27755 | int eDetail = pCsr->pFts5->pConfig->eDetail; | |||
| 27756 | int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; | |||
| 27757 | i64 iVal = 0; | |||
| 27758 | ||||
| 27759 | if( iCol==0 ){ | |||
| 27760 | sqlite3_result_textsqlite3_api->result_text( | |||
| 27761 | pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT((sqlite3_destructor_type)-1) | |||
| 27762 | ); | |||
| 27763 | }else if( eType==FTS5_VOCAB_COL0 ){ | |||
| 27764 | assert( iCol==1 || iCol==2 || iCol==3 )((void) (0)); | |||
| 27765 | if( iCol==1 ){ | |||
| 27766 | if( eDetail!=FTS5_DETAIL_NONE1 ){ | |||
| 27767 | const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol]; | |||
| 27768 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 27769 | } | |||
| 27770 | }else if( iCol==2 ){ | |||
| 27771 | iVal = pCsr->aDoc[pCsr->iCol]; | |||
| 27772 | }else{ | |||
| 27773 | iVal = pCsr->aCnt[pCsr->iCol]; | |||
| 27774 | } | |||
| 27775 | }else if( eType==FTS5_VOCAB_ROW1 ){ | |||
| 27776 | assert( iCol==1 || iCol==2 )((void) (0)); | |||
| 27777 | if( iCol==1 ){ | |||
| 27778 | iVal = pCsr->aDoc[0]; | |||
| 27779 | }else{ | |||
| 27780 | iVal = pCsr->aCnt[0]; | |||
| 27781 | } | |||
| 27782 | }else{ | |||
| 27783 | assert( eType==FTS5_VOCAB_INSTANCE )((void) (0)); | |||
| 27784 | switch( iCol ){ | |||
| 27785 | case 1: | |||
| 27786 | sqlite3_result_int64sqlite3_api->result_int64(pCtx, pCsr->pIter->iRowid); | |||
| 27787 | break; | |||
| 27788 | case 2: { | |||
| 27789 | int ii = -1; | |||
| 27790 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 27791 | ii = FTS5_POS2COLUMN(pCsr->iInstPos)(int)((pCsr->iInstPos >> 32) & 0x7FFFFFFF); | |||
| 27792 | }else if( eDetail==FTS5_DETAIL_COLUMNS2 ){ | |||
| 27793 | ii = (int)pCsr->iInstPos; | |||
| 27794 | } | |||
| 27795 | if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){ | |||
| 27796 | const char *z = pCsr->pFts5->pConfig->azCol[ii]; | |||
| 27797 | sqlite3_result_textsqlite3_api->result_text(pCtx, z, -1, SQLITE_STATIC((sqlite3_destructor_type)0)); | |||
| 27798 | } | |||
| 27799 | break; | |||
| 27800 | } | |||
| 27801 | default: { | |||
| 27802 | assert( iCol==3 )((void) (0)); | |||
| 27803 | if( eDetail==FTS5_DETAIL_FULL0 ){ | |||
| 27804 | int ii = FTS5_POS2OFFSET(pCsr->iInstPos)(int)(pCsr->iInstPos & 0x7FFFFFFF); | |||
| 27805 | sqlite3_result_intsqlite3_api->result_int(pCtx, ii); | |||
| 27806 | } | |||
| 27807 | break; | |||
| 27808 | } | |||
| 27809 | } | |||
| 27810 | } | |||
| 27811 | ||||
| 27812 | if( iVal>0 ) sqlite3_result_int64sqlite3_api->result_int64(pCtx, iVal); | |||
| 27813 | return SQLITE_OK0; | |||
| 27814 | } | |||
| 27815 | ||||
| 27816 | /* | |||
| 27817 | ** This is the xRowid method. The SQLite core calls this routine to | |||
| 27818 | ** retrieve the rowid for the current row of the result set. The | |||
| 27819 | ** rowid should be written to *pRowid. | |||
| 27820 | */ | |||
| 27821 | static int fts5VocabRowidMethod( | |||
| 27822 | sqlite3_vtab_cursor *pCursor, | |||
| 27823 | sqlite_int64 *pRowid | |||
| 27824 | ){ | |||
| 27825 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; | |||
| 27826 | *pRowid = pCsr->rowid; | |||
| 27827 | return SQLITE_OK0; | |||
| 27828 | } | |||
| 27829 | ||||
| 27830 | static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ | |||
| 27831 | static const sqlite3_module fts5Vocab = { | |||
| 27832 | /* iVersion */ 2, | |||
| 27833 | /* xCreate */ fts5VocabCreateMethod, | |||
| 27834 | /* xConnect */ fts5VocabConnectMethod, | |||
| 27835 | /* xBestIndex */ fts5VocabBestIndexMethod, | |||
| 27836 | /* xDisconnect */ fts5VocabDisconnectMethod, | |||
| 27837 | /* xDestroy */ fts5VocabDestroyMethod, | |||
| 27838 | /* xOpen */ fts5VocabOpenMethod, | |||
| 27839 | /* xClose */ fts5VocabCloseMethod, | |||
| 27840 | /* xFilter */ fts5VocabFilterMethod, | |||
| 27841 | /* xNext */ fts5VocabNextMethod, | |||
| 27842 | /* xEof */ fts5VocabEofMethod, | |||
| 27843 | /* xColumn */ fts5VocabColumnMethod, | |||
| 27844 | /* xRowid */ fts5VocabRowidMethod, | |||
| 27845 | /* xUpdate */ 0, | |||
| 27846 | /* xBegin */ 0, | |||
| 27847 | /* xSync */ 0, | |||
| 27848 | /* xCommit */ 0, | |||
| 27849 | /* xRollback */ 0, | |||
| 27850 | /* xFindFunction */ 0, | |||
| 27851 | /* xRename */ 0, | |||
| 27852 | /* xSavepoint */ 0, | |||
| 27853 | /* xRelease */ 0, | |||
| 27854 | /* xRollbackTo */ 0, | |||
| 27855 | /* xShadowName */ 0, | |||
| 27856 | /* xIntegrity */ 0 | |||
| 27857 | }; | |||
| 27858 | void *p = (void*)pGlobal; | |||
| 27859 | ||||
| 27860 | return sqlite3_create_module_v2sqlite3_api->create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); | |||
| 27861 | } | |||
| 27862 | ||||
| 27863 | ||||
| 27864 | /* Here ends the fts5.c composite file. */ | |||
| 27865 | #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ |