phpBB
Statistics
| Revision:

root / trunk / phpBB / includes / search / fulltext_native.php

History | View | Annotate | Download (48.1 kB)

1
<?php
2
/**
3
*
4
* @package search
5
* @copyright (c) 2005 phpBB Group
6
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2
7
*
8
*/
9
10
/**
11
* @ignore
12
*/
13
if (!defined('IN_PHPBB'))
14
{
15
        exit;
16
}
17
18
/**
19
* fulltext_native
20
* phpBB's own db driven fulltext search, version 2
21
* @package search
22
*/
23
class phpbb_search_fulltext_native extends phpbb_search_base
24
{
25
        var $stats = array();
26
        var $word_length = array();
27
        var $search_query;
28
        var $common_words = array();
29
30
        var $must_contain_ids = array();
31
        var $must_not_contain_ids = array();
32
        var $must_exclude_one_ids = array();
33
34
        /**
35
        * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
36
        *
37
        * @param        boolean|string        &$error        is passed by reference and should either be set to false on success or an error message on failure.
38
        */
39
        public function __construct(&$error)
40
        {
41
                global $phpbb_root_path, $phpEx, $config;
42
43
                $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
44
45
                /**
46
                * Load the UTF tools
47
                */
48
                if (!class_exists('utf_normalizer'))
49
                {
50
                        include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
51
                }
52
53
                $error = false;
54
        }
55
56
        /**
57
        * Returns the name of this search backend to be displayed to administrators
58
        *
59
        * @return string Name
60
        */
61
        public function get_name()
62
        {
63
                return 'phpBB Native Fulltext';
64
        }
65
66
        /**
67
        * This function fills $this->search_query with the cleaned user search query.
68
        *
69
        * If $terms is 'any' then the words will be extracted from the search query
70
        * and combined with | inside brackets. They will afterwards be treated like
71
        * an standard search query.
72
        *
73
        * Then it analyses the query and fills the internal arrays $must_not_contain_ids,
74
        * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
75
        *
76
        * @param        string        $keywords        contains the search query string as entered by the user
77
        * @param        string        $terms                is either 'all' (use search query as entered, default words to 'must be contained in post')
78
        *         or 'any' (find all posts containing at least one of the given words)
79
        * @return        boolean                                false if no valid keywords were found and otherwise true
80
        *
81
        * @access        public
82
        */
83
        function split_keywords($keywords, $terms)
84
        {
85
                global $db, $user, $config;
86
87
                $tokens = '+-|()*';
88
89
                $keywords = trim($this->cleanup($keywords, $tokens));
90
91
                // allow word|word|word without brackets
92
                if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
93
                {
94
                        $keywords = '(' . $keywords . ')';
95
                }
96
97
                $open_bracket = $space = false;
98
                for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
99
                {
100
                        if ($open_bracket !== false)
101
                        {
102
                                switch ($keywords[$i])
103
                                {
104
                                        case ')':
105
                                                if ($open_bracket + 1 == $i)
106
                                                {
107
                                                        $keywords[$i - 1] = '|';
108
                                                        $keywords[$i] = '|';
109
                                                }
110
                                                $open_bracket = false;
111
                                        break;
112
                                        case '(':
113
                                                $keywords[$i] = '|';
114
                                        break;
115
                                        case '+':
116
                                        case '-':
117
                                        case ' ':
118
                                                $keywords[$i] = '|';
119
                                        break;
120
                                        case '*':
121
                                                if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0))
122
                                                {
123
                                                        if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0))
124
                                                        {
125
                                                                $keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
126
                                                        }
127
                                                }
128
                                        break;
129
                                }
130
                        }
131
                        else
132
                        {
133
                                switch ($keywords[$i])
134
                                {
135
                                        case ')':
136
                                                $keywords[$i] = ' ';
137
                                        break;
138
                                        case '(':
139
                                                $open_bracket = $i;
140
                                                $space = false;
141
                                        break;
142
                                        case '|':
143
                                                $keywords[$i] = ' ';
144
                                        break;
145
                                        case '-':
146
                                        case '+':
147
                                                $space = $keywords[$i];
148
                                        break;
149
                                        case ' ':
150
                                                if ($space !== false)
151
                                                {
152
                                                        $keywords[$i] = $space;
153
                                                }
154
                                        break;
155
                                        default:
156
                                                $space = false;
157
                                }
158
                        }
159
                }
160
161
                if ($open_bracket)
162
                {
163
                        $keywords .= ')';
164
                }
165
166
                $match = array(
167
                        '#  +#',
168
                        '#\|\|+#',
169
                        '#(\+|\-)(?:\+|\-)+#',
170
                        '#\(\|#',
171
                        '#\|\)#',
172
                );
173
                $replace = array(
174
                        ' ',
175
                        '|',
176
                        '$1',
177
                        '(',
178
                        ')',
179
                );
180
181
                $keywords = preg_replace($match, $replace, $keywords);
182
                $num_keywords = sizeof(explode(' ', $keywords));
183
184
                // We limit the number of allowed keywords to minimize load on the database
185
                if ($config['max_num_search_keywords'] && $num_keywords > $config['max_num_search_keywords'])
186
                {
187
                        trigger_error($user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', $config['max_num_search_keywords'], $num_keywords));
188
                }
189
190
                // $keywords input format: each word separated by a space, words in a bracket are not separated
191
192
                // the user wants to search for any word, convert the search query
193
                if ($terms == 'any')
194
                {
195
                        $words = array();
196
197
                        preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
198
                        if (sizeof($words[1]))
199
                        {
200
                                $keywords = '(' . implode('|', $words[1]) . ')';
201
                        }
202
                }
203
204
                // set the search_query which is shown to the user
205
                $this->search_query = $keywords;
206
207
                $exact_words = array();
208
                preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
209
                $exact_words = $exact_words[1];
210
211
                $common_ids = $words = array();
212
213
                if (sizeof($exact_words))
214
                {
215
                        $sql = 'SELECT word_id, word_text, word_common
216
                                FROM ' . SEARCH_WORDLIST_TABLE . '
217
                                WHERE ' . $db->sql_in_set('word_text', $exact_words) . '
218
                                ORDER BY word_count ASC';
219
                        $result = $db->sql_query($sql);
220
221
                        // store an array of words and ids, remove common words
222
                        while ($row = $db->sql_fetchrow($result))
223
                        {
224
                                if ($row['word_common'])
225
                                {
226
                                        $this->common_words[] = $row['word_text'];
227
                                        $common_ids[$row['word_text']] = (int) $row['word_id'];
228
                                        continue;
229
                                }
230
231
                                $words[$row['word_text']] = (int) $row['word_id'];
232
                        }
233
                        $db->sql_freeresult($result);
234
                }
235
                unset($exact_words);
236
237
                // now analyse the search query, first split it using the spaces
238
                $query = explode(' ', $keywords);
239
240
                $this->must_contain_ids = array();
241
                $this->must_not_contain_ids = array();
242
                $this->must_exclude_one_ids = array();
243
244
                $mode = '';
245
                $ignore_no_id = true;
246
247
                foreach ($query as $word)
248
                {
249
                        if (empty($word))
250
                        {
251
                                continue;
252
                        }
253
254
                        // words which should not be included
255
                        if ($word[0] == '-')
256
                        {
257
                                $word = substr($word, 1);
258
259
                                // a group of which at least one may not be in the resulting posts
260
                                if ($word[0] == '(')
261
                                {
262
                                        $word = array_unique(explode('|', substr($word, 1, -1)));
263
                                        $mode = 'must_exclude_one';
264
                                }
265
                                // one word which should not be in the resulting posts
266
                                else
267
                                {
268
                                        $mode = 'must_not_contain';
269
                                }
270
                                $ignore_no_id = true;
271
                        }
272
                        // words which have to be included
273
                        else
274
                        {
275
                                // no prefix is the same as a +prefix
276
                                if ($word[0] == '+')
277
                                {
278
                                        $word = substr($word, 1);
279
                                }
280
281
                                // a group of words of which at least one word should be in every resulting post
282
                                if ($word[0] == '(')
283
                                {
284
                                        $word = array_unique(explode('|', substr($word, 1, -1)));
285
                                }
286
                                $ignore_no_id = false;
287
                                $mode = 'must_contain';
288
                        }
289
290
                        if (empty($word))
291
                        {
292
                                continue;
293
                        }
294
295
                        // if this is an array of words then retrieve an id for each
296
                        if (is_array($word))
297
                        {
298
                                $non_common_words = array();
299
                                $id_words = array();
300
                                foreach ($word as $i => $word_part)
301
                                {
302
                                        if (strpos($word_part, '*') !== false)
303
                                        {
304
                                                $id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
305
                                                $non_common_words[] = $word_part;
306
                                        }
307
                                        else if (isset($words[$word_part]))
308
                                        {
309
                                                $id_words[] = $words[$word_part];
310
                                                $non_common_words[] = $word_part;
311
                                        }
312
                                        else
313
                                        {
314
                                                $len = utf8_strlen($word_part);
315
                                                if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
316
                                                {
317
                                                        $this->common_words[] = $word_part;
318
                                                }
319
                                        }
320
                                }
321
                                if (sizeof($id_words))
322
                                {
323
                                        sort($id_words);
324
                                        if (sizeof($id_words) > 1)
325
                                        {
326
                                                $this->{$mode . '_ids'}[] = $id_words;
327
                                        }
328
                                        else
329
                                        {
330
                                                $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
331
                                                $this->{$mode . '_ids'}[] = $id_words[0];
332
                                        }
333
                                }
334
                                // throw an error if we shall not ignore unexistant words
335
                                else if (!$ignore_no_id && sizeof($non_common_words))
336
                                {
337
                                        trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
338
                                }
339
                                unset($non_common_words);
340
                        }
341
                        // else we only need one id
342
                        else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
343
                        {
344
                                if ($wildcard)
345
                                {
346
                                        $len = utf8_strlen(str_replace('*', '', $word));
347
                                        if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
348
                                        {
349
                                                $this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
350
                                        }
351
                                        else
352
                                        {
353
                                                $this->common_words[] = $word;
354
                                        }
355
                                }
356
                                else
357
                                {
358
                                        $this->{$mode . '_ids'}[] = $words[$word];
359
                                }
360
                        }
361
                        // throw an error if we shall not ignore unexistant words
362
                        else if (!$ignore_no_id)
363
                        {
364
                                if (!isset($common_ids[$word]))
365
                                {
366
                                        $len = utf8_strlen($word);
367
                                        if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
368
                                        {
369
                                                trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
370
                                        }
371
                                        else
372
                                        {
373
                                                $this->common_words[] = $word;
374
                                        }
375
                                }
376
                        }
377
                        else
378
                        {
379
                                $len = utf8_strlen($word);
380
                                if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
381
                                {
382
                                        $this->common_words[] = $word;
383
                                }
384
                        }
385
                }
386
387
                // we can't search for negatives only
388
                if (!sizeof($this->must_contain_ids))
389
                {
390
                        return false;
391
                }
392
393
                if (!empty($this->search_query))
394
                {
395
                        return true;
396
                }
397
                return false;
398
        }
399
400
        /**
401
        * Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
402
        *
403
        * @param        string                $type                                contains either posts or topics depending on what should be searched for
404
        * @param        string                $fields                                contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
405
        * @param        string                $terms                                is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
406
        * @param        array                $sort_by_sql                contains SQL code for the ORDER BY part of a query
407
        * @param        string                $sort_key                        is the key of $sort_by_sql for the selected sorting
408
        * @param        string                $sort_dir                        is either a or d representing ASC and DESC
409
        * @param        string                $sort_days                        specifies the maximum amount of days a post may be old
410
        * @param        array                $ex_fid_ary                        specifies an array of forum ids which should not be searched
411
        * @param        array                $m_approve_fid_ary        specifies an array of forum ids in which the searcher is allowed to view unapproved posts
412
        * @param        int                        $topic_id                        is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
413
        * @param        array                $author_ary                        an array of author ids if the author should be ignored during the search the array is empty
414
        * @param        string                $author_name                specifies the author match, when ANONYMOUS is also a search-match
415
        * @param        array                &$id_ary                        passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
416
        * @param        int                        $start                                indicates the first index of the page
417
        * @param        int                        $per_page                        number of ids each page is supposed to contain
418
        * @return        boolean|int                                                total number of results
419
        *
420
        * @access        public
421
        */
422
        function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
423
        {
424
                global $config, $db;
425
426
                // No keywords? No posts.
427
                if (empty($this->search_query))
428
                {
429
                        return false;
430
                }
431
432
                $must_contain_ids = $this->must_contain_ids;
433
                $must_not_contain_ids = $this->must_not_contain_ids;
434
                $must_exclude_one_ids = $this->must_exclude_one_ids;
435
436
                sort($must_contain_ids);
437
                sort($must_not_contain_ids);
438
                sort($must_exclude_one_ids);
439
440
                // generate a search_key from all the options to identify the results
441
                $search_key = md5(implode('#', array(
442
                        serialize($must_contain_ids),
443
                        serialize($must_not_contain_ids),
444
                        serialize($must_exclude_one_ids),
445
                        $type,
446
                        $fields,
447
                        $terms,
448
                        $sort_days,
449
                        $sort_key,
450
                        $topic_id,
451
                        implode(',', $ex_fid_ary),
452
                        implode(',', $m_approve_fid_ary),
453
                        implode(',', $author_ary),
454
                        $author_name,
455
                )));
456
457
                // try reading the results from cache
458
                $total_results = 0;
459
                if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
460
                {
461
                        return $total_results;
462
                }
463
464
                $id_ary = array();
465
466
                $sql_where = array();
467
                $group_by = false;
468
                $m_num = 0;
469
                $w_num = 0;
470
471
                $sql_array = array(
472
                        'SELECT'        => ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
473
                        'FROM'                => array(
474
                                SEARCH_WORDMATCH_TABLE        => array(),
475
                                SEARCH_WORDLIST_TABLE        => array(),
476
                        ),
477
                        'LEFT_JOIN' => array(array(
478
                                'FROM'        => array(POSTS_TABLE => 'p'),
479
                                'ON'        => 'm0.post_id = p.post_id',
480
                        )),
481
                );
482
483
                $title_match = '';
484
                $left_join_topics = false;
485
                $group_by = true;
486
                // Build some display specific sql strings
487
                switch ($fields)
488
                {
489
                        case 'titleonly':
490
                                $title_match = 'title_match = 1';
491
                                $group_by = false;
492
                        // no break
493
                        case 'firstpost':
494
                                $left_join_topics = true;
495
                                $sql_where[] = 'p.post_id = t.topic_first_post_id';
496
                        break;
497
498
                        case 'msgonly':
499
                                $title_match = 'title_match = 0';
500
                                $group_by = false;
501
                        break;
502
                }
503
504
                if ($type == 'topics')
505
                {
506
                        $left_join_topics = true;
507
                        $group_by = true;
508
                }
509
510
                /**
511
                * @todo Add a query optimizer (handle stuff like "+(4|3) +4")
512
                */
513
514
                foreach ($this->must_contain_ids as $subquery)
515
                {
516
                        if (is_array($subquery))
517
                        {
518
                                $group_by = true;
519
520
                                $word_id_sql = array();
521
                                $word_ids = array();
522
                                foreach ($subquery as $id)
523
                                {
524
                                        if (is_string($id))
525
                                        {
526
                                                $sql_array['LEFT_JOIN'][] = array(
527
                                                        'FROM'        => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
528
                                                        'ON'        => "w$w_num.word_text LIKE $id"
529
                                                );
530
                                                $word_ids[] = "w$w_num.word_id";
531
532
                                                $w_num++;
533
                                        }
534
                                        else
535
                                        {
536
                                                $word_ids[] = $id;
537
                                        }
538
                                }
539
540
                                $sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
541
542
                                unset($word_id_sql);
543
                                unset($word_ids);
544
                        }
545
                        else if (is_string($subquery))
546
                        {
547
                                $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
548
549
                                $sql_where[] = "w$w_num.word_text LIKE $subquery";
550
                                $sql_where[] = "m$m_num.word_id = w$w_num.word_id";
551
552
                                $group_by = true;
553
                                $w_num++;
554
                        }
555
                        else
556
                        {
557
                                $sql_where[] = "m$m_num.word_id = $subquery";
558
                        }
559
560
                        $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
561
562
                        if ($title_match)
563
                        {
564
                                $sql_where[] = "m$m_num.$title_match";
565
                        }
566
567
                        if ($m_num != 0)
568
                        {
569
                                $sql_where[] = "m$m_num.post_id = m0.post_id";
570
                        }
571
                        $m_num++;
572
                }
573
574
                foreach ($this->must_not_contain_ids as $key => $subquery)
575
                {
576
                        if (is_string($subquery))
577
                        {
578
                                $sql_array['LEFT_JOIN'][] = array(
579
                                        'FROM'        => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
580
                                        'ON'        => "w$w_num.word_text LIKE $subquery"
581
                                );
582
583
                                $this->must_not_contain_ids[$key] = "w$w_num.word_id";
584
585
                                $group_by = true;
586
                                $w_num++;
587
                        }
588
                }
589
590
                if (sizeof($this->must_not_contain_ids))
591
                {
592
                        $sql_array['LEFT_JOIN'][] = array(
593
                                'FROM'        => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
594
                                'ON'        => $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
595
                        );
596
597
                        $sql_where[] = "m$m_num.word_id IS NULL";
598
                        $m_num++;
599
                }
600
601
                foreach ($this->must_exclude_one_ids as $ids)
602
                {
603
                        $is_null_joins = array();
604
                        foreach ($ids as $id)
605
                        {
606
                                if (is_string($id))
607
                                {
608
                                        $sql_array['LEFT_JOIN'][] = array(
609
                                                'FROM'        => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
610
                                                'ON'        => "w$w_num.word_text LIKE $id"
611
                                        );
612
                                        $id = "w$w_num.word_id";
613
614
                                        $group_by = true;
615
                                        $w_num++;
616
                                }
617
618
                                $sql_array['LEFT_JOIN'][] = array(
619
                                        'FROM'        => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
620
                                        'ON'        => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
621
                                );
622
                                $is_null_joins[] = "m$m_num.word_id IS NULL";
623
624
                                $m_num++;
625
                        }
626
                        $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
627
                }
628
629
                if (!sizeof($m_approve_fid_ary))
630
                {
631
                        $sql_where[] = 'p.post_approved = 1';
632
                }
633
                else if ($m_approve_fid_ary !== array(-1))
634
                {
635
                        $sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
636
                }
637
638
                if ($topic_id)
639
                {
640
                        $sql_where[] = 'p.topic_id = ' . $topic_id;
641
                }
642
643
                if (sizeof($author_ary))
644
                {
645
                        if ($author_name)
646
                        {
647
                                // first one matches post of registered users, second one guests and deleted users
648
                                $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
649
                        }
650
                        else
651
                        {
652
                                $sql_author = $db->sql_in_set('p.poster_id', $author_ary);
653
                        }
654
                        $sql_where[] = $sql_author;
655
                }
656
657
                if (sizeof($ex_fid_ary))
658
                {
659
                        $sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
660
                }
661
662
                if ($sort_days)
663
                {
664
                        $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
665
                }
666
667
                $sql_array['WHERE'] = implode(' AND ', $sql_where);
668
669
                $is_mysql = false;
670
                // if the total result count is not cached yet, retrieve it from the db
671
                if (!$total_results)
672
                {
673
                        $sql = '';
674
                        $sql_array_count = $sql_array;
675
676
                        if ($left_join_topics)
677
                        {
678
                                $sql_array_count['LEFT_JOIN'][] = array(
679
                                        'FROM'        => array(TOPICS_TABLE => 't'),
680
                                        'ON'        => 'p.topic_id = t.topic_id'
681
                                );
682
                        }
683
684
                        switch ($db->sql_layer)
685
                        {
686
                                case 'mysql4':
687
                                case 'mysqli':
688
689
                                        // 3.x does not support SQL_CALC_FOUND_ROWS
690
                                        // $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
691
                                        $is_mysql = true;
692
693
                                break;
694
695
                                case 'sqlite':
696
                                        $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
697
                                        $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
698
                                                        FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
699
700
                                // no break
701
702
                                default:
703
                                        $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
704
                                        $sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
705
706
                                        $result = $db->sql_query($sql);
707
                                        $total_results = (int) $db->sql_fetchfield('total_results');
708
                                        $db->sql_freeresult($result);
709
710
                                        if (!$total_results)
711
                                        {
712
                                                return false;
713
                                        }
714
                                break;
715
                        }
716
717
                        unset($sql_array_count, $sql);
718
                }
719
720
                // Build sql strings for sorting
721
                $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
722
723
                switch ($sql_sort[0])
724
                {
725
                        case 'u':
726
                                $sql_array['FROM'][USERS_TABLE] = 'u';
727
                                $sql_where[] = 'u.user_id = p.poster_id ';
728
                        break;
729
730
                        case 't':
731
                                $left_join_topics = true;
732
                        break;
733
734
                        case 'f':
735
                                $sql_array['FROM'][FORUMS_TABLE] = 'f';
736
                                $sql_where[] = 'f.forum_id = p.forum_id';
737
                        break;
738
                }
739
740
                if ($left_join_topics)
741
                {
742
                        $sql_array['LEFT_JOIN'][] = array(
743
                                'FROM'        => array(TOPICS_TABLE => 't'),
744
                                'ON'        => 'p.topic_id = t.topic_id'
745
                        );
746
                }
747
748
                $sql_array['WHERE'] = implode(' AND ', $sql_where);
749
                $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
750
                $sql_array['ORDER_BY'] = $sql_sort;
751
752
                unset($sql_where, $sql_sort, $group_by);
753
754
                $sql = $db->sql_build_query('SELECT', $sql_array);
755
                $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
756
757
                while ($row = $db->sql_fetchrow($result))
758
                {
759
                        $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
760
                }
761
                $db->sql_freeresult($result);
762
763
                if (!sizeof($id_ary))
764
                {
765
                        return false;
766
                }
767
768
                // if we use mysql and the total result count is not cached yet, retrieve it from the db
769
                if (!$total_results && $is_mysql)
770
                {
771
                        // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
772
                        $sql_array_copy = $sql_array;
773
                        $sql_array_copy['SELECT'] = 'SQL_CALC_FOUND_ROWS p.post_id ';
774
775
                        $sql = $db->sql_build_query('SELECT', $sql_array_copy);
776
                        unset($sql_array_copy);
777
778
                        $db->sql_query($sql);
779
                        $db->sql_freeresult($result);
780
781
                        $sql = 'SELECT FOUND_ROWS() as total_results';
782
                        $result = $db->sql_query($sql);
783
                        $total_results = (int) $db->sql_fetchfield('total_results');
784
                        $db->sql_freeresult($result);
785
786
                        if (!$total_results)
787
                        {
788
                                return false;
789
                        }
790
                }
791
792
                // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
793
                $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
794
                $id_ary = array_slice($id_ary, 0, (int) $per_page);
795
796
                return $total_results;
797
        }
798
799
        /**
800
        * Performs a search on an author's posts without caring about message contents. Depends on display specific params
801
        *
802
        * @param        string                $type                                contains either posts or topics depending on what should be searched for
803
        * @param        boolean                $firstpost_only                if true, only topic starting posts will be considered
804
        * @param        array                $sort_by_sql                contains SQL code for the ORDER BY part of a query
805
        * @param        string                $sort_key                        is the key of $sort_by_sql for the selected sorting
806
        * @param        string                $sort_dir                        is either a or d representing ASC and DESC
807
        * @param        string                $sort_days                        specifies the maximum amount of days a post may be old
808
        * @param        array                $ex_fid_ary                        specifies an array of forum ids which should not be searched
809
        * @param        array                $m_approve_fid_ary        specifies an array of forum ids in which the searcher is allowed to view unapproved posts
810
        * @param        int                        $topic_id                        is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
811
        * @param        array                $author_ary                        an array of author ids
812
        * @param        string                $author_name                specifies the author match, when ANONYMOUS is also a search-match
813
        * @param        array                &$id_ary                        passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
814
        * @param        int                        $start                                indicates the first index of the page
815
        * @param        int                        $per_page                        number of ids each page is supposed to contain
816
        * @return        boolean|int                                                total number of results
817
        *
818
        * @access        public
819
        */
820
        function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
821
        {
822
                global $config, $db;
823
824
                // No author? No posts.
825
                if (!sizeof($author_ary))
826
                {
827
                        return 0;
828
                }
829
830
                // generate a search_key from all the options to identify the results
831
                $search_key = md5(implode('#', array(
832
                        '',
833
                        $type,
834
                        ($firstpost_only) ? 'firstpost' : '',
835
                        '',
836
                        '',
837
                        $sort_days,
838
                        $sort_key,
839
                        $topic_id,
840
                        implode(',', $ex_fid_ary),
841
                        implode(',', $m_approve_fid_ary),
842
                        implode(',', $author_ary),
843
                        $author_name,
844
                )));
845
846
                // try reading the results from cache
847
                $total_results = 0;
848
                if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
849
                {
850
                        return $total_results;
851
                }
852
853
                $id_ary = array();
854
855
                // Create some display specific sql strings
856
                if ($author_name)
857
                {
858
                        // first one matches post of registered users, second one guests and deleted users
859
                        $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
860
                }
861
                else
862
                {
863
                        $sql_author = $db->sql_in_set('p.poster_id', $author_ary);
864
                }
865
                $sql_fora                = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
866
                $sql_time                = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
867
                $sql_topic_id        = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
868
                $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
869
870
                // Build sql strings for sorting
871
                $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
872
                $sql_sort_table = $sql_sort_join = '';
873
                switch ($sql_sort[0])
874
                {
875
                        case 'u':
876
                                $sql_sort_table        = USERS_TABLE . ' u, ';
877
                                $sql_sort_join        = ' AND u.user_id = p.poster_id ';
878
                        break;
879
880
                        case 't':
881
                                $sql_sort_table        = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
882
                                $sql_sort_join        = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
883
                        break;
884
885
                        case 'f':
886
                                $sql_sort_table        = FORUMS_TABLE . ' f, ';
887
                                $sql_sort_join        = ' AND f.forum_id = p.forum_id ';
888
                        break;
889
                }
890
891
                if (!sizeof($m_approve_fid_ary))
892
                {
893
                        $m_approve_fid_sql = ' AND p.post_approved = 1';
894
                }
895
                else if ($m_approve_fid_ary == array(-1))
896
                {
897
                        $m_approve_fid_sql = '';
898
                }
899
                else
900
                {
901
                        $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
902
                }
903
904
                $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
905
                $is_mysql = false;
906
907
                // If the cache was completely empty count the results
908
                if (!$total_results)
909
                {
910
                        switch ($db->sql_layer)
911
                        {
912
                                case 'mysql4':
913
                                case 'mysqli':
914
//                                        $select = 'SQL_CALC_FOUND_ROWS ' . $select;
915
                                        $is_mysql = true;
916
                                break;
917
918
                                default:
919
                                        if ($type == 'posts')
920
                                        {
921
                                                $sql = 'SELECT COUNT(p.post_id) as total_results
922
                                                        FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
923
                                                        WHERE $sql_author
924
                                                                $sql_topic_id
925
                                                                $sql_firstpost
926
                                                                $m_approve_fid_sql
927
                                                                $sql_fora
928
                                                                $sql_time";
929
                                        }
930
                                        else
931
                                        {
932
                                                if ($db->sql_layer == 'sqlite')
933
                                                {
934
                                                        $sql = 'SELECT COUNT(topic_id) as total_results
935
                                                                FROM (SELECT DISTINCT t.topic_id';
936
                                                }
937
                                                else
938
                                                {
939
                                                        $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
940
                                                }
941
942
                                                $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
943
                                                        WHERE $sql_author
944
                                                                $sql_topic_id
945
                                                                $sql_firstpost
946
                                                                $m_approve_fid_sql
947
                                                                $sql_fora
948
                                                                AND t.topic_id = p.topic_id
949
                                                                $sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
950
                                        }
951
                                        $result = $db->sql_query($sql);
952
953
                                        $total_results = (int) $db->sql_fetchfield('total_results');
954
                                        $db->sql_freeresult($result);
955
956
                                        if (!$total_results)
957
                                        {
958
                                                return false;
959
                                        }
960
                                break;
961
                        }
962
                }
963
964
                // Build the query for really selecting the post_ids
965
                if ($type == 'posts')
966
                {
967
                        $sql = "SELECT $select
968
                                FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
969
                                WHERE $sql_author
970
                                        $sql_topic_id
971
                                        $sql_firstpost
972
                                        $m_approve_fid_sql
973
                                        $sql_fora
974
                                        $sql_sort_join
975
                                        $sql_time
976
                                ORDER BY $sql_sort";
977
                        $field = 'post_id';
978
                }
979
                else
980
                {
981
                        $sql = "SELECT $select
982
                                FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
983
                                WHERE $sql_author
984
                                        $sql_topic_id
985
                                        $sql_firstpost
986
                                        $m_approve_fid_sql
987
                                        $sql_fora
988
                                        AND t.topic_id = p.topic_id
989
                                        $sql_sort_join
990
                                        $sql_time
991
                                GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
992
                                ORDER BY ' . $sql_sort;
993
                        $field = 'topic_id';
994
                }
995
996
                // Only read one block of posts from the db and then cache it
997
                $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
998
999
                while ($row = $db->sql_fetchrow($result))
1000
                {
1001
                        $id_ary[] = (int) $row[$field];
1002
                }
1003
                $db->sql_freeresult($result);
1004
1005
                if (!$total_results && $is_mysql)
1006
                {
1007
                        // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
1008
                        $sql = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);
1009
1010
                        $db->sql_query($sql);
1011
                        $db->sql_freeresult($result);
1012
1013
                        $sql = 'SELECT FOUND_ROWS() as total_results';
1014
                        $result = $db->sql_query($sql);
1015
                        $total_results = (int) $db->sql_fetchfield('total_results');
1016
                        $db->sql_freeresult($result);
1017
1018
                        if (!$total_results)
1019
                        {
1020
                                return false;
1021
                        }
1022
                }
1023
1024
                if (sizeof($id_ary))
1025
                {
1026
                        $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
1027
                        $id_ary = array_slice($id_ary, 0, $per_page);
1028
1029
                        return $total_results;
1030
                }
1031
                return false;
1032
        }
1033
1034
        /**
1035
        * Split a text into words of a given length
1036
        *
1037
        * The text is converted to UTF-8, cleaned up, and split. Then, words that
1038
        * conform to the defined length range are returned in an array.
1039
        *
1040
        * NOTE: duplicates are NOT removed from the return array
1041
        *
1042
        * @param        string        $text        Text to split, encoded in UTF-8
1043
        * @return        array                        Array of UTF-8 words
1044
        *
1045
        * @access        private
1046
        */
1047
        function split_message($text)
1048
        {
1049
                global $phpbb_root_path, $phpEx, $user;
1050
1051
                $match = $words = array();
1052
1053
                /**
1054
                * Taken from the original code
1055
                */
1056
                // Do not index code
1057
                $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
1058
                // BBcode
1059
                $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
1060
1061
                $min = $this->word_length['min'];
1062
                $max = $this->word_length['max'];
1063
1064
                $isset_min = $min - 1;
1065
1066
                /**
1067
                * Clean up the string, remove HTML tags, remove BBCodes
1068
                */
1069
                $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
1070
1071
                while (strlen($word))
1072
                {
1073
                        if (strlen($word) > 255 || strlen($word) <= $isset_min)
1074
                        {
1075
                                /**
1076
                                * Words longer than 255 bytes are ignored. This will have to be
1077
                                * changed whenever we change the length of search_wordlist.word_text
1078
                                *
1079
                                * Words shorter than $isset_min bytes are ignored, too
1080
                                */
1081
                                $word = strtok(' ');
1082
                                continue;
1083
                        }
1084
1085
                        $len = utf8_strlen($word);
1086
1087
                        /**
1088
                        * Test whether the word is too short to be indexed.
1089
                        *
1090
                        * Note that this limit does NOT apply to CJK and Hangul
1091
                        */
1092
                        if ($len < $min)
1093
                        {
1094
                                /**
1095
                                * Note: this could be optimized. If the codepoint is lower than Hangul's range
1096
                                * we know that it will also be lower than CJK ranges
1097
                                */
1098
                                if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1099
                                 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1100
                                 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1101
                                {
1102
                                        $word = strtok(' ');
1103
                                        continue;
1104
                                }
1105
                        }
1106
1107
                        $words[] = $word;
1108
                        $word = strtok(' ');
1109
                }
1110
1111
                return $words;
1112
        }
1113
1114
        /**
1115
        * Updates wordlist and wordmatch tables when a message is posted or changed
1116
        *
1117
        * @param        string        $mode                Contains the post mode: edit, post, reply, quote
1118
        * @param        int                $post_id        The id of the post which is modified/created
1119
        * @param        string        &$message        New or updated post content
1120
        * @param        string        &$subject        New or updated post subject
1121
        * @param        int                $poster_id        Post author's user id
1122
        * @param        int                $forum_id        The id of the forum in which the post is located
1123
        *
1124
        * @access        public
1125
        */
1126
        function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1127
        {
1128
                global $config, $db, $user;
1129
1130
                if (!$config['fulltext_native_load_upd'])
1131
                {
1132
                        /**
1133
                        * The search indexer is disabled, return
1134
                        */
1135
                        return;
1136
                }
1137
1138
                // Split old and new post/subject to obtain array of 'words'
1139
                $split_text = $this->split_message($message);
1140
                $split_title = $this->split_message($subject);
1141
1142
                $cur_words = array('post' => array(), 'title' => array());
1143
1144
                $words = array();
1145
                if ($mode == 'edit')
1146
                {
1147
                        $words['add']['post'] = array();
1148
                        $words['add']['title'] = array();
1149
                        $words['del']['post'] = array();
1150
                        $words['del']['title'] = array();
1151
1152
                        $sql = 'SELECT w.word_id, w.word_text, m.title_match
1153
                                FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1154
                                WHERE m.post_id = $post_id
1155
                                        AND w.word_id = m.word_id";
1156
                        $result = $db->sql_query($sql);
1157
1158
                        while ($row = $db->sql_fetchrow($result))
1159
                        {
1160
                                $which = ($row['title_match']) ? 'title' : 'post';
1161
                                $cur_words[$which][$row['word_text']] = $row['word_id'];
1162
                        }
1163
                        $db->sql_freeresult($result);
1164
1165
                        $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1166
                        $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1167
                        $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1168
                        $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1169
                }
1170
                else
1171
                {
1172
                        $words['add']['post'] = $split_text;
1173
                        $words['add']['title'] = $split_title;
1174
                        $words['del']['post'] = array();
1175
                        $words['del']['title'] = array();
1176
                }
1177
                unset($split_text);
1178
                unset($split_title);
1179
1180
                // Get unique words from the above arrays
1181
                $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1182
1183
                // We now have unique arrays of all words to be added and removed and
1184
                // individual arrays of added and removed words for text and title. What
1185
                // we need to do now is add the new words (if they don't already exist)
1186
                // and then add (or remove) matches between the words and this post
1187
                if (sizeof($unique_add_words))
1188
                {
1189
                        $sql = 'SELECT word_id, word_text
1190
                                FROM ' . SEARCH_WORDLIST_TABLE . '
1191
                                WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1192
                        $result = $db->sql_query($sql);
1193
1194
                        $word_ids = array();
1195
                        while ($row = $db->sql_fetchrow($result))
1196
                        {
1197
                                $word_ids[$row['word_text']] = $row['word_id'];
1198
                        }
1199
                        $db->sql_freeresult($result);
1200
                        $new_words = array_diff($unique_add_words, array_keys($word_ids));
1201
1202
                        $db->sql_transaction('begin');
1203
                        if (sizeof($new_words))
1204
                        {
1205
                                $sql_ary = array();
1206
1207
                                foreach ($new_words as $word)
1208
                                {
1209
                                        $sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1210
                                }
1211
                                $db->sql_return_on_error(true);
1212
                                $db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1213
                                $db->sql_return_on_error(false);
1214
                        }
1215
                        unset($new_words, $sql_ary);
1216
                }
1217
                else
1218
                {
1219
                        $db->sql_transaction('begin');
1220
                }
1221
1222
                // now update the search match table, remove links to removed words and add links to new words
1223
                foreach ($words['del'] as $word_in => $word_ary)
1224
                {
1225
                        $title_match = ($word_in == 'title') ? 1 : 0;
1226
1227
                        if (sizeof($word_ary))
1228
                        {
1229
                                $sql_in = array();
1230
                                foreach ($word_ary as $word)
1231
                                {
1232
                                        $sql_in[] = $cur_words[$word_in][$word];
1233
                                }
1234
1235
                                $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1236
                                        WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1237
                                                AND post_id = ' . intval($post_id) . "
1238
                                                AND title_match = $title_match";
1239
                                $db->sql_query($sql);
1240
1241
                                $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1242
                                        SET word_count = word_count - 1
1243
                                        WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1244
                                                AND word_count > 0';
1245
                                $db->sql_query($sql);
1246
1247
                                unset($sql_in);
1248
                        }
1249
                }
1250
1251
                $db->sql_return_on_error(true);
1252
                foreach ($words['add'] as $word_in => $word_ary)
1253
                {
1254
                        $title_match = ($word_in == 'title') ? 1 : 0;
1255
1256
                        if (sizeof($word_ary))
1257
                        {
1258
                                $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1259
                                        SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1260
                                        FROM ' . SEARCH_WORDLIST_TABLE . '
1261
                                        WHERE ' . $db->sql_in_set('word_text', $word_ary);
1262
                                $db->sql_query($sql);
1263
1264
                                $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1265
                                        SET word_count = word_count + 1
1266
                                        WHERE ' . $db->sql_in_set('word_text', $word_ary);
1267
                                $db->sql_query($sql);
1268
                        }
1269
                }
1270
                $db->sql_return_on_error(false);
1271
1272
                $db->sql_transaction('commit');
1273
1274
                // destroy cached search results containing any of the words removed or added
1275
                $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1276
1277
                unset($unique_add_words);
1278
                unset($words);
1279
                unset($cur_words);
1280
        }
1281
1282
        /**
1283
        * Removes entries from the wordmatch table for the specified post_ids
1284
        */
1285
        function index_remove($post_ids, $author_ids, $forum_ids)
1286
        {
1287
                global $db;
1288
1289
                if (sizeof($post_ids))
1290
                {
1291
                        $sql = 'SELECT w.word_id, w.word_text, m.title_match
1292
                                FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1293
                                WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
1294
                                        AND w.word_id = m.word_id';
1295
                        $result = $db->sql_query($sql);
1296
1297
                        $message_word_ids = $title_word_ids = $word_texts = array();
1298
                        while ($row = $db->sql_fetchrow($result))
1299
                        {
1300
                                if ($row['title_match'])
1301
                                {
1302
                                        $title_word_ids[] = $row['word_id'];
1303
                                }
1304
                                else
1305
                                {
1306
                                        $message_word_ids[] = $row['word_id'];
1307
                                }
1308
                                $word_texts[] = $row['word_text'];
1309
                        }
1310
                        $db->sql_freeresult($result);
1311
1312
                        if (sizeof($title_word_ids))
1313
                        {
1314
                                $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1315
                                        SET word_count = word_count - 1
1316
                                        WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
1317
                                                AND word_count > 0';
1318
                                $db->sql_query($sql);
1319
                        }
1320
1321
                        if (sizeof($message_word_ids))
1322
                        {
1323
                                $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1324
                                        SET word_count = word_count - 1
1325
                                        WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
1326
                                                AND word_count > 0';
1327
                                $db->sql_query($sql);
1328
                        }
1329
1330
                        unset($title_word_ids);
1331
                        unset($message_word_ids);
1332
1333
                        $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1334
                                WHERE ' . $db->sql_in_set('post_id', $post_ids);
1335
                        $db->sql_query($sql);
1336
                }
1337
1338
                $this->destroy_cache(array_unique($word_texts), $author_ids);
1339
        }
1340
1341
        /**
1342
        * Tidy up indexes: Tag 'common words' and remove
1343
        * words no longer referenced in the match table
1344
        */
1345
        function tidy()
1346
        {
1347
                global $db, $config;
1348
1349
                // Is the fulltext indexer disabled? If yes then we need not
1350
                // carry on ... it's okay ... I know when I'm not wanted boo hoo
1351
                if (!$config['fulltext_native_load_upd'])
1352
                {
1353
                        set_config('search_last_gc', time(), true);
1354
                        return;
1355
                }
1356
1357
                $destroy_cache_words = array();
1358
1359
                // Remove common words
1360
                if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
1361
                {
1362
                        $common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
1363
                        // First, get the IDs of common words
1364
                        $sql = 'SELECT word_id, word_text
1365
                                FROM ' . SEARCH_WORDLIST_TABLE . '
1366
                                WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
1367
                                        OR word_common = 1';
1368
                        $result = $db->sql_query($sql);
1369
1370
                        $sql_in = array();
1371
                        while ($row = $db->sql_fetchrow($result))
1372
                        {
1373
                                $sql_in[] = $row['word_id'];
1374
                                $destroy_cache_words[] = $row['word_text'];
1375
                        }
1376
                        $db->sql_freeresult($result);
1377
1378
                        if (sizeof($sql_in))
1379
                        {
1380
                                // Flag the words
1381
                                $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1382
                                        SET word_common = 1
1383
                                        WHERE ' . $db->sql_in_set('word_id', $sql_in);
1384
                                $db->sql_query($sql);
1385
1386
                                // by setting search_last_gc to the new time here we make sure that if a user reloads because the
1387
                                // following query takes too long, he won't run into it again
1388
                                set_config('search_last_gc', time(), true);
1389
1390
                                // Delete the matches
1391
                                $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1392
                                        WHERE ' . $db->sql_in_set('word_id', $sql_in);
1393
                                $db->sql_query($sql);
1394
                        }
1395
                        unset($sql_in);
1396
                }
1397
1398
                if (sizeof($destroy_cache_words))
1399
                {
1400
                        // destroy cached search results containing any of the words that are now common or were removed
1401
                        $this->destroy_cache(array_unique($destroy_cache_words));
1402
                }
1403
1404
                set_config('search_last_gc', time(), true);
1405
        }
1406
1407
        /**
1408
        * Deletes all words from the index
1409
        */
1410
        function delete_index($acp_module, $u_action)
1411
        {
1412
                global $db;
1413
1414
                switch ($db->sql_layer)
1415
                {
1416
                        case 'sqlite':
1417
                        case 'firebird':
1418
                                $db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1419
                                $db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1420
                                $db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1421
                        break;
1422
1423
                        default:
1424
                                $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1425
                                $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1426
                                $db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1427
                        break;
1428
                }
1429
        }
1430
1431
        /**
1432
        * Returns true if both FULLTEXT indexes exist
1433
        */
1434
        function index_created()
1435
        {
1436
                if (!sizeof($this->stats))
1437
                {
1438
                        $this->get_stats();
1439
                }
1440
1441
                return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1442
        }
1443
1444
        /**
1445
        * Returns an associative array containing information about the indexes
1446
        */
1447
        function index_stats()
1448
        {
1449
                global $user;
1450
1451
                if (!sizeof($this->stats))
1452
                {
1453
                        $this->get_stats();
1454
                }
1455
1456
                return array(
1457
                        $user->lang['TOTAL_WORDS']                => $this->stats['total_words'],
1458
                        $user->lang['TOTAL_MATCHES']        => $this->stats['total_matches']);
1459
        }
1460
1461
        function get_stats()
1462
        {
1463
                global $db;
1464
1465
                $sql = 'SELECT COUNT(*) as total_words
1466
                        FROM ' . SEARCH_WORDLIST_TABLE;
1467
                $result = $db->sql_query($sql);
1468
                $this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
1469
                $db->sql_freeresult($result);
1470
1471
                $sql = 'SELECT COUNT(*) as total_matches
1472
                        FROM ' . SEARCH_WORDMATCH_TABLE;
1473
                $result = $db->sql_query($sql);
1474
                $this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
1475
                $db->sql_freeresult($result);
1476
        }
1477
1478
        /**
1479
        * Clean up a text to remove non-alphanumeric characters
1480
        *
1481
        * This method receives a UTF-8 string, normalizes and validates it, replaces all
1482
        * non-alphanumeric characters with strings then returns the result.
1483
        *
1484
        * Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1485
        *
1486
        * @param        string        $text                        Text to split, in UTF-8 (not normalized or sanitized)
1487
        * @param        string        $allowed_chars        String of special chars to allow
1488
        * @param        string        $encoding                Text encoding
1489
        * @return        string                                        Cleaned up text, only alphanumeric chars are left
1490
        *
1491
        * @todo normalizer::cleanup being able to be used?
1492
        */
1493
        function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1494
        {
1495
                global $phpbb_root_path, $phpEx;
1496
                static $conv = array(), $conv_loaded = array();
1497
                $words = $allow = array();
1498
1499
                // Convert the text to UTF-8
1500
                $encoding = strtolower($encoding);
1501
                if ($encoding != 'utf-8')
1502
                {
1503
                        $text = utf8_recode($text, $encoding);
1504
                }
1505
1506
                $utf_len_mask = array(
1507
                        "\xC0"        =>        2,
1508
                        "\xD0"        =>        2,
1509
                        "\xE0"        =>        3,
1510
                        "\xF0"        =>        4
1511
                );
1512
1513
                /**
1514
                * Replace HTML entities and NCRs
1515
                */
1516
                $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1517
1518
                /**
1519
                * Load the UTF-8 normalizer
1520
                *
1521
                * If we use it more widely, an instance of that class should be held in a
1522
                * a global variable instead
1523
                */
1524
                utf_normalizer::nfc($text);
1525
1526
                /**
1527
                * The first thing we do is:
1528
                *
1529
                * - convert ASCII-7 letters to lowercase
1530
                * - remove the ASCII-7 non-alpha characters
1531
                * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1532
                *   0xC1 and 0xF5-0xFF
1533
                *
1534
                * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1535
                */
1536
                $sb_match        = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1537
                $sb_replace        = 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1538
1539
                /**
1540
                * This is the list of legal ASCII chars, it is automatically extended
1541
                * with ASCII chars from $allowed_chars
1542
                */
1543
                $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1544
1545
                /**
1546
                * Prepare an array containing the extra chars to allow
1547
                */
1548
                if (isset($allowed_chars[0]))
1549
                {
1550
                        $pos = 0;
1551
                        $len = strlen($allowed_chars);
1552
                        do
1553
                        {
1554
                                $c = $allowed_chars[$pos];
1555
1556
                                if ($c < "\x80")
1557
                                {
1558
                                        /**
1559
                                        * ASCII char
1560
                                        */
1561
                                        $sb_pos = strpos($sb_match, $c);
1562
                                        if (is_int($sb_pos))
1563
                                        {
1564
                                                /**
1565
                                                * Remove the char from $sb_match and its corresponding
1566
                                                * replacement in $sb_replace
1567
                                                */
1568
                                                $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1569
                                                $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1570
                                                $legal_ascii .= $c;
1571
                                        }
1572
1573
                                        ++$pos;
1574
                                }
1575
                                else
1576
                                {
1577
                                        /**
1578
                                        * UTF-8 char
1579
                                        */
1580
                                        $utf_len = $utf_len_mask[$c & "\xF0"];
1581
                                        $allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1582
                                        $pos += $utf_len;
1583
                                }
1584
                        }
1585
                        while ($pos < $len);
1586
                }
1587
1588
                $text = strtr($text, $sb_match, $sb_replace);
1589
                $ret = '';
1590
1591
                $pos = 0;
1592
                $len = strlen($text);
1593
1594
                do
1595
                {
1596
                        /**
1597
                        * Do all consecutive ASCII chars at once
1598
                        */
1599
                        if ($spn = strspn($text, $legal_ascii, $pos))
1600
                        {
1601
                                $ret .= substr($text, $pos, $spn);
1602
                                $pos += $spn;
1603
                        }
1604
1605
                        if ($pos >= $len)
1606
                        {
1607
                                return $ret;
1608
                        }
1609
1610
                        /**
1611
                        * Capture the UTF char
1612
                        */
1613
                        $utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1614
                        $utf_char = substr($text, $pos, $utf_len);
1615
                        $pos += $utf_len;
1616
1617
                        if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1618
                         || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1619
                         || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1620
                        {
1621
                                /**
1622
                                * All characters within these ranges are valid
1623
                                *
1624
                                * We separate them with a space in order to index each character
1625
                                * individually
1626
                                */
1627
                                $ret .= ' ' . $utf_char . ' ';
1628
                                continue;
1629
                        }
1630
1631
                        if (isset($allow[$utf_char]))
1632
                        {
1633
                                /**
1634
                                * The char is explicitly allowed
1635
                                */
1636
                                $ret .= $utf_char;
1637
                                continue;
1638
                        }
1639
1640
                        if (isset($conv[$utf_char]))
1641
                        {
1642
                                /**
1643
                                * The char is mapped to something, maybe to itself actually
1644
                                */
1645
                                $ret .= $conv[$utf_char];
1646
                                continue;
1647
                        }
1648
1649
                        /**
1650
                        * The char isn't mapped, but did we load its conversion table?
1651
                        *
1652
                        * The search indexer table is split into blocks. The block number of
1653
                        * each char is equal to its codepoint right-shifted for 11 bits. It
1654
                        * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1655
                        * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1656
                        * all UTF chars encoded in 2 bytes are in the same first block.
1657
                        */
1658
                        if (isset($utf_char[2]))
1659
                        {
1660
                                if (isset($utf_char[3]))
1661
                                {
1662
                                        /**
1663
                                        * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1664
                                        * 0000 0111 0011 1111 0010 0000
1665
                                        */
1666
                                        $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1667
                                }
1668
                                else
1669
                                {
1670
                                        /**
1671
                                        * 1110 nnnn 10nx xxxx 10xx xxxx
1672
                                        * 0000 0111 0010 0000
1673
                                        */
1674
                                        $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1675
                                }
1676
                        }
1677
                        else
1678
                        {
1679
                                /**
1680
                                * 110x xxxx 10xx xxxx
1681
                                * 0000 0000 0000 0000
1682
                                */
1683
                                $idx = 0;
1684
                        }
1685
1686
                        /**
1687
                        * Check if the required conv table has been loaded already
1688
                        */
1689
                        if (!isset($conv_loaded[$idx]))
1690
                        {
1691
                                $conv_loaded[$idx] = 1;
1692
                                $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1693
1694
                                if (file_exists($file))
1695
                                {
1696
                                        $conv += include($file);
1697
                                }
1698
                        }
1699
1700
                        if (isset($conv[$utf_char]))
1701
                        {
1702
                                $ret .= $conv[$utf_char];
1703
                        }
1704
                        else
1705
                        {
1706
                                /**
1707
                                * We add an entry to the conversion table so that we
1708
                                * don't have to convert to codepoint and perform the checks
1709
                                * that are above this block
1710
                                */
1711
                                $conv[$utf_char] = ' ';
1712
                                $ret .= ' ';
1713
                        }
1714
                }
1715
                while (1);
1716
1717
                return $ret;
1718
        }
1719
1720
        /**
1721
        * Returns a list of options for the ACP to display
1722
        */
1723
        function acp()
1724
        {
1725
                global $user, $config;
1726
1727
1728
                /**
1729
                * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1730
                */
1731
1732
                $tpl = '
1733
                <dl>
1734
                        <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1735
                        <dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd>
1736
                </dl>
1737
                <dl>
1738
                        <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1739
                        <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1740
                </dl>
1741
                <dl>
1742
                        <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1743
                        <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1744
                </dl>
1745
                <dl>
1746
                        <dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>
1747
                        <dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %</dd>
1748
                </dl>
1749
                ';
1750
1751
                // These are fields required in the config table
1752
                return array(
1753
                        'tpl'                => $tpl,
1754
                        'config'        => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
1755
                );
1756
        }
1757
}