[Customisation Database Commits] r1783 [25/63] - in /branches/stable: language/en/acp/ language/en/mods/ language/pt_br/ language/pt_br/acp/ language/pt_br/mods/ titania/ titania/authors/ titania/contributions/ titania/docs/ titania/download/ titania/download/modx/ titania/files/contrib_temp/ titania/files/modx_files/ titania/images/ titania/includes/ titania/includes/core/ titania/includes/hooks/ titania/includes/library/Zend/ titania/includes/library/Zend/Search/ titania/includes/library/Zend/Search/Lucene/ titania/includes/library/Zend/Search/Lucene/Analysis/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/ titania/includes/library/Zend/Search/Lucene/Analysis/TokenFilter/ titania/includes/library/Zend/Search/Lucene/Document/ titania/includes/library/Zend/Search/Lucene/Index/ titania/includes/library/Zend/Search/Lucene/Index/SegmentWriter/ titania/includes/library/Zend/Search/Lucene/Index/TermsStream/ titania/includes/library/Zend/Search/Lucene/Search/ titania/includes/library/Zend/Search/Lucene/Search/Highlighter/ titania/includes/library/Zend/Search/Lucene/Search/Query/ titania/includes/library/Zend/Search/Lucene/Search/Query/Preprocessing/ titania/includes/library/Zend/Search/Lucene/Search/QueryEntry/ titania/includes/library/Zend/Search/Lucene/Search/Similarity/ titania/includes/library/Zend/Search/Lucene/Search/Weight/ titania/includes/library/Zend/Search/Lucene/Storage/ titania/includes/library/Zend/Search/Lucene/Storage/Directory/ titania/includes/library/Zend/Search/Lucene/Storage/File/ titania/includes/library/automod/ titania/includes/library/ezcomponents/ titania/includes/library/ezcomponents/Base/ titania/includes/library/ezcomponents/Base/interfaces/ titania/includes/library/translations/ titania/includes/manage_tools/ titania/includes/objects/ titania/includes/overlords/ titania/includes/tools/ titania/includes/types/ titania/js/ titania/language/ titania/language/en/ titania/language/en/email/ titania/language/en/manage_tools/ titania/language/en/types/ titania/language/pt_br/ titania/language/pt_br/email/ titania/language/pt_br/manage_tools/ titania/language/pt_br/types/ titania/manage/ titania/store/ titania/store/phpbb_packages/ titania/store/phpbb_packages/extracted/ titania/store/search/ titania/styles/default/template/ titania/styles/default/template/authors/ titania/styles/default/template/common/ titania/styles/default/template/contributions/ titania/styles/default/template/manage/ titania/styles/default/template/posting/ titania/styles/default/template/posting/attachments/ titania/styles/default/template/posting/panels/ titania/styles/default/theme/ titania/styles/default/theme/en-gb/ titania/styles/prosilver/template/ titania/styles/prosilver/template/authors/ titania/styles/prosilver/template/common/ titania/styles/prosilver/theme/ titania/styles/prosilver/theme/en-gb/ umil/ umil/error_files/ umil/language/en/ umil/style/

Nathan Guse exreaction at phpbb.com
Sat Nov 20 18:23:23 GMT 2010


Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php Sat Nov 20 18:23:22 2010
***************
*** 1,493 ****
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
! {
!     /** Default minimum similarity */
!     const DEFAULT_MIN_SIMILARITY = 0.5;
! 
!     /**
!      * Maximum number of matched terms.
!      * Apache Lucene defines this limitation as boolean query maximum number of clauses:
!      * org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
!      */
!     const MAX_CLAUSE_COUNT = 1024;
! 
!     /**
!      * Array of precalculated max distances
!      *
!      * keys are integers representing a word size
!      */
!     private $_maxDistances = array();
! 
!     /**
!      * Base searching term.
!      *
!      * @var Zend_Search_Lucene_Index_Term
!      */
!     private $_term;
! 
!     /**
!      * A value between 0 and 1 to set the required similarity
!      *  between the query term and the matching terms. For example, for a
!      *  _minimumSimilarity of 0.5 a term of the same length
!      *  as the query term is considered similar to the query term if the edit distance
!      *  between both terms is less than length(term)*0.5
!      *
!      * @var float
!      */
!     private $_minimumSimilarity;
! 
!     /**
!      * The length of common (non-fuzzy) prefix
!      *
!      * @var integer
!      */
!     private $_prefixLength;
! 
!     /**
!      * Matched terms.
!      *
!      * Matched terms list.
!      * It's filled during the search (rewrite operation) and may be used for search result
!      * post-processing
!      *
!      * Array of Zend_Search_Lucene_Index_Term objects
!      *
!      * @var array
!      */
!     private $_matches = null;
! 
!     /**
!      * Matched terms scores
!      *
!      * @var array
!      */
!     private $_scores = null;
! 
!     /**
!      * Array of the term keys.
!      * Used to sort terms in alphabetical order if terms have the same socres
!      *
!      * @var array
!      */
!     private $_termKeys = null;
! 
!     /**
!      * Default non-fuzzy prefix length
!      *
!      * @var integer
!      */
!     private static $_defaultPrefixLength = 3;
! 
!     /**
!      * Zend_Search_Lucene_Search_Query_Wildcard constructor.
!      *
!      * @param Zend_Search_Lucene_Index_Term $term
!      * @param float   $minimumSimilarity
!      * @param integer $prefixLength
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
!     {
!         if ($minimumSimilarity < 0) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
!         }
!         if ($minimumSimilarity >= 1) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
!         }
!         if ($prefixLength < 0) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
!         }
! 
!         $this->_term              = $term;
!         $this->_minimumSimilarity = $minimumSimilarity;
!         $this->_prefixLength      = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
!     }
! 
!     /**
!      * Get default non-fuzzy prefix length
!      *
!      * @return integer
!      */
!     public static function getDefaultPrefixLength()
!     {
!         return self::$_defaultPrefixLength;
!     }
! 
!     /**
!      * Set default non-fuzzy prefix length
!      *
!      * @param integer $defaultPrefixLength
!      */
!     public static function setDefaultPrefixLength($defaultPrefixLength)
!     {
!         self::$_defaultPrefixLength = $defaultPrefixLength;
!     }
! 
!     /**
!      * Calculate maximum distance for specified word length
!      *
!      * @param integer $prefixLength
!      * @param integer $termLength
!      * @param integer $length
!      * @return integer
!      */
!     private function _calculateMaxDistance($prefixLength, $termLength, $length)
!     {
!         $this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
!         return $this->_maxDistances[$length];
!     }
! 
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         $this->_matches  = array();
!         $this->_scores   = array();
!         $this->_termKeys = array();
! 
!         if ($this->_term->field === null) {
!             // Search through all fields
!             $fields = $index->getFieldNames(true /* indexed fields list */);
!         } else {
!             $fields = array($this->_term->field);
!         }
! 
!         require_once 'Zend/Search/Lucene/Index/Term.php';
!         $prefix           = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
!         $prefixByteLength = strlen($prefix);
!         $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
! 
!         $termLength       = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
! 
!         $termRest         = substr($this->_term->text, $prefixByteLength);
!         // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
!         $termRestLength   = strlen($termRest);
! 
!         $scaleFactor = 1/(1 - $this->_minimumSimilarity);
! 
!         require_once 'Zend/Search/Lucene.php';
!         $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
!         foreach ($fields as $field) {
!             $index->resetTermsStream();
! 
!             require_once 'Zend/Search/Lucene/Index/Term.php';
!             if ($prefix != '') {
!                 $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
! 
!                 while ($index->currentTerm() !== null          &&
!                        $index->currentTerm()->field == $field  &&
!                        substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
!                     // Calculate similarity
!                     $target = substr($index->currentTerm()->text, $prefixByteLength);
! 
!                     $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                        $this->_maxDistances[strlen($target)] :
!                                        $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
! 
!                     if ($termRestLength == 0) {
!                         // we don't have anything to compare.  That means if we just add
!                         // the letters for current term we get the new word
!                         $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
!                     } else if (strlen($target) == 0) {
!                         $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
!                     } else if ($maxDistance < abs($termRestLength - strlen($target))){
!                         //just adding the characters of term to target or vice-versa results in too many edits
!                         //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                         //given this optimal circumstance, the edit distance cannot be less than 5.
!                         //which is 8-3 or more precisesly abs(3-8).
!                         //if our maximum edit distance is 4, then we can discard this word
!                         //without looking at it.
!                         $similarity = 0;
!                     } else {
!                         $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
!                     }
! 
!                     if ($similarity > $this->_minimumSimilarity) {
!                         $this->_matches[]  = $index->currentTerm();
!                         $this->_termKeys[] = $index->currentTerm()->key();
!                         $this->_scores[]   = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
! 
!                         if ($maxTerms != 0  &&  count($this->_matches) > $maxTerms) {
!                             require_once 'Zend/Search/Lucene/Exception.php';
!                             throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!                         }
!                     }
! 
!                     $index->nextTerm();
!                 }
!             } else {
!                 $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
! 
!                 while ($index->currentTerm() !== null  &&  $index->currentTerm()->field == $field) {
!                     // Calculate similarity
!                     $target = $index->currentTerm()->text;
! 
!                     $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                        $this->_maxDistances[strlen($target)] :
!                                        $this->_calculateMaxDistance(0, $termRestLength, strlen($target));
! 
!                     if ($maxDistance < abs($termRestLength - strlen($target))){
!                         //just adding the characters of term to target or vice-versa results in too many edits
!                         //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                         //given this optimal circumstance, the edit distance cannot be less than 5.
!                         //which is 8-3 or more precisesly abs(3-8).
!                         //if our maximum edit distance is 4, then we can discard this word
!                         //without looking at it.
!                         $similarity = 0;
!                     } else {
!                         $similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
!                     }
! 
!                     if ($similarity > $this->_minimumSimilarity) {
!                         $this->_matches[]  = $index->currentTerm();
!                         $this->_termKeys[] = $index->currentTerm()->key();
!                         $this->_scores[]   = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
! 
!                         if ($maxTerms != 0  &&  count($this->_matches) > $maxTerms) {
!                             require_once 'Zend/Search/Lucene/Exception.php';
!                             throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!                         }
!                     }
! 
!                     $index->nextTerm();
!                 }
!             }
! 
!             $index->closeTermsStream();
!         }
! 
!         if (count($this->_matches) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         } else if (count($this->_matches) == 1) {
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
!         } else {
!             require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
!             $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
! 
!             array_multisort($this->_scores,   SORT_DESC, SORT_NUMERIC,
!                             $this->_termKeys, SORT_ASC,  SORT_STRING,
!                             $this->_matches);
! 
!             $termCount = 0;
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             foreach ($this->_matches as $id => $matchedTerm) {
!                 $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
!                 $subquery->setBoost($this->_scores[$id]);
! 
!                 $rewrittenQuery->addSubquery($subquery);
! 
!                 $termCount++;
!                 if ($termCount >= self::MAX_CLAUSE_COUNT) {
!                     break;
!                 }
!             }
! 
!             return $rewrittenQuery;
!         }
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function getQueryTerms()
!     {
!         if ($this->_matches === null) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
!         }
! 
!         return $this->_matches;
!     }
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function matchedDocs()
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         $words = array();
! 
!         require_once 'Zend/Search/Lucene/Index/Term.php';
!         $prefix           = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
!         $prefixByteLength = strlen($prefix);
!         $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
! 
!         $termLength       = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
! 
!         $termRest         = substr($this->_term->text, $prefixByteLength);
!         // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
!         $termRestLength   = strlen($termRest);
! 
!         $scaleFactor = 1/(1 - $this->_minimumSimilarity);
! 
!         $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
!         require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
!         $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
!         foreach ($tokens as $token) {
!             $termText = $token->getTermText();
! 
!             if (substr($termText, 0, $prefixByteLength) == $prefix) {
!                 // Calculate similarity
!                 $target = substr($termText, $prefixByteLength);
! 
!                 $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                    $this->_maxDistances[strlen($target)] :
!                                    $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
! 
!                 if ($termRestLength == 0) {
!                     // we don't have anything to compare.  That means if we just add
!                     // the letters for current term we get the new word
!                     $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
!                 } else if (strlen($target) == 0) {
!                     $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
!                 } else if ($maxDistance < abs($termRestLength - strlen($target))){
!                     //just adding the characters of term to target or vice-versa results in too many edits
!                     //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                     //given this optimal circumstance, the edit distance cannot be less than 5.
!                     //which is 8-3 or more precisesly abs(3-8).
!                     //if our maximum edit distance is 4, then we can discard this word
!                     //without looking at it.
!                     $similarity = 0;
!                 } else {
!                     $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
!                 }
! 
!                 if ($similarity > $this->_minimumSimilarity) {
!                     $words[] = $termText;
!                 }
!             }
!         }
! 
!         $highlighter->highlight($words);
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         // It's used only for query visualisation, so we don't care about characters escaping
!         return (($this->_term->field === null)? '' : $this->_term->field . ':')
!              . $this->_term->text . '~'
!              . (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
!              . (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
!     }
! }
! 
--- 1,493 ----
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
! {
!     /** Default minimum similarity */
!     const DEFAULT_MIN_SIMILARITY = 0.5;
! 
!     /**
!      * Maximum number of matched terms.
!      * Apache Lucene defines this limitation as boolean query maximum number of clauses:
!      * org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
!      */
!     const MAX_CLAUSE_COUNT = 1024;
! 
!     /**
!      * Array of precalculated max distances
!      *
!      * keys are integers representing a word size
!      */
!     private $_maxDistances = array();
! 
!     /**
!      * Base searching term.
!      *
!      * @var Zend_Search_Lucene_Index_Term
!      */
!     private $_term;
! 
!     /**
!      * A value between 0 and 1 to set the required similarity
!      *  between the query term and the matching terms. For example, for a
!      *  _minimumSimilarity of 0.5 a term of the same length
!      *  as the query term is considered similar to the query term if the edit distance
!      *  between both terms is less than length(term)*0.5
!      *
!      * @var float
!      */
!     private $_minimumSimilarity;
! 
!     /**
!      * The length of common (non-fuzzy) prefix
!      *
!      * @var integer
!      */
!     private $_prefixLength;
! 
!     /**
!      * Matched terms.
!      *
!      * Matched terms list.
!      * It's filled during the search (rewrite operation) and may be used for search result
!      * post-processing
!      *
!      * Array of Zend_Search_Lucene_Index_Term objects
!      *
!      * @var array
!      */
!     private $_matches = null;
! 
!     /**
!      * Matched terms scores
!      *
!      * @var array
!      */
!     private $_scores = null;
! 
!     /**
!      * Array of the term keys.
!      * Used to sort terms in alphabetical order if terms have the same socres
!      *
!      * @var array
!      */
!     private $_termKeys = null;
! 
!     /**
!      * Default non-fuzzy prefix length
!      *
!      * @var integer
!      */
!     private static $_defaultPrefixLength = 3;
! 
!     /**
!      * Zend_Search_Lucene_Search_Query_Wildcard constructor.
!      *
!      * @param Zend_Search_Lucene_Index_Term $term
!      * @param float   $minimumSimilarity
!      * @param integer $prefixLength
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
!     {
!         if ($minimumSimilarity < 0) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
!         }
!         if ($minimumSimilarity >= 1) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
!         }
!         if ($prefixLength < 0) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
!         }
! 
!         $this->_term              = $term;
!         $this->_minimumSimilarity = $minimumSimilarity;
!         $this->_prefixLength      = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
!     }
! 
!     /**
!      * Get default non-fuzzy prefix length
!      *
!      * @return integer
!      */
!     public static function getDefaultPrefixLength()
!     {
!         return self::$_defaultPrefixLength;
!     }
! 
!     /**
!      * Set default non-fuzzy prefix length
!      *
!      * @param integer $defaultPrefixLength
!      */
!     public static function setDefaultPrefixLength($defaultPrefixLength)
!     {
!         self::$_defaultPrefixLength = $defaultPrefixLength;
!     }
! 
!     /**
!      * Calculate maximum distance for specified word length
!      *
!      * @param integer $prefixLength
!      * @param integer $termLength
!      * @param integer $length
!      * @return integer
!      */
!     private function _calculateMaxDistance($prefixLength, $termLength, $length)
!     {
!         $this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
!         return $this->_maxDistances[$length];
!     }
! 
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         $this->_matches  = array();
!         $this->_scores   = array();
!         $this->_termKeys = array();
! 
!         if ($this->_term->field === null) {
!             // Search through all fields
!             $fields = $index->getFieldNames(true /* indexed fields list */);
!         } else {
!             $fields = array($this->_term->field);
!         }
! 
!         require_once 'Zend/Search/Lucene/Index/Term.php';
!         $prefix           = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
!         $prefixByteLength = strlen($prefix);
!         $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
! 
!         $termLength       = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
! 
!         $termRest         = substr($this->_term->text, $prefixByteLength);
!         // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
!         $termRestLength   = strlen($termRest);
! 
!         $scaleFactor = 1/(1 - $this->_minimumSimilarity);
! 
!         require_once 'Zend/Search/Lucene.php';
!         $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
!         foreach ($fields as $field) {
!             $index->resetTermsStream();
! 
!             require_once 'Zend/Search/Lucene/Index/Term.php';
!             if ($prefix != '') {
!                 $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
! 
!                 while ($index->currentTerm() !== null          &&
!                        $index->currentTerm()->field == $field  &&
!                        substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
!                     // Calculate similarity
!                     $target = substr($index->currentTerm()->text, $prefixByteLength);
! 
!                     $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                        $this->_maxDistances[strlen($target)] :
!                                        $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
! 
!                     if ($termRestLength == 0) {
!                         // we don't have anything to compare.  That means if we just add
!                         // the letters for current term we get the new word
!                         $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
!                     } else if (strlen($target) == 0) {
!                         $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
!                     } else if ($maxDistance < abs($termRestLength - strlen($target))){
!                         //just adding the characters of term to target or vice-versa results in too many edits
!                         //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                         //given this optimal circumstance, the edit distance cannot be less than 5.
!                         //which is 8-3 or more precisesly abs(3-8).
!                         //if our maximum edit distance is 4, then we can discard this word
!                         //without looking at it.
!                         $similarity = 0;
!                     } else {
!                         $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
!                     }
! 
!                     if ($similarity > $this->_minimumSimilarity) {
!                         $this->_matches[]  = $index->currentTerm();
!                         $this->_termKeys[] = $index->currentTerm()->key();
!                         $this->_scores[]   = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
! 
!                         if ($maxTerms != 0  &&  count($this->_matches) > $maxTerms) {
!                             require_once 'Zend/Search/Lucene/Exception.php';
!                             throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!                         }
!                     }
! 
!                     $index->nextTerm();
!                 }
!             } else {
!                 $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
! 
!                 while ($index->currentTerm() !== null  &&  $index->currentTerm()->field == $field) {
!                     // Calculate similarity
!                     $target = $index->currentTerm()->text;
! 
!                     $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                        $this->_maxDistances[strlen($target)] :
!                                        $this->_calculateMaxDistance(0, $termRestLength, strlen($target));
! 
!                     if ($maxDistance < abs($termRestLength - strlen($target))){
!                         //just adding the characters of term to target or vice-versa results in too many edits
!                         //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                         //given this optimal circumstance, the edit distance cannot be less than 5.
!                         //which is 8-3 or more precisesly abs(3-8).
!                         //if our maximum edit distance is 4, then we can discard this word
!                         //without looking at it.
!                         $similarity = 0;
!                     } else {
!                         $similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
!                     }
! 
!                     if ($similarity > $this->_minimumSimilarity) {
!                         $this->_matches[]  = $index->currentTerm();
!                         $this->_termKeys[] = $index->currentTerm()->key();
!                         $this->_scores[]   = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
! 
!                         if ($maxTerms != 0  &&  count($this->_matches) > $maxTerms) {
!                             require_once 'Zend/Search/Lucene/Exception.php';
!                             throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!                         }
!                     }
! 
!                     $index->nextTerm();
!                 }
!             }
! 
!             $index->closeTermsStream();
!         }
! 
!         if (count($this->_matches) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         } else if (count($this->_matches) == 1) {
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
!         } else {
!             require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
!             $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
! 
!             array_multisort($this->_scores,   SORT_DESC, SORT_NUMERIC,
!                             $this->_termKeys, SORT_ASC,  SORT_STRING,
!                             $this->_matches);
! 
!             $termCount = 0;
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             foreach ($this->_matches as $id => $matchedTerm) {
!                 $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
!                 $subquery->setBoost($this->_scores[$id]);
! 
!                 $rewrittenQuery->addSubquery($subquery);
! 
!                 $termCount++;
!                 if ($termCount >= self::MAX_CLAUSE_COUNT) {
!                     break;
!                 }
!             }
! 
!             return $rewrittenQuery;
!         }
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function getQueryTerms()
!     {
!         if ($this->_matches === null) {
!             require_once 'Zend/Search/Lucene/Exception.php';
!             throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
!         }
! 
!         return $this->_matches;
!     }
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function matchedDocs()
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Exception.php';
!         throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         $words = array();
! 
!         require_once 'Zend/Search/Lucene/Index/Term.php';
!         $prefix           = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
!         $prefixByteLength = strlen($prefix);
!         $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
! 
!         $termLength       = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
! 
!         $termRest         = substr($this->_term->text, $prefixByteLength);
!         // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
!         $termRestLength   = strlen($termRest);
! 
!         $scaleFactor = 1/(1 - $this->_minimumSimilarity);
! 
!         $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
!         require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
!         $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
!         foreach ($tokens as $token) {
!             $termText = $token->getTermText();
! 
!             if (substr($termText, 0, $prefixByteLength) == $prefix) {
!                 // Calculate similarity
!                 $target = substr($termText, $prefixByteLength);
! 
!                 $maxDistance = isset($this->_maxDistances[strlen($target)])?
!                                    $this->_maxDistances[strlen($target)] :
!                                    $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
! 
!                 if ($termRestLength == 0) {
!                     // we don't have anything to compare.  That means if we just add
!                     // the letters for current term we get the new word
!                     $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
!                 } else if (strlen($target) == 0) {
!                     $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
!                 } else if ($maxDistance < abs($termRestLength - strlen($target))){
!                     //just adding the characters of term to target or vice-versa results in too many edits
!                     //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
!                     //given this optimal circumstance, the edit distance cannot be less than 5.
!                     //which is 8-3 or more precisesly abs(3-8).
!                     //if our maximum edit distance is 4, then we can discard this word
!                     //without looking at it.
!                     $similarity = 0;
!                 } else {
!                     $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
!                 }
! 
!                 if ($similarity > $this->_minimumSimilarity) {
!                     $words[] = $termText;
!                 }
!             }
!         }
! 
!         $highlighter->highlight($words);
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         // It's used only for query visualisation, so we don't care about characters escaping
!         return (($this->_term->field === null)? '' : $this->_term->field . ':')
!              . $this->_term->text . '~'
!              . (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
!              . (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
!     }
! }
! 

Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php Sat Nov 20 18:23:22 2010
***************
*** 1,139 ****
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * The insignificant query returns empty result, but doesn't limit result set as a part of other queries
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_Search_Query
! {
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         return $this;
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         return $this;
!     }
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
!         return new Zend_Search_Lucene_Search_Weight_Empty();
!     }
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         // Do nothing
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      */
!     public function matchedDocs()
!     {
!         return array();
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         return 0;
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      */
!     public function getQueryTerms()
!     {
!         return array();
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         // Do nothing
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         return '<InsignificantQuery>';
!     }
! }
! 
--- 1,139 ----
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * The insignificant query returns empty result, but doesn't limit result set as a part of other queries
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_Search_Query
! {
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         return $this;
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         return $this;
!     }
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
!         return new Zend_Search_Lucene_Search_Weight_Empty();
!     }
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         // Do nothing
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      */
!     public function matchedDocs()
!     {
!         return array();
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         return 0;
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      */
!     public function getQueryTerms()
!     {
!         return array();
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         // Do nothing
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         return '<InsignificantQuery>';
!     }
! }
! 

Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php Sat Nov 20 18:23:22 2010
***************
*** 1,668 ****
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
! {
! 
!     /**
!      * Terms to find.
!      * Array of Zend_Search_Lucene_Index_Term
!      *
!      * @var array
!      */
!     private $_terms = array();
! 
!     /**
!      * Term signs.
!      * If true then term is required.
!      * If false then term is prohibited.
!      * If null then term is neither prohibited, nor required
!      *
!      * If array is null then all terms are required
!      *
!      * @var array
!      */
!     private $_signs;
! 
!     /**
!      * Result vector.
!      *
!      * @var array
!      */
!     private $_resVector = null;
! 
!     /**
!      * Terms positions vectors.
!      * Array of Arrays:
!      * term1Id => (docId => freq, ...)
!      * term2Id => (docId => freq, ...)
!      *
!      * @var array
!      */
!     private $_termsFreqs = array();
! 
! 
!     /**
!      * A score factor based on the fraction of all query terms
!      * that a document contains.
!      * float for conjunction queries
!      * array of float for non conjunction queries
!      *
!      * @var mixed
!      */
!     private $_coord = null;
! 
! 
!     /**
!      * Terms weights
!      * array of Zend_Search_Lucene_Search_Weight
!      *
!      * @var array
!      */
!     private $_weights = array();
! 
! 
!     /**
!      * Class constructor.  Create a new multi-term query object.
!      *
!      * if $signs array is omitted then all terms are required
!      * it differs from addTerm() behavior, but should never be used
!      *
!      * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
!      * @param array $signs    Array of signs.  Sign is boolean|null.
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function __construct($terms = null, $signs = null)
!     {
!         if (is_array($terms)) {
!             require_once 'Zend/Search/Lucene.php';
!             if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
!                 throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!             }
! 
!             $this->_terms = $terms;
! 
!             $this->_signs = null;
!             // Check if all terms are required
!             if (is_array($signs)) {
!                 foreach ($signs as $sign ) {
!                     if ($sign !== true) {
!                         $this->_signs = $signs;
!                         break;
!                     }
!                 }
!             }
!         }
!     }
! 
! 
!     /**
!      * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
!      *
!      * The sign is specified as:
!      *     TRUE  - term is required
!      *     FALSE - term is prohibited
!      *     NULL  - term is neither prohibited, nor required
!      *
!      * @param  Zend_Search_Lucene_Index_Term $term
!      * @param  boolean|null $sign
!      * @return void
!      */
!     public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
!         if ($sign !== true || $this->_signs !== null) {       // Skip, if all terms are required
!             if ($this->_signs === null) {                     // Check, If all previous terms are required
!                 $this->_signs = array();
!                 foreach ($this->_terms as $prevTerm) {
!                     $this->_signs[] = true;
!                 }
!             }
!             $this->_signs[] = $sign;
!         }
! 
!         $this->_terms[] = $term;
!     }
! 
! 
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         if (count($this->_terms) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         // Check, that all fields are qualified
!         $allQualified = true;
!         foreach ($this->_terms as $term) {
!             if ($term->field === null) {
!                 $allQualified = false;
!                 break;
!             }
!         }
! 
!         if ($allQualified) {
!             return $this;
!         } else {
!             /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
!             require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
!             $query = new Zend_Search_Lucene_Search_Query_Boolean();
!             $query->setBoost($this->getBoost());
! 
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             foreach ($this->_terms as $termId => $term) {
!                 $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
! 
!                 $query->addSubquery($subquery->rewrite($index),
!                                     ($this->_signs === null)?  true : $this->_signs[$termId]);
!             }
! 
!             return $query;
!         }
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         $terms = $this->_terms;
!         $signs = $this->_signs;
! 
!         foreach ($terms as $id => $term) {
!             if (!$index->hasTerm($term)) {
!                 if ($signs === null  ||  $signs[$id] === true) {
!                     // Term is required
!                     require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!                     return new Zend_Search_Lucene_Search_Query_Empty();
!                 } else {
!                     // Term is optional or prohibited
!                     // Remove it from terms and signs list
!                     unset($terms[$id]);
!                     unset($signs[$id]);
!                 }
!             }
!         }
! 
!         // Check if all presented terms are prohibited
!         $allProhibited = true;
!         if ($signs === null) {
!             $allProhibited = false;
!         } else {
!             foreach ($signs as $sign) {
!                 if ($sign !== false) {
!                     $allProhibited = false;
!                     break;
!                 }
!             }
!         }
!         if ($allProhibited) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         /**
!          * @todo make an optimization for repeated terms
!          * (they may have different signs)
!          */
! 
!         if (count($terms) == 1) {
!             // It's already checked, that it's not a prohibited term
! 
!             // It's one term query with one required or optional element
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
!             $optimizedQuery->setBoost($this->getBoost());
! 
!             return $optimizedQuery;
!         }
! 
!         if (count($terms) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
!         $optimizedQuery->setBoost($this->getBoost());
!         return $optimizedQuery;
!     }
! 
! 
!     /**
!      * Returns query term
!      *
!      * @return array
!      */
!     public function getTerms()
!     {
!         return $this->_terms;
!     }
! 
! 
!     /**
!      * Return terms signs
!      *
!      * @return array
!      */
!     public function getSigns()
!     {
!         return $this->_signs;
!     }
! 
! 
!     /**
!      * Set weight for specified term
!      *
!      * @param integer $num
!      * @param Zend_Search_Lucene_Search_Weight_Term $weight
!      */
!     public function setWeight($num, $weight)
!     {
!         $this->_weights[$num] = $weight;
!     }
! 
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
!         $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
!         return $this->_weight;
!     }
! 
! 
!     /**
!      * Calculate result vector for Conjunction query
!      * (like '+something +another')
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      */
!     private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
!     {
!         $this->_resVector = null;
! 
!         if (count($this->_terms) == 0) {
!             $this->_resVector = array();
!         }
! 
!         // Order terms by selectivity
!         $docFreqs = array();
!         $ids      = array();
!         foreach ($this->_terms as $id => $term) {
!             $docFreqs[] = $reader->docFreq($term);
!             $ids[]      = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
!         }
!         array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
!                         $ids,      SORT_ASC, SORT_NUMERIC,
!                         $this->_terms);
! 
!         require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
!         $docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
!         foreach ($this->_terms as $termId => $term) {
!             $termDocs = $reader->termDocs($term, $docsFilter);
!         }
!         // Treat last retrieved docs vector as a result set
!         // (filter collects data for other terms)
!         $this->_resVector = array_flip($termDocs);
! 
!         foreach ($this->_terms as $termId => $term) {
!             $this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
!         }
! 
!         // ksort($this->_resVector, SORT_NUMERIC);
!         // Docs are returned ordered. Used algorithms doesn't change elements order.
!     }
! 
! 
!     /**
!      * Calculate result vector for non Conjunction query
!      * (like '+something -another')
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      */
!     private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
!     {
!         $requiredVectors      = array();
!         $requiredVectorsSizes = array();
!         $requiredVectorsIds   = array(); // is used to prevent arrays comparison
! 
!         $optional   = array();
!         $prohibited = array();
! 
!         foreach ($this->_terms as $termId => $term) {
!             $termDocs = array_flip($reader->termDocs($term));
! 
!             if ($this->_signs[$termId] === true) {
!                 // required
!                 $requiredVectors[]      = $termDocs;
!                 $requiredVectorsSizes[] = count($termDocs);
!                 $requiredVectorsIds[]   = $termId;
!             } elseif ($this->_signs[$termId] === false) {
!                 // prohibited
!                 // array union
!                 $prohibited += $termDocs;
!             } else {
!                 // neither required, nor prohibited
!                 // array union
!                 $optional += $termDocs;
!             }
! 
!             $this->_termsFreqs[$termId] = $reader->termFreqs($term);
!         }
! 
!         // sort resvectors in order of subquery cardinality increasing
!         array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
!                         $requiredVectorsIds,   SORT_ASC, SORT_NUMERIC,
!                         $requiredVectors);
! 
!         $required = null;
!         foreach ($requiredVectors as $nextResVector) {
!             if($required === null) {
!                 $required = $nextResVector;
!             } else {
!                 //$required = array_intersect_key($required, $nextResVector);
! 
!                 /**
!                  * This code is used as workaround for array_intersect_key() slowness problem.
!                  */
!                 $updatedVector = array();
!                 foreach ($required as $id => $value) {
!                     if (isset($nextResVector[$id])) {
!                         $updatedVector[$id] = $value;
!                     }
!                 }
!                 $required = $updatedVector;
!             }
! 
!             if (count($required) == 0) {
!                 // Empty result set, we don't need to check other terms
!                 break;
!             }
!         }
! 
!         if ($required !== null) {
!             $this->_resVector = $required;
!         } else {
!             $this->_resVector = $optional;
!         }
! 
!         if (count($prohibited) != 0) {
!             // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
! 
!             /**
!              * This code is used as workaround for array_diff_key() slowness problem.
!              */
!             if (count($this->_resVector) < count($prohibited)) {
!                 $updatedVector = $this->_resVector;
!                 foreach ($this->_resVector as $id => $value) {
!                     if (isset($prohibited[$id])) {
!                         unset($updatedVector[$id]);
!                     }
!                 }
!                 $this->_resVector = $updatedVector;
!             } else {
!                 $updatedVector = $this->_resVector;
!                 foreach ($prohibited as $id => $value) {
!                     unset($updatedVector[$id]);
!                 }
!                 $this->_resVector = $updatedVector;
!             }
!         }
! 
!         ksort($this->_resVector, SORT_NUMERIC);
!     }
! 
! 
!     /**
!      * Score calculator for conjunction queries (all terms are required)
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         if ($this->_coord === null) {
!             $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
!                                                             count($this->_terms) );
!         }
! 
!         $score = 0.0;
! 
!         foreach ($this->_terms as $termId => $term) {
!             /**
!              * We don't need to check that term freq is not 0
!              * Score calculation is performed only for matched docs
!              */
!             $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
!                       $this->_weights[$termId]->getValue() *
!                       $reader->norm($docId, $term->field);
!         }
! 
!         return $score * $this->_coord * $this->getBoost();
!     }
! 
! 
!     /**
!      * Score calculator for non conjunction queries (not all terms are required)
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function _nonConjunctionScore($docId, $reader)
!     {
!         if ($this->_coord === null) {
!             $this->_coord = array();
! 
!             $maxCoord = 0;
!             foreach ($this->_signs as $sign) {
!                 if ($sign !== false /* not prohibited */) {
!                     $maxCoord++;
!                 }
!             }
! 
!             for ($count = 0; $count <= $maxCoord; $count++) {
!                 $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
!             }
!         }
! 
!         $score = 0.0;
!         $matchedTerms = 0;
!         foreach ($this->_terms as $termId=>$term) {
!             // Check if term is
!             if ($this->_signs[$termId] !== false &&        // not prohibited
!                 isset($this->_termsFreqs[$termId][$docId]) // matched
!                ) {
!                 $matchedTerms++;
! 
!                 /**
!                  * We don't need to check that term freq is not 0
!                  * Score calculation is performed only for matched docs
!                  */
!                 $score +=
!                       $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
!                       $this->_weights[$termId]->getValue() *
!                       $reader->norm($docId, $term->field);
!             }
!         }
! 
!         return $score * $this->_coord[$matchedTerms] * $this->getBoost();
!     }
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         if ($this->_signs === null) {
!             $this->_calculateConjunctionResult($reader);
!         } else {
!             $this->_calculateNonConjunctionResult($reader);
!         }
! 
!         // Initialize weight if it's not done yet
!         $this->_initWeight($reader);
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      */
!     public function matchedDocs()
!     {
!         return $this->_resVector;
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         if (isset($this->_resVector[$docId])) {
!             if ($this->_signs === null) {
!                 return $this->_conjunctionScore($docId, $reader);
!             } else {
!                 return $this->_nonConjunctionScore($docId, $reader);
!             }
!         } else {
!             return 0;
!         }
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      */
!     public function getQueryTerms()
!     {
!         if ($this->_signs === null) {
!             return $this->_terms;
!         }
! 
!         $terms = array();
! 
!         foreach ($this->_signs as $id => $sign) {
!             if ($sign !== false) {
!                 $terms[] = $this->_terms[$id];
!             }
!         }
! 
!         return $terms;
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         $words = array();
! 
!         if ($this->_signs === null) {
!             foreach ($this->_terms as $term) {
!                 $words[] = $term->text;
!             }
!         } else {
!             foreach ($this->_signs as $id => $sign) {
!                 if ($sign !== false) {
!                     $words[] = $this->_terms[$id]->text;
!                 }
!             }
!         }
! 
!         $highlighter->highlight($words);
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         // It's used only for query visualisation, so we don't care about characters escaping
! 
!         $query = '';
! 
!         foreach ($this->_terms as $id => $term) {
!             if ($id != 0) {
!                 $query .= ' ';
!             }
! 
!             if ($this->_signs === null || $this->_signs[$id] === true) {
!                 $query .= '+';
!             } else if ($this->_signs[$id] === false) {
!                 $query .= '-';
!             }
! 
!             if ($term->field !== null) {
!                 $query .= $term->field . ':';
!             }
!             $query .= $term->text;
!         }
! 
!         if ($this->getBoost() != 1) {
!             $query = '(' . $query . ')^' . round($this->getBoost(), 4);
!         }
! 
!         return $query;
!     }
! }
! 
--- 1,668 ----
! <?php
! /**
!  * Zend Framework
!  *
!  * LICENSE
!  *
!  * This source file is subject to the new BSD license that is bundled
!  * with this package in the file LICENSE.txt.
!  * It is also available through the world-wide-web at this URL:
!  * http://framework.zend.com/license/new-bsd
!  * If you did not receive a copy of the license and are unable to
!  * obtain it through the world-wide-web, please send an email
!  * to license at zend.com so we can send you a copy immediately.
!  *
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  * @version    $Id$
!  */
! 
! 
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
! 
! 
! /**
!  * @category   Zend
!  * @package    Zend_Search_Lucene
!  * @subpackage Search
!  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
!  * @license    http://framework.zend.com/license/new-bsd     New BSD License
!  */
! class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
! {
! 
!     /**
!      * Terms to find.
!      * Array of Zend_Search_Lucene_Index_Term
!      *
!      * @var array
!      */
!     private $_terms = array();
! 
!     /**
!      * Term signs.
!      * If true then term is required.
!      * If false then term is prohibited.
!      * If null then term is neither prohibited, nor required
!      *
!      * If array is null then all terms are required
!      *
!      * @var array
!      */
!     private $_signs;
! 
!     /**
!      * Result vector.
!      *
!      * @var array
!      */
!     private $_resVector = null;
! 
!     /**
!      * Terms positions vectors.
!      * Array of Arrays:
!      * term1Id => (docId => freq, ...)
!      * term2Id => (docId => freq, ...)
!      *
!      * @var array
!      */
!     private $_termsFreqs = array();
! 
! 
!     /**
!      * A score factor based on the fraction of all query terms
!      * that a document contains.
!      * float for conjunction queries
!      * array of float for non conjunction queries
!      *
!      * @var mixed
!      */
!     private $_coord = null;
! 
! 
!     /**
!      * Terms weights
!      * array of Zend_Search_Lucene_Search_Weight
!      *
!      * @var array
!      */
!     private $_weights = array();
! 
! 
!     /**
!      * Class constructor.  Create a new multi-term query object.
!      *
!      * if $signs array is omitted then all terms are required
!      * it differs from addTerm() behavior, but should never be used
!      *
!      * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
!      * @param array $signs    Array of signs.  Sign is boolean|null.
!      * @throws Zend_Search_Lucene_Exception
!      */
!     public function __construct($terms = null, $signs = null)
!     {
!         if (is_array($terms)) {
!             require_once 'Zend/Search/Lucene.php';
!             if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
!                 throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
!             }
! 
!             $this->_terms = $terms;
! 
!             $this->_signs = null;
!             // Check if all terms are required
!             if (is_array($signs)) {
!                 foreach ($signs as $sign ) {
!                     if ($sign !== true) {
!                         $this->_signs = $signs;
!                         break;
!                     }
!                 }
!             }
!         }
!     }
! 
! 
!     /**
!      * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
!      *
!      * The sign is specified as:
!      *     TRUE  - term is required
!      *     FALSE - term is prohibited
!      *     NULL  - term is neither prohibited, nor required
!      *
!      * @param  Zend_Search_Lucene_Index_Term $term
!      * @param  boolean|null $sign
!      * @return void
!      */
!     public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
!         if ($sign !== true || $this->_signs !== null) {       // Skip, if all terms are required
!             if ($this->_signs === null) {                     // Check, If all previous terms are required
!                 $this->_signs = array();
!                 foreach ($this->_terms as $prevTerm) {
!                     $this->_signs[] = true;
!                 }
!             }
!             $this->_signs[] = $sign;
!         }
! 
!         $this->_terms[] = $term;
!     }
! 
! 
!     /**
!      * Re-write query into primitive queries in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function rewrite(Zend_Search_Lucene_Interface $index)
!     {
!         if (count($this->_terms) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         // Check, that all fields are qualified
!         $allQualified = true;
!         foreach ($this->_terms as $term) {
!             if ($term->field === null) {
!                 $allQualified = false;
!                 break;
!             }
!         }
! 
!         if ($allQualified) {
!             return $this;
!         } else {
!             /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
!             require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
!             $query = new Zend_Search_Lucene_Search_Query_Boolean();
!             $query->setBoost($this->getBoost());
! 
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             foreach ($this->_terms as $termId => $term) {
!                 $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
! 
!                 $query->addSubquery($subquery->rewrite($index),
!                                     ($this->_signs === null)?  true : $this->_signs[$termId]);
!             }
! 
!             return $query;
!         }
!     }
! 
!     /**
!      * Optimize query in the context of specified index
!      *
!      * @param Zend_Search_Lucene_Interface $index
!      * @return Zend_Search_Lucene_Search_Query
!      */
!     public function optimize(Zend_Search_Lucene_Interface $index)
!     {
!         $terms = $this->_terms;
!         $signs = $this->_signs;
! 
!         foreach ($terms as $id => $term) {
!             if (!$index->hasTerm($term)) {
!                 if ($signs === null  ||  $signs[$id] === true) {
!                     // Term is required
!                     require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!                     return new Zend_Search_Lucene_Search_Query_Empty();
!                 } else {
!                     // Term is optional or prohibited
!                     // Remove it from terms and signs list
!                     unset($terms[$id]);
!                     unset($signs[$id]);
!                 }
!             }
!         }
! 
!         // Check if all presented terms are prohibited
!         $allProhibited = true;
!         if ($signs === null) {
!             $allProhibited = false;
!         } else {
!             foreach ($signs as $sign) {
!                 if ($sign !== false) {
!                     $allProhibited = false;
!                     break;
!                 }
!             }
!         }
!         if ($allProhibited) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         /**
!          * @todo make an optimization for repeated terms
!          * (they may have different signs)
!          */
! 
!         if (count($terms) == 1) {
!             // It's already checked, that it's not a prohibited term
! 
!             // It's one term query with one required or optional element
!             require_once 'Zend/Search/Lucene/Search/Query/Term.php';
!             $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
!             $optimizedQuery->setBoost($this->getBoost());
! 
!             return $optimizedQuery;
!         }
! 
!         if (count($terms) == 0) {
!             require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
!             return new Zend_Search_Lucene_Search_Query_Empty();
!         }
! 
!         $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
!         $optimizedQuery->setBoost($this->getBoost());
!         return $optimizedQuery;
!     }
! 
! 
!     /**
!      * Returns query term
!      *
!      * @return array
!      */
!     public function getTerms()
!     {
!         return $this->_terms;
!     }
! 
! 
!     /**
!      * Return terms signs
!      *
!      * @return array
!      */
!     public function getSigns()
!     {
!         return $this->_signs;
!     }
! 
! 
!     /**
!      * Set weight for specified term
!      *
!      * @param integer $num
!      * @param Zend_Search_Lucene_Search_Weight_Term $weight
!      */
!     public function setWeight($num, $weight)
!     {
!         $this->_weights[$num] = $weight;
!     }
! 
! 
!     /**
!      * Constructs an appropriate Weight implementation for this query.
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return Zend_Search_Lucene_Search_Weight
!      */
!     public function createWeight(Zend_Search_Lucene_Interface $reader)
!     {
!         require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
!         $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
!         return $this->_weight;
!     }
! 
! 
!     /**
!      * Calculate result vector for Conjunction query
!      * (like '+something +another')
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      */
!     private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
!     {
!         $this->_resVector = null;
! 
!         if (count($this->_terms) == 0) {
!             $this->_resVector = array();
!         }
! 
!         // Order terms by selectivity
!         $docFreqs = array();
!         $ids      = array();
!         foreach ($this->_terms as $id => $term) {
!             $docFreqs[] = $reader->docFreq($term);
!             $ids[]      = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
!         }
!         array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
!                         $ids,      SORT_ASC, SORT_NUMERIC,
!                         $this->_terms);
! 
!         require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
!         $docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
!         foreach ($this->_terms as $termId => $term) {
!             $termDocs = $reader->termDocs($term, $docsFilter);
!         }
!         // Treat last retrieved docs vector as a result set
!         // (filter collects data for other terms)
!         $this->_resVector = array_flip($termDocs);
! 
!         foreach ($this->_terms as $termId => $term) {
!             $this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
!         }
! 
!         // ksort($this->_resVector, SORT_NUMERIC);
!         // Docs are returned ordered. Used algorithms doesn't change elements order.
!     }
! 
! 
!     /**
!      * Calculate result vector for non Conjunction query
!      * (like '+something -another')
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      */
!     private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
!     {
!         $requiredVectors      = array();
!         $requiredVectorsSizes = array();
!         $requiredVectorsIds   = array(); // is used to prevent arrays comparison
! 
!         $optional   = array();
!         $prohibited = array();
! 
!         foreach ($this->_terms as $termId => $term) {
!             $termDocs = array_flip($reader->termDocs($term));
! 
!             if ($this->_signs[$termId] === true) {
!                 // required
!                 $requiredVectors[]      = $termDocs;
!                 $requiredVectorsSizes[] = count($termDocs);
!                 $requiredVectorsIds[]   = $termId;
!             } elseif ($this->_signs[$termId] === false) {
!                 // prohibited
!                 // array union
!                 $prohibited += $termDocs;
!             } else {
!                 // neither required, nor prohibited
!                 // array union
!                 $optional += $termDocs;
!             }
! 
!             $this->_termsFreqs[$termId] = $reader->termFreqs($term);
!         }
! 
!         // sort resvectors in order of subquery cardinality increasing
!         array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
!                         $requiredVectorsIds,   SORT_ASC, SORT_NUMERIC,
!                         $requiredVectors);
! 
!         $required = null;
!         foreach ($requiredVectors as $nextResVector) {
!             if($required === null) {
!                 $required = $nextResVector;
!             } else {
!                 //$required = array_intersect_key($required, $nextResVector);
! 
!                 /**
!                  * This code is used as workaround for array_intersect_key() slowness problem.
!                  */
!                 $updatedVector = array();
!                 foreach ($required as $id => $value) {
!                     if (isset($nextResVector[$id])) {
!                         $updatedVector[$id] = $value;
!                     }
!                 }
!                 $required = $updatedVector;
!             }
! 
!             if (count($required) == 0) {
!                 // Empty result set, we don't need to check other terms
!                 break;
!             }
!         }
! 
!         if ($required !== null) {
!             $this->_resVector = $required;
!         } else {
!             $this->_resVector = $optional;
!         }
! 
!         if (count($prohibited) != 0) {
!             // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
! 
!             /**
!              * This code is used as workaround for array_diff_key() slowness problem.
!              */
!             if (count($this->_resVector) < count($prohibited)) {
!                 $updatedVector = $this->_resVector;
!                 foreach ($this->_resVector as $id => $value) {
!                     if (isset($prohibited[$id])) {
!                         unset($updatedVector[$id]);
!                     }
!                 }
!                 $this->_resVector = $updatedVector;
!             } else {
!                 $updatedVector = $this->_resVector;
!                 foreach ($prohibited as $id => $value) {
!                     unset($updatedVector[$id]);
!                 }
!                 $this->_resVector = $updatedVector;
!             }
!         }
! 
!         ksort($this->_resVector, SORT_NUMERIC);
!     }
! 
! 
!     /**
!      * Score calculator for conjunction queries (all terms are required)
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         if ($this->_coord === null) {
!             $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
!                                                             count($this->_terms) );
!         }
! 
!         $score = 0.0;
! 
!         foreach ($this->_terms as $termId => $term) {
!             /**
!              * We don't need to check that term freq is not 0
!              * Score calculation is performed only for matched docs
!              */
!             $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
!                       $this->_weights[$termId]->getValue() *
!                       $reader->norm($docId, $term->field);
!         }
! 
!         return $score * $this->_coord * $this->getBoost();
!     }
! 
! 
!     /**
!      * Score calculator for non conjunction queries (not all terms are required)
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function _nonConjunctionScore($docId, $reader)
!     {
!         if ($this->_coord === null) {
!             $this->_coord = array();
! 
!             $maxCoord = 0;
!             foreach ($this->_signs as $sign) {
!                 if ($sign !== false /* not prohibited */) {
!                     $maxCoord++;
!                 }
!             }
! 
!             for ($count = 0; $count <= $maxCoord; $count++) {
!                 $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
!             }
!         }
! 
!         $score = 0.0;
!         $matchedTerms = 0;
!         foreach ($this->_terms as $termId=>$term) {
!             // Check if term is
!             if ($this->_signs[$termId] !== false &&        // not prohibited
!                 isset($this->_termsFreqs[$termId][$docId]) // matched
!                ) {
!                 $matchedTerms++;
! 
!                 /**
!                  * We don't need to check that term freq is not 0
!                  * Score calculation is performed only for matched docs
!                  */
!                 $score +=
!                       $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
!                       $this->_weights[$termId]->getValue() *
!                       $reader->norm($docId, $term->field);
!             }
!         }
! 
!         return $score * $this->_coord[$matchedTerms] * $this->getBoost();
!     }
! 
!     /**
!      * Execute query in context of index reader
!      * It also initializes necessary internal structures
!      *
!      * @param Zend_Search_Lucene_Interface $reader
!      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
!      */
!     public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
!     {
!         if ($this->_signs === null) {
!             $this->_calculateConjunctionResult($reader);
!         } else {
!             $this->_calculateNonConjunctionResult($reader);
!         }
! 
!         // Initialize weight if it's not done yet
!         $this->_initWeight($reader);
!     }
! 
!     /**
!      * Get document ids likely matching the query
!      *
!      * It's an array with document ids as keys (performance considerations)
!      *
!      * @return array
!      */
!     public function matchedDocs()
!     {
!         return $this->_resVector;
!     }
! 
!     /**
!      * Score specified document
!      *
!      * @param integer $docId
!      * @param Zend_Search_Lucene_Interface $reader
!      * @return float
!      */
!     public function score($docId, Zend_Search_Lucene_Interface $reader)
!     {
!         if (isset($this->_resVector[$docId])) {
!             if ($this->_signs === null) {
!                 return $this->_conjunctionScore($docId, $reader);
!             } else {
!                 return $this->_nonConjunctionScore($docId, $reader);
!             }
!         } else {
!             return 0;
!         }
!     }
! 
!     /**
!      * Return query terms
!      *
!      * @return array
!      */
!     public function getQueryTerms()
!     {
!         if ($this->_signs === null) {
!             return $this->_terms;
!         }
! 
!         $terms = array();
! 
!         foreach ($this->_signs as $id => $sign) {
!             if ($sign !== false) {
!                 $terms[] = $this->_terms[$id];
!             }
!         }
! 
!         return $terms;
!     }
! 
!     /**
!      * Query specific matches highlighting
!      *
!      * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
!      */
!     protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
!     {
!         $words = array();
! 
!         if ($this->_signs === null) {
!             foreach ($this->_terms as $term) {
!                 $words[] = $term->text;
!             }
!         } else {
!             foreach ($this->_signs as $id => $sign) {
!                 if ($sign !== false) {
!                     $words[] = $this->_terms[$id]->text;
!                 }
!             }
!         }
! 
!         $highlighter->highlight($words);
!     }
! 
!     /**
!      * Print a query
!      *
!      * @return string
!      */
!     public function __toString()
!     {
!         // It's used only for query visualisation, so we don't care about characters escaping
! 
!         $query = '';
! 
!         foreach ($this->_terms as $id => $term) {
!             if ($id != 0) {
!                 $query .= ' ';
!             }
! 
!             if ($this->_signs === null || $this->_signs[$id] === true) {
!                 $query .= '+';
!             } else if ($this->_signs[$id] === false) {
!                 $query .= '-';
!             }
! 
!             if ($term->field !== null) {
!                 $query .= $term->field . ':';
!             }
!             $query .= $term->text;
!         }
! 
!         if ($this->getBoost() != 1) {
!             $query = '(' . $query . ')^' . round($this->getBoost(), 4);
!         }
! 
!         return $query;
!     }
! }
! 




More information about the customisationdb-commits mailing list