View file humhub-1.2.0-beta.1/protected/vendor/nqxcode/zendsearch/tests/ZendSearch/Lucene/IndexTest.php

File size: 14.73Kb
<?php
/**
 * Zend Framework (http://framework.zend.com/)
 *
 * @link      http://github.com/zendframework/zf2 for the canonical source repository
 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 * @license   http://framework.zend.com/license/new-bsd New BSD License
 * @package   Zend_Search
 */

namespace ZendSearchTest\Lucene;

use ZendSearch\Lucene;
use ZendSearch\Lucene\Document;
use ZendSearch\Lucene\Index;

/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage UnitTests
 * @group      Zend_Search_Lucene
 */
class IndexTest extends \PHPUnit_Framework_TestCase
{
    public function tearDown()
    {
        $this->_clearDirectory(__DIR__ . '/_index/_files');
    }

    private function _clearDirectory($dirName)
    {
        if (!file_exists($dirName) || !is_dir($dirName))  {
            return;
        }

        // remove files from temporary directory
        $dir = opendir($dirName);
        while (($file = readdir($dir)) !== false) {
            if (!is_dir($dirName . '/' . $file)) {
                @unlink($dirName . '/' . $file);
            }
        }
        closedir($dir);
    }

    public function testCreate()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        $this->assertTrue($index instanceof Lucene\SearchIndexInterface);
    }

    public function testOpen()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue($index instanceof Lucene\SearchIndexInterface);
    }

    public function testOpenNonCompound()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_nonCompoundIndexFiles');

        $this->assertTrue($index instanceof Lucene\SearchIndexInterface);
    }

    public function testDefaultSearchField()
    {
        $currentDefaultSearchField = Lucene\Lucene::getDefaultSearchField();
        $this->assertEquals($currentDefaultSearchField, null);

        Lucene\Lucene::setDefaultSearchField('anotherField');
        $this->assertEquals(Lucene\Lucene::getDefaultSearchField(), 'anotherField');

        Lucene\Lucene::setDefaultSearchField($currentDefaultSearchField);
    }

    public function testCount()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertEquals($index->count(), 10);
    }

    public function testMaxDoc()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertEquals($index->maxDoc(), 10);
    }

    public function testNumDocs()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertEquals($index->numDocs(), 9);
    }

    public function testIsDeleted()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertFalse($index->isDeleted(3));
        $this->assertTrue($index->isDeleted(6));
    }

    public function testMaxBufferedDocs()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $currentMaxBufferedDocs = $index->getMaxBufferedDocs();

        $index->setMaxBufferedDocs(234);
        $this->assertEquals($index->getMaxBufferedDocs(), 234);

        $index->setMaxBufferedDocs($currentMaxBufferedDocs);
    }

    public function testMaxMergeDocs()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $currentMaxMergeDocs = $index->getMaxMergeDocs();

        $index->setMaxMergeDocs(34);
        $this->assertEquals($index->getMaxMergeDocs(), 34);

        $index->setMaxMergeDocs($currentMaxMergeDocs);
    }

    public function testMergeFactor()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $currentMergeFactor = $index->getMergeFactor();

        $index->setMergeFactor(113);
        $this->assertEquals($index->getMergeFactor(), 113);

        $index->setMergeFactor($currentMergeFactor);
    }

    public function testFind()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $hits = $index->find('submitting');
        $this->assertEquals(count($hits), 3);
    }

    public function testGetFieldNames()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue(array_values($index->getFieldNames()) == array('path', 'modified', 'contents'));
    }

    public function testGetDocument()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $doc = $index->getDocument(3);

        $this->assertTrue($doc instanceof Document);
        $this->assertEquals($doc->path, 'IndexSource/about-pear.html');
    }

    public function testHasTerm()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue($index->hasTerm(new Index\Term('packages', 'contents')));
        $this->assertFalse($index->hasTerm(new Index\Term('nonusedword', 'contents')));
    }

    public function testTermDocs()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue(array_values( $index->termDocs(new Index\Term('packages', 'contents')) ) ==
                          array(0, 2, 6, 7, 8));
    }

    public function testTermPositions()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue($index->termPositions(new Index\Term('packages', 'contents')) ==
                          array(0 => array(174),
                                2 => array(40, 742),
                                6 => array(6, 156, 163),
                                7 => array(194),
                                8 => array(55, 190, 405)));
    }

    public function testDocFreq()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertEquals($index->docFreq(new Index\Term('packages', 'contents')), 5);
    }

    public function testGetSimilarity()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue($index->getSimilarity() instanceof \ZendSearch\Lucene\Search\Similarity\AbstractSimilarity);
    }

    public function testNorm()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue(abs($index->norm(3, 'contents') - 0.054688) < 0.000001);
    }

    public function testHasDeletions()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertTrue($index->hasDeletions());
    }

    public function testDelete()
    {
        // Copy index sample into _files directory
        $sampleIndexDir = __DIR__ . '/_indexSample/_files';
        $tempIndexDir = __DIR__ . '/_files';
        if (!is_dir($tempIndexDir)) {
            mkdir($tempIndexDir);
        }

        $this->_clearDirectory($tempIndexDir);

        $indexDir = opendir($sampleIndexDir);
        while (($file = readdir($indexDir)) !== false) {
            if (!is_dir($sampleIndexDir . '/' . $file)) {
                copy($sampleIndexDir . '/' . $file, $tempIndexDir . '/' . $file);
            }
        }
        closedir($indexDir);


        $index = Lucene\Lucene::open($tempIndexDir);

        $this->assertFalse($index->isDeleted(2));
        $index->delete(2);
        $this->assertTrue($index->isDeleted(2));

        $index->commit();

        unset($index);

        $index1 = Lucene\Lucene::open($tempIndexDir);
        $this->assertTrue($index1->isDeleted(2));
        unset($index1);
    }

    public function testAddDocument()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        $indexSourceDir = __DIR__ . '/_indexSource/_files';
        $dir = opendir($indexSourceDir);
        while (($file = readdir($dir)) !== false) {
            if (is_dir($indexSourceDir . '/' . $file)) {
                continue;
            }
            if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
                continue;
            }

            // Create new Document from a file
            $doc = new Document();
            $doc->addField(Document\Field::Text('path', 'IndexSource/' . $file));
            $doc->addField(Document\Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));

            $f = fopen($indexSourceDir . '/' . $file,'rb');
            $byteCount = filesize($indexSourceDir . '/' . $file);

            $data = '';
            while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
                $data .= $nextBlock;
                $byteCount -= strlen($nextBlock);
            }
            fclose($f);

            $doc->addField(Document\Field::Text('contents', $data, 'ISO-8859-1'));

            // Add document to the index
            $index->addDocument($doc);
        }
        closedir($dir);

        unset($index);

        $index1 = Lucene\Lucene::open(__DIR__ . '/_index/_files');
        $this->assertTrue($index1 instanceof Lucene\SearchIndexInterface);
    }

    public function testOptimize()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        $index->setMaxBufferedDocs(2);

        $indexSourceDir = __DIR__ . '/_indexSource/_files';
        $dir = opendir($indexSourceDir);
        while (($file = readdir($dir)) !== false) {
            if (is_dir($indexSourceDir . '/' . $file)) {
                continue;
            }
            if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
                continue;
            }

            // Create new Document from a file
            $doc = new Document();
            $doc->addField(Document\Field::Keyword('path', 'IndexSource/' . $file));
            $doc->addField(Document\Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));

            $f = fopen($indexSourceDir . '/' . $file,'rb');
            $byteCount = filesize($indexSourceDir . '/' . $file);

            $data = '';
            while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
                $data .= $nextBlock;
                $byteCount -= strlen($nextBlock);
            }
            fclose($f);

            $doc->addField(Document\Field::Text('contents', $data, 'ISO-8859-1'));

            // Add document to the index
            $index->addDocument($doc);
        }
        closedir($dir);
        unset($index);

        $index1 = Lucene\Lucene::open(__DIR__ . '/_index/_files');
        $this->assertTrue($index1 instanceof Lucene\SearchIndexInterface);
        $pathTerm = new Index\Term('IndexSource/contributing.html', 'path');
        $contributingDocs = $index1->termDocs($pathTerm);
        foreach ($contributingDocs as $id) {
            $index1->delete($id);
        }
        $index1->optimize();
        unset($index1);

        $index2 = Lucene\Lucene::open(__DIR__ . '/_index/_files');
        $this->assertTrue($index2 instanceof Lucene\SearchIndexInterface);

        $hits = $index2->find('submitting');
        $this->assertEquals(count($hits), 3);
    }

    public function testTerms()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $this->assertEquals(count($index->terms()), 607);
    }

    public function testTermsStreamInterface()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $terms = array();

        $index->resetTermsStream();
        while ($index->currentTerm() !== null) {
            $terms[] = $index->currentTerm();
            $index->nextTerm();
        }

        $this->assertEquals(count($terms), 607);
    }

    public function testTermsStreamInterfaceSkipTo()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $terms = array();

        $index->resetTermsStream();
        $index->skipTo(new Index\Term('one', 'contents'));

        while ($index->currentTerm() !== null) {
            $terms[] = $index->currentTerm();
            $index->nextTerm();
        }

        $this->assertEquals(count($terms), 244);
    }

    public function testTermsStreamInterfaceSkipToTermsRetrieving()
    {
        $index = Lucene\Lucene::open(__DIR__ . '/_indexSample/_files');

        $terms = array();

        $index->resetTermsStream();
        $index->skipTo(new Index\Term('one', 'contents'));

        $terms[] = $index->currentTerm();
        $terms[] = $index->nextTerm();
        $terms[] = $index->nextTerm();

        $index->closeTermsStream();

        $this->assertTrue($terms ==
                          array(new Index\Term('one', 'contents'),
                                new Index\Term('only', 'contents'),
                                new Index\Term('open', 'contents'),
                               ));
    }

    public function testTermsStreamInterfaceSkipToTermsRetrievingZeroTermsCase()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        // Zero terms
        $doc = new Document();
        $doc->addField(Document\Field::Text('contents', ''));
        $index->addDocument($doc);

        unset($index);


        $index = Lucene\Lucene::open(__DIR__ . '/_index/_files');

        $index->resetTermsStream();
        $index->skipTo(new Index\Term('term', 'contents'));

        $this->assertTrue($index->currentTerm() === null);

        $index->closeTermsStream();
    }

    public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        // Zero terms
        $doc = new Document();
        $doc->addField(Document\Field::Text('contents', 'someterm'));
        $index->addDocument($doc);

        unset($index);


        $index = Lucene\Lucene::open(__DIR__ . '/_index/_files');

        $index->resetTermsStream();
        $index->skipTo(new Index\Term('term', 'contents'));

        $this->assertTrue($index->currentTerm() === null);

        $index->closeTermsStream();
    }

    public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        // Zero terms
        $doc = new Document();
        $doc->addField(Document\Field::Text('contents', 'someterm word'));
        $index->addDocument($doc);

        unset($index);


        $index = Lucene\Lucene::open(__DIR__ . '/_index/_files');

        $index->resetTermsStream();
        $index->skipTo(new Index\Term('term', 'contents'));

        $this->assertTrue($index->currentTerm() == new Index\Term('word', 'contents'));

        $index->closeTermsStream();
    }

    /**
     * @group ZF-9680
     */
    public function testIsDeletedWithoutExplicitCommit()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        $document = new Document;
        $document->addField(Document\Field::Keyword('_id', 'myId'));
        $document->addField(Document\Field::Keyword('bla', 'blubb'));
        $index->addDocument($document);

        $this->assertFalse($index->isDeleted(0));
    }
}