View file humhub-1.2.0-beta.1/protected/vendor/nqxcode/zendsearch/tests/ZendSearch/Lucene/DocumentTest.php

File size: 12.24Kb
<?php
/**
 * Zend Framework (http://framework.zend.com/)
 *
 * @link      http://github.com/zendframework/zf2 for the canonical source repository
 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 * @license   http://framework.zend.com/license/new-bsd New BSD License
 * @package   Zend_Search
 */

namespace ZendSearchTest\Lucene;

use ZendSearch\Lucene\Document;
use ZendSearch\Lucene;

/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage UnitTests
 * @group      Zend_Search_Lucene
 */
class DocumentTest extends \PHPUnit_Framework_TestCase
{

    private function _clearDirectory($dirName)
    {
        if (!file_exists($dirName) || !is_dir($dirName))  {
            return;
        }

        // remove files from temporary directory
        $dir = opendir($dirName);
        while (($file = readdir($dir)) !== false) {
            if (!is_dir($dirName . '/' . $file)) {
                @unlink($dirName . '/' . $file);
            }
        }
        closedir($dir);
    }

    public function testCreate()
    {
        $document =  new Document();

        $this->assertEquals($document->boost, 1);
    }

    public function testFields()
    {
        $document =  new Document();

        $document->addField(Document\Field::Text('title',      'Title'));
        $document->addField(Document\Field::Text('annotation', 'Annotation'));
        $document->addField(Document\Field::Text('body',       'Document body, document body, document body...'));

        $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body'));
        $this->assertTrue(is_array($fieldnamesDiffArray));
        $this->assertEquals(count($fieldnamesDiffArray), 0);

        $this->assertEquals($document->title,      'Title');
        $this->assertEquals($document->annotation, 'Annotation');
        $this->assertEquals($document->body,       'Document body, document body, document body...');

        $this->assertEquals($document->getField('title')->value,      'Title');
        $this->assertEquals($document->getField('annotation')->value, 'Annotation');
        $this->assertEquals($document->getField('body')->value,       'Document body, document body, document body...');

        $this->assertEquals($document->getFieldValue('title'),      'Title');
        $this->assertEquals($document->getFieldValue('annotation'), 'Annotation');
        $this->assertEquals($document->getFieldValue('body'),       'Document body, document body, document body...');


        if (PHP_OS == 'AIX') {
            return; // tests below here not valid on AIX
        }

        $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...');
        $document->addField(Document\Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1'));
        $this->assertEquals($document->description, $wordsWithUmlautsIso88591);
        $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...');
    }

    public function testAddFieldMethodChaining()
    {
        $document =  new Document();
        $this->assertTrue($document->addField(Document\Field::Text('title', 'Title')) instanceof Document);

        $document =  new Document();
        $document->addField(Document\Field::Text('title',      'Title'))
                 ->addField(Document\Field::Text('annotation', 'Annotation'))
                 ->addField(Document\Field::Text('body',       'Document body, document body, document body...'));
    }


    public function testHtmlHighlighting()
    {
        $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
        $this->assertTrue($doc instanceof Document\HTML);

        $doc->highlight('document', '#66ffff');
        $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#66ffff">Document</b> body.') !== false);
    }

    public function testHtmlExtendedHighlighting()
    {
        $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
        $this->assertTrue($doc instanceof Document\HTML);

        $doc->highlightExtended('document',
                                array('\ZendSearchTest\Lucene\DocHighlightingContainer',
                                      'extendedHighlightingCallback'),
                                array('style="color:black;background-color:#ff66ff"',
                                      '(!!!)'));
        $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b>(!!!) body.') !== false);
    }

    public function testHtmlWordsHighlighting()
    {
        $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
        $this->assertTrue($doc instanceof Document\HTML);

        $doc->highlight(array('document', 'body'), '#66ffff');
        $highlightedHTML = $doc->getHTML();
        $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Document</b>') !== false);
        $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">body</b>')     !== false);
    }

    public function testHtmlExtendedHighlightingCorrectWrongHtml()
    {
        $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
        $this->assertTrue($doc instanceof Document\HTML);

        $doc->highlightExtended('document',
                                array('\ZendSearchTest\Lucene\DocHighlightingContainer',
                                      'extendedHighlightingCallback'),
                                array('style="color:black;background-color:#ff66ff"',
                                      '<h3>(!!!)' /* Wrong HTML here, <h3> tag is not closed */));
        $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b><h3>(!!!)</h3> body.') !== false);
    }

    public function testHtmlLinksProcessing()
    {
        $doc =  Document\HTML::loadHTMLFile(__DIR__ . '/_indexSource/_files/contributing.documentation.html', true);
        $this->assertTrue($doc instanceof Document\HTML);

        $this->assertTrue(array_values($doc->getHeaderLinks()) ==
                          array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
        $this->assertTrue(array_values($doc->getLinks()) ==
                          array('contributing.bugs.html',
                                'contributing.wishlist.html',
                                'developers.documentation.html',
                                'faq.translators-revision-tracking.html',
                                'index.html',
                                'contributing.html'));
    }


    /**
     * @group ZF-4252
     */
    public function testHtmlInlineTagsIndexing()
    {
        $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');

        $htmlString = '<html><head><title>Hello World</title></head>'
                    . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n"
                    . ' <strong>Test</strong></body></html>';

        $doc = Document\Html::loadHTML($htmlString);

        $index->addDocument($doc);

        $hits = $index->find('FooBar');
        $this->assertEquals(count($hits), 0);

        $hits = $index->find('ZendFramework');
        $this->assertEquals(count($hits), 1);

        unset($index);
        $this->_clearDirectory(__DIR__ . '/_index/_files');
    }

    /**
     * @group ZF-8740
     */
    public function testHtmlAreaTags()
    {
        $html = '<HTML>'
                . '<HEAD><TITLE>Page title</TITLE></HEAD>'
                . '<BODY>'
                .   'Document body.'
                .   '<img src="img.png" width="640" height="480" alt="some image" usemap="#some_map" />'
                .   '<map name="some_map">'
                .     '<area shape="rect" coords="0,0,100,100" href="link3.html" alt="Link 3" />'
                .     '<area shape="rect" coords="200,200,300,300" href="link4.html" alt="Link 4" />'
                .   '</map>'
                .   '<a href="link1.html">Link 1</a>.'
                .   '<a href="link2.html" rel="nofollow">Link 1</a>.'
                . '</BODY>'
              . '</HTML>';

        $oldNoFollowValue = Document\Html::getExcludeNoFollowLinks();

        Document\Html::setExcludeNoFollowLinks(false);
        $doc1 = Document\Html::loadHTML($html);
        $this->assertTrue($doc1 instanceof Document\Html);
        $links = array('link1.html', 'link2.html', 'link3.html', 'link4.html');
        $this->assertTrue(array_values($doc1->getLinks()) == $links);
    }

    public function testHtmlNoFollowLinks()
    {
        $html = '<HTML>'
                . '<HEAD><TITLE>Page title</TITLE></HEAD>'
                . '<BODY>'
                .   'Document body.'
                .   '<a href="link1.html">Link 1</a>.'
                .   '<a href="link2.html" rel="nofollow">Link 1</a>.'
                . '</BODY>'
              . '</HTML>';

        $oldNoFollowValue = Document\HTML::getExcludeNoFollowLinks();

        Document\HTML::setExcludeNoFollowLinks(false);
        $doc1 = Document\HTML::loadHTML($html);
        $this->assertTrue($doc1 instanceof Document\HTML);
        $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));

        Document\HTML::setExcludeNoFollowLinks(true);
        $doc2 = Document\HTML::loadHTML($html);
        $this->assertTrue($doc2 instanceof Document\HTML);
        $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
    }

    public function testDocx()
    {
        if (!class_exists('ZipArchive')) {
            $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
        }

        $docxDocument = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/test.docx', true);

        $this->assertTrue($docxDocument instanceof Document\Docx);
        $this->assertEquals($docxDocument->getFieldValue('title'), 'Test document');
        $this->assertEquals($docxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
        $this->assertTrue($docxDocument->getFieldValue('body') != '');

        try {
            $docxDocument1 = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/dummy.docx', true);

            $this->fail('File not readable exception is expected.');
        } catch (Document\Exception\InvalidArgumentException $e) {
            if (strpos($e->getMessage(), 'is not readable') === false) {
                // Passthrough exception
                throw $e;
            }
        }
    }

    public function testPptx()
    {
        if (!class_exists('ZipArchive')) {
            $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
        }

        $pptxDocument = Document\Pptx::loadPptxFile(__DIR__ . '/_openXmlDocuments/test.pptx', true);

        $this->assertTrue($pptxDocument instanceof Document\Pptx);
        $this->assertEquals($pptxDocument->getFieldValue('title'), 'Test document');
        $this->assertEquals($pptxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
        $this->assertTrue($pptxDocument->getFieldValue('body') != '');
    }

    public function testXlsx()
    {
        if (!class_exists('ZipArchive')) {
            $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
        }

        $xlsxDocument = Document\Xlsx::loadXlsxFile(__DIR__ . '/_openXmlDocuments/test.xlsx', true);

        $this->assertTrue($xlsxDocument instanceof Document\Xlsx);
        $this->assertEquals($xlsxDocument->getFieldValue('title'), 'Test document');
        $this->assertEquals($xlsxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
        $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
        $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
    }
}


class DocHighlightingContainer
{
    public static function extendedHighlightingCallback($stringToHighlight, $param1, $param2)
    {
        return '<b ' . $param1 . '>' . $stringToHighlight . '</b>' . $param2;
    }
}