Your IP : 216.73.216.170


Current Path : /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/src/graminfo/
Upload File :
Current File : /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/src/graminfo/graminfo.php

<?php
 /**
 * This file is part of phpMorphy library
 *
 * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

interface phpMorphy_GramInfo_Interace {
    /**
     * Returns langugage for graminfo file
     * @return string
     */
    function getLocale();
    
    /**
     * Return encoding for graminfo file
     * @return string
     */
    function getEncoding();
    
    /**
     * Return size of character (cp1251 - 1, utf8 - 1, utf16 - 2, utf32 - 4 etc)
     * @return int
     */
    function getCharSize();
    
    /**
     * Return end of string value (usually string with \0 value of char_size + 1 length)
     * @return string
     */
    function getEnds();
    
    /**
     * Reads graminfo header
     *
     * @param int $offset
     * @return array
     */
    function readGramInfoHeader($offset);

    /**
     * Returns size of header struct
     */
    function getGramInfoHeaderSize();
    
    /**
     * Read ancodes section for header retrieved with readGramInfoHeader
     *
     * @param array $info
     * @return array
     */
    function readAncodes($info);
    
    /**
     * Read flexias section for header retrieved with readGramInfoHeader
     *
     * @param array $info
     * @return array
     */
    function readFlexiaData($info);
    
    /**
     * Read all graminfo headers offsets, which can be used latter for readGramInfoHeader method
     * @return array
     */
    function readAllGramInfoOffsets();
    
    function getHeader();
    function readAllPartOfSpeech();
    function readAllGrammems();
    function readAllAncodes();
}
 
abstract class phpMorphy_GramInfo implements phpMorphy_GramInfo_Interace {
    const HEADER_SIZE = 128;
    
    protected
        $resource,
        $header,
        $ends,
        $ends_size;
    
    protected function phpMorphy_GramInfo($resource, $header) {
        $this->resource = $resource;
        $this->header = $header;
        
        $this->ends = str_repeat("\0", $header['char_size'] + 1);
        $this->ends_size = $GLOBALS['__phpmorphy_strlen']($this->ends);
    }
    
    static function create(phpMorphy_Storage $storage, $lazy) {
        if($lazy) {
            return new phpMorphy_GramInfo_Proxy($storage);
        }
        
        $header = phpMorphy_GramInfo::readHeader(
            $storage->read(0, self::HEADER_SIZE)
        );
        
        if(!phpMorphy_GramInfo::validateHeader($header)) {
            throw new phpMorphy_Exception('Invalid graminfo format');
        }
        
        $storage_type = $storage->getTypeAsString();
        $file_path = dirname(__FILE__) . "/access/graminfo_{$storage_type}.php";
        $clazz = 'phpMorphy_GramInfo_' . ucfirst($storage_type);
        
        require_once($file_path);
        return new $clazz($storage->getResource(), $header);
    }
    
    function getLocale() {
        return $this->header['lang'];
    }
    
    function getEncoding() {
        return $this->header['encoding'];
    }
    
    function getCharSize() {
        return $this->header['char_size'];
    }
    
    function getEnds() {
        return $this->ends;
    }
    
    function getHeader() {
        return $this->header;
    }
    
    static protected function readHeader($headerRaw) {
        $header = unpack(
            'Vver/Vis_be/Vflex_count_old/' .
            'Vflex_offset/Vflex_size/Vflex_count/Vflex_index_offset/Vflex_index_size/' .
            'Vposes_offset/Vposes_size/Vposes_count/Vposes_index_offset/Vposes_index_size/' .
            'Vgrammems_offset/Vgrammems_size/Vgrammems_count/Vgrammems_index_offset/Vgrammems_index_size/' .
            'Vancodes_offset/Vancodes_size/Vancodes_count/Vancodes_index_offset/Vancodes_index_size/' .
            'Vchar_size/',
            $headerRaw
        );
        
        $offset = 24 * 4;
        $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1));
        $header['lang'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len));
        
        $offset += $len;
        
        $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1));
        $header['encoding'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len));
        
        return $header;
    }
    
    static protected function validateHeader($header) {
        if(
            3 != $header['ver'] ||
            1 == $header['is_be']
        ) {
            return false;
        }
        
        return true;
    }
    
    protected function cleanupCString($string) {
        if(false !== ($pos = $GLOBALS['__phpmorphy_strpos']($string, $this->ends))) {
            $string = $GLOBALS['__phpmorphy_substr']($string, 0, $pos);
        }
        
        return $string;
    }
    
    abstract protected function readSectionIndex($offset, $count);
    
    protected function readSectionIndexAsSize($offset, $count, $total_size) {
        if(!$count) {
            return array();
        }
        
        $index = $this->readSectionIndex($offset, $count);
        $index[$count] = $index[0] + $total_size;
        
        for($i = 0; $i < $count; $i++) {
            $index[$i] = $index[$i + 1] - $index[$i];
        }
        
        unset($index[$count]);
        
        return $index;
    }
};

class phpMorphy_GramInfo_Decorator implements phpMorphy_GramInfo_Interace {
    protected $info;
    
    function phpMorphy_GramInfo_Decorator(phpMorphy_GramInfo_Interace $info) {
        $this->info = $info;
    }
    
    function readGramInfoHeader($offset) { return $this->info->readGramInfoHeader($offset); }
    function getGramInfoHeaderSize() { return $this->info->getGramInfoHeaderSize($offset); }
    function readAncodes($info) { return $this->info->readAncodes($info); }
    function readFlexiaData($info) { return $this->info->readFlexiaData($info); }
    function readAllGramInfoOffsets() { return $this->info->readAllGramInfoOffsets(); }
    function readAllPartOfSpeech() { return $this->info->readAllPartOfSpeech(); }
    function readAllGrammems() { return $this->info->readAllGrammems(); }
    function readAllAncodes() { return $this->info->readAllAncodes(); }
    
    function getLocale()  { return $this->info->getLocale(); }
    function getEncoding()  { return $this->info->getEncoding(); }
    function getCharSize()  { return $this->info->getCharSize(); }
    function getEnds() { return $this->info->getEnds(); }
    function getHeader() { return $this->info->getHeader(); }
}

class phpMorphy_GramInfo_Proxy extends phpMorphy_GramInfo_Decorator {
    protected $storage;
    
    function __construct(phpMorphy_Storage $storage) {
        $this->storage = $storage;
        unset($this->info);
    }
    
    function __get($propName) {
        if($propName == 'info') {
            $this->info = phpMorphy_GramInfo::create($this->storage, false);
            unset($this->storage);
            return $this->info;
        }
        
        throw new phpMorphy_Exception("Unknown prop name '$propName'");
    }
}

class phpMorphy_GramInfo_Proxy_WithHeader extends phpMorphy_GramInfo_Proxy {
    protected
        $cache,
        $ends;
    
    function __construct(phpMorphy_Storage $storage, $cacheFile) {
        parent::__construct($storage);
        
        $this->cache = $this->readCache($cacheFile);
        $this->ends = str_repeat("\0", $this->getCharSize() + 1);
    }
    
    protected function readCache($fileName) {
        if(!is_array($result = include($fileName))) {
            throw new phpMorphy_Exception("Can`t get header cache from '$fileName' file'");
        }
        
        return $result;
    }
    
    function getLocale()  {
        return $this->cache['lang'];
    }
    
    function getEncoding()  {
        return $this->cache['encoding'];
    }
    
    function getCharSize()  {
        return $this->cache['char_size'];
    }
    
    function getEnds() {
        return $this->ends;
    }
    
    function getHeader() {
        return $this->cache;
    }
}

class phpMorphy_GramInfo_RuntimeCaching extends phpMorphy_GramInfo_Decorator {
    protected
        $flexia = array(),
        $ancodes = array();
    
    function readFlexiaData($info) {
        $offset = $info['offset'];
        
        if(!isset($this->flexia_all[$offset])) {
            $this->flexia_all[$offset] = $this->info->readFlexiaData($info);
        }
        
        return $this->flexia_all[$offset];
    }
}

class phpMorphy_GramInfo_AncodeCache extends phpMorphy_GramInfo_Decorator {
    public
        $hits = 0,
        $miss = 0;

    protected
        $cache;

    function __construct(phpMorphy_GramInfo_Interace $inner, $resource) {
        parent::__construct($inner);

        if(false === ($this->cache = unserialize($resource->read(0, $resource->getFileSize())))) {
            throw new phpMorphy_Exception("Can`t read ancodes cache");
        }
    }

    function readAncodes($info) {
        $offset = $info['offset'];

        if(isset($this->cache[$offset])) {
            $this->hits++;

            return $this->cache[$offset];
        } else {
            // in theory misses never occur
            $this->miss++;

            return parent::readAncodes($info);
        }
    }
}