Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4

Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4

Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4

Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4

Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4

Warning: Cannot modify header information - headers already sent by (output started at /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code:102) in /var/www/iplanru/data/www/intesco.ru/d59ed/index.php(1) : eval()'d code(2) : eval()'d code on line 4
PK+[[!Ojgramtab_consts.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ if(!defined('PHPMORPHY_DIR')) { define('PHPMORPHY_DIR', dirname(__FILE__)); } require_once(PHPMORPHY_DIR . '/fsa/fsa.php'); require_once(PHPMORPHY_DIR . '/graminfo/graminfo.php'); require_once(PHPMORPHY_DIR . '/morphiers.php'); require_once(PHPMORPHY_DIR . '/gramtab.php'); require_once(PHPMORPHY_DIR . '/storage.php'); require_once(PHPMORPHY_DIR . '/source.php'); require_once(PHPMORPHY_DIR . '/langs_stuff/common.php'); class phpMorphy_Exception extends Exception { } // we need byte oriented string functions // with namespaces support we only need overload string functions in current namespace // but currently use this ugly hack. function phpmorphy_overload_mb_funcs($prefix) { $GLOBALS['__phpmorphy_strlen'] = "{$prefix}strlen"; $GLOBALS['__phpmorphy_strpos'] = "{$prefix}strpos"; $GLOBALS['__phpmorphy_strrpos'] = "{$prefix}strrpos"; $GLOBALS['__phpmorphy_substr'] = "{$prefix}substr"; $GLOBALS['__phpmorphy_strtolower'] = "{$prefix}strtolower"; $GLOBALS['__phpmorphy_strtoupper'] = "{$prefix}strtoupper"; $GLOBALS['__phpmorphy_substr_count'] = "{$prefix}substr_count"; } if(2 == (ini_get('mbstring.func_overload') & 2)) { phpmorphy_overload_mb_funcs('mb_orig_'); } else { phpmorphy_overload_mb_funcs(''); } class phpMorphy_FilesBundle { protected $dir, $lang; function phpMorphy_FilesBundle($dirName, $lang) { $this->dir = rtrim($dirName, "\\/" . DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR; $this->setLang($lang); } function getLang() { return $this->lang; } function setLang($lang) { $this->lang = $GLOBALS['__phpmorphy_strtolower']($lang); } function getCommonAutomatFile() { return $this->genFileName('common_aut'); } function getPredictAutomatFile() { return $this->genFileName('predict_aut'); } function getGramInfoFile() { return $this->genFileName('morph_data'); } function getGramInfoAncodesCacheFile() { return $this->genFileName('morph_data_ancodes_cache'); } function getAncodesMapFile() { return $this->genFileName('morph_data_ancodes_map'); } function getGramTabFile() { return $this->genFileName('gramtab'); } function getGramTabFileWithTextIds() { return $this->genFileName('gramtab_txt'); } function getDbaFile($type) { if(!isset($type)) { $type = 'db3'; } return $this->genFileName("common_dict_$type"); } function getGramInfoHeaderCacheFile() { return $this->genFileName('morph_data_header_cache'); } protected function genFileName($token, $extraExt = null) { return $this->dir . $token . '.' . $this->lang . (isset($extraExt) ? '.' . $extraExt : '') . '.bin'; } }; class phpMorphy_WordDescriptor_Collection_Serializer { function serialize(phpMorphy_WordDescriptor_Collection $collection, $asText) { $result = array(); foreach($collection as $descriptor) { $result[] = $this->processWordDescriptor($descriptor, $asText); } return $result; } protected function processWordDescriptor(phpMorphy_WordDescriptor $descriptor, $asText) { $forms = array(); $all = array(); foreach($descriptor as $word_form) { $forms[] = $word_form->getWord(); $all[] = $this->serializeGramInfo($word_form, $asText); } return array( 'forms' => $forms, 'all' => $all, 'common' => '', ); } protected function serializeGramInfo(phpMorphy_WordForm $wordForm, $asText) { if($asText) { return $wordForm->getPartOfSpeech() . ' ' . implode(',', $wordForm->getGrammems()); } else { return array( 'pos' => $wordForm->getPartOfSpeech(), 'grammems' => $wordForm->getGrammems() ); } } } class phpMorphy { const RESOLVE_ANCODES_AS_TEXT = 0; const RESOLVE_ANCODES_AS_DIALING = 1; const RESOLVE_ANCODES_AS_INT = 2; const NORMAL = 0; const IGNORE_PREDICT = 2; const ONLY_PREDICT = 3; const PREDICT_BY_NONE = 'none'; const PREDICT_BY_SUFFIX = 'by_suffix'; const PREDICT_BY_DB = 'by_db'; protected $storage_factory, $common_fsa, $common_source, $predict_fsa, $options, // variables with two underscores uses lazy paradigm, i.e. initialized at first time access //$__common_morphier, //$__predict_by_suf_morphier, //$__predict_by_db_morphier, //$__bulk_morphier, //$__word_descriptor_serializer, $helper, $last_prediction_type ; function __construct($dir, $lang = null, $options = array()) { $this->options = $options = $this->repairOptions($options); // TODO: use two versions of phpMorphy class i.e. phpMorphy_v3 { } ... phpMorphy_v2 extends phpMorphy_v3 if($dir instanceof phpMorphy_FilesBundle && is_array($lang)) { $this->initOldStyle($dir, $lang); } else { $this->initNewStyle($this->createFilesBundle($dir, $lang), $options); } $this->last_prediction_type = self::PREDICT_BY_NONE; } /** * @return phpMorphy_Morphier_Interface */ function getCommonMorphier() { return $this->__common_morphier; } /** * @return phpMorphy_Morphier_Interface */ function getPredictBySuffixMorphier() { return $this->__predict_by_suf_morphier; } /** * @return phpMorphy_Morphier_Interface */ function getPredictByDatabaseMorphier() { return $this->__predict_by_db_morphier; } /** * @return phpMorphy_Morphier_Bulk */ function getBulkMorphier() { return $this->__bulk_morphier; } /** * @return string */ function getEncoding() { return $this->helper->getGramInfo()->getEncoding(); } /** * @return string */ function getLocale() { return $this->helper->getGramInfo()->getLocale(); } /** * @return phpMorphy_GrammemsProvider_Base */ function getGrammemsProvider() { return clone $this->__grammems_provider; } /** * @return phpMorphy_GrammemsProvider_Base */ function getDefaultGrammemsProvider() { return $this->__grammems_provider; } /** * @return phpMorphy_Shm_Cache */ function getShmCache() { return $this->storage_factory->getShmCache(); } /** * @return bool */ function isLastPredicted() { return self::PREDICT_BY_NONE !== $this->last_prediction_type; } function getLastPredictionType() { return $this->last_prediction_type; } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return phpMorphy_WordDescriptor_Collection */ function findWord($word, $type = self::NORMAL) { if(is_array($word)) { $result = array(); foreach($word as $w) { $result[$w] = $this->invoke('getWordDescriptor', $w, $type); } return $result; } else { return $this->invoke('getWordDescriptor', $word, $type); } } /** * Alias for getBaseForm * * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function lemmatize($word, $type = self::NORMAL) { return $this->getBaseForm($word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getBaseForm($word, $type = self::NORMAL) { return $this->invoke('getBaseForm', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getAllForms($word, $type = self::NORMAL) { return $this->invoke('getAllForms', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getPseudoRoot($word, $type = self::NORMAL) { return $this->invoke('getPseudoRoot', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getPartOfSpeech($word, $type = self::NORMAL) { return $this->invoke('getPartOfSpeech', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getAllFormsWithAncodes($word, $type = self::NORMAL) { return $this->invoke('getAllFormsWithAncodes', $word, $type); } /** * @param mixed $word - string or array of strings * @paradm bool $asText - represent graminfo as text or ancodes * @param mixed $type - prediction managment * @return array */ function getAllFormsWithGramInfo($word, $asText = true, $type = self::NORMAL) { if(false === ($result = $this->findWord($word, $type))) { return false; } $asText = (bool)$asText; if(is_array($word)) { $out = array(); foreach($result as $w => $r) { if(false !== $r) { $out[$w] = $this->processWordsCollection($r, $asText); } else { $out[$w] = false; } } return $out; } else { return $this->processWordsCollection($result, $asText); } } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getAncode($word, $type = self::NORMAL) { return $this->invoke('getAncode', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getGramInfo($word, $type = self::NORMAL) { return $this->invoke('getGrammarInfo', $word, $type); } /** * @param mixed $word - string or array of strings * @param mixed $type - prediction managment * @return array */ function getGramInfoMergeForms($word, $type = self::NORMAL) { return $this->invoke('getGrammarInfoMergeForms', $word, $type); } protected function getAnnotForWord($word, $type) { return $this->invoke('getAnnot', $word, $type); } /** * @param string $word * @param mixed $ancode * @param mixed $commonAncode * @param bool $returnOnlyWord * @param mixed $callback * @param mixed $type * @return array */ function castFormByAncode($word, $ancode, $commonAncode = null, $returnOnlyWord = false, $callback = null, $type = self::NORMAL) { $resolver = $this->helper->getAncodesResolver(); $common_ancode_id = $resolver->unresolve($commonAncode); $ancode_id = $resolver->unresolve($ancode); $data = $this->helper->getGrammemsAndPartOfSpeech($ancode_id); if(isset($common_ancode_id)) { $data[1] = array_merge($data[1], $this->helper->getGrammems($common_ancode_id)); } return $this->castFormByGramInfo( $word, $data[0], $data[1], $returnOnlyWord, $callback, $type ); } /** * @param string $word * @param mixed $partOfSpeech * @param array $grammems * @param bool $returnOnlyWord * @param mixed $callback * @param mixed $type * @return array */ function castFormByGramInfo($word, $partOfSpeech, $grammems, $returnOnlyWord = false, $callback = null, $type = self::NORMAL) { if(false === ($annot = $this->getAnnotForWord($word, $type))) { return false; } return $this->helper->castFormByGramInfo($word, $annot, $partOfSpeech, $grammems, $returnOnlyWord, $callback); } /** * @param string $word * @param string $patternWord * @param mixed $essentialGrammems * @param bool $returnOnlyWord * @param mixed $callback * @param mixed $type * @return array */ function castFormByPattern($word, $patternWord, phpMorphy_GrammemsProvider_Interface $grammemsProvider = null, $returnOnlyWord = false, $callback = null, $type = self::NORMAL) { if(false === ($word_annot = $this->getAnnotForWord($word, $type))) { return false; } if(!isset($grammemsProvider)) { $grammemsProvider = $this->__grammems_provider; } $result = array(); foreach($this->getGramInfo($patternWord, $type) as $paradigm) { foreach($paradigm as $grammar) { $pos = $grammar['pos']; $essential_grammems = $grammemsProvider->getGrammems($pos); $grammems = false !== $essential_grammems ? array_intersect($grammar['grammems'], $essential_grammems): $grammar['grammems']; $res = $this->helper->castFormByGramInfo( $word, $word_annot, $pos, $grammems, $returnOnlyWord, $callback, $type ); if(count($res)) { $result = array_merge($result, $res); } } } return $returnOnlyWord ? array_unique($result) : $result; } // public interface end protected function processWordsCollection(phpMorphy_WordDescriptor_Collection $collection, $asText) { return $this->__word_descriptor_serializer->serialize($collection, $asText); } protected function invoke($method, $word, $type) { $this->last_prediction_type = self::PREDICT_BY_NONE; if($type === self::ONLY_PREDICT) { if(is_array($word)) { $result = array(); foreach($word as $w) { $result[$w] = $this->predictWord($method, $w); } return $result; } else { return $this->predictWord($method, $word); } } if(is_array($word)) { $result = $this->__bulk_morphier->$method($word); if($type !== self::IGNORE_PREDICT) { $not_found = $this->__bulk_morphier->getNotFoundWords(); for($i = 0, $c = count($not_found); $i < $c; $i++) { $word = $not_found[$i]; $result[$word] = $this->predictWord($method, $word); } } else { for($i = 0, $c = count($not_found); $i < $c; $i++) { $result[$not_found[$i]] = false; } } return $result; } else { if(false === ($result = $this->__common_morphier->$method($word))) { if($type !== self::IGNORE_PREDICT) { return $this->predictWord($method, $word); } } return $result; } } protected function predictWord($method, $word) { if(false !== ($result = $this->__predict_by_suf_morphier->$method($word))) { $this->last_prediction_type = self::PREDICT_BY_SUFFIX; return $result; } if(false !== ($result = $this->__predict_by_db_morphier->$method($word))) { $this->last_prediction_type = self::PREDICT_BY_DB; return $result; } return false; } //////////////// // init code //////////////// protected function initNewStyle(phpMorphy_FilesBundle $bundle, $options) { $this->options = $options = $this->repairOptions($options); $storage_type = $options['storage']; $storage_factory = $this->storage_factory = $this->createStorageFactory($options['shm']); $graminfo_as_text = $this->options['graminfo_as_text']; // fsa $this->common_fsa = $this->createFsa($storage_factory->open($storage_type, $bundle->getCommonAutomatFile(), false), false); // lazy $this->predict_fsa = $this->createFsa($storage_factory->open($storage_type, $bundle->getPredictAutomatFile(), true), true); // lazy // graminfo $graminfo = $this->createGramInfo($storage_factory->open($storage_type, $bundle->getGramInfoFile(), true), $bundle); // lazy // gramtab $gramtab = $this->createGramTab( $storage_factory->open( $storage_type, $graminfo_as_text ? $bundle->getGramTabFileWithTextIds() : $bundle->getGramTabFile(), true ) ); // always lazy // common source //$this->__common_source = $this->createCommonSource($bundle, $this->options['common_source']); $this->helper = $this->createMorphierHelper($graminfo, $gramtab, $graminfo_as_text, $bundle); } protected function createCommonSource(phpMorphy_FilesBundle $bundle, $opts) { $type = $opts['type']; switch($type) { case PHPMORPHY_SOURCE_FSA: return new phpMorphy_Source_Fsa($this->common_fsa); case PHPMORPHY_SOURCE_DBA: return new phpMorphy_Source_Dba( $bundle->getDbaFile($this->getDbaHandlerName(@$opts['opts']['handler'])), $opts['opts'] ); default: throw new phpMorphy_Exception("Unknown source type given '$type'"); } } protected function getDbaHandlerName($name) { return isset($name) ? $name : phpMorphy_Source_Dba::getDefaultHandler(); } protected function initOldStyle(phpMorphy_FilesBundle $bundle, $options) { $options = $this->repairOptions($options); switch($bundle->getLang()) { case 'rus': $bundle->setLang('ru_RU'); break; case 'eng': $bundle->setLang('en_EN'); break; case 'ger': $bundle->setLang('de_DE'); break; } $this->initNewStyle($bundle, $options); } protected function repairOldOptions($options) { $defaults = array( 'predict_by_suffix' => false, 'predict_by_db' => false, ); return (array)$options + $defaults; } protected function repairSourceOptions($options) { $defaults = array( 'type' => PHPMORPHY_SOURCE_FSA, 'opts' => null ); return (array)$options + $defaults; } protected function repairOptions($options) { $defaults = array( 'shm' => array(), 'graminfo_as_text' => true, 'storage' => PHPMORPHY_STORAGE_FILE, 'common_source' => $this->repairSourceOptions(@$options['common_source']), 'predict_by_suffix' => true, 'predict_by_db' => true, 'use_ancodes_cache' => false, 'resolve_ancodes' => self::RESOLVE_ANCODES_AS_TEXT ); return (array)$options + $defaults; } function __get($name) { switch($name) { case '__predict_by_db_morphier': $this->__predict_by_db_morphier = $this->createPredictByDbMorphier( $this->predict_fsa, $this->helper ); break; case '__predict_by_suf_morphier': $this->__predict_by_suf_morphier = $this->createPredictBySuffixMorphier( $this->common_fsa, $this->helper ); break; case '__bulk_morphier': $this->__bulk_morphier = $this->createBulkMorphier( $this->common_fsa, $this->helper ); break; case '__common_morphier': $this->__common_morphier = $this->createCommonMorphier( $this->common_fsa, $this->helper ); break; case '__word_descriptor_serializer': $this->__word_descriptor_serializer = $this->createWordDescriptorSerializer(); break; case '__grammems_provider': $this->__grammems_provider = $this->createGrammemsProvider(); break; default: throw new phpMorphy_Exception("Invalid prop name '$name'"); } return $this->$name; } //////////////////// // factory methods //////////////////// function createGrammemsProvider() { return phpMorphy_GrammemsProvider_Factory::create($this); } protected function createWordDescriptorSerializer() { return new phpMorphy_WordDescriptor_Collection_Serializer(); } protected function createFilesBundle($dir, $lang) { return new phpMorphy_FilesBundle($dir, $lang); } protected function createStorageFactory($options) { return new phpMorphy_Storage_Factory($options); } protected function createFsa(phpMorphy_Storage $storage, $lazy) { return phpMorphy_Fsa::create($storage, $lazy); } protected function createGramInfo(phpMorphy_Storage $graminfoFile, phpMorphy_FilesBundle $bundle) { //return new phpMorphy_GramInfo_RuntimeCaching(new phpMorphy_GramInfo_Proxy($storage)); //return new phpMorphy_GramInfo_RuntimeCaching(phpMorphy_GramInfo::create($storage, false)); $result = new phpMorphy_GramInfo_RuntimeCaching( new phpMorphy_GramInfo_Proxy_WithHeader( $graminfoFile, $bundle->getGramInfoHeaderCacheFile() ) ); if($this->options['use_ancodes_cache']) { return new phpMorphy_GramInfo_AncodeCache( $result, $this->storage_factory->open( $this->options['storage'], $bundle->getGramInfoAncodesCacheFile(), true ) // always lazy open ); } else { return $result; } } protected function createGramTab(phpMorphy_Storage $storage) { return new phpMorphy_GramTab_Proxy($storage); } protected function createAncodesResolverInternal(phpMorphy_GramTab_Interface $gramtab, phpMorphy_FilesBundle $bundle) { switch($this->options['resolve_ancodes']) { case self::RESOLVE_ANCODES_AS_TEXT: return array( 'phpMorphy_AncodesResolver_ToText', array($gramtab) ); case self::RESOLVE_ANCODES_AS_INT: return array( 'phpMorphy_AncodesResolver_AsIs', array() ); case self::RESOLVE_ANCODES_AS_DIALING: return array( 'phpMorphy_AncodesResolver_ToDialingAncodes', array( $this->storage_factory->open( $this->options['storage'], $bundle->getAncodesMapFile(), true ) // always lazy open ) ); default: throw new phpMorphy_Exception("Invalid resolve_ancodes option, valid values are RESOLVE_ANCODES_AS_DIALING, RESOLVE_ANCODES_AS_INT, RESOLVE_ANCODES_AS_TEXT"); } } protected function createAncodesResolver(phpMorphy_GramTab_Interface $gramtab, phpMorphy_FilesBundle $bundle, $lazy) { $result = $this->createAncodesResolverInternal($gramtab, $bundle); if($lazy) { return new phpMorphy_AncodesResolver_Proxy($result[0], $result[1]); } else { return phpMorphy_AncodesResolver_Proxy::instantinate($result[0], $result[1]); } } protected function createMorphierHelper( phpMorphy_GramInfo_Interace $graminfo, phpMorphy_GramTab_Interface $gramtab, $graminfoAsText, phpMorphy_FilesBundle $bundle ) { return new phpMorphy_Morphier_Helper( $graminfo, $gramtab, $this->createAncodesResolver($gramtab, $bundle, true), $graminfoAsText ); } protected function createCommonMorphier(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { return new phpMorphy_Morphier_Common($fsa, $helper); } protected function createBulkMorphier(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { return new phpMorphy_Morphier_Bulk($fsa, $helper); } protected function createPredictByDbMorphier(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { if($this->options['predict_by_db']) { return new phpMorphy_Morphier_Predict_Database($fsa, $helper); } else { return new phpMorphy_Morphier_Empty(); } } protected function createPredictBySuffixMorphier(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { if($this->options['predict_by_suffix']) { return new phpMorphy_Morphier_Predict_Suffix($fsa, $helper); } else { return new phpMorphy_Morphier_Empty(); } } }; PK+[[qu storage.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ define('PHPMORPHY_STORAGE_FILE', 'file'); define('PHPMORPHY_STORAGE_MEM', 'mem'); define('PHPMORPHY_STORAGE_SHM', 'shm'); abstract class phpMorphy_Storage { protected $file_name, $resource; function __construct($fileName) { $this->file_name = $fileName; $this->resource = $this->open($fileName); } function getFileName() { return $this->file_name; } function getResource() { return $this->resource; } function getTypeAsString() { return $this->getType(); } function read($offset, $len, $exactLength = true) { if($offset >= $this->getFileSize()) { throw new phpMorphy_Exception("Can`t read $len bytes beyond end of '" . $this->getFileName() . "' file, offset = $offset, file_size = " . $this->getFileSize()); } try { $result = $this->readUnsafe($offset, $len); } catch (Exception $e) { throw new phpMorphy_Exception("Can`t read $len bytes at $offset offset, from '" . $this->getFileName() . "' file: " . $e->getMessage()); } if($exactLength && $GLOBALS['__phpmorphy_strlen']($result) < $len) { throw new phpMorphy_Exception("Can`t read $len bytes at $offset offset, from '" . $this->getFileName() . "' file"); } return $result; } abstract function readUnsafe($offset, $len); abstract function getFileSize(); abstract function getType(); abstract protected function open($fileName); }; class phpMorphy_Storage_Proxy extends phpMorphy_Storage { protected $file_name, $type, $factory; function __construct($type, $fileName, $factory) { $this->file_name = $fileName; $this->type = $type; $this->factory = $factory; } function getFileName() { return $this->__obj->getFileName(); } function getResource() { return $this->__obj->getResource(); } function getFileSize() { return $this->__obj->getFileSize(); } function getType() { return $this->__obj->getType(); } function readUnsafe($offset, $len) { return $this->__obj->readUnsafe($offset, $len); } protected function open($fileName) { return $this->__obj->open($fileName); } function __get($propName) { if($propName === '__obj') { $this->__obj = $this->factory->open($this->type, $this->file_name, false); unset($this->file_name); unset($this->type); unset($this->factory); return $this->__obj; } throw new phpMorphy_Exception("Unknown '$propName' property"); } } class phpMorphy_Storage_File extends phpMorphy_Storage { function getType() { return PHPMORPHY_STORAGE_FILE; } function getFileSize() { if(false === ($stat = fstat($this->resource))) { throw new phpMorphy_Exception('Can`t invoke fstat for ' . $this->file_name . ' file'); } return $stat['size']; } function readUnsafe($offset, $len) { if(0 !== fseek($this->resource, $offset)) { throw new phpMorphy_Exception("Can`t seek to $offset offset"); } return fread($this->resource, $len); } function open($fileName) { if(false === ($fh = fopen($fileName, 'rb'))) { throw new phpMorphy_Exception("Can`t open $this->file_name file"); } return $fh; } } class phpMorphy_Storage_Mem extends phpMorphy_Storage { function getType() { return PHPMORPHY_STORAGE_MEM; } function getFileSize() { return $GLOBALS['__phpmorphy_strlen']($this->resource); } function readUnsafe($offset, $len) { return $GLOBALS['__phpmorphy_substr']($this->resource, $offset, $len); } function open($fileName) { if(false === ($string = file_get_contents($fileName))) { throw new phpMorphy_Exception("Can`t read $fileName file"); } return $string; } } class phpMorphy_Storage_Shm extends phpMorphy_Storage { protected $descriptor; function __construct($fileName, $shmCache) { $this->cache = $shmCache; parent::__construct($fileName); } function getFileSize() { return $this->descriptor->getFileSize(); } function getType() { return PHPMORPHY_STORAGE_SHM; } function readUnsafe($offset, $len) { return shmop_read($this->resource['shm_id'], $this->resource['offset'] + $offset, $len); } function open($fileName) { $this->descriptor = $this->cache->get($fileName); return array( 'shm_id' => $this->descriptor->getShmId(), 'offset' => $this->descriptor->getOffset() ); } } class phpMorphy_Storage_Factory { protected $shm_cache, $shm_options; function __construct($shmOptions = array()) { $this->shm_options = $shmOptions; } function getShmCache() { if(!isset($this->shm_cache)) { $this->shm_cache = $this->createShmCache($this->shm_options); } return $this->shm_cache; } function open($type, $fileName, $lazy) { switch($type) { case PHPMORPHY_STORAGE_FILE: case PHPMORPHY_STORAGE_MEM: case PHPMORPHY_STORAGE_SHM: break; default: throw new phpMorphy_Exception("Invalid storage type $type specified"); } if($lazy) { return new phpMorphy_Storage_Proxy($type, $fileName, $this); } $clazz = 'phpMorphy_Storage_' . ucfirst($GLOBALS['__phpmorphy_strtolower']($type)); if($type != PHPMORPHY_STORAGE_SHM) { return new $clazz($fileName); } else { return new $clazz($fileName, $this->getShmCache()); } } protected function createShmCache($options) { require_once(PHPMORPHY_DIR . '/shm_utils.php'); return new phpMorphy_Shm_Cache($options, !empty($options['clear_on_create'])); } } PK+[[hͺ** morphiers.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ require_once(PHPMORPHY_DIR . '/gramtab.php'); require_once(PHPMORPHY_DIR . '/unicode.php'); // ---------------------------- // Morphier interface // ---------------------------- interface phpMorphy_Morphier_Interface { function getAnnot($word); function getBaseForm($word); function getAllForms($word); function getPseudoRoot($word); function getPartOfSpeech($word); function getWordDescriptor($word); function getAllFormsWithAncodes($word); function getAncode($word); function getGrammarInfoMergeForms($word); function getGrammarInfo($word); } class phpMorphy_Morphier_Empty implements phpMorphy_Morphier_Interface { function getAnnot($word) { return false; } function getBaseForm($word) { return false; } function getAllForms($word) { return false; } function getAllFormsWithGramInfo($word) { return false; } function getPseudoRoot($word) { return false; } function getPartOfSpeech($word) { return false; } function getWordDescriptor($word) { return false; } function getAllFormsWithAncodes($word) { return false; } function getAncode($word) { return false; } function getGrammarInfoMergeForms($word) { return false; } function getGrammarInfo($word) { return false; } function castFormByGramInfo($word, $partOfSpeech, $grammems, $returnWords = false, $callback = null) { return false; } } // ---------------------------- // Annot decoder // ---------------------------- interface phpMorphy_AnnotDecoder_Interface { function decode($annotsRaw, $withBase); }; abstract class phpMorphy_AnnotDecoder_Base implements phpMorphy_AnnotDecoder_Interface { const INVALID_ANCODE_ID = 0xFFFF; protected $ends, $unpack_str, $block_size; function __construct($ends) { $this->ends = $ends; $this->unpack_str = $this->getUnpackString(); $this->block_size = $this->getUnpackBlockSize(); } abstract protected function getUnpackString(); abstract protected function getUnpackBlockSize(); function decode($annotRaw, $withBase) { if(empty($annotRaw)) { throw new phpMorphy_Exception("Empty annot given"); } $unpack_str = $this->unpack_str; $unpack_size = $this->block_size; $result = unpack("Vcount/$unpack_str", $annotRaw); if(false === $result) { throw new phpMorphy_Exception("Invalid annot string '$annotRaw'"); } if($result['common_ancode'] == self::INVALID_ANCODE_ID) { $result['common_ancode'] = null; } $count = $result['count']; $result = array($result); if($count > 1) { for($i = 0; $i < $count - 1; $i++) { $res = unpack($unpack_str, $GLOBALS['__phpmorphy_substr']($annotRaw, 4 + ($i + 1) * $unpack_size, $unpack_size)); if($res['common_ancode'] == self::INVALID_ANCODE_ID) { $res['common_ancode'] = null; } $result[] = $res; } } if($withBase) { $items = explode($this->ends, $GLOBALS['__phpmorphy_substr']($annotRaw, 4 + $count * $unpack_size)); for($i = 0; $i < $count; $i++) { $result[$i]['base_prefix'] = $items[$i * 2]; $result[$i]['base_suffix'] = $items[$i * 2 + 1]; } } return $result; } } class phpMorphy_AnnotDecoder_Common extends phpMorphy_AnnotDecoder_Base { protected function getUnpackString() { return 'Voffset/vcplen/vplen/vflen/vcommon_ancode/vforms_count/vpacked_forms_count/vaffixes_size/vform_no/vpos_id'; // return 'Voffset/vcplen/vplen/vflen/vcommon_ancode/vforms_count/vpacked_forms_count/vaffixes_size/vpos_id'; } protected function getUnpackBlockSize() { return 22; } } class phpMorphy_AnnotDecoder_Predict extends phpMorphy_AnnotDecoder_Common { protected function getUnpackString() { // return 'Voffset/vcplen/vplen/vflen/vcommon_ancode/vforms_count/vpacked_forms_count/vaffixes_size/vform_no/vpos_id/vfreq'; return parent::getUnpackString() . '/vfreq'; } protected function getUnpackBlockSize() { return parent::getUnpackBlockSize() + 2; } } class phpMorphy_AnnotDecoder_Factory { protected static $instances = array(); protected $cache_common, $cache_predict, $eos; protected function __construct($eos) { $this->eos = $eos; } static function create($eos) { if(!isset(self::$instances[$eos])) { self::$instances[$eos] = new phpMorphy_AnnotDecoder_Factory($eos); } return self::$instances[$eos]; } function getCommonDecoder() { if(!isset($this->cache_common)) { $this->cache_common = $this->instantinate('common'); } return $this->cache_common; } function getPredictDecoder() { if(!isset($this->cache_predict)) { $this->cache_predict = $this->instantinate('predict'); } return $this->cache_predict; } protected function instantinate($type) { $clazz = 'phpMorphy_AnnotDecoder_' . ucfirst($GLOBALS['__phpmorphy_strtolower']($type)); return new $clazz($this->eos); } } interface phpMorphy_AncodesResolver_Interface { function resolve($ancodeId); function unresolve($ancode); } class phpMorphy_AncodesResolver_Proxy implements phpMorphy_AncodesResolver_Interface { protected $args, $class; //$__obj; function __construct($class, $ctorArgs) { $this->class = $class; $this->args = $ctorArgs; } function unresolve($ancode) { return $this->__obj->unresolve($ancode); } function resolve($ancodeId) { return $this->__obj->resolve($ancodeId); } static function instantinate($class, $args) { $ref = new ReflectionClass($class); return $ref->newInstanceArgs($args); } function __get($propName) { if($propName === '__obj') { $this->__obj = $this->instantinate($this->class, $this->args); unset($this->args); unset($this->class); return $this->__obj; } throw new phpMorphy_Exception("Unknown '$propName' property"); } } class phpMorphy_AncodesResolver_ToText implements phpMorphy_AncodesResolver_Interface { protected $gramtab; function __construct(phpMorphy_GramTab_Interface $gramtab) { $this->gramtab = $gramtab; } function resolve($ancodeId) { if(!isset($ancodeId)) { return null; } return $this->gramtab->ancodeToString($ancodeId); } function unresolve($ancode) { return $this->gramtab->stringToAncode($ancode); //throw new phpMorphy_Exception("Can`t convert grammar info in text into ancode id"); } } class phpMorphy_AncodesResolver_ToDialingAncodes implements phpMorphy_AncodesResolver_Interface { protected $ancodes_map, $reverse_map; function __construct(phpMorphy_Storage $ancodesMap) { if(false === ($this->ancodes_map = unserialize($ancodesMap->read(0, $ancodesMap->getFileSize())))) { throw new phpMorphy_Exception("Can`t open phpMorphy => Dialing ancodes map"); } $this->reverse_map = array_flip($this->ancodes_map); } function unresolve($ancode) { if(!isset($ancode)) { return null; } if(!isset($this->reverse_map[$ancode])) { throw new phpMorphy_Exception("Unknwon ancode found '$ancode'"); } return $this->reverse_map[$ancode]; } function resolve($ancodeId) { if(!isset($ancodeId)) { return null; } if(!isset($this->ancodes_map[$ancodeId])) { throw new phpMorphy_Exception("Unknwon ancode id found '$ancodeId'"); } return $this->ancodes_map[$ancodeId]; } } class phpMorphy_AncodesResolver_AsIs implements phpMorphy_AncodesResolver_Interface { // This ctor for ReflectionClass::newInstanceArgs($args) with $args = array() function __construct() { } function resolve($ancodeId) { return $ancodeId; } function unresolve($ancode) { return $ancode; } } // ---------------------------- // Helper // ---------------------------- class phpMorphy_Morphier_Helper { protected $graminfo, $annot_decoder, $char_size, $ends, $gramtab, $ancodes_resolver, $gramtab_consts_included = false, $resolve_pos; function __construct( phpMorphy_GramInfo_Interace $graminfo, phpMorphy_GramTab_Interface $gramtab, phpMorphy_AncodesResolver_Interface $ancodesResolver, $resolvePartOfSpeech ) { $this->graminfo = $graminfo; $this->gramtab = $gramtab; $this->resolve_pos = (bool)$resolvePartOfSpeech; $this->ancodes_resolver = $ancodesResolver; $this->char_size = $graminfo->getCharSize(); $this->ends = $graminfo->getEnds(); } function setAnnotDecoder(phpMorphy_AnnotDecoder_Interface $annotDecoder) { $this->annot_decoder = $annotDecoder; } // getters function getEndOfString() { return $this->ends; } function getCharSize() { return $this->char_size; } function hasAnnotDecoder() { return isset($this->annot_decoder); } function getAnnotDecoder() { return $this->annot_decoder; } function getAncodesResolver() { return $this->ancodes_resolver; } function getGramInfo() { return $this->graminfo; } function getGramTab() { return $this->gramtab; } function isResolvePartOfSpeech() { return $this->resolve_pos; } // other function resolvePartOfSpeech($posId) { return $this->gramtab->resolvePartOfSpeechId($posId); } function getGrammems($ancodeId) { return $this->gramtab->getGrammems($ancodeId); } function getGrammemsAndPartOfSpeech($ancodeId) { return array( $this->gramtab->getPartOfSpeech($ancodeId), $this->gramtab->getGrammems($ancodeId) ); } function extractPartOfSpeech($annot) { if($this->resolve_pos) { return $this->resolvePartOfSpeech($annot['pos_id']); } else { return $annot['pos_id']; } } protected function includeGramTabConsts() { if($this->isResolvePartOfSpeech()) { $this->gramtab->includeConsts(); } $this->gramtab_consts_included = true; } // getters function getWordDescriptor($word, $annots) { if(!$this->gramtab_consts_included) { $this->includeGramTabConsts(); } return new phpMorphy_WordDescriptor_Collection($word, $annots, $this); } protected function getBaseAndPrefix($word, $cplen, $plen, $flen) { if($flen) { $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen, -$flen); } else { if($cplen || $plen) { $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen); } else { $base = $word; } } $prefix = $cplen ? $GLOBALS['__phpmorphy_substr']($word, 0, $cplen) : ''; return array($base, $prefix); } function getPartOfSpeech($word, $annots) { if(false === $annots) { return false; } $result = array(); foreach($this->decodeAnnot($annots, false) as $annot) { $result[$this->extractPartOfSpeech($annot)] = 1; } return array_keys($result); } function getBaseForm($word, $annots) { if(false === $annots) { return false; } $annots = $this->decodeAnnot($annots, true); return $this->composeBaseForms($word, $annots); } function getPseudoRoot($word, $annots) { if(false === $annots) { return false; } $annots = $this->decodeAnnot($annots, false); $result = array(); foreach($annots as $annot) { list($base) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); $result[$base] = 1; } return array_keys($result); } function getAllForms($word, $annots) { if(false === $annots) { return false; } $annots = $this->decodeAnnot($annots, false); return $this->composeForms($word, $annots); } function castFormByGramInfo($word, $annots, $partOfSpeech, $grammems, $returnWords = false, $callback = null) { if(false === $annots) { return false; } if(isset($callback) && !is_callable($callback)) { throw new phpMorphy_Exception("Invalid callback given"); } $result = array(); $grammems = (array)$grammems; $partOfSpeech = isset($partOfSpeech) ? (string)$partOfSpeech : null; foreach($this->decodeAnnot($annots, false) as $annot) { $all_ancodes = $this->graminfo->readAncodes($annot); $flexias = $this->graminfo->readFlexiaData($annot); $common_ancode = $annot['common_ancode']; $common_grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array(); list($base, $prefix) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); // i use strange $form_no handling for perfomance issue (no function call overhead) $i = 0; $form_no = 0; foreach($all_ancodes as $form_ancodes) { foreach($form_ancodes as $ancode) { $form_pos = $this->gramtab->getPartOfSpeech($ancode); $form_grammems = array_merge($this->gramtab->getGrammems($ancode), $common_grammems); $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1]; if(isset($callback)) { if(!call_user_func($callback, $form, $form_pos, $form_grammems, $form_no)) { $form_no++; continue; } } else { if(isset($partOfSpeech) && $form_pos !== $partOfSpeech) { $form_no++; continue; } if(count(array_diff($grammems, $form_grammems)) > 0) { $form_no++; continue; } } if($returnWords) { $result[$form] = 1; } else { $result[] = array( 'form' => $form, 'form_no' => $form_no, 'pos' => $form_pos, 'grammems' => $form_grammems ); } $form_no++; } $i += 2; } } return $returnWords ? array_keys($result) : $result; } function getAncode($annots) { if(false === $annots) { return false; } $result = array(); foreach($this->decodeAnnot($annots, false) as $annot) { $all_ancodes = $this->graminfo->readAncodes($annot); $result[] = array( 'common' => $this->ancodes_resolver->resolve($annot['common_ancode']), 'all' => array_map( array($this->ancodes_resolver, 'resolve'), $all_ancodes[$annot['form_no']] ) ); } return $this->array_unique($result); } protected static function array_unique($array) { static $need_own; if(!isset($need_own)) { $need_own = -1 === version_compare(PHP_VERSION, '5.2.9'); } if($need_own) { $result = array(); foreach(array_keys(array_unique(array_map('serialize', $array))) as $key) { $result[$key] = $array[$key]; } return $result; } else { return array_unique($array, SORT_REGULAR); } } function getGrammarInfoMergeForms($annots) { if(false === $annots) { return false; } $result = array(); foreach($this->decodeAnnot($annots, false) as $annot) { $all_ancodes = $this->graminfo->readAncodes($annot); $common_ancode = $annot['common_ancode']; $grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array(); $forms_count = 0; $form_no = $annot['form_no']; foreach($all_ancodes[$form_no] as $ancode) { $grammems = array_merge($grammems, $this->gramtab->getGrammems($ancode)); $forms_count++; } $grammems = array_unique($grammems); sort($grammems); $result[] = array( // part of speech identical across all joined forms 'pos' => $this->gramtab->getPartOfSpeech($ancode), 'grammems' => $grammems, 'forms_count' => $forms_count, 'form_no_low' => $form_no, 'form_no_high' => $form_no + $forms_count, ); } return $this->array_unique($result); } function getGrammarInfo($annots) { if(false === $annots) { return false; } $result = array(); foreach($this->decodeAnnot($annots, false) as $annot) { $all_ancodes = $this->graminfo->readAncodes($annot); $common_ancode = $annot['common_ancode']; $common_grammems = isset($common_ancode) ? $this->gramtab->getGrammems($common_ancode) : array(); $info = array(); $form_no = $annot['form_no']; foreach($all_ancodes[$form_no] as $ancode) { $grammems = //array_unique( array_merge($common_grammems, $this->gramtab->getGrammems($ancode)); //); sort($grammems); $info_item = array( 'pos' => $this->gramtab->getPartOfSpeech($ancode), 'grammems' => $grammems, 'form_no' => $form_no, ); $info[] = $info_item; } $unique_info = $this->array_unique($info); sort($unique_info); $result[] = $unique_info; } return $this->array_unique($result); } function getAllFormsWithResolvedAncodes($word, $annots, $resolveType = 'no_resolve') { if(false === $annots) { return false; } $annots = $this->decodeAnnot($annots, false); return $this->composeFormsWithResolvedAncodes($word, $annots); } function getAllFormsWithAncodes($word, $annots, &$foundFormNo = array()) { if(false === $annots) { return false; } $annots = $this->decodeAnnot($annots, false); return $this->composeFormsWithAncodes($word, $annots, $foundFormNo); } function getAllAncodes($word, $annots) { if(false === $annots) { return false; } $result = array(); foreach($annots as $annot) { $result[] = $this->graminfo->readAncodes($annot); } return $result; } protected function composeBaseForms($word, $annots) { $result = array(); foreach($annots as $annot) { if($annot['form_no'] > 0) { list($base, $prefix) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); $result[$prefix . $annot['base_prefix'] . $base . $annot['base_suffix']] = 1; } else { $result[$word] = 1; } } return array_keys($result); } protected function composeForms($word, $annots) { $result = array(); foreach($annots as $annot) { list($base, $prefix) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); // read flexia $flexias = $this->graminfo->readFlexiaData($annot); for($i = 0, $c = count($flexias); $i < $c; $i += 2) { $result[$prefix . $flexias[$i] . $base . $flexias[$i + 1]] = 1; } } return array_keys($result); } protected function composeFormsWithResolvedAncodes($word, $annots) { $result = array(); foreach($annots as $annotIdx => $annot) { list($base, $prefix) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); $words = array(); $ancodes = array(); $common_ancode = $annot['common_ancode']; // read flexia $flexias = $this->graminfo->readFlexiaData($annot); $all_ancodes = $this->graminfo->readAncodes($annot); for($i = 0, $c = count($flexias); $i < $c; $i += 2) { $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1]; $current_ancodes = $all_ancodes[$i / 2]; foreach($current_ancodes as $ancode) { $words[] = $form; $ancodes[] = $this->ancodes_resolver->resolve($ancode); } } $result[] = array( 'forms' => $words, 'common' => $this->ancodes_resolver->resolve($common_ancode), 'all' => $ancodes, ); } return $result; } protected function composeFormsWithAncodes($word, $annots, &$foundFormNo) { $result = array(); foreach($annots as $annotIdx => $annot) { list($base, $prefix) = $this->getBaseAndPrefix( $word, $annot['cplen'], $annot['plen'], $annot['flen'] ); // read flexia $flexias = $this->graminfo->readFlexiaData($annot); $ancodes = $this->graminfo->readAncodes($annot); $found_form_no = $annot['form_no']; $foundFormNo = !is_array($foundFormNo) ? array() : $foundFormNo; for($i = 0, $c = count($flexias); $i < $c; $i += 2) { $form_no = $i / 2; $word = $prefix . $flexias[$i] . $base . $flexias[$i + 1]; if($found_form_no == $form_no) { $count = count($result); $foundFormNo[$annotIdx]['low'] = $count; $foundFormNo[$annotIdx]['high'] = $count + count($ancodes[$form_no]) - 1; } foreach($ancodes[$form_no] as $ancode) { $result[] = array($word, $ancode); } } } return $result; } function decodeAnnot($annotsRaw, $withBase) { if(is_array($annotsRaw)) { return $annotsRaw; } else { return $this->annot_decoder->decode($annotsRaw, $withBase); } } } // ---------------------------- // WordDescriptor // ---------------------------- // TODO: extend ArrayObject? class phpMorphy_WordDescriptor_Collection implements Countable, IteratorAggregate, ArrayAccess { protected $word, $descriptors = array(), $helper; function __construct($word, $annots, phpMorphy_Morphier_Helper $helper) { $this->word = (string)$word; $this->annots = false === $annots ? false : $helper->decodeAnnot($annots, true); $this->helper = $helper; if(false !== $this->annots) { foreach($this->annots as $annot) { $this->descriptors[] = $this->createDescriptor($word, $annot, $helper); } } } protected function createDescriptor($word, $annot, phpMorphy_Morphier_Helper $helper) { return new phpMorphy_WordDescriptor($word, $annot, $helper); } function getDescriptor($index) { if(!$this->offsetExists($index)) { throw new phpMorphy_Exception("Invalid index '$index' specified"); } return $this->descriptors[$index]; } function getByPartOfSpeech($poses) { $result = array(); settype($poses, 'array'); foreach($this as $desc) { if($desc->hasPartOfSpeech($poses)) { $result[] = $desc; } } // return count($result) ? $result : false; return $result; } function offsetExists($off) { return isset($this->descriptors[$off]); } function offsetUnset($off) { throw new phpMorphy_Exception(__CLASS__ . " is not mutable"); } function offsetSet($off, $value) { throw new phpMorphy_Exception(__CLASS__ . " is not mutable"); } function offsetGet($off) { return $this->getDescriptor($off); } function count() { return count($this->descriptors); } function getIterator() { return new ArrayIterator($this->descriptors); } } class phpMorphy_WordForm { protected $word, $form_no, $pos_id, $grammems ; function __construct($word, $form_no, $pos_id, $grammems) { $this->word = (string)$word; $this->form_no = (int)$form_no; $this->pos_id = $pos_id; sort($grammems); $this->grammems = $grammems; } function getPartOfSpeech() { return $this->pos_id; } function getGrammems() { return $this->grammems; } function hasGrammems($grammems) { $grammems = (array)$grammems; $grammes_count = count($grammems); return $grammes_count && count(array_intersect($grammems, $this->grammems)) == $grammes_count; } static function compareGrammems($a, $b) { return count($a) == count($b) && count(array_diff($a, $b)) == 0; } function getWord() { return $this->word; } function getFormNo() { return $this->form_no; } } class phpMorphy_WordDescriptor implements Countable, ArrayAccess, IteratorAggregate { protected $word, $annot, $helper, $cached_forms, $cached_base, $cached_pseudo_root, $all_forms, $found_form_no, $common_ancode_grammems; function __construct($word, $annot, phpMorphy_Morphier_Helper $helper) { $this->word = (string)$word; $this->annot = array($annot); $this->helper = $helper; } function getPseudoRoot() { if(!isset($this->cached_pseudo_root)) { list($this->cached_pseudo_root) = $this->helper->getPseudoRoot($this->word, $this->annot); } return $this->cached_pseudo_root; } function getBaseForm() { if(!isset($this->cached_base)) { list($this->cached_base) = $this->helper->getBaseForm($this->word, $this->annot); } return $this->cached_base; } function getAllForms() { if(!isset($this->cached_forms)) { $this->cached_forms = $this->helper->getAllForms($this->word, $this->annot); } return $this->cached_forms; } function getWordForm($index) { $this->readAllForms(); if(!$this->offsetExists($index)) { throw new phpMorphy_Exception("Invalid index '$index' given"); } return $this->all_forms[$index]; } protected function createWordForm($word, $form_no, $ancode) { if(!isset($this->common_ancode_grammems)) { $common_ancode = $this->annot[0]['common_ancode']; $this->common_ancode_grammems = isset($common_ancode) ? $this->helper->getGrammems($common_ancode) : array(); } list($pos_id, $all_grammems) = $this->helper->getGrammemsAndPartOfSpeech($ancode); return new phpMorphy_WordForm($word, $form_no, $pos_id, array_merge($this->common_ancode_grammems, $all_grammems)); } protected function readAllForms() { if(!isset($this->all_forms)) { $result = array(); $form_no = 0; $found_form_no = array(); foreach($this->helper->getAllFormsWithAncodes($this->word, $this->annot, $found_form_no) as $form) { $word = $form[0]; $result[] = $this->createWordForm($word, $form_no, $form[1]); $form_no++; } $this->found_form_no = $found_form_no[0]; $this->all_forms = $result; } return $this->all_forms; } protected function getFoundFormNoLow() { $this->readAllForms(); return $this->found_form_no['low']; } protected function getFoundFormNoHigh() { $this->readAllForms(); return $this->found_form_no['high']; } function getFoundWordForm() { $result = array(); for($i = $this->getFoundFormNoLow(), $c = $this->getFoundFormNoHigh() + 1; $i < $c; $i++) { $result[] = $this->getWordForm($i); } return $result; } function hasGrammems($grammems) { settype($grammems, 'array'); foreach($this as $wf) { if($wf->hasGrammems($grammems)) { return true; } } return false; } function getWordFormsByGrammems($grammems) { settype($grammems, 'array'); $result = array(); foreach($this as $wf) { if($wf->hasGrammems($grammems)) { $result[] = $wf; } } return $result; // return count($result) ? $result : false; } function hasPartOfSpeech($poses) { settype($poses, 'array'); foreach($this as $wf) { if(in_array($wf->getPartOfSpeech(), $poses, true)) { return true; } } return false; } function getWordFormsByPartOfSpeech($poses) { settype($poses, 'array'); $result = array(); foreach($this as $wf) { if(in_array($wf->getPartOfSpeech(), $poses, true)) { $result[] = $wf; } } return $result; // return count($result) ? $result : false; } function count() { return count($this->readAllForms()); } function offsetExists($off) { $this->readAllForms(); return isset($this->all_forms[$off]); } function offsetSet($off, $value) { throw new phpMorphy_Exception(__CLASS__ . " is not mutable"); } function offsetUnset($off) { throw new phpMorphy_Exception(__CLASS__ . " is not mutable"); } function offsetGet($off) { return $this->getWordForm($off); } function getIterator() { $this->readAllForms(); return new ArrayIterator($this->all_forms); } } // ---------------------------- // Finders // ---------------------------- interface phpMorphy_Morphier_Finder_Interface { function findWord($word); function decodeAnnot($raw, $withBase); function getAnnotDecoder(); } abstract class phpMorphy_Morphier_Finder_Base implements phpMorphy_Morphier_Finder_Interface { protected $annot_decoder, $prev_word, $prev_result = false; function __construct(phpMorphy_AnnotDecoder_Interface $annotDecoder) { $this->annot_decoder = $annotDecoder; } function findWord($word) { if($this->prev_word === $word) { return $this->prev_result; } $result = $this->doFindWord($word); $this->prev_word = $word; $this->prev_result = $result; return $result; } function getAnnotDecoder() { return $this->annot_decoder; } function decodeAnnot($raw, $withBase) { return $this->annot_decoder->decode($raw, $withBase); } abstract protected function doFindWord($word); } class phpMorphy_Morphier_Finder_Common extends phpMorphy_Morphier_Finder_Base { protected $fsa, $root; function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_AnnotDecoder_Interface $annotDecoder) { parent::__construct($annotDecoder); $this->fsa = $fsa; $this->root = $this->fsa->getRootTrans(); } function getFsa() { return $this->fsa; } protected function doFindWord($word) { $result = $this->fsa->walk($this->root, $word); if(!$result['result'] || null === $result['annot']) { return false; } return $result['annot']; } } class phpMorphy_Morphier_Finder_Predict_Suffix extends phpMorphy_Morphier_Finder_Common { protected $min_suf_len, $unicode; function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_AnnotDecoder_Interface $annotDecoder, $encoding, $minimalSuffixLength = 4) { parent::__construct($fsa, $annotDecoder); $this->min_suf_len = (int)$minimalSuffixLength; $this->unicode = phpMorphy_UnicodeHelper::create($encoding); } protected function doFindWord($word) { $word_len = $this->unicode->strlen($word); if(!$word_len) { return false; } for($i = 1, $c = $word_len - $this->min_suf_len; $i < $c; $i++) { $word = $GLOBALS['__phpmorphy_substr']($word, $this->unicode->firstCharSize($word)); if(false !== ($result = parent::doFindWord($word))) { break; } } if($i < $c) { //$known_len = $word_len - $i; $unknown_len = $i; return $result; /* return $this->fixAnnots( $this->decodeAnnot($result, true), $unknown_len ); */ } else { return false; } } protected function fixAnnots($annots, $len) { for($i = 0, $c = count($annots); $i < $c; $i++) { $annots[$i]['cplen'] = $len; } return $annots; } } class phpMorphy_Morphier_PredictCollector extends phpMorphy_Fsa_WordsCollector { protected $used_poses = array(), $annot_decoder, $collected = 0; function __construct($limit, phpMorphy_AnnotDecoder_Interface $annotDecoder) { parent::__construct($limit); $this->annot_decoder = $annotDecoder; } function collect($path, $annotRaw) { if($this->collected > $this->limit) { return false; } $used_poses =& $this->used_poses; $annots = $this->decodeAnnot($annotRaw); for($i = 0, $c = count($annots); $i < $c; $i++) { $annot = $annots[$i]; $annot['cplen'] = $annot['plen'] = 0; $pos_id = $annot['pos_id']; if(isset($used_poses[$pos_id])) { $result_idx = $used_poses[$pos_id]; if($annot['freq'] > $this->items[$result_idx]['freq']) { $this->items[$result_idx] = $annot; } } else { $used_poses[$pos_id] = count($this->items); $this->items[] = $annot; } } $this->collected++; return true; } function clear() { parent::clear(); $this->collected = 0; $this->used_poses = array(); } function decodeAnnot($annotRaw) { return $this->annot_decoder->decode($annotRaw, true); } } class phpMorphy_Morphier_Finder_Predict_Databse extends phpMorphy_Morphier_Finder_Common { protected $collector, $unicode, $graminfo, $min_postfix_match; function __construct( phpMorphy_Fsa_Interface $fsa, phpMorphy_AnnotDecoder_Interface $annotDecoder, $encoding, phpMorphy_GramInfo_Interace $graminfo, $minPostfixMatch = 2, $collectLimit = 32 ) { parent::__construct($fsa, $annotDecoder); $this->graminfo = $graminfo; $this->min_postfix_match = $minPostfixMatch; $this->collector = $this->createCollector($collectLimit, $this->getAnnotDecoder()); $this->unicode = phpMorphy_UnicodeHelper::create($encoding); } protected function createAnnotDecoder() { return phpmorphy_annot_decoder_new('predict'); } protected function doFindWord($word) { $rev_word = $this->unicode->strrev($word); $result = $this->fsa->walk($this->root, $rev_word); if($result['result'] && null !== $result['annot']) { $annots = $result['annot']; } else { $match_len = $this->unicode->strlen($this->unicode->fixTrailing($GLOBALS['__phpmorphy_substr']($rev_word, 0, $result['walked']))); if(null === ($annots = $this->determineAnnots($result['last_trans'], $match_len))) { return false; } } if(!is_array($annots)) { $annots = $this->collector->decodeAnnot($annots); } return $this->fixAnnots($word, $annots); } protected function determineAnnots($trans, $matchLen) { $annots = $this->fsa->getAnnot($trans); if(null == $annots && $matchLen >= $this->min_postfix_match) { $this->collector->clear(); $this->fsa->collect( $trans, $this->collector->getCallback() ); $annots = $this->collector->getItems(); } return $annots; } protected function fixAnnots($word, $annots) { $result = array(); // remove all prefixes? for($i = 0, $c = count($annots); $i < $c; $i++) { $annot = $annots[$i]; $annot['cplen'] = $annot['plen'] = 0; $flexias = $this->graminfo->readFlexiaData($annot, false); $prefix = $flexias[$annot['form_no'] * 2]; $suffix = $flexias[$annot['form_no'] * 2 + 1]; $plen = $GLOBALS['__phpmorphy_strlen']($prefix); $slen = $GLOBALS['__phpmorphy_strlen']($suffix); if( (!$plen || $GLOBALS['__phpmorphy_substr']($word, 0, $GLOBALS['__phpmorphy_strlen']($prefix)) === $prefix) && (!$slen || $GLOBALS['__phpmorphy_substr']($word, -$GLOBALS['__phpmorphy_strlen']($suffix)) === $suffix) ) { $result[] = $annot; } } return count($result) ? $result : false; } protected function createCollector($limit) { return new phpMorphy_Morphier_PredictCollector($limit, $this->getAnnotDecoder()); } } // ---------------------------- // Morphiers // ---------------------------- abstract class phpMorphy_Morphier_Base implements phpMorphy_Morphier_Interface { protected /** * @var phpMorphy_Morphier_Finder_Interface */ $finder, /** * @var phpMorphy_Morphier_Helper */ $helper; function __construct(phpMorphy_Morphier_Finder_Interface $finder, phpMorphy_Morphier_Helper $helper) { $this->finder = $finder; $this->helper = clone $helper; $this->helper->setAnnotDecoder($finder->getAnnotDecoder()); } /** * @return phpMorphy_Morphier_Finder_Interface */ function getFinder() { return $this->finder; } /** * @return phpMorphy_Morphier_Helper */ function getHelper() { return $this->helper; } function getAnnot($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->decodeAnnot($annots, true); } function getWordDescriptor($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getWordDescriptor($word, $annots); } function getAllFormsWithAncodes($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getAllFormsWithResolvedAncodes($word, $annots); } function getPartOfSpeech($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getPartOfSpeech($word, $annots); } function getBaseForm($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getBaseForm($word, $annots); } function getPseudoRoot($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getPseudoRoot($word, $annots); } function getAllForms($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getAllForms($word, $annots); } function getAncode($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getAncode($annots); } function getGrammarInfo($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getGrammarInfo($annots); } function getGrammarInfoMergeForms($word) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->getGrammarInfoMergeForms($annots); } function castFormByGramInfo($word, $partOfSpeech, $grammems, $returnOnlyWord = false, $callback = null) { if(false === ($annots = $this->finder->findWord($word))) { return false; } return $this->helper->castFormByGramInfo($word, $annots); } function castFormByPattern($word, $patternWord, $returnOnlyWord = false, $callback = null) { if(false === ($orig_annots = $this->finder->findWord($word))) { return false; } if(false === ($pattern_annots = $this->finder->findWord($patternWord))) { return false; } return $this->helper->castFormByPattern( $word, $orig_annots, $patternWord, $pattern_annots, $returnOnlyWord, $callback ); } }; class phpMorphy_Morphier_Common extends phpMorphy_Morphier_Base { function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { parent::__construct( new phpMorphy_Morphier_Finder_Common( $fsa, $this->createAnnotDecoder($helper) ), $helper ); } protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) { return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getCommonDecoder(); } }; class phpMorphy_Morphier_Predict_Suffix extends phpMorphy_Morphier_Base { function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { parent::__construct( new phpMorphy_Morphier_Finder_Predict_Suffix( $fsa, $this->createAnnotDecoder($helper), $helper->getGramInfo()->getEncoding(), 4 ), $helper ); } protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) { return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getCommonDecoder(); } } class phpMorphy_Morphier_Predict_Database extends phpMorphy_Morphier_Base { function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { parent::__construct( new phpMorphy_Morphier_Finder_Predict_Databse( $fsa, $this->createAnnotDecoder($helper), $helper->getGramInfo()->getEncoding(), $helper->getGramInfo(), 2, 32 ), $helper ); } protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) { return phpMorphy_AnnotDecoder_Factory::create($helper->getGramInfo()->getEnds())->getPredictDecoder(); } } class phpMorphy_Morphier_Bulk implements phpMorphy_Morphier_Interface { protected $fsa, $root_trans, $helper, $notfound = array(), $graminfo; function __construct(phpMorphy_Fsa_Interface $fsa, phpMorphy_Morphier_Helper $helper) { $this->fsa = $fsa; $this->root_trans = $fsa->getRootTrans(); $this->helper = clone $helper; $this->helper->setAnnotDecoder($this->createAnnotDecoder($helper)); $this->graminfo = $helper->getGramInfo(); } function getFsa() { return $this->fsa; } function getHelper() { return $this->helper; } function getGraminfo() { return $this->graminfo; } function getNotFoundWords() { return $this->notfound; } protected function createAnnotDecoder(phpMorphy_Morphier_Helper $helper) { return new phpMorphy_AnnotDecoder_Common($helper->getGramInfo()->getEnds()); } function getAnnot($word) { $result = array(); foreach($this->findWord($word) as $annot => $words) { $annot = $this->helper->decodeAnnot($annot, true); foreach($words as $word) { $result[$word][] = $annot; } } return $result; } function getBaseForm($words) { $annots = $this->findWord($words); return $this->composeForms($annots, true, false, false); } function getAllForms($words) { $annots = $this->findWord($words); return $this->composeForms($annots, false, false, false); } function getPseudoRoot($words) { $annots = $this->findWord($words); return $this->composeForms($annots, false, true, false); } function getPartOfSpeech($words) { $annots = $this->findWord($words); return $this->composeForms($annots, false, false, true); } protected function processAnnotsWithHelper($words, $method, $callWithWord = false) { $result = array(); foreach($this->findWord($words) as $annot_raw => $words) { if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue; if($callWithWord) { foreach($words as $word) { $result[$word] = $this->helper->$method($word, $annot_raw); } } else { $result_for_annot = $this->helper->$method($annot_raw); foreach($words as $word) { $result[$word] = $result_for_annot; } } } return $result; } function getAncode($words) { return $this->processAnnotsWithHelper($words, 'getAncode'); } function getGrammarInfoMergeForms($words) { return $this->processAnnotsWithHelper($words, 'getGrammarInfoMergeForms'); } function getGrammarInfo($words) { return $this->processAnnotsWithHelper($words, 'getGrammarInfo'); } function getAllFormsWithAncodes($words) { return $this->processAnnotsWithHelper($words, 'getAllFormsWithResolvedAncodes', true); } function getWordDescriptor($word) { return $this->processAnnotsWithHelper($words, 'getWordDescriptor', true); } protected function findWord($words) { $unknown_words_annot = ''; $this->notfound = array(); list($labels, $finals, $dests) = $this->buildPatriciaTrie($words); $annots = array(); $unknown_words_annot = ''; $stack = array(0, '', $this->root_trans); $stack_idx = 0; $fsa = $this->fsa; // TODO: Improve this while($stack_idx >= 0) { $n = $stack[$stack_idx]; $path = $stack[$stack_idx + 1] . $labels[$n]; $trans = $stack[$stack_idx + 2]; $stack_idx -= 3; // TODO: Remove items from stack? (performance!!!) $is_final = $finals[$n] > 0; $result = false; if(false !== $trans && $n > 0) { $label = $labels[$n]; $result = $fsa->walk($trans, $label, $is_final); if($GLOBALS['__phpmorphy_strlen']($label) == $result['walked']) { $trans = $result['word_trans']; } else { $trans = false; } } if($is_final) { if(false !== $trans && isset($result['annot'])) { $annots[$result['annot']][] = $path; } else { //$annots[$unknown_words_annot][] = $path; $this->notfound[] = $path; } } if(false !== $dests[$n]) { foreach($dests[$n] as $dest) { $stack_idx += 3; $stack[$stack_idx] = $dest; $stack[$stack_idx + 1] = $path; $stack[$stack_idx + 2] = $trans; } } } return $annots; } protected function composeForms($annotsRaw, $onlyBase, $pseudoRoot, $partOfSpeech) { $result = array(); // process found annotations foreach($annotsRaw as $annot_raw => $words) { if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue; foreach($this->helper->decodeAnnot($annot_raw, $onlyBase) as $annot) { if(!($onlyBase || $pseudoRoot)) { $flexias = $this->graminfo->readFlexiaData($annot); } $cplen = $annot['cplen']; $plen = $annot['plen']; $flen = $annot['flen']; if($partOfSpeech) { $pos_id = $this->helper->extractPartOfSpeech($annot); } foreach($words as $word) { if($flen) { $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen, -$flen); } else { if($cplen || $plen) { $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen); } else { $base = $word; } } $prefix = $cplen ? $GLOBALS['__phpmorphy_substr']($word, 0, $cplen) : ''; if($pseudoRoot) { $result[$word][$base] = 1; } else if($onlyBase) { $form = $prefix . $annot['base_prefix'] . $base . $annot['base_suffix']; $result[$word][$form] = 1; } else if($partOfSpeech) { $result[$word][$pos_id] = 1; } else { for($i = 0, $c = count($flexias); $i < $c; $i += 2) { $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1]; $result[$word][$form] = 1; } } } } } for($keys = array_keys($result), $i = 0, $c = count($result); $i < $c; $i++) { $key = $keys[$i]; $result[$key] = array_keys($result[$key]); } return $result; } protected function buildPatriciaTrie($words) { if(!is_array($words)) { throw new phpMorphy_Exception("Words must be array"); } sort($words); $stack = array(); $prev_word = ''; $prev_word_len = 0; $prev_lcp = 0; $state_labels = array(); $state_finals = array(); $state_dests = array(); $state_labels[] = ''; $state_finals = '0'; $state_dests[] = array(); $node = 0; foreach($words as $word) { if($word == $prev_word) { continue; } $word_len = $GLOBALS['__phpmorphy_strlen']($word); // find longest common prefix for($lcp = 0, $c = min($prev_word_len, $word_len); $lcp < $c && $word[$lcp] == $prev_word[$lcp]; $lcp++); if($lcp == 0) { $stack = array(); $new_state_id = count($state_labels); $state_labels[] = $word; $state_finals .= '1'; $state_dests[] = false; $state_dests[0][] = $new_state_id; $node = $new_state_id; } else { $need_split = true; $trim_size = 0; // for split if($lcp == $prev_lcp) { $need_split = false; $node = $stack[count($stack) - 1]; } elseif($lcp > $prev_lcp) { if($lcp == $prev_word_len) { $need_split = false; } else { $need_split = true; $trim_size = $lcp - $prev_lcp; } $stack[] = $node; } else { $trim_size = $GLOBALS['__phpmorphy_strlen']($prev_word) - $lcp; for($stack_size = count($stack) - 1; ;--$stack_size) { $trim_size -= $GLOBALS['__phpmorphy_strlen']($state_labels[$node]); if($trim_size <= 0) { break; } if(count($stack) < 1) { throw new phpMorphy_Exception('Infinite loop posible'); } $node = array_pop($stack); } $need_split = $trim_size < 0; $trim_size = abs($trim_size); if($need_split) { $stack[] = $node; } else { $node = $stack[$stack_size]; } } if($need_split) { $node_key = $state_labels[$node]; // split $new_node_id_1 = count($state_labels); $new_node_id_2 = $new_node_id_1 + 1; // new_node_1 $state_labels[] = $GLOBALS['__phpmorphy_substr']($node_key, $trim_size); $state_finals .= $state_finals[$node]; $state_dests[] = $state_dests[$node]; // adjust old node $state_labels[$node] = $GLOBALS['__phpmorphy_substr']($node_key, 0, $trim_size); $state_finals[$node] = '0'; $state_dests[$node] = array($new_node_id_1); // append new node, new_node_2 $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp); $state_finals .= '1'; $state_dests[] = false; $state_dests[$node][] = $new_node_id_2; $node = $new_node_id_2; } else { $new_node_id = count($state_labels); $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp); $state_finals .= '1'; $state_dests[] = false; if(false !== $state_dests[$node]) { $state_dests[$node][] = $new_node_id; } else { $state_dests[$node] = array($new_node_id); } $node = $new_node_id; } } $prev_word = $word; $prev_word_len = $word_len; $prev_lcp = $lcp; } return array($state_labels, $state_finals, $state_dests); } } PK+[[s11 unicode.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ abstract class phpMorphy_UnicodeHelper { protected static $cache = array(); static function create($encoding) { $encoding = $GLOBALS['__phpmorphy_strtolower']($encoding); if(isset(self::$cache[$encoding])) { return self::$cache[$encoding]; } $result = self::doCreate($encoding); self::$cache[$encoding] = $result; return $result; } protected static function doCreate($encoding) { if(preg_match('~^(utf|ucs)(-)?([0-9]+)(-)?(le|be)?$~', $encoding, $matches)) { $utf_type = $matches[1]; $utf_base = (int)$matches[3]; $endiannes = ''; switch($utf_type) { case 'utf': if(!in_array($utf_base, array(8, 16, 32))) { throw new phpMorphy_Exception('Invalid utf base'); } break; case 'ucs': if(!in_array($utf_base, array(2, 4))) { throw new phpMorphy_Exception('Invalid ucs base'); } break; default: throw new phpMorphy_Exception('Internal error'); } if($utf_base > 8 || 'ucs' === $utf_type) { if(isset($matches[5])) { $endiannes = $matches[5] == 'be' ? 'be' : 'le'; } else { $tmp = pack('L', 1); $endiannes = ord($tmp[0]) == 0 ? 'be' : 'le'; } } if($utf_type == 'ucs' || $utf_base > 8) { $encoding_name = "$utf_type-$utf_base$endiannes"; } else { $encoding_name = "$utf_type-$utf_base"; } $clazz = "phpMorphy_UnicodeHelper_" . str_replace('-', '_', $encoding_name); return new $clazz($encoding_name); } else { return new phpMorphy_UnicodeHelper_singlebyte($encoding); } } abstract function firstCharSize($str); abstract function strrev($str); abstract function strlen($str); abstract function fixTrailing($str); } abstract class phpMorphy_UnicodeHelper_Base extends phpMorphy_UnicodeHelper { protected static $ICONV, $MB, $STRLEN_FOO ; protected $encoding, $strlen_foo, $iconv, $mb ; protected function __construct($encoding) { $this->encoding = $encoding; if(!isset(self::$ICONV) || !isset(self::$MB)) { if(false !== (self::$ICONV = extension_loaded('iconv'))) { self::$STRLEN_FOO = 'iconv_strlen'; } else if(false !== (self::$MB = extension_loaded('mbstring'))) { self::$STRLEN_FOO = 'mb_strlen'; } } } /* function fixTrailing($str) { $to = $this->encoding === 'utf-16' ? 'utf-32' : 'utf-16'; if(self::ICONV) { $new = @iconv($this->encoding, $to, $str); return @iconv($to, $this->encoding, $new); } else if(self::MB) { $new = @mb_convert_encoding($str, $to, $this->encoding); return @mb_convert_encoding($str, $this->encoding, $to); } else { $this->php_fixTrailing($str); } } */ function strlen($str) { if(isset(self::$STRLEN_FOO)) { $foo = self::$STRLEN_FOO; return $foo($str, $this->encoding); } else { return $this->php_strlen($str); } } protected abstract function php_strlen($str); } class phpMorphy_UnicodeHelper_MultiByteFixed extends phpMorphy_UnicodeHelper_Base { protected $size; protected function __construct($encoding, $size) { parent::__construct($encoding); $this->size = $size; } function firstCharSize($str) { return $this->size; } function strrev($str) { return implode('', array_reverse(str_split($str, $this->size))); } protected function php_strlen($str) { return $GLOBALS['__phpmorphy_strlen']($str) / $this->size; } function fixTrailing($str) { $len = $GLOBALS['__phpmorphy_strlen']($str); if(($len % $this->size) > 0) { return $GLOBALS['__phpmorphy_substr']($str, 0, floor($len / $this->size) * $this->size); } return $str; } } // single byte encoding class phpMorphy_UnicodeHelper_singlebyte extends phpMorphy_UnicodeHelper_Base { function firstCharSize($str) { return 1; } function strrev($str) { return strrev($str); } function strlen($str) { return $GLOBALS['__phpmorphy_strlen']($str); } function fixTrailing($str) { return $str; } protected function php_strlen($str) { return $GLOBALS['__phpmorphy_strlen']($str); } } // utf8 class phpMorphy_UnicodeHelper_utf_8 extends phpMorphy_UnicodeHelper_Base { protected $tails_length; protected function __construct($encoding) { parent::__construct($encoding); $this->tails_length = $this->getTailsLength(); } function firstCharSize($str) { return 1 + $this->tails_length[ord($str[0])]; } function strrev($str) { preg_match_all('/./us', $str, $matches); return implode('', array_reverse($matches[0])); /* $result = array(); for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($str); $i < $c;) { $len = 1 + $this->tails_length[ord($str[$i])]; $result[] = $GLOBALS['__phpmorphy_substr']($str, $i, $len); $i += $len; } return implode('', array_reverse($result)); */ } function fixTrailing($str) { $strlen = $GLOBALS['__phpmorphy_strlen']($str); if(!$strlen) { return ''; } $ord = ord($str[$strlen - 1]); if(($ord & 0x80) == 0) { return $str; } for($i = $strlen - 1; $i >= 0; $i--) { $ord = ord($str[$i]); if(($ord & 0xC0) == 0xC0) { $diff = $strlen - $i; $seq_len = $this->tails_length[$ord] + 1; $miss = $seq_len - $diff; if($miss) { return $GLOBALS['__phpmorphy_substr']($str, 0, -($seq_len - $miss)); } else { return $str; } } } return ''; } protected function php_strlen($str) { preg_match_all('/./us', $str, $matches); return count($matches[0]); } protected function getTailsLength() { return array( 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3, 4,4,4,4,5,5,0,0 ); } } // utf16 class phpMorphy_UnicodeHelper_utf_16_Base extends phpMorphy_UnicodeHelper_Base { protected $is_be, $char_fmt; protected function __construct($encoding, $isBigEndian) { parent::__construct($encoding); $this->is_be = (bool)$isBigEndian; $this->char_fmt = $isBigEndian ? 'n' : 'v'; } function firstCharSize($str) { list(, $ord) = unpack($this->char_fmt, $str); return $ord >= 0xD800 && $ord <= 0xDFFF ? 4 : 2; } function strrev($str) { $result = array(); $count = $GLOBALS['__phpmorphy_strlen']($str) / 2; $fmt = $this->char_fmt . $count; $words = array_reverse(unpack($fmt, $str)); for($i = 0; $i < $count; $i++) { $ord = $words[$i]; if($ord >= 0xD800 && $ord <= 0xDFFF) { // swap surrogates $t = $words[$i]; $words[$i] = $words[$i + 1]; $i++; $words[$i] = $t; } } array_unshift($words, $fmt); return call_user_func_array('pack', $words); } function fixTrailing($str) { $strlen = $GLOBALS['__phpmorphy_strlen']($str); if($strlen & 1) { $strlen--; $str = $GLOBALS['__phpmorphy_substr']($str, 0, $strlen); } if($strlen < 2) { return ''; } list(, $ord) = unpack($this->char_fmt, $GLOBALS['__phpmorphy_substr']($str, -2, 2)); if($this->isSurrogate($ord)) { if($strlen < 4) { return ''; } list(, $ord) = unpack($this->char_fmt, $GLOBALS['__phpmorphy_substr']($str, -4, 2)); if($this->isSurrogate($ord)) { // full surrogate pair return $str; } else { return $GLOBALS['__phpmorphy_substr']($str, 0, -2); } } return $str; } protected function php_strlen($str) { $count = $GLOBALS['__phpmorphy_strlen']($str) / 2; $fmt = $this->char_fmt . $count; foreach(unpack($fmt, $str) as $ord) { if($ord >= 0xD800 && $ord <= 0xDFFF) { $count--; } } return $count; } protected function isSurrogate($ord) { return $ord >= 0xD800 && $ord <= 0xDFFF; } } class phpMorphy_UnicodeHelper_utf_16le extends phpMorphy_UnicodeHelper_utf_16_Base { protected function __construct($encoding) { parent::__construct($encoding, false); } } class phpMorphy_UnicodeHelper_utf_16be extends phpMorphy_UnicodeHelper_utf_16_Base { protected function __construct($encoding) { parent::__construct($encoding, true); } } // utf32 class phpMorphy_UnicodeHelper_utf_32_Base extends phpMorphy_UnicodeHelper_MultiByteFixed { protected function __construct($encoding) { parent::__construct($encoding, 4); } } class phpMorphy_UnicodeHelper_utf_32le extends phpMorphy_UnicodeHelper_utf_32_Base { } class phpMorphy_UnicodeHelper_utf_32be extends phpMorphy_UnicodeHelper_utf_32_Base { } // ucs2, ucs4 class phpMorphy_UnicodeHelper_ucs_2le extends phpMorphy_UnicodeHelper_MultiByteFixed { protected function __construct($encoding) { parent::__construct($encoding, 2); } } class phpMorphy_UnicodeHelper_ucs_2be extends phpMorphy_UnicodeHelper_MultiByteFixed { protected function __construct($encoding) { parent::__construct($encoding, 2); } } class phpMorphy_UnicodeHelper_ucs_4le extends phpMorphy_UnicodeHelper_MultiByteFixed { protected function __construct($encoding) { parent::__construct($encoding, 4); } } class phpMorphy_UnicodeHelper_ucs_4be extends phpMorphy_UnicodeHelper_MultiByteFixed { protected function __construct($encoding) { parent::__construct($encoding, 4); } } PK+[[_ source.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ define('PHPMORPHY_SOURCE_FSA', 'fsa'); define('PHPMORPHY_SOURCE_DBA', 'dba'); define('PHPMORPHY_SOURCE_SQL', 'sql'); interface phpMorphy_Source_Interface { function getValue($key); } class phpMorphy_Source_Fsa implements phpMorphy_Source_Interface { protected $fsa, $root; function __construct(phpMorphy_Fsa_Interface $fsa) { $this->fsa = $fsa; $this->root = $fsa->getRootTrans(); } function getFsa() { return $this->fsa; } function getValue($key) { if(false === ($result = $this->fsa->walk($this->root, $key, true)) || !$result['annot']) { return false; } return $result['annot']; } } class phpMorphy_Source_Dba implements phpMorphy_Source_Interface { const DEFAULT_HANDLER = 'db3'; protected $handle; function __construct($fileName, $options = null) { $this->handle = $this->openFile($fileName, $this->repairOptions($options)); } function close() { if(isset($this->handle)) { dba_close($this->handle); $this->handle = null; } } static function getDefaultHandler() { return self::DEFAULT_HANDLER; } protected function openFile($fileName, $options) { if(false === ($new_filename = realpath($fileName))) { throw new phpMorphy_Exception("Can`t get realpath for '$fileName' file"); } $lock_mode = $options['lock_mode']; $handler = $options['handler']; $func = $options['persistent'] ? 'dba_popen' : 'dba_open'; if(false === ($result = $func($new_filename, "r$lock_mode", $handler))) { throw new phpMorphy_Exception("Can`t open '$fileFile' file"); } return $result; } protected function repairOptions($options) { $defaults = array( 'lock_mode' => 'd', 'handler' => self::getDefaultHandler(), 'persistent' => false ); return (array)$options + $defaults; } function getValue($key) { return dba_fetch($key, $this->handle); } } PK+[[ڔ"" fsa/fsa.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ interface phpMorphy_Fsa_Interface { /** * Return root transition of fsa * @return array */ function getRootTrans(); /** * Returns root state object * @return */ function getRootState(); /** * Returns alphabet i.e. all chars used in automat * @return array */ function getAlphabet(); /** * Return annotation for given transition(if annotation flag is set for given trans) * * @param array $trans * @return string */ function getAnnot($trans); /** * Find word in automat * * @param mixed $trans starting transition * @param string $word * @param bool $readAnnot read annot or simple check if word exists in automat * @return bool TRUE if word is found, FALSE otherwise */ function walk($trans, $word, $readAnnot = true); /** * Traverse automat and collect words * For each found words $callback function invoked with follow arguments: * call_user_func($callback, $word, $annot) * when $readAnnot is FALSE then $annot arg is always NULL * * @param mixed $startNode * @param mixed $callback callback function(in php format callback i.e. string or array(obj, method) or array(class, method) * @param bool $readAnnot read annot * @param string $path string to be append to all words */ function collect($startNode, $callback, $readAnnot = true, $path = ''); /** * Read state at given index * * @param int $index * @return array */ function readState($index); /** * Unpack transition from binary form to array * * @param mixed $rawTranses may be array for convert more than one transitions * @return array */ function unpackTranses($rawTranses); } abstract class phpMorphy_Fsa implements phpMorphy_Fsa_Interface { const HEADER_SIZE = 128; protected $resource, $header, $fsa_start, $root_trans, $alphabet; protected function phpMorphy_Fsa($resource, $header) { $this->resource = $resource; $this->header = $header; $this->fsa_start = $header['fsa_offset']; $this->root_trans = $this->readRootTrans(); } // static static function create(phpMorphy_Storage $storage, $lazy) { if($lazy) { return new phpMorphy_Fsa_Proxy($storage); } $header = phpMorphy_Fsa::readHeader( $storage->read(0, self::HEADER_SIZE, true) ); if(!phpMorphy_Fsa::validateHeader($header)) { throw new phpMorphy_Exception('Invalid fsa format'); } if($header['flags']['is_sparse']) { $type = 'sparse'; } else if($header['flags']['is_tree']) { $type = 'tree'; } else { throw new phpMorphy_Exception('Only sparse or tree fsa`s supported'); } $storage_type = $storage->getTypeAsString(); $file_path = dirname(__FILE__) . "/access/fsa_{$type}_{$storage_type}.php"; $clazz = 'phpMorphy_Fsa_' . ucfirst($type) . '_' . ucfirst($storage_type); require_once($file_path); return new $clazz( $storage->getResource(), $header ); } function getRootTrans() { return $this->root_trans; } function getRootState() { return $this->createState($this->getRootStateIndex()); } function getAlphabet() { if(!isset($this->alphabet)) { $this->alphabet = str_split($this->readAlphabet()); } return $this->alphabet; } protected function createState($index) { require_once(PHPMORPHY_DIR . '/fsa/fsa_state.php'); return new phpMorphy_State($this, $index); } static protected function readHeader($headerRaw) { if($GLOBALS['__phpmorphy_strlen']($headerRaw) != self::HEADER_SIZE) { throw new phpMorphy_Exception('Invalid header string given'); } $header = unpack( 'a4fourcc/Vver/Vflags/Valphabet_offset/Vfsa_offset/Vannot_offset/Valphabet_size/Vtranses_count/Vannot_length_size/' . 'Vannot_chunk_size/Vannot_chunks_count/Vchar_size/Vpadding_size/Vdest_size/Vhash_size', $headerRaw ); if(false === $header) { throw new phpMorphy_Exception('Can`t unpack header'); } $flags = array(); $raw_flags = $header['flags']; $flags['is_tree'] = $raw_flags & 0x01 ? true : false; $flags['is_hash'] = $raw_flags & 0x02 ? true : false; $flags['is_sparse'] = $raw_flags & 0x04 ? true : false; $flags['is_be'] = $raw_flags & 0x08 ? true : false; $header['flags'] = $flags; $header['trans_size'] = $header['char_size'] + $header['padding_size'] + $header['dest_size'] + $header['hash_size']; return $header; } // static static protected function validateHeader($header) { if( 'meal' != $header['fourcc'] || 3 != $header['ver'] || $header['char_size'] != 1 || $header['padding_size'] > 0 || $header['dest_size'] != 3 || $header['hash_size'] != 0 || $header['annot_length_size'] != 1 || $header['annot_chunk_size'] != 1 || $header['flags']['is_be'] || $header['flags']['is_hash'] || 1 == 0 ) { return false; } return true; } protected function getRootStateIndex() { return 0; } abstract protected function readRootTrans(); abstract protected function readAlphabet(); }; class phpMorphy_Fsa_WordsCollector { protected $items = array(), $limit; function phpMorphy_Fsa_WordsCollector($collectLimit) { $this->limit = $collectLimit; } function collect($word, $annot) { if(count($this->items) < $this->limit) { $this->items[$word] = $annot; return true; } else { return false; } } function getItems() { return $this->items; } function clear() { $this->items = array(); } function getCallback() { return array($this, 'collect'); } }; class phpMorphy_Fsa_Decorator implements phpMorphy_Fsa_Interface { protected $fsa; function phpMorphy_Fsa_Decorator(phpMorphy_Fsa_Interface $fsa) { $this->fsa = $fsa; } function getRootTrans() { return $this->fsa->getRootTrans(); } function getRootState() { return $this->fsa->getRootState(); } function getAlphabet() { return $this->fsa->getAlphabet(); } function getAnnot($trans) { return $this->fsa->getAnnot($trans); } function walk($start, $word, $readAnnot = true) { return $this->fsa->walk($start, $word, $readAnnot); } function collect($start, $callback, $readAnnot = true, $path = '') { return $this->fsa->collect($start, $callback, $readAnnot, $path); } function readState($index) { return $this->fsa->readState($index); } function unpackTranses($transes) { return $this->fsa->unpackTranses($transes); } }; class phpMorphy_Fsa_Proxy extends phpMorphy_Fsa_Decorator { protected $storage; function __construct(phpMorphy_Storage $storage) { $this->storage = $storage; unset($this->fsa); } function __get($propName) { if($propName == 'fsa') { $this->fsa = phpMorphy_Fsa::create($this->storage, false); unset($this->storage); return $this->fsa; } throw new phpMorphy_Exception("Unknown prop name '$propName'"); } } PK+[[k4 4 fsa/fsa_state.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ class phpMorphy_Link_Base { protected $fsa, $trans, $raw_trans; function phpMorphy_Link_Base(phpMorphy_Fsa_Interface $fsa, $trans, $rawTrans) { $this->fsa = $fsa; $this->trans = $trans; $this->raw_trans = $rawTrans; } function isAnnotation() { } function getTrans() { return $this->trans; } function getFsa() { return $this->fsa; } function getRawTrans() { return $this->raw_trans; } }; /** * This class represent "normal" link i.e. link that points to automat state */ class phpMorphy_Link extends phpMorphy_Link_Base { function isAnnotation() { return false; } function getDest() { return $this->trans['dest']; } function getAttr() { return $this->trans['attr']; } function getTargetState() { return $this->createState($this->trans['dest']); } protected function createState($index) { return new phpMorphy_State($this->fsa, $index); } } class phpMorphy_Link_Annot extends phpMorphy_Link_Base { function isAnnotation() { return true; } function getAnnotation() { return $this->fsa->getAnnot($this->raw_trans); } }; class phpMorphy_State { protected $fsa, $transes, $raw_transes; function phpMorphy_State(phpMorphy_Fsa_Interface $fsa, $index) { $this->fsa = $fsa; $this->raw_transes = $fsa->readState($index); $this->transes = $fsa->unpackTranses($this->raw_transes); } function getLinks() { $result = array(); for($i = 0, $c = count($this->transes); $i < $c; $i++) { $trans = $this->transes[$i]; if(!$trans['term']) { $result[] = $this->createNormalLink($trans, $this->raw_transes[$i]); } else { $result[] = $this->createAnnotLink($trans, $this->raw_transes[$i]); } } return $result; } function getSize() { return count($this->transes); } protected function createNormalLink($trans, $raw) { return new phpMorphy_Link($this->fsa, $trans, $raw); } protected function createAnnotLink($trans, $raw) { return new phpMorphy_Link_Annot($this->fsa, $trans, $raw); } }; PK+[[LM!M!fsa/access/fsa_tree_mem.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Tree_Mem extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__mem = $this->resource; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // tree version $result = true; $start_offset = $fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2); // read first trans in state list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $start_offset, 4)); // If first trans is term(i.e. pointing to annot) then skip it if(($trans & 0x0100)) { // When this is single transition in state then break if(($trans & 0x0200) && ($trans & 0x0400)) { $result = false; } else { $start_offset += 4; list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $start_offset, 4)); } } // if all ok process rest transitions in state if($result) { // walk through state for($idx = 1, $j = 0; ; $j++) { $attr = ($trans & 0xFF); if($attr == $char) { $result = true; break; } else if($attr > $char) { if(($trans & 0x0200)) { $result = false; break; } $idx = $idx << 1; } else { if(($trans & 0x0400)) { $result = false; break; } $idx = ($idx << 1) + 1; } if($j > 255) { throw new phpMorphy_Exception('Infinite recursion possible'); } // read next trans list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $start_offset + (($idx - 1) << 2), 4)); } } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2), 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 11) & 0x1FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 11) & 0x1FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__mem = $this->resource; $fsa_start = $this->fsa_start; $result = array(); $offset = $fsa_start + (($index) << 2); // read first trans list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 4)); // check if first trans is pointer to annot, and not single in state if(($trans & 0x0100) && !(($trans & 0x0200) || ($trans & 0x0400))) { $result[] = $trans; list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 4)); $offset += 4; } // read rest for($expect = 1; $expect; $expect--) { if(!($trans & 0x0200)) $expect++; if(!($trans & 0x0400)) $expect++; $result[] = $trans; if($expect > 1) { list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 4)); $offset += 4; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'llast' => ($rawTrans & 0x0200) ? true : false, 'rlast' => ($rawTrans & 0x0400) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 11) & 0x1FFFFF, ); } return $result; } protected function readRootTrans() { $__mem = $this->resource; $fsa_start = $this->fsa_start; list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $fsa_start + 0, 4)); return $trans; } protected function readAlphabet() { $__mem = $this->resource; $fsa_start = $this->fsa_start; return $GLOBALS['__phpmorphy_substr']($__mem, $this->header['alphabet_offset'], $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__mem = $this->resource; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 21) | (($trans >> 11) & 0x1FFFFF)); $len = ord($GLOBALS['__phpmorphy_substr']($__mem, $offset, 1)); if($len) { $annot = $GLOBALS['__phpmorphy_substr']($__mem, $offset + 1, $len); } else { $annot = null; } return $annot; } } PK+[[5fsa/access/fsa_sparse_mem.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Sparse_Mem extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__mem = $this->resource; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // sparse version $result = true; list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $fsa_start + (((($trans >> 10) & 0x3FFFFF) + $char + 1) << 2), 4)); if(($trans & 0x0200) || ($trans & 0xFF) != $char) { $result = false; } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $fsa_start + ((($trans >> 10) & 0x3FFFFF) << 2), 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 10) & 0x3FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 10) & 0x3FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__mem = $this->resource; $fsa_start = $this->fsa_start; $result = array(); $start_offset = $fsa_start + (($index) << 2); // first try read annot transition list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $start_offset, 4)); if(($trans & 0x0100)) { $result[] = $trans; } // read rest $start_offset += 4; foreach($this->getAlphabetNum() as $char) { list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $start_offset + (($char) << 2), 4)); // if(!($trans & 0x0200) && ($trans & 0xFF) == $char) { // TODO: check term and empty flags at once i.e. $trans & 0x0300 if(!(($trans & 0x0200) || ($trans & 0x0100)) && ($trans & 0xFF) == $char) { $result[] = $trans; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'empty' => ($rawTrans & 0x0200) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 10) & 0x3FFFFF, ); } return $result; } protected function readRootTrans() { $__mem = $this->resource; $fsa_start = $this->fsa_start; list(, $trans) = unpack('V', $GLOBALS['__phpmorphy_substr']($__mem, $fsa_start + 4, 4)); return $trans; } protected function readAlphabet() { $__mem = $this->resource; $fsa_start = $this->fsa_start; return $GLOBALS['__phpmorphy_substr']($__mem, $this->header['alphabet_offset'], $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__mem = $this->resource; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 22) | (($trans >> 10) & 0x3FFFFF)); $len = ord($GLOBALS['__phpmorphy_substr']($__mem, $offset, 1)); if($len) { $annot = $GLOBALS['__phpmorphy_substr']($__mem, $offset + 1, $len); } else { $annot = null; } return $annot; } function getAlphabetNum() { if(!isset($this->alphabet_num)) { $this->alphabet_num = array_map('ord', $this->getAlphabet()); } return $this->alphabet_num; } protected $alphabet_num; } PK+[[ ""fsa/access/fsa_tree_shm.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Tree_Shm extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // tree version $result = true; $start_offset = $fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2); // read first trans in state list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($start_offset), 4)); // If first trans is term(i.e. pointing to annot) then skip it if(($trans & 0x0100)) { // When this is single transition in state then break if(($trans & 0x0200) && ($trans & 0x0400)) { $result = false; } else { $start_offset += 4; list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($start_offset), 4)); } } // if all ok process rest transitions in state if($result) { // walk through state for($idx = 1, $j = 0; ; $j++) { $attr = ($trans & 0xFF); if($attr == $char) { $result = true; break; } else if($attr > $char) { if(($trans & 0x0200)) { $result = false; break; } $idx = $idx << 1; } else { if(($trans & 0x0400)) { $result = false; break; } $idx = ($idx << 1) + 1; } if($j > 255) { throw new phpMorphy_Exception('Infinite recursion possible'); } // read next trans list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($start_offset + (($idx - 1) << 2)), 4)); } } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2)), 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 11) & 0x1FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 11) & 0x1FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; $result = array(); $offset = $fsa_start + (($index) << 2); // read first trans list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($offset), 4)); // check if first trans is pointer to annot, and not single in state if(($trans & 0x0100) && !(($trans & 0x0200) || ($trans & 0x0400))) { $result[] = $trans; list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($offset), 4)); $offset += 4; } // read rest for($expect = 1; $expect; $expect--) { if(!($trans & 0x0200)) $expect++; if(!($trans & 0x0400)) $expect++; $result[] = $trans; if($expect > 1) { list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($offset), 4)); $offset += 4; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'llast' => ($rawTrans & 0x0200) ? true : false, 'rlast' => ($rawTrans & 0x0400) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 11) & 0x1FFFFF, ); } return $result; } protected function readRootTrans() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($fsa_start + 0), 4)); return $trans; } protected function readAlphabet() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; return shmop_read($__shm, $__offset + ($this->header['alphabet_offset']), $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 21) | (($trans >> 11) & 0x1FFFFF)); $len = ord(shmop_read($__shm, $__offset + ($offset), 1)); if($len) { $annot = shmop_read($__shm, $__offset + ($offset + 1), $len); } else { $annot = null; } return $annot; } } PK+[[Z]]fsa/access/fsa_sparse_file.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Sparse_File extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__fh = $this->resource; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // sparse version $result = true; fseek($__fh, $fsa_start + (((($trans >> 10) & 0x3FFFFF) + $char + 1) << 2)); list(, $trans) = unpack('V', fread($__fh, 4)); if(($trans & 0x0200) || ($trans & 0xFF) != $char) { $result = false; } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans fseek($__fh, $fsa_start + ((($trans >> 10) & 0x3FFFFF) << 2)); list(, $trans) = unpack('V', fread($__fh, 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 10) & 0x3FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 10) & 0x3FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__fh = $this->resource; $fsa_start = $this->fsa_start; $result = array(); $start_offset = $fsa_start + (($index) << 2); // first try read annot transition fseek($__fh, $start_offset); list(, $trans) = unpack('V', fread($__fh, 4)); if(($trans & 0x0100)) { $result[] = $trans; } // read rest $start_offset += 4; foreach($this->getAlphabetNum() as $char) { fseek($__fh, $start_offset + (($char) << 2)); list(, $trans) = unpack('V', fread($__fh, 4)); // if(!($trans & 0x0200) && ($trans & 0xFF) == $char) { // TODO: check term and empty flags at once i.e. $trans & 0x0300 if(!(($trans & 0x0200) || ($trans & 0x0100)) && ($trans & 0xFF) == $char) { $result[] = $trans; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'empty' => ($rawTrans & 0x0200) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 10) & 0x3FFFFF, ); } return $result; } protected function readRootTrans() { $__fh = $this->resource; $fsa_start = $this->fsa_start; fseek($__fh, $fsa_start + 4); list(, $trans) = unpack('V', fread($__fh, 4)); return $trans; } protected function readAlphabet() { $__fh = $this->resource; $fsa_start = $this->fsa_start; fseek($__fh, $this->header['alphabet_offset']); return fread($__fh, $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__fh = $this->resource; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 22) | (($trans >> 10) & 0x3FFFFF)); fseek($__fh, $offset); $len = ord(fread($__fh, 1)); if($len) { $annot = fread($__fh, $len); } else { $annot = null; } return $annot; } function getAlphabetNum() { if(!isset($this->alphabet_num)) { $this->alphabet_num = array_map('ord', $this->getAlphabet()); } return $this->alphabet_num; } protected $alphabet_num; } PK+[[7<:fsa/access/fsa_sparse_shm.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Sparse_Shm extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // sparse version $result = true; list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($fsa_start + (((($trans >> 10) & 0x3FFFFF) + $char + 1) << 2)), 4)); if(($trans & 0x0200) || ($trans & 0xFF) != $char) { $result = false; } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($fsa_start + ((($trans >> 10) & 0x3FFFFF) << 2)), 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 10) & 0x3FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 10) & 0x3FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; $result = array(); $start_offset = $fsa_start + (($index) << 2); // first try read annot transition list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($start_offset), 4)); if(($trans & 0x0100)) { $result[] = $trans; } // read rest $start_offset += 4; foreach($this->getAlphabetNum() as $char) { list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($start_offset + (($char) << 2)), 4)); // if(!($trans & 0x0200) && ($trans & 0xFF) == $char) { // TODO: check term and empty flags at once i.e. $trans & 0x0300 if(!(($trans & 0x0200) || ($trans & 0x0100)) && ($trans & 0xFF) == $char) { $result[] = $trans; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'empty' => ($rawTrans & 0x0200) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 10) & 0x3FFFFF, ); } return $result; } protected function readRootTrans() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; list(, $trans) = unpack('V', shmop_read($__shm, $__offset + ($fsa_start + 4), 4)); return $trans; } protected function readAlphabet() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; return shmop_read($__shm, $__offset + ($this->header['alphabet_offset']), $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 22) | (($trans >> 10) & 0x3FFFFF)); $len = ord(shmop_read($__shm, $__offset + ($offset), 1)); if($len) { $annot = shmop_read($__shm, $__offset + ($offset + 1), $len); } else { $annot = null; } return $annot; } function getAlphabetNum() { if(!isset($this->alphabet_num)) { $this->alphabet_num = array_map('ord', $this->getAlphabet()); } return $this->alphabet_num; } protected $alphabet_num; } PK+[[t t fsa/access/fsa_tree_file.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/fsa/tpl/fsa.tpl.php on line 24
Wed, 15 Jan 2020 20:00:56 +0000, don`t change it! */ class phpMorphy_Fsa_Tree_File extends phpMorphy_Fsa { function walk($trans, $word, $readAnnot = true) { $__fh = $this->resource; $fsa_start = $this->fsa_start; for($i = 0, $c = $GLOBALS['__phpmorphy_strlen']($word); $i < $c; $i++) { $prev_trans = $trans; $char = ord($word[$i]); ///////////////////////////////// // find char in state begin // tree version $result = true; $start_offset = $fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2); // read first trans in state fseek($__fh, $start_offset); list(, $trans) = unpack('V', fread($__fh, 4)); // If first trans is term(i.e. pointing to annot) then skip it if(($trans & 0x0100)) { // When this is single transition in state then break if(($trans & 0x0200) && ($trans & 0x0400)) { $result = false; } else { $start_offset += 4; fseek($__fh, $start_offset); list(, $trans) = unpack('V', fread($__fh, 4)); } } // if all ok process rest transitions in state if($result) { // walk through state for($idx = 1, $j = 0; ; $j++) { $attr = ($trans & 0xFF); if($attr == $char) { $result = true; break; } else if($attr > $char) { if(($trans & 0x0200)) { $result = false; break; } $idx = $idx << 1; } else { if(($trans & 0x0400)) { $result = false; break; } $idx = ($idx << 1) + 1; } if($j > 255) { throw new phpMorphy_Exception('Infinite recursion possible'); } // read next trans fseek($__fh, $start_offset + (($idx - 1) << 2)); list(, $trans) = unpack('V', fread($__fh, 4)); } } // find char in state end ///////////////////////////////// if(!$result) { $trans = $prev_trans; break; } } $annot = null; $result = false; $prev_trans = $trans; if($i >= $c) { // Read annotation when we walked all chars in word $result = true; if($readAnnot) { // read annot trans fseek($__fh, $fsa_start + ((($trans >> 11) & 0x1FFFFF) << 2)); list(, $trans) = unpack('V', fread($__fh, 4)); if(0 == ($trans & 0x0100)) { $result = false; } else { $annot = $this->getAnnot($trans); } } } return array( 'result' => $result, 'last_trans' => $trans, 'word_trans' => $prev_trans, 'walked' => $i, 'annot' => $annot ); } function collect($startNode, $callback, $readAnnot = true, $path = '') { $total = 0; $stack = array(); $stack_idx = array(); $start_idx = 0; array_push($stack, null); array_push($stack_idx, null); $state = $this->readState((($startNode) >> 11) & 0x1FFFFF); do { for($i = $start_idx, $c = count($state); $i < $c; $i++) { $trans = $state[$i]; if(($trans & 0x0100)) { $total++; if($readAnnot) { $annot = $this->getAnnot($trans); } else { $annot = $trans; } if(!call_user_func($callback, $path, $annot)) { return $total; } } else { $path .= chr(($trans & 0xFF)); array_push($stack, $state); array_push($stack_idx, $i + 1); $state = $this->readState((($trans) >> 11) & 0x1FFFFF); $start_idx = 0; break; } } if($i >= $c) { $state = array_pop($stack); $start_idx = array_pop($stack_idx); $path = $GLOBALS['__phpmorphy_substr']($path, 0, -1); } } while(!empty($stack)); return $total; } function readState($index) { $__fh = $this->resource; $fsa_start = $this->fsa_start; $result = array(); $offset = $fsa_start + (($index) << 2); // read first trans fseek($__fh, $offset); list(, $trans) = unpack('V', fread($__fh, 4)); // check if first trans is pointer to annot, and not single in state if(($trans & 0x0100) && !(($trans & 0x0200) || ($trans & 0x0400))) { $result[] = $trans; list(, $trans) = unpack('V', fread($__fh, 4)); $offset += 4; } // read rest for($expect = 1; $expect; $expect--) { if(!($trans & 0x0200)) $expect++; if(!($trans & 0x0400)) $expect++; $result[] = $trans; if($expect > 1) { list(, $trans) = unpack('V', fread($__fh, 4)); $offset += 4; } } return $result; } function unpackTranses($rawTranses) { settype($rawTranses, 'array'); $result = array(); foreach($rawTranses as $rawTrans) { $result[] = array( 'term' => ($rawTrans & 0x0100) ? true : false, 'llast' => ($rawTrans & 0x0200) ? true : false, 'rlast' => ($rawTrans & 0x0400) ? true : false, 'attr' => ($rawTrans & 0xFF), 'dest' => (($rawTrans) >> 11) & 0x1FFFFF, ); } return $result; } protected function readRootTrans() { $__fh = $this->resource; $fsa_start = $this->fsa_start; fseek($__fh, $fsa_start + 0); list(, $trans) = unpack('V', fread($__fh, 4)); return $trans; } protected function readAlphabet() { $__fh = $this->resource; $fsa_start = $this->fsa_start; fseek($__fh, $this->header['alphabet_offset']); return fread($__fh, $this->header['alphabet_size']); } function getAnnot($trans) { if(!($trans & 0x0100)) { return null; } $__fh = $this->resource; $fsa_start = $this->fsa_start; $offset = $this->header['annot_offset'] + ((($trans & 0xFF) << 21) | (($trans >> 11) & 0x1FFFFF)); fseek($__fh, $offset); $len = ord(fread($__fh, 1)); if($len) { $annot = fread($__fh, $len); } else { $annot = null; } return $annot; } } PK+[[Xf gramtab.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ interface phpMorphy_GramTab_Interface { function getGrammems($ancodeId); function getPartOfSpeech($ancodeId); function resolveGrammemIds($ids); function resolvePartOfSpeechId($id); function includeConsts(); function ancodeToString($ancodeId, $commonAncode = null); function stringToAncode($string); function toString($partOfSpeechId, $grammemIds); } class phpMorphy_GramTab_Empty implements phpMorphy_GramTab_Interface { function getGrammems($ancodeId) { return array(); } function getPartOfSpeech($ancodeId) { return 0; } function resolveGrammemIds($ids) { return is_array($ids) ? array() : ''; } function resolvePartOfSpeechId($id) { return ''; } function includeConsts() { } function ancodeToString($ancodeId, $commonAncode = null) { return ''; } function stringToAncode($string) { return null; } function toString($partOfSpeechId, $grammemIds) { return ''; } } class phpMorphy_GramTab_Proxy implements phpMorphy_GramTab_Interface { protected $storage; function __construct(phpMorphy_Storage $storage) { $this->storage = $storage; } function getGrammems($ancodeId) { return $this->__obj->getGrammems($ancodeId); } function getPartOfSpeech($ancodeId) { return $this->__obj->getPartOfSpeech($ancodeId); } function resolveGrammemIds($ids) { return $this->__obj->resolveGrammemIds($ids); } function resolvePartOfSpeechId($id) { return $this->__obj->resolvePartOfSpeechId($id); } function includeConsts() { return $this->__obj->includeConsts(); } function ancodeToString($ancodeId, $commonAncode = null) { return $this->__obj->ancodeToString($ancodeId, $commonAncode); } function stringToAncode($string) { return $this->__obj->stringToAncode($string); } function toString($partOfSpeechId, $grammemIds) { return $this->__obj->toString($partOfSpeechId, $grammemIds); } function __get($name) { if($name === '__obj') { $this->__obj = phpMorphy_GramTab::create($this->storage); unset($this->storage); return $this->__obj; } throw new phpMorphy_Exception("Invalid prop name '$name'"); } } class phpMorphy_GramTab implements phpMorphy_GramTab_Interface { protected $data, $ancodes, $grammems, // $__ancodes_map, $poses; protected function __construct(phpMorphy_Storage $storage) { $this->data = unserialize($storage->read(0, $storage->getFileSize())); if(false === $this->data) { throw new phpMorphy_Exception("Broken gramtab data"); } $this->grammems = $this->data['grammems']; $this->poses = $this->data['poses']; $this->ancodes = $this->data['ancodes']; } // TODO: remove this static function create(phpMorphy_Storage $storage) { return new phpMorphy_GramTab($storage); } function getGrammems($ancodeId) { if(!isset($this->ancodes[$ancodeId])) { throw new phpMorphy_Exception("Invalid ancode id '$ancodeId'"); } return $this->ancodes[$ancodeId]['grammem_ids']; } function getPartOfSpeech($ancodeId) { if(!isset($this->ancodes[$ancodeId])) { throw new phpMorphy_Exception("Invalid ancode id '$ancodeId'"); } return $this->ancodes[$ancodeId]['pos_id']; } function resolveGrammemIds($ids) { if(is_array($ids)) { $result = array(); foreach($ids as $id) { if(!isset($this->grammems[$id])) { throw new phpMorphy_Exception("Invalid grammem id '$id'"); } $result[] = $this->grammems[$id]['name']; } return $result; } else { if(!isset($this->grammems[$ids])) { throw new phpMorphy_Exception("Invalid grammem id '$ids'"); } return $this->grammems[$ids]['name']; } } function resolvePartOfSpeechId($id) { if(!isset($this->poses[$id])) { throw new phpMorphy_Exception("Invalid part of speech id '$id'"); } return $this->poses[$id]['name']; } function includeConsts() { require_once(PHPMORPHY_DIR . '/gramtab_consts.php'); } function ancodeToString($ancodeId, $commonAncode = null) { if(isset($commonAncode)) { $commonAncode = implode(',', $this->getGrammems($commonAncode)) . ','; } return $this->getPartOfSpeech($ancodeId) . ' ' . $commonAncode . implode(',', $this->getGrammems($ancodeId)); } protected function findAncode($partOfSpeech, $grammems) { } function stringToAncode($string) { if(!isset($string)) { return null; } if(!isset($this->__ancodes_map[$string])) { throw new phpMorphy_Exception("Ancode with '$string' graminfo not found"); } return $this->__ancodes_map[$string]; } function toString($partOfSpeechId, $grammemIds) { return $partOfSpeechId . ' ' . implode(',', $grammemIds); } protected function buildAncodesMap() { $result = array(); foreach($this->ancodes as $ancode_id => $data) { $key = $this->toString($data['pos_id'], $data['grammem_ids']); $result[$key] = $ancode_id; } return $result; } function __get($propName) { switch($propName) { case '__ancodes_map': $this->__ancodes_map = $this->buildAncodesMap(); return $this->__ancodes_map; } throw new phpMorphy_Exception("Unknown '$propName' property"); } } PK+[[6JJ shm_utils.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ if(!defined('PHPMORPHY_SHM_SEGMENT_SIZE')) { define('PHPMORPHY_SHM_SEGMENT_SIZE', 1024 * 1024 * 24); } if(!defined('PHPMORPHY_SHM_SEGMENT_ID')) { define('PHPMORPHY_SHM_SEGMENT_ID', 0x54358308); } if(!defined('PHPMORPHY_SEMAPHORE_KEY')) { define('PHPMORPHY_SEMAPHORE_KEY', PHPMORPHY_SHM_SEGMENT_ID + 1); } if(!defined('PHPMORPHY_SHM_HEADER_MAX_SIZE')) { define('PHPMORPHY_SHM_HEADER_MAX_SIZE', 1024 * 32); } interface phpMorphy_Shm_Cache_Interface { function close(); function get($filePath); function clear(); function delete($filePath); function reload($filePath); function reloadIfExists($filePath); function free(); } class phpMorphy_Shm_Cache_FileDescriptor { private $shm_id, $file_size, $offset; function __construct($shmId, $fileSize, $offset) { $this->shm_id = $shmId; $this->file_size = $fileSize; $this->offset = $offset; } function getShmId() { return $this->shm_id; } function getFileSize() { return $this->file_size; } function getOffset() { return $this->offset; } } abstract class phpMorphy_Semaphore { abstract function lock(); abstract function unlock(); static function create($key, $empty = false) { if(!$empty) { if (0 == strcasecmp($GLOBALS['__phpmorphy_substr'](PHP_OS, 0, 3), 'WIN')) { $clazz = 'phpMorphy_Semaphore_Win'; } else { $clazz = 'phpMorphy_Semaphore_Nix'; } } else { $clazz = 'phpMorphy_Semaphore_Empty'; } return new $clazz($key); } }; class phpMorphy_Semaphore_Empty extends phpMorphy_Semaphore { function lock() { } function unlock() { } function remove() { } }; // TODO: implement this class phpMorphy_Semaphore_Win extends phpMorphy_Semaphore { const DIR_NAME = 'phpmorphy_semaphore'; const USLEEP_TIME = 100000; // 0.1s const MAX_SLEEP_TIME = 5000000; // 5sec protected $dir_path; protected function __construct($key) { $this->dir_path = $this->getTempDir() . DIRECTORY_SEPARATOR . self::DIR_NAME . "_$key"; register_shutdown_function(array($this, 'unlock')); } protected function getTempDir() { if(false === ($result = getenv('TEMP'))) { if(false === ($result = getenv('TMP'))) { throw new phpMorphy_Exception("Can`t get temporary directory"); } } return $result; } function lock() { for($i = 0; $i < self::MAX_SLEEP_TIME; $i += self::USLEEP_TIME) { if(!file_exists($this->dir_path)) { if(false !== @mkdir($this->dir_path, 0644)) { return true; } } usleep(self::USLEEP_TIME); } throw new phpMorphy_Exception("Can`t acquire semaphore"); } function unlock() { @rmdir($this->dir_path); } function remove() { } } class phpMorphy_Semaphore_Nix extends phpMorphy_Semaphore { const DEFAULT_PERM = 0644; private $sem_id = false; protected function __construct($key) { if(false === ($this->sem_id = sem_get($key, 1, self::DEFAULT_PERM, true))) { throw new phpMorphy_Exception("Can`t get semaphore for '$key' key"); } } function lock() { if(false === sem_acquire($this->sem_id)) { throw new phpMorphy_Exception("Can`t acquire semaphore"); } } function unlock() { if(false === sem_release($this->sem_id)) { throw new phpMorphy_Exception("Can`t release semaphore"); } } function remove() { sem_remove($this->sem_id); } } class phpMorphy_Shm_Header { protected $max_size, $segment_id, $files_map = array(), $free_map = array(); function __construct($segmentId, $maxSize) { $this->max_size = (int)$maxSize; $this->segment_id = $segmentId; $this->clear(); } function lookup($filePath) { if(!$this->exists($filePath)) { throw new phpMorphy_Exception("'$filePath' not found in shm"); } return $this->files_map[$this->normalizePath($filePath)]; } function exists($filePath) { return isset($this->files_map[$this->normalizePath($filePath)]); } function register($filePath, $fh) { if($this->exists($filePath)) { throw new phpMorphy_Exception("Can`t register, '$filePath' already exists"); } if(false === ($stat = fstat($fh))) { throw new phpMorphy_Exception("Can`t fstat '$filePath' file"); } $file_size = $stat['size']; $offset = $this->getBlock($file_size); $entry = array( 'offset' => $offset, 'mtime' => $stat['mtime'], 'size' => $file_size, 'shm_id' => $this->segment_id ); $this->files_map[$this->normalizePath($filePath)] = $entry; return $entry; } function delete($filePath) { $data = $this->lookup($filePath); unset($this->files_map[$this->normalizePath($filePath)]); $this->freeBlock($data['offset'], $data['size']); } function clear() { $this->files_map = array(); $this->free_map = array(0 => $this->max_size); } function getAllFiles() { return $this->files_map; } protected function registerBlock($offset, $size) { $old_size = $this->free_map[$offset]; if($old_size < $size) { throw new phpMorphy_Exception("Too small free block for register(free = $old_size, need = $size)"); } unset($this->free_map[$offset]); if($old_size > $size) { $this->free_map[$offset + $size] = $old_size - $size; } } protected function freeBlock($offset, $size) { $this->free_map[$offset] = $size; $this->defrag(); } protected function defrag() { ksort($this->free_map); $map_count = count($this->free_map); if($map_count < 2) { return; } $keys = array_keys($this->free_map); $i = 0; $prev_offset = $keys[$i]; for($i++; $i < $map_count; $i++) { $offset = $keys[$i]; if($prev_offset + $this->free_map[$prev_offset] == $offset) { // merge $this->free_map[$prev_offset] += $this->free_map[$offset]; unset($this->free_map[$offset]); } else { $prev_offset = $offset; } } } protected function getBlock($fileSize) { foreach($this->free_map as $offset => $size) { if($size >= $fileSize) { $this->registerBlock($offset, $fileSize); return $offset; } } throw new phpMorphy_Exception("Can`t find free space for $size block"); } protected function normalizePath($path) { return $path; } } class phpMorphy_Shm_Cache implements phpMorphy_Shm_Cache_Interface { const DEFAULT_MODE = 0644; const READ_BLOCK_SIZE = 8192; protected static $EXTENSION_PRESENT = null; protected $options, $semaphore, $segment ; function __construct($options = array(), $clear = false) { if(!isset(self::$EXTENSION_PRESENT)) { self::$EXTENSION_PRESENT = extension_loaded('shmop'); } if(!self::$EXTENSION_PRESENT) { throw new phpMorphy_Exception("shmop extension needed"); } $this->options = $options = $this->repairOptions($options); $this->semaphore = phpMorphy_Semaphore::create($options['semaphore_key'], $options['no_lock']); $this->segment = $this->getSegment($options['segment_id'], $options['segment_size']); if($clear) { $this->semaphore->remove(); $this->initHeaderObject($this->segment); } } static function clearSemaphore($semaphoreId = null) { $semaphoreId = isset($semaphoreId) ? $semaphoreId : PHPMORPHY_SEMAPHORE_KEY; $sem = phpMorphy_Semaphore::create($semaphoreId); return $sem->remove(); } protected function repairOptions($options) { $defaults = array( 'semaphore_key' => PHPMORPHY_SEMAPHORE_KEY, 'segment_id' => PHPMORPHY_SHM_SEGMENT_ID, 'segment_size' => PHPMORPHY_SHM_SEGMENT_SIZE, 'with_mtime' => false, 'header_max_size' => PHPMORPHY_SHM_HEADER_MAX_SIZE, 'no_lock' => false, ); return (array)$options + $defaults; } function close() { if(isset($this->segment)) { shmop_close($this->segment); $this->segment = null; } } protected function safeInvoke($filePath, $method) { $this->lock(); try { $header = $this->readHeader(); $result = $this->$method($filePath, $header); // writeHeader is atomic $this->writeHeader($this->segment, $header); $this->unlock(); return $result; } catch (Exception $e) { $this->unlock(); throw $e; } } protected function doGet($filePath, $header) { $result = array(); foreach((array)$filePath as $file) { $result[$file] = $this->getSingleFile($header, $file); } if(!is_array($filePath)) { $result = $result[$filePath]; } return $result; } function get($filePath) { if(!is_array($filePath)) { return $this->createFileDescriptor($this->safeInvoke($filePath, 'doGet')); } else { $result = array(); foreach($this->safeInvoke($filePath, 'doGet') as $file => $item) { $result[$file] = $this->createFileDescriptor($item); } return $result; } } protected function getSingleFile($header, $filePath) { try { $fh = false; if(false !== $header->exists($filePath)) { $result = $header->lookup($filePath); if(!$this->options['with_mtime']) { return $result; } if(false === ($mtime = filemtime($filePath))) { throw new phpMorphy_Exception("Can`t get mtime attribute for '$filePath' file"); } if($result['mtime'] === $mtime) { return $result; } $fh = $this->openFile($filePath); // update $header->delete($filePath); $result = $header->register($filePath, $fh); $this->saveFile($fh, $result['offset']); fclose($fh); return $result; } // register $fh = $this->openFile($filePath); $result = $header->register($filePath, $fh); $this->saveFile($fh, $result['offset']); fclose($fh); return $result; } catch (Exception $e) { if(isset($fh) && $fh !== false) { fclose($fh); } throw $e; } } protected function doClear($filePath, $header) { $header->clear(); } function clear() { $this->safeInvoke(null, 'doClear'); } protected function doDelete($filePath, $header) { foreach((array)$filePath as $file) { $hdr->delete($file); } } function delete($filePath) { $this->safeInvoke($filePath, 'doDelete'); } protected function doReload($filePath, $header) { $return = array(); foreach((array)$filePath as $file) { $fh = $this->openFile($file); // update $hdr->delete($file); $result = $hdr->register($file, $fh); $this->saveFile($fh, $result['offset']); fclose($fh); $fh = false; $return[$file] = $result; } if(!is_array($filePath)) { $return = $return[$filePath]; } return $return; } function reload($filePath) { if(!is_array($filePath)) { return $this->createFileDescriptor($this->safeInvoke($filePath, 'doReload')); } else { $result = array(); foreach($this->safeInvoke($filePath, 'doReload') as $file => $item) { $result[$file] = $this->createFileDescriptor($item); } return $result; } } function reloadIfExists($filePath) { try { return $this->reload($filePath); } catch (Exception $e) { return false; } } function free() { $this->lock(); if(false === shmop_delete($this->segment)) { throw new phpMorphy_Exception("Can`t delete $this->segment segment"); } $this->close(); $this->unlock(); } function getFilesList() { $this->lock(); $result = $this->readHeader()->getAllFiles(); $this->unlock(); return $result; } protected function createFileDescriptor($result) { return new phpMorphy_Shm_Cache_FileDescriptor($this->segment, $result['size'], $this->options['header_max_size'] + $result['offset']); } protected function openFile($filePath) { if(false === ($fh = fopen($filePath, 'rb'))) { throw new phpMorphy_Exception("Can`t open '$filePath' file"); } return $fh; } protected function lock() { $this->semaphore->lock(); } protected function unlock() { $this->semaphore->unlock(); } protected function getFilesOffset() { return $this->options['header_max_size']; } protected function getMaxOffset() { return $this->options['segment_size'] - 1; } protected function saveFile($fh, $offset) { if(false === ($stat = fstat($fh))) { throw new phpMorphy_Exception("Can`t fstat '$filePath'"); } $file_size = $stat['size']; $chunk_size = self::READ_BLOCK_SIZE; $max_offset = $offset + $file_size; if($max_offset >= $this->getMaxOffset()) { throw new phpMorphy_Exception("Can`t write '$filePath' file to $offset offset, not enough space"); } $i = 0; while(!feof($fh)) { $data = fread($fh, $chunk_size); if(false === (shmop_write($this->segment, $data, $this->getFilesOffset() + $offset + $i))) { throw new phpMorphy_Exception("Can`t write chunk of file '$filePath' to shm"); } $i += $chunk_size; } } protected function getSegment($segmentId, $segmentSize) { $this->lock(); try { $shm_id = $this->openSegment($segmentId, $segmentSize, $is_new); if($is_new) { $this->initHeaderObject($shm_id, false); } } catch (Exception $e) { $this->unlock(); throw $e; } $this->unlock(); return $shm_id; } protected function initHeaderObject($shmId, $lock = true) { if($lock) { $this->lock(); $this->writeHeader($shmId, $this->createHeader($shmId)); $this->unlock(); } else { $this->writeHeader($shmId, $this->createHeader($shmId)); } } protected function readHeader() { if(false === ($data = shmop_read($this->segment, 0, $this->getFilesOffset()))) { throw new phpMorphy_Exception("Can`t read header for " . $this->segment); } if(false === ($result = unserialize($data))) { throw new phpMorphy_Exception("Can`t unserialize header for " . $this->segment); } return $result; } protected function writeHeader($shmId, phpMorphy_Shm_Header $header) { $data = serialize($header); if($GLOBALS['__phpmorphy_strlen']($data) > $this->getFilesOffset()) { throw new phpMorphy_Exception("Too long header, try increase PHPMORPHY_SHM_HEADER_MAX_SIZE"); } if(false === shmop_write($shmId, $data, 0)) { throw new phpMorphy_Exception("Can`t write shm header"); } } protected function createHeader($shmId) { return new phpMorphy_Shm_Header($shmId, $this->options['segment_size']); } protected function openSegment($segmentId, $size, &$new = null) { $new = false; if(false === ($handle = @shmop_open($segmentId, 'w', 0, 0))) { if(false === ($handle = shmop_open($segmentId, 'n', self::DEFAULT_MODE, $size))) { throw new phpMorphy_Exception("Can`t create SHM segment with '$segmentId' id and $size size"); } $new = true; } return $handle; } } PK+[[*^^langs_stuff/common.phpnuW+Ainner = $inner; } function getGrammems($partOfSpeech) { return $this->inner->getGrammems($partOfSpeech); } } abstract class phpMorphy_GrammemsProvider_Base implements phpMorphy_GrammemsProvider_Interface { protected $all_grammems, $grammems = array(); function __construct() { $this->all_grammems = $this->flatizeArray($this->getAllGrammemsGrouped()); } abstract function getAllGrammemsGrouped(); function includeGroups($partOfSpeech, $names) { $grammems = $this->getAllGrammemsGrouped(); $names = array_flip((array)$names); foreach(array_keys($grammems) as $key) { if(!isset($names[$key])) { unset($grammems[$key]); } } $this->grammems[$partOfSpeech] = $this->flatizeArray($grammems); return $this; } function excludeGroups($partOfSpeech, $names) { $grammems = $this->getAllGrammemsGrouped(); foreach((array)$names as $key) { unset($grammems[$key]); } $this->grammems[$partOfSpeech] = $this->flatizeArray($grammems); return $this; } function resetGroups($partOfSpeech) { unset($this->grammems[$partOfSpeech]); return $this; } function resetGroupsForAll() { $this->grammems = array(); return $this; } static function flatizeArray($array) { return call_user_func_array('array_merge', $array); } function getGrammems($partOfSpeech) { if(isset($this->grammems[$partOfSpeech])) { return $this->grammems[$partOfSpeech]; } else { return $this->all_grammems; } } } class phpMorphy_GrammemsProvider_Empty extends phpMorphy_GrammemsProvider_Base { function getAllGrammemsGrouped() { return array(); } function getGrammems($partOfSpeech) { return false; } } abstract class phpMorphy_GrammemsProvider_ForFactory extends phpMorphy_GrammemsProvider_Base { protected $encoded_grammems; function __construct($encoding) { $this->encoded_grammems = $this->encodeGrammems($this->getGrammemsMap(), $encoding); parent::__construct(); } abstract function getGrammemsMap(); function getAllGrammemsGrouped() { return $this->encoded_grammems; } protected function encodeGrammems($grammems, $encoding) { $from_encoding = $this->getSelfEncoding(); if($from_encoding == $encoding) { return $grammems; } $result = array(); foreach($grammems as $key => $ary) { $new_key = iconv($from_encoding, $encoding, $key); $new_value = array(); foreach($ary as $value) { $new_value[] = iconv($from_encoding, $encoding, $value); } $result[$new_key] = $new_value; } return $result; } } class phpMorphy_GrammemsProvider_Factory { protected static $included = array(); static function create(phpMorphy $morphy) { $locale = $GLOBALS['__phpmorphy_strtolower']($morphy->getLocale()); if(!isset(self::$included[$locale])) { $file_name = PHPMORPHY_DIR . "/langs_stuff/$locale.php"; $class = "phpMorphy_GrammemsProvider_$locale"; if(is_readable($file_name)) { require($file_name); if(!class_exists($class)) { throw new phpMorphy_Exception("Class '$class' not found in '$file_name' file"); } self::$included[$locale] = call_user_func(array($class, 'instance'), $morphy); } else { self::$included[$locale] = new phpMorphy_GrammemsProvider_Empty($morphy); } } return self::$included[$locale]; } } PK+[[_&&langs_stuff/ru_ru.phpnuW+A array('', '', ''), '' => array('', ''), '' => array('', ''), '' => array('', '', '', '', '', '', '', '2'), '' => array('', ''), '' => array('', '', ''), ' ' => array(''), '' => array('1', '2', '3'), '' => array(''), ' ' => array(''), ' ' => array(''), '' => array('', ''), '' => array('', ''), ' ' => array(''), ); function getSelfEncoding() { return 'windows-1251'; } function getGrammemsMap() { return self::$grammems_map; } static function instance(phpMorphy $morphy) { $key = $morphy->getEncoding(); if(!isset(self::$instances[$key])) { $class = __CLASS__; self::$instances[$key] = new $class($key); } return self::$instances[$key]; } } PK+[[V&&graminfo/graminfo.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ interface phpMorphy_GramInfo_Interace { /** * Returns langugage for graminfo file * @return string */ function getLocale(); /** * Return encoding for graminfo file * @return string */ function getEncoding(); /** * Return size of character (cp1251 - 1, utf8 - 1, utf16 - 2, utf32 - 4 etc) * @return int */ function getCharSize(); /** * Return end of string value (usually string with \0 value of char_size + 1 length) * @return string */ function getEnds(); /** * Reads graminfo header * * @param int $offset * @return array */ function readGramInfoHeader($offset); /** * Returns size of header struct */ function getGramInfoHeaderSize(); /** * Read ancodes section for header retrieved with readGramInfoHeader * * @param array $info * @return array */ function readAncodes($info); /** * Read flexias section for header retrieved with readGramInfoHeader * * @param array $info * @return array */ function readFlexiaData($info); /** * Read all graminfo headers offsets, which can be used latter for readGramInfoHeader method * @return array */ function readAllGramInfoOffsets(); function getHeader(); function readAllPartOfSpeech(); function readAllGrammems(); function readAllAncodes(); } abstract class phpMorphy_GramInfo implements phpMorphy_GramInfo_Interace { const HEADER_SIZE = 128; protected $resource, $header, $ends, $ends_size; protected function phpMorphy_GramInfo($resource, $header) { $this->resource = $resource; $this->header = $header; $this->ends = str_repeat("\0", $header['char_size'] + 1); $this->ends_size = $GLOBALS['__phpmorphy_strlen']($this->ends); } static function create(phpMorphy_Storage $storage, $lazy) { if($lazy) { return new phpMorphy_GramInfo_Proxy($storage); } $header = phpMorphy_GramInfo::readHeader( $storage->read(0, self::HEADER_SIZE) ); if(!phpMorphy_GramInfo::validateHeader($header)) { throw new phpMorphy_Exception('Invalid graminfo format'); } $storage_type = $storage->getTypeAsString(); $file_path = dirname(__FILE__) . "/access/graminfo_{$storage_type}.php"; $clazz = 'phpMorphy_GramInfo_' . ucfirst($storage_type); require_once($file_path); return new $clazz($storage->getResource(), $header); } function getLocale() { return $this->header['lang']; } function getEncoding() { return $this->header['encoding']; } function getCharSize() { return $this->header['char_size']; } function getEnds() { return $this->ends; } function getHeader() { return $this->header; } static protected function readHeader($headerRaw) { $header = unpack( 'Vver/Vis_be/Vflex_count_old/' . 'Vflex_offset/Vflex_size/Vflex_count/Vflex_index_offset/Vflex_index_size/' . 'Vposes_offset/Vposes_size/Vposes_count/Vposes_index_offset/Vposes_index_size/' . 'Vgrammems_offset/Vgrammems_size/Vgrammems_count/Vgrammems_index_offset/Vgrammems_index_size/' . 'Vancodes_offset/Vancodes_size/Vancodes_count/Vancodes_index_offset/Vancodes_index_size/' . 'Vchar_size/', $headerRaw ); $offset = 24 * 4; $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1)); $header['lang'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len)); $offset += $len; $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1)); $header['encoding'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len)); return $header; } static protected function validateHeader($header) { if( 3 != $header['ver'] || 1 == $header['is_be'] ) { return false; } return true; } protected function cleanupCString($string) { if(false !== ($pos = $GLOBALS['__phpmorphy_strpos']($string, $this->ends))) { $string = $GLOBALS['__phpmorphy_substr']($string, 0, $pos); } return $string; } abstract protected function readSectionIndex($offset, $count); protected function readSectionIndexAsSize($offset, $count, $total_size) { if(!$count) { return array(); } $index = $this->readSectionIndex($offset, $count); $index[$count] = $index[0] + $total_size; for($i = 0; $i < $count; $i++) { $index[$i] = $index[$i + 1] - $index[$i]; } unset($index[$count]); return $index; } }; class phpMorphy_GramInfo_Decorator implements phpMorphy_GramInfo_Interace { protected $info; function phpMorphy_GramInfo_Decorator(phpMorphy_GramInfo_Interace $info) { $this->info = $info; } function readGramInfoHeader($offset) { return $this->info->readGramInfoHeader($offset); } function getGramInfoHeaderSize() { return $this->info->getGramInfoHeaderSize($offset); } function readAncodes($info) { return $this->info->readAncodes($info); } function readFlexiaData($info) { return $this->info->readFlexiaData($info); } function readAllGramInfoOffsets() { return $this->info->readAllGramInfoOffsets(); } function readAllPartOfSpeech() { return $this->info->readAllPartOfSpeech(); } function readAllGrammems() { return $this->info->readAllGrammems(); } function readAllAncodes() { return $this->info->readAllAncodes(); } function getLocale() { return $this->info->getLocale(); } function getEncoding() { return $this->info->getEncoding(); } function getCharSize() { return $this->info->getCharSize(); } function getEnds() { return $this->info->getEnds(); } function getHeader() { return $this->info->getHeader(); } } class phpMorphy_GramInfo_Proxy extends phpMorphy_GramInfo_Decorator { protected $storage; function __construct(phpMorphy_Storage $storage) { $this->storage = $storage; unset($this->info); } function __get($propName) { if($propName == 'info') { $this->info = phpMorphy_GramInfo::create($this->storage, false); unset($this->storage); return $this->info; } throw new phpMorphy_Exception("Unknown prop name '$propName'"); } } class phpMorphy_GramInfo_Proxy_WithHeader extends phpMorphy_GramInfo_Proxy { protected $cache, $ends; function __construct(phpMorphy_Storage $storage, $cacheFile) { parent::__construct($storage); $this->cache = $this->readCache($cacheFile); $this->ends = str_repeat("\0", $this->getCharSize() + 1); } protected function readCache($fileName) { if(!is_array($result = include($fileName))) { throw new phpMorphy_Exception("Can`t get header cache from '$fileName' file'"); } return $result; } function getLocale() { return $this->cache['lang']; } function getEncoding() { return $this->cache['encoding']; } function getCharSize() { return $this->cache['char_size']; } function getEnds() { return $this->ends; } function getHeader() { return $this->cache; } } class phpMorphy_GramInfo_RuntimeCaching extends phpMorphy_GramInfo_Decorator { protected $flexia = array(), $ancodes = array(); function readFlexiaData($info) { $offset = $info['offset']; if(!isset($this->flexia_all[$offset])) { $this->flexia_all[$offset] = $this->info->readFlexiaData($info); } return $this->flexia_all[$offset]; } } class phpMorphy_GramInfo_AncodeCache extends phpMorphy_GramInfo_Decorator { public $hits = 0, $miss = 0; protected $cache; function __construct(phpMorphy_GramInfo_Interace $inner, $resource) { parent::__construct($inner); if(false === ($this->cache = unserialize($resource->read(0, $resource->getFileSize())))) { throw new phpMorphy_Exception("Can`t read ancodes cache"); } } function readAncodes($info) { $offset = $info['offset']; if(isset($this->cache[$offset])) { $this->hits++; return $this->cache[$offset]; } else { // in theory misses never occur $this->miss++; return parent::readAncodes($info); } } }PK+[[w graminfo/access/graminfo_mem.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/graminfo/tpl/graminfo.tpl.php on line 24
Thu, 16 Jan 2020 00:41:13 +0000, don`t change it! */ class phpMorphy_Graminfo_Mem extends phpMorphy_Graminfo { function getGramInfoHeaderSize() { return 20; } function readGramInfoHeader($offset) { $__mem = $this->resource; $result = unpack( 'vid/vfreq/vforms_count/vpacked_forms_count/vancodes_count/vancodes_offset/vancodes_map_offset/vaffixes_offset/vaffixes_size/vbase_size', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 20) ); $result['offset'] = $offset; return $result; } protected function readAncodesMap($info) { $__mem = $this->resource; // TODO: this can be wrong due to aligning ancodes map section $offset = $info['offset'] + 20 + $info['forms_count'] * 2; $forms_count = $info['packed_forms_count']; return unpack("v$forms_count", $GLOBALS['__phpmorphy_substr']($__mem, $offset, $forms_count * 2)); } protected function splitAncodes($ancodes, $map) { $result = array(); for($i = 1, $c = count($map), $j = 1; $i <= $c; $i++) { $res = array(); for($k = 0, $kc = $map[$i]; $k < $kc; $k++, $j++) { $res[] = $ancodes[$j]; } $result[] = $res; } return $result; } function readAncodes($info) { $__mem = $this->resource; // TODO: this can be wrong due to aligning ancodes section $offset = $info['offset'] + 20; $forms_count = $info['forms_count']; $ancodes = unpack("v$forms_count", $GLOBALS['__phpmorphy_substr']($__mem, $offset, $forms_count * 2)); /* if(!$expand) { return $ancodes; } */ $map = $this->readAncodesMap($info); return $this->splitAncodes($ancodes, $map); } function readFlexiaData($info) { $__mem = $this->resource; $offset = $info['offset'] + 20; if(isset($info['affixes_offset'])) { $offset += $info['affixes_offset']; } else { $offset += $info['forms_count'] * 2 + $info['packed_forms_count'] * 2; } return explode($this->ends, $GLOBALS['__phpmorphy_substr']($__mem, $offset, $info['affixes_size'] - $this->ends_size)); } function readAllGramInfoOffsets() { return $this->readSectionIndex($this->header['flex_index_offset'], $this->header['flex_count']); } protected function readSectionIndex($offset, $count) { $__mem = $this->resource; return array_values(unpack("V$count", $GLOBALS['__phpmorphy_substr']($__mem, $offset, $count * 4))); } function readAllFlexia() { $__mem = $this->resource; $result = array(); $offset = $this->header['flex_offset']; foreach($this->readSectionIndexAsSize($this->header['flex_index_offset'], $this->header['flex_count'], $this->header['flex_size']) as $size) { $header = $this->readGramInfoHeader($offset); $affixes = $this->readFlexiaData($header); $ancodes = $this->readAncodes($header, true); $result[$header['id']] = array( 'header' => $header, 'affixes' => $affixes, 'ancodes' => $ancodes ); $offset += $size; } return $result; } function readAllPartOfSpeech() { $__mem = $this->resource; $result = array(); $offset = $this->header['poses_offset']; foreach($this->readSectionIndexAsSize($this->header['poses_index_offset'], $this->header['poses_count'], $this->header['poses_size']) as $size) { $res = unpack( 'vid/Cis_predict', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 3) ); $result[$res['id']] = array( 'is_predict' => (bool)$res['is_predict'], 'name' => $this->cleanupCString($GLOBALS['__phpmorphy_substr']($__mem, $offset + 3, $size - 3)) ); $offset += $size; } return $result; } function readAllGrammems() { $__mem = $this->resource; $result = array(); $offset = $this->header['grammems_offset']; foreach($this->readSectionIndexAsSize($this->header['grammems_index_offset'], $this->header['grammems_count'], $this->header['grammems_size']) as $size) { $res = unpack( 'vid/Cshift', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 3) ); $result[$res['id']] = array( 'shift' => $res['shift'], 'name' => $this->cleanupCString($GLOBALS['__phpmorphy_substr']($__mem, $offset + 3, $size - 3)) ); $offset += $size; } return $result; } function readAllAncodes() { $__mem = $this->resource; $result = array(); $offset = $this->header['ancodes_offset']; for($i = 0; $i < $this->header['ancodes_count']; $i++) { $res = unpack('vid/vpos_id', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 4)); $offset += 4; list(, $grammems_count) = unpack('v', $GLOBALS['__phpmorphy_substr']($__mem, $offset, 2)); $offset += 2; $result[$res['id']] = array( 'pos_id' => $res['pos_id'], 'grammem_ids' => $grammems_count ? array_values(unpack("v$grammems_count", $GLOBALS['__phpmorphy_substr']($__mem, $offset, $grammems_count * 2))) : array(), 'offset' => $offset, ); $offset += $grammems_count * 2; } return $result; } } PK+[[!graminfo/access/graminfo_file.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/graminfo/tpl/graminfo.tpl.php on line 24
Thu, 16 Jan 2020 00:41:13 +0000, don`t change it! */ class phpMorphy_Graminfo_File extends phpMorphy_Graminfo { function getGramInfoHeaderSize() { return 20; } function readGramInfoHeader($offset) { $__fh = $this->resource; fseek($__fh, $offset); $result = unpack( 'vid/vfreq/vforms_count/vpacked_forms_count/vancodes_count/vancodes_offset/vancodes_map_offset/vaffixes_offset/vaffixes_size/vbase_size', fread($__fh, 20) ); $result['offset'] = $offset; return $result; } protected function readAncodesMap($info) { $__fh = $this->resource; // TODO: this can be wrong due to aligning ancodes map section $offset = $info['offset'] + 20 + $info['forms_count'] * 2; fseek($__fh, $offset); $forms_count = $info['packed_forms_count']; return unpack("v$forms_count", fread($__fh, $forms_count * 2)); } protected function splitAncodes($ancodes, $map) { $result = array(); for($i = 1, $c = count($map), $j = 1; $i <= $c; $i++) { $res = array(); for($k = 0, $kc = $map[$i]; $k < $kc; $k++, $j++) { $res[] = $ancodes[$j]; } $result[] = $res; } return $result; } function readAncodes($info) { $__fh = $this->resource; // TODO: this can be wrong due to aligning ancodes section $offset = $info['offset'] + 20; fseek($__fh, $offset); $forms_count = $info['forms_count']; $ancodes = unpack("v$forms_count", fread($__fh, $forms_count * 2)); /* if(!$expand) { return $ancodes; } */ $map = $this->readAncodesMap($info); return $this->splitAncodes($ancodes, $map); } function readFlexiaData($info) { $__fh = $this->resource; $offset = $info['offset'] + 20; if(isset($info['affixes_offset'])) { $offset += $info['affixes_offset']; } else { $offset += $info['forms_count'] * 2 + $info['packed_forms_count'] * 2; } fseek($__fh, $offset); return explode($this->ends, fread($__fh, $info['affixes_size'] - $this->ends_size)); } function readAllGramInfoOffsets() { return $this->readSectionIndex($this->header['flex_index_offset'], $this->header['flex_count']); } protected function readSectionIndex($offset, $count) { $__fh = $this->resource; fseek($__fh, $offset); return array_values(unpack("V$count", fread($__fh, $count * 4))); } function readAllFlexia() { $__fh = $this->resource; $result = array(); $offset = $this->header['flex_offset']; foreach($this->readSectionIndexAsSize($this->header['flex_index_offset'], $this->header['flex_count'], $this->header['flex_size']) as $size) { $header = $this->readGramInfoHeader($offset); $affixes = $this->readFlexiaData($header); $ancodes = $this->readAncodes($header, true); $result[$header['id']] = array( 'header' => $header, 'affixes' => $affixes, 'ancodes' => $ancodes ); $offset += $size; } return $result; } function readAllPartOfSpeech() { $__fh = $this->resource; $result = array(); $offset = $this->header['poses_offset']; foreach($this->readSectionIndexAsSize($this->header['poses_index_offset'], $this->header['poses_count'], $this->header['poses_size']) as $size) { fseek($__fh, $offset); $res = unpack( 'vid/Cis_predict', fread($__fh, 3) ); $result[$res['id']] = array( 'is_predict' => (bool)$res['is_predict'], 'name' => $this->cleanupCString(fread($__fh, $size - 3)) ); $offset += $size; } return $result; } function readAllGrammems() { $__fh = $this->resource; $result = array(); $offset = $this->header['grammems_offset']; foreach($this->readSectionIndexAsSize($this->header['grammems_index_offset'], $this->header['grammems_count'], $this->header['grammems_size']) as $size) { fseek($__fh, $offset); $res = unpack( 'vid/Cshift', fread($__fh, 3) ); $result[$res['id']] = array( 'shift' => $res['shift'], 'name' => $this->cleanupCString(fread($__fh, $size - 3)) ); $offset += $size; } return $result; } function readAllAncodes() { $__fh = $this->resource; $result = array(); $offset = $this->header['ancodes_offset']; fseek($__fh, $offset); for($i = 0; $i < $this->header['ancodes_count']; $i++) { $res = unpack('vid/vpos_id', fread($__fh, 4)); $offset += 4; list(, $grammems_count) = unpack('v', fread($__fh, 2)); $offset += 2; $result[$res['id']] = array( 'pos_id' => $res['pos_id'], 'grammem_ids' => $grammems_count ? array_values(unpack("v$grammems_count", fread($__fh, $grammems_count * 2))) : array(), 'offset' => $offset, ); $offset += $grammems_count * 2; } return $result; } } PK+[[: graminfo/access/graminfo_shm.phpnuW+A * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * This file is autogenerated at
Warning: date(): It is not safe to rely on the system's timezone settings. You are *required* to use the date.timezone setting or the date_default_timezone_set() function. In case you used any of those methods and you are still getting this warning, you most likely misspelled the timezone identifier. We selected the timezone 'UTC' for now, but please set date.timezone to select your timezone. in /var/www/iplanru/data/www/i-plan.ru/libraries/phpmorphy/utils/autogen/graminfo/tpl/graminfo.tpl.php on line 24
Thu, 16 Jan 2020 00:41:13 +0000, don`t change it! */ class phpMorphy_Graminfo_Shm extends phpMorphy_Graminfo { function getGramInfoHeaderSize() { return 20; } function readGramInfoHeader($offset) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $result = unpack( 'vid/vfreq/vforms_count/vpacked_forms_count/vancodes_count/vancodes_offset/vancodes_map_offset/vaffixes_offset/vaffixes_size/vbase_size', shmop_read($__shm, $__offset + ($offset), 20) ); $result['offset'] = $offset; return $result; } protected function readAncodesMap($info) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; // TODO: this can be wrong due to aligning ancodes map section $offset = $info['offset'] + 20 + $info['forms_count'] * 2; $forms_count = $info['packed_forms_count']; return unpack("v$forms_count", shmop_read($__shm, $__offset + ($offset), $forms_count * 2)); } protected function splitAncodes($ancodes, $map) { $result = array(); for($i = 1, $c = count($map), $j = 1; $i <= $c; $i++) { $res = array(); for($k = 0, $kc = $map[$i]; $k < $kc; $k++, $j++) { $res[] = $ancodes[$j]; } $result[] = $res; } return $result; } function readAncodes($info) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; // TODO: this can be wrong due to aligning ancodes section $offset = $info['offset'] + 20; $forms_count = $info['forms_count']; $ancodes = unpack("v$forms_count", shmop_read($__shm, $__offset + ($offset), $forms_count * 2)); /* if(!$expand) { return $ancodes; } */ $map = $this->readAncodesMap($info); return $this->splitAncodes($ancodes, $map); } function readFlexiaData($info) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $offset = $info['offset'] + 20; if(isset($info['affixes_offset'])) { $offset += $info['affixes_offset']; } else { $offset += $info['forms_count'] * 2 + $info['packed_forms_count'] * 2; } return explode($this->ends, shmop_read($__shm, $__offset + ($offset), $info['affixes_size'] - $this->ends_size)); } function readAllGramInfoOffsets() { return $this->readSectionIndex($this->header['flex_index_offset'], $this->header['flex_count']); } protected function readSectionIndex($offset, $count) { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; return array_values(unpack("V$count", shmop_read($__shm, $__offset + ($offset), $count * 4))); } function readAllFlexia() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $result = array(); $offset = $this->header['flex_offset']; foreach($this->readSectionIndexAsSize($this->header['flex_index_offset'], $this->header['flex_count'], $this->header['flex_size']) as $size) { $header = $this->readGramInfoHeader($offset); $affixes = $this->readFlexiaData($header); $ancodes = $this->readAncodes($header, true); $result[$header['id']] = array( 'header' => $header, 'affixes' => $affixes, 'ancodes' => $ancodes ); $offset += $size; } return $result; } function readAllPartOfSpeech() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $result = array(); $offset = $this->header['poses_offset']; foreach($this->readSectionIndexAsSize($this->header['poses_index_offset'], $this->header['poses_count'], $this->header['poses_size']) as $size) { $res = unpack( 'vid/Cis_predict', shmop_read($__shm, $__offset + ($offset), 3) ); $result[$res['id']] = array( 'is_predict' => (bool)$res['is_predict'], 'name' => $this->cleanupCString(shmop_read($__shm, $__offset + ($offset + 3), $size - 3)) ); $offset += $size; } return $result; } function readAllGrammems() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $result = array(); $offset = $this->header['grammems_offset']; foreach($this->readSectionIndexAsSize($this->header['grammems_index_offset'], $this->header['grammems_count'], $this->header['grammems_size']) as $size) { $res = unpack( 'vid/Cshift', shmop_read($__shm, $__offset + ($offset), 3) ); $result[$res['id']] = array( 'shift' => $res['shift'], 'name' => $this->cleanupCString(shmop_read($__shm, $__offset + ($offset + 3), $size - 3)) ); $offset += $size; } return $result; } function readAllAncodes() { $__shm = $this->resource['shm_id']; $__offset = $this->resource['offset']; $result = array(); $offset = $this->header['ancodes_offset']; for($i = 0; $i < $this->header['ancodes_count']; $i++) { $res = unpack('vid/vpos_id', shmop_read($__shm, $__offset + ($offset), 4)); $offset += 4; list(, $grammems_count) = unpack('v', shmop_read($__shm, $__offset + ($offset), 2)); $offset += 2; $result[$res['id']] = array( 'pos_id' => $res['pos_id'], 'grammem_ids' => $grammems_count ? array_values(unpack("v$grammems_count", shmop_read($__shm, $__offset + ($offset), $grammems_count * 2))) : array(), 'offset' => $offset, ); $offset += $grammems_count * 2; } return $result; } } PK+[[!Ojgramtab_consts.phpnuW+APK+[[M nkk Hcommon.phpnuW+APK+[[qu storage.phpnuW+APK+[[hͺ** lmorphiers.phpnuW+APK+[[s11 ӏunicode.phpnuW+APK+[[_ source.phpnuW+APK+[[ڔ"" fsa/fsa.phpnuW+APK+[[k4 4 Qfsa/fsa_state.phpnuW+APK+[[LM!M!fsa/access/fsa_tree_mem.phpnuW+APK+[[5^fsa/access/fsa_sparse_mem.phpnuW+APK+[[ "";fsa/access/fsa_tree_shm.phpnuW+APK+[[Z]]]fsa/access/fsa_sparse_file.phpnuW+APK+[[7<:{fsa/access/fsa_sparse_shm.phpnuW+APK+[[t t yfsa/access/fsa_tree_file.phpnuW+APK+[[Xf 9gramtab.phpnuW+APK+[[6JJ Qshm_utils.phpnuW+APK+[[*^^:!langs_stuff/common.phpnuW+APK+[[_&&1langs_stuff/ru_ru.phpnuW+APK+[[V&&I7graminfo/graminfo.phpnuW+APK+[[w ^graminfo/access/graminfo_mem.phpnuW+APK+[[!|graminfo/access/graminfo_file.phpnuW+APK+[[: ]graminfo/access/graminfo_shm.phpnuW+APKH9