Your IP : 216.73.216.170


Current Path : /var/www/iplanru/data/www/i-plan.ru/administrator/components/com_sef/controllers/
Upload File :
Current File : /var/www/iplanru/data/www/i-plan.ru/administrator/components/com_sef/controllers/crawler.php

<?php
/**
 * SEF component for Joomla!
 * 
 * @package   JoomSEF
 * @version   4.7.8
 * @author    ARTIO s.r.o., http://www.artio.net
 * @copyright Copyright (C) 2020 ARTIO s.r.o. 
 * @license   GNU/GPLv3 http://www.artio.net/license/gnu-general-public-license
 */

// Check to ensure this file is included in Joomla!
defined('_JEXEC') or die();

class SEFControllerCrawler extends SEFController
{
    /**
     * constructor (registers additional tasks to methods)
     * @return void
     */
    function __construct()
    {
        parent::__construct();
    }
    
    function display($cachable = false, $urlparams = false)
    {
        JRequest::setVar('view', 'crawler');
        
        parent::display();
    }
    
    function crawl()
    {
        // Suppress all normal output
        ob_start();
        
        // Compute maximum time for script execution
        $timeLimit = ini_get('max_execution_time');
        $timeLimit = $timeLimit / 2;
        if ($timeLimit > 10) {
            $timeLimit = 10;
        }
        $maxTime = $_SERVER['REQUEST_TIME'] + $timeLimit;
        
        $urls = JRequest::getVar('url', array());
        
        $crawled = 0;
        $found = array();
        
        // Crawl URLs
        foreach ($urls as $url) {
            // Find URLs on URL
            $this->_parseLinks($url, $found);
            
            // Increase counter
            $crawled++;

            // Check time
            if (time() > $maxTime) {
                // We need to interrupt the script
                break;
            }
        }
        
        // Create response object
        $ret = new stdClass();
        $ret->crawled = $crawled;
        $ret->found = $found;
        
        ob_end_clean();
        
        // Write response
        echo json_encode($ret);
        jexit();
    }
    
    function _parseLinks($url, &$found)
    {
        // Get response, follow 5 redirections
        $redirs = 0;
        do {
            $redirect = false;
            $response = SEFTools::PostRequest($url, null, null, 'get', null);
            
            if ($response === false) {
                // Error
                return;
            }
            
            if ($redirs < 5 && $response->code >= 300 && $response->code < 400) {
                // Parse redirect URL
                $pos = stripos($response->header, 'location:');
                if ($pos !== false) {
                    $pos += 9;  // Skip header name
                    $pos2 = strpos($response->header, "\r", $pos);
                    $url = substr($response->header, $pos, $pos2 - $pos);
                    $url = trim($url);
                    $redirect = true;
                    $redirs++;
                }
            }
        } while ($redirect);
        
        // Check code
        if ($response->code == 200) {
            // Parse URLs
            $matches = array();
            if (preg_match_all('#<a\s[^>]*href=["\']([^"\']+)#i', $response->content, $matches) > 0) {
                // Loop through found URLs
                foreach ($matches[1] as $link) {
                    $link = str_replace(JURI::root(), '', $link);
                    
                    if (strpos($link, '://') !== false) {
                        // Absolute external link, skip
                        continue;
                    }
                    
                    // Internal link
                    $link = JURI::root().ltrim(html_entity_decode(urldecode($link)), '/');
                    
                    if (!in_array($link, $found)) {
                        $found[] = $link;
                    }
                }
            }
        }
    }
}