* @link http://stupid.su/php-curl_multi/ * @licence GPL * @version 0.4 * * @todo stop on error_limit exceed * @todo "on the fly" change AngryCurlRequest fix * * @uses RollingCurl * @uses cURL * * @var array $debug_info - debug information * @var bool $debug_log - Enable/disable debug log * @var bool $console_mode - Enable/disable loggin information direct to 'user's browser on a fly' * @var array $array_alive_proxy - alive proxy array needed to transfer data from proxy filtering function in its callback * @var array $array_proxy - proxy list * @var array $array_url - url list to parse * @var array $array_useragent - useragents to change * @var bool $error_limit - Limit of invalid http responses before die, 0 - unlimited // not implemented yet * @var bool $array_valid_http_code- Array of valid http response codes, default // not implemented yet * @var int $n_proxy - proxies amount * @var int $n_useragent - useragents amount * @var int $n_url - urls amount * @var string $proxy_test_url - url address to connect to for testing proxies * @var string $proxy_valif_regexp - regexp needed to be shure that response hasn`t been modified by proxy * @var bool $use_proxy_list - Flag that is set in load_proxy_list method * @var bool $use_useragent_list - Flag that is set in load_useragent_list method */ class AngryCurl extends RollingCurl { public static $debug_info = array(); public static $debug_log = false; protected static $console_mode = false; protected static $array_alive_proxy=array(); protected $array_proxy = array(); protected $array_url = array(); protected $array_useragent = array(); protected $error_limit = 0; // not implemented yet protected $array_valid_http_code= array(200); // not implemented yet protected $n_proxy = 0; protected $n_useragent = 0; protected $n_url = 0; protected $proxy_test_url = 'http://google.com'; protected static $proxy_valid_regexp = ''; private $use_proxy_list = false; private $use_useragent_list = false; /** * AngryCurl constructor * * @throws AngryCurlException * * @param string $callback Callback function name * @param bool $debug_log Enable/disable writing log to $debug_info var (false by default to reduce memory consumption) * * @return void */ function __construct($callback = null, $debug_log = false) { self::$debug_log = $debug_log; # writing debug self::add_debug_msg("# Building"); # checking if cURL enabled if(!function_exists('curl_init')) { throw new AngryCurlException("(!) cURL is not enabled"); } parent::__construct($callback); } /** * Initializing console mode * * @return void */ public function init_console() { self::$console_mode = true; echo "
";
        
        # Internal Server Error fix in case no apache_setenv() function exists
        if (function_exists('apache_setenv'))
        {
            @apache_setenv('no-gzip', 1);
        }
        @ini_set('zlib.output_compression', 0);
        @ini_set('implicit_flush', 1);
        for ($i = 0; $i < ob_get_level(); $i++)
            ob_end_flush();
        ob_implicit_flush(1);
        
        # writing debug
        self::add_debug_msg("# Console mode activated");
    }

    /**
     * Request execution overload
     *
     * @access public
     *
     * @throws AngryCurlException
     * 
     * @param string $url Request URL
     * @param enum(GET/POST) $method
     * @param array $post_data
     * @param array $headers
     * @param array $options
     * 
     * @return bool
     */
    public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null)
    {
        if($this->n_proxy > 0 && $this->use_proxy_list)
        {
            $options[CURLOPT_PROXY]=$this->array_proxy[ mt_rand(0, $this->n_proxy-1) ];
        //    self::add_debug_msg("Using PROXY({$this->n_proxy}): ".$options[CURLOPT_PROXY]);
        }
        elseif($this->n_proxy < 1 && $this->use_proxy_list)
        {
            throw new AngryCurlException("(!) Option 'use_proxy_list' is set, but no alive proxy available");
        }
        
        if($this->n_useragent > 0 && $this->use_useragent_list)
        {
            $options[CURLOPT_USERAGENT]=$this->array_useragent[ mt_rand(0, $this->n_useragent-1) ];
        //    self::add_debug_msg("Using USERAGENT: ".$options[CURLOPT_USERAGENT]);
        }
        elseif($this->n_useragent < 1 && $this->use_useragent_list)
        {
            throw new AngryCurlException("(!) Option 'use_useragent_list' is set, but no useragents available");
        }

        parent::request($url, $method, $post_data, $headers, $options);
        return true;
    }
    
    /**
     * Starting connections function execution overload
     *
     * @access public
     *
     * @throws AngryCurlException
     *
     * @param int $window_size Max number of simultaneous connections
     *
     * @return string|bool
     */
    public function execute($window_size = null)
    {
        # checking $window_size var
        if($window_size == null)
        {
            self::add_debug_msg(" (!) Default threads amount value (5) is used");
        }
        elseif($window_size > 0 && is_int($window_size))
        {
            self::add_debug_msg(" * Threads set to:\t$window_size");
        }
        else
        {
            throw new AngryCurlException(" (!) Wrong threads amount in execute():\t$window_size");
        }
        
        # writing debug
        self::add_debug_msg(" * Starting connections");
        //var_dump($this->__get('requests'));
        
        $time_start = microtime(1);
        $result = parent::execute($window_size);
        $time_end = microtime(1);
        
        # writing debug
        self::add_debug_msg(" * Finished in ".round($time_end-$time_start,2)."s");
        
        return $result;
    }
    
    /**
     * Flushing requests map for re-using purposes
     *
     * @return void
     */
    public function flush_requests()
    {
        $this->__set('requests', array());
    }
    
    /**
     * Useragent list loading method
     *
     * @access public
     * 
     * @param string/array $input Input proxy data, could be an array or filename
     * @return integer Amount of useragents loaded
     */
    public function load_useragent_list($input)
    {
        # writing debug
        self::add_debug_msg("# Start loading useragent list");
        
        # defining proxiess
        if(is_array($input))
        {
            $this->array_useragent = $input;
        }
        else
        {        
            $this->array_useragent = $this->load_from_file($input);
        }
        
        # setting amount
        $this->n_useragent = count($this->array_useragent);
        
        # writing debug
        if($this->n_useragent > 0)
        {
            self::add_debug_msg("# Loaded useragents:\t{$this->n_useragent}");
        }
        else
        {
            throw new AngryCurlException("# (!) No useragents loaded");
        }
        
        # Setting flag to prevent using AngryCurl without useragents
        $this->use_useragent_list = true;
        
        return $this->n_useragent;
    }

    /**
     * Proxy list loading and filtering method
     *
     * @access public
     *
     * @throws AngryCurlException
     * 
     * @param string/array $input Input proxy data, could be an array or filename
     * @param integer $window_size Max number of simultaneous connections when testing
     * @param enum(http/socks5) $proxy_type
     * @param string $proxy_test_url URL needed for proxy test requests
     * @param regexp $proxy_valid_regexp Regexp needed to be shure that response hasn`t been modified by proxy
     * 
     * @return bool
     */
    public function load_proxy_list($input, $window_size = 5, $proxy_type = 'http', $proxy_test_url = 'http://google.com', $proxy_valid_regexp = null)
    {
        # writing debug
        self::add_debug_msg("# Start loading proxies");
        
        # defining proxiess
        if(is_array($input))
        {
            $this->array_proxy = $input;
        }
        else
        {
            $this->array_proxy = $this->load_from_file($input);
        }        
        
        # checking $window_size var
        if( intval($window_size) < 1 || !is_int($window_size) )
        {
            throw new AngryCurlException(" (!) Wrong threads amount in load_proxy_list():\t$window_size");
        }

        
        # setting proxy type
        if($proxy_type == 'socks5')
        {
            self::add_debug_msg(" * Proxy type set to:\tSOCKS5");
            $this->__set('options', array(CURLOPT_PROXYTYPE => CURLPROXY_SOCKS5));
        }
        else
        {
            self::add_debug_msg(" * Proxy type set to:\tHTTP");
        }
            
        # setting amount
        $this->n_proxy = count($this->array_proxy);
        self::add_debug_msg(" * Loaded proxies:\t{$this->n_proxy}");
        
        # filtering alive proxies
        if($this->n_proxy>0)
        {
            # removing duplicates
            $n_dup = count($this->array_proxy);
            # by array_values bug was fixed in random array indexes using mt_rand in request()
            $this->array_proxy = array_values( array_unique( $this->array_proxy) );
            $n_dup -= count($this->array_proxy);
            
            self::add_debug_msg(" * Removed duplicates:\t{$n_dup}");
            unset($n_dup);
            
            # updating amount
            $this->n_proxy = count($this->array_proxy);
            self::add_debug_msg(" * Unique proxies:\t{$this->n_proxy}");
            
            # setting url for testing proxies
            $this->proxy_test_url = $proxy_test_url;
            self::add_debug_msg(" * Proxy test URL:\t{$this->proxy_test_url}");
            
            # setting regexp for testing proxies
            if( !empty($proxy_valid_regexp) )
            {
                self::$proxy_valid_regexp = $proxy_valid_regexp;
                self::add_debug_msg(" * Proxy test RegExp:\t".self::$proxy_valid_regexp);
            }
            
            $this->filter_alive_proxy($window_size); 
        }
        else
        {
            throw new AngryCurlException(" (!) Proxies amount < 0 in load_proxy_list():\t{$this->n_proxy}");
        }
        
        # Setting flag to prevent using AngryCurl without proxies
        $this->use_proxy_list = true;   
    }
    
    /**
     * Filtering proxy array method, choosing alive proxy only
     *
     * @return void
     */
    public static function callback_proxy_check($response, $info, $request)
    {
        static $rid = 0;
        $rid++;
    
        if($info['http_code']!==200)
        {
            self::add_debug_msg("   $rid->\t".$request->options[CURLOPT_PROXY]."\tFAILED\t".$info['http_code']."\t".$info['total_time']."\t".$info['url']);
            return;
        }

        if(!empty(self::$proxy_valid_regexp) && !@preg_match('#'.self::$proxy_valid_regexp.'#', $response) )
        {
            self::add_debug_msg("   $rid->\t".$request->options[CURLOPT_PROXY]."\tFAILED\tRegExp match:\t".self::$proxy_valid_regexp."\t".$info['url']);
            return;
        }
            self::add_debug_msg("   $rid->\t".$request->options[CURLOPT_PROXY]."\tOK\t".$info['http_code']."\t".$info['total_time']."\t".$info['url']);
            self::$array_alive_proxy[] = $request->options[CURLOPT_PROXY];
    }
    
    /**
     * Filtering proxy array, choosing alive proxy only
     *
     * @throws AngryCurlException
     *
     * @param integer $window_size Max number of simultaneous connections when testing
     *
     * @return void
     */
    protected function filter_alive_proxy($window_size = 5)
    {
        # writing debug
        self::add_debug_msg("# Start testing proxies");
        
        # checking $window_size var
        if( intval($window_size) < 1 || !is_int($window_size) )
        {
            throw new AngryCurlException(" (!) Wrong threads amount in filter_alive_proxy():\t$window_size");
        }
        
        $buff_callback_func = $this->__get('callback');
        $this->__set('callback',array('AngryCurl', 'callback_proxy_check'));

        # adding requests to stack
        foreach($this->array_proxy as $id => $proxy)
        {
            # there won't be any regexp checks, just this :)
            if( strlen($proxy) > 4)
                $this->request($this->proxy_test_url, $method = "GET", null, null, array(CURLOPT_PROXY => $proxy) );
        }

        # run
        $this->execute($window_size);
        
        #flushing requests
        $this->__set('requests', array());

        # writing debug
        self::add_debug_msg("# Alive proxies:\t".count(self::$array_alive_proxy)."/".$this->n_proxy);
        
        # updating params
        $this->n_proxy = count(self::$array_alive_proxy);
        $this->array_proxy = self::$array_alive_proxy;
        $this->__set('callback', $buff_callback_func);
    }

    /**
     * Loading info from external files
     *
     * @access private
     * @param string $filename
     * @param string $delim
     * @return array
     */
    protected function load_from_file($filename, $delim = "\n")
    {
        $data;
        $fp = @fopen($filename, "r");
        
        if(!$fp)
        {
            self::add_debug_msg("(!) Failed to open file: $filename");
            return array();
        }
        
        $data = @fread($fp, filesize($filename) );
        fclose($fp);
        
        if(strlen($data)<1)
        {
            self::add_debug_msg("(!) Empty file: $filename");
            return array();
        }
        
        $array = explode($delim, $data);
        
        if(is_array($array) && count($array)>0)
        {
            foreach($array as $k => $v)
            {
                if(strlen( trim($v) ) > 0)
                    $array[$k] = trim($v);
            }
            return $array;
        }
        else
        {
            self::add_debug_msg("(!) Empty data array in file: $filename");
            return array();
        }
    }
    
    /**
     * Printing debug information method
     *
     * @access public
     * @return void
     */
    public static function print_debug()
    {
        echo "
";
        echo htmlspecialchars( implode("\n", self::$debug_info) );
        echo "
"; } /** * Logging method * * @access public * @param string $msg message * @return void */ public static function add_debug_msg($msg) { if(self::$debug_log) { self::$debug_info[] = $msg; } if(self::$console_mode) { echo htmlspecialchars($msg)."\r\n"; } } /** * AngryCurl destructor * * @return void */ function __destruct() { self::add_debug_msg("# Finishing ..."); parent::__destruct(); } } /** * AngryCurl custom exception */ class AngryCurlException extends Exception { public function __construct($message = "", $code = 0 /*For PHP < 5.3 compatibility omitted: , Exception $previous = null*/) { AngryCurl::add_debug_msg($message); parent::__construct($message, $code); } } /** * Class that represent a single curl request */ class AngryCurlRequest extends RollingCurlRequest { } ?>