* @link http://stupid.su/php-curl_multi/ * @licence GPL * @version 0.4 * * @todo stop on error_limit exceed * @todo "on the fly" change AngryCurlRequest fix * * @uses RollingCurl * @uses cURL * * @var array $debug_info - debug information * @var bool $debug_log - Enable/disable debug log * @var bool $console_mode - Enable/disable loggin information direct to 'user's browser on a fly' * @var array $array_alive_proxy - alive proxy array needed to transfer data from proxy filtering function in its callback * @var array $array_proxy - proxy list * @var array $array_url - url list to parse * @var array $array_useragent - useragents to change * @var bool $error_limit - Limit of invalid http responses before die, 0 - unlimited // not implemented yet * @var bool $array_valid_http_code- Array of valid http response codes, default // not implemented yet * @var int $n_proxy - proxies amount * @var int $n_useragent - useragents amount * @var int $n_url - urls amount * @var string $proxy_test_url - url address to connect to for testing proxies * @var string $proxy_valif_regexp - regexp needed to be shure that response hasn`t been modified by proxy * @var bool $use_proxy_list - Flag that is set in load_proxy_list method * @var bool $use_useragent_list - Flag that is set in load_useragent_list method */ class AngryCurl extends RollingCurl { public static $debug_info = array(); public static $debug_log = false; protected static $console_mode = false; protected static $array_alive_proxy=array(); protected $array_proxy = array(); protected $array_url = array(); protected $array_useragent = array(); protected $error_limit = 0; // not implemented yet protected $array_valid_http_code= array(200); // not implemented yet protected $n_proxy = 0; protected $n_useragent = 0; protected $n_url = 0; protected $proxy_test_url = 'http://google.com'; protected static $proxy_valid_regexp = ''; private $use_proxy_list = false; private $use_useragent_list = false; /** * AngryCurl constructor * * @throws AngryCurlException * * @param string $callback Callback function name * @param bool $debug_log Enable/disable writing log to $debug_info var (false by default to reduce memory consumption) * * @return void */ function __construct($callback = null, $debug_log = false) { self::$debug_log = $debug_log; # writing debug self::add_debug_msg("# Building"); # checking if cURL enabled if(!function_exists('curl_init')) { throw new AngryCurlException("(!) cURL is not enabled"); } parent::__construct($callback); } /** * Initializing console mode * * @return void */ public function init_console() { self::$console_mode = true; echo "
"; # Internal Server Error fix in case no apache_setenv() function exists if (function_exists('apache_setenv')) { @apache_setenv('no-gzip', 1); } @ini_set('zlib.output_compression', 0); @ini_set('implicit_flush', 1); for ($i = 0; $i < ob_get_level(); $i++) ob_end_flush(); ob_implicit_flush(1); # writing debug self::add_debug_msg("# Console mode activated"); } /** * Request execution overload * * @access public * * @throws AngryCurlException * * @param string $url Request URL * @param enum(GET/POST) $method * @param array $post_data * @param array $headers * @param array $options * * @return bool */ public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) { if($this->n_proxy > 0 && $this->use_proxy_list) { $options[CURLOPT_PROXY]=$this->array_proxy[ mt_rand(0, $this->n_proxy-1) ]; // self::add_debug_msg("Using PROXY({$this->n_proxy}): ".$options[CURLOPT_PROXY]); } elseif($this->n_proxy < 1 && $this->use_proxy_list) { throw new AngryCurlException("(!) Option 'use_proxy_list' is set, but no alive proxy available"); } if($this->n_useragent > 0 && $this->use_useragent_list) { $options[CURLOPT_USERAGENT]=$this->array_useragent[ mt_rand(0, $this->n_useragent-1) ]; // self::add_debug_msg("Using USERAGENT: ".$options[CURLOPT_USERAGENT]); } elseif($this->n_useragent < 1 && $this->use_useragent_list) { throw new AngryCurlException("(!) Option 'use_useragent_list' is set, but no useragents available"); } parent::request($url, $method, $post_data, $headers, $options); return true; } /** * Starting connections function execution overload * * @access public * * @throws AngryCurlException * * @param int $window_size Max number of simultaneous connections * * @return string|bool */ public function execute($window_size = null) { # checking $window_size var if($window_size == null) { self::add_debug_msg(" (!) Default threads amount value (5) is used"); } elseif($window_size > 0 && is_int($window_size)) { self::add_debug_msg(" * Threads set to:\t$window_size"); } else { throw new AngryCurlException(" (!) Wrong threads amount in execute():\t$window_size"); } # writing debug self::add_debug_msg(" * Starting connections"); //var_dump($this->__get('requests')); $time_start = microtime(1); $result = parent::execute($window_size); $time_end = microtime(1); # writing debug self::add_debug_msg(" * Finished in ".round($time_end-$time_start,2)."s"); return $result; } /** * Flushing requests map for re-using purposes * * @return void */ public function flush_requests() { $this->__set('requests', array()); } /** * Useragent list loading method * * @access public * * @param string/array $input Input proxy data, could be an array or filename * @return integer Amount of useragents loaded */ public function load_useragent_list($input) { # writing debug self::add_debug_msg("# Start loading useragent list"); # defining proxiess if(is_array($input)) { $this->array_useragent = $input; } else { $this->array_useragent = $this->load_from_file($input); } # setting amount $this->n_useragent = count($this->array_useragent); # writing debug if($this->n_useragent > 0) { self::add_debug_msg("# Loaded useragents:\t{$this->n_useragent}"); } else { throw new AngryCurlException("# (!) No useragents loaded"); } # Setting flag to prevent using AngryCurl without useragents $this->use_useragent_list = true; return $this->n_useragent; } /** * Proxy list loading and filtering method * * @access public * * @throws AngryCurlException * * @param string/array $input Input proxy data, could be an array or filename * @param integer $window_size Max number of simultaneous connections when testing * @param enum(http/socks5) $proxy_type * @param string $proxy_test_url URL needed for proxy test requests * @param regexp $proxy_valid_regexp Regexp needed to be shure that response hasn`t been modified by proxy * * @return bool */ public function load_proxy_list($input, $window_size = 5, $proxy_type = 'http', $proxy_test_url = 'http://google.com', $proxy_valid_regexp = null) { # writing debug self::add_debug_msg("# Start loading proxies"); # defining proxiess if(is_array($input)) { $this->array_proxy = $input; } else { $this->array_proxy = $this->load_from_file($input); } # checking $window_size var if( intval($window_size) < 1 || !is_int($window_size) ) { throw new AngryCurlException(" (!) Wrong threads amount in load_proxy_list():\t$window_size"); } # setting proxy type if($proxy_type == 'socks5') { self::add_debug_msg(" * Proxy type set to:\tSOCKS5"); $this->__set('options', array(CURLOPT_PROXYTYPE => CURLPROXY_SOCKS5)); } else { self::add_debug_msg(" * Proxy type set to:\tHTTP"); } # setting amount $this->n_proxy = count($this->array_proxy); self::add_debug_msg(" * Loaded proxies:\t{$this->n_proxy}"); # filtering alive proxies if($this->n_proxy>0) { # removing duplicates $n_dup = count($this->array_proxy); # by array_values bug was fixed in random array indexes using mt_rand in request() $this->array_proxy = array_values( array_unique( $this->array_proxy) ); $n_dup -= count($this->array_proxy); self::add_debug_msg(" * Removed duplicates:\t{$n_dup}"); unset($n_dup); # updating amount $this->n_proxy = count($this->array_proxy); self::add_debug_msg(" * Unique proxies:\t{$this->n_proxy}"); # setting url for testing proxies $this->proxy_test_url = $proxy_test_url; self::add_debug_msg(" * Proxy test URL:\t{$this->proxy_test_url}"); # setting regexp for testing proxies if( !empty($proxy_valid_regexp) ) { self::$proxy_valid_regexp = $proxy_valid_regexp; self::add_debug_msg(" * Proxy test RegExp:\t".self::$proxy_valid_regexp); } $this->filter_alive_proxy($window_size); } else { throw new AngryCurlException(" (!) Proxies amount < 0 in load_proxy_list():\t{$this->n_proxy}"); } # Setting flag to prevent using AngryCurl without proxies $this->use_proxy_list = true; } /** * Filtering proxy array method, choosing alive proxy only * * @return void */ public static function callback_proxy_check($response, $info, $request) { static $rid = 0; $rid++; if($info['http_code']!==200) { self::add_debug_msg(" $rid->\t".$request->options[CURLOPT_PROXY]."\tFAILED\t".$info['http_code']."\t".$info['total_time']."\t".$info['url']); return; } if(!empty(self::$proxy_valid_regexp) && !@preg_match('#'.self::$proxy_valid_regexp.'#', $response) ) { self::add_debug_msg(" $rid->\t".$request->options[CURLOPT_PROXY]."\tFAILED\tRegExp match:\t".self::$proxy_valid_regexp."\t".$info['url']); return; } self::add_debug_msg(" $rid->\t".$request->options[CURLOPT_PROXY]."\tOK\t".$info['http_code']."\t".$info['total_time']."\t".$info['url']); self::$array_alive_proxy[] = $request->options[CURLOPT_PROXY]; } /** * Filtering proxy array, choosing alive proxy only * * @throws AngryCurlException * * @param integer $window_size Max number of simultaneous connections when testing * * @return void */ protected function filter_alive_proxy($window_size = 5) { # writing debug self::add_debug_msg("# Start testing proxies"); # checking $window_size var if( intval($window_size) < 1 || !is_int($window_size) ) { throw new AngryCurlException(" (!) Wrong threads amount in filter_alive_proxy():\t$window_size"); } $buff_callback_func = $this->__get('callback'); $this->__set('callback',array('AngryCurl', 'callback_proxy_check')); # adding requests to stack foreach($this->array_proxy as $id => $proxy) { # there won't be any regexp checks, just this :) if( strlen($proxy) > 4) $this->request($this->proxy_test_url, $method = "GET", null, null, array(CURLOPT_PROXY => $proxy) ); } # run $this->execute($window_size); #flushing requests $this->__set('requests', array()); # writing debug self::add_debug_msg("# Alive proxies:\t".count(self::$array_alive_proxy)."/".$this->n_proxy); # updating params $this->n_proxy = count(self::$array_alive_proxy); $this->array_proxy = self::$array_alive_proxy; $this->__set('callback', $buff_callback_func); } /** * Loading info from external files * * @access private * @param string $filename * @param string $delim * @return array */ protected function load_from_file($filename, $delim = "\n") { $data; $fp = @fopen($filename, "r"); if(!$fp) { self::add_debug_msg("(!) Failed to open file: $filename"); return array(); } $data = @fread($fp, filesize($filename) ); fclose($fp); if(strlen($data)<1) { self::add_debug_msg("(!) Empty file: $filename"); return array(); } $array = explode($delim, $data); if(is_array($array) && count($array)>0) { foreach($array as $k => $v) { if(strlen( trim($v) ) > 0) $array[$k] = trim($v); } return $array; } else { self::add_debug_msg("(!) Empty data array in file: $filename"); return array(); } } /** * Printing debug information method * * @access public * @return void */ public static function print_debug() { echo ""; echo htmlspecialchars( implode("\n", self::$debug_info) ); echo ""; } /** * Logging method * * @access public * @param string $msg message * @return void */ public static function add_debug_msg($msg) { if(self::$debug_log) { self::$debug_info[] = $msg; } if(self::$console_mode) { echo htmlspecialchars($msg)."\r\n"; } } /** * AngryCurl destructor * * @return void */ function __destruct() { self::add_debug_msg("# Finishing ..."); parent::__destruct(); } } /** * AngryCurl custom exception */ class AngryCurlException extends Exception { public function __construct($message = "", $code = 0 /*For PHP < 5.3 compatibility omitted: , Exception $previous = null*/) { AngryCurl::add_debug_msg($message); parent::__construct($message, $code); } } /** * Class that represent a single curl request */ class AngryCurlRequest extends RollingCurlRequest { } ?>