Thursday, April 14, 2011

Get redirect URL from headers (recursive)

Recursively obtain the redirect URL from web server page headers.
The method makes use of regular expressions, the PHP Curl library and also offers the possibility to indicate a number of consecutive (cascaded) redirects to follow. 

Usage:

$redirect_url = xDomainTools::getInstance()->getRedirectUrlRecursive($url, 2);

[Class ]methods:

class xDomainTools {
  
static private $instance = null;
  
private $ua = array(
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.7) Gecko/20050414 Firefox/3.5.1',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
);

public function __construct() {}

public function getInstance() {
    if(!self::$instance) {
        self::$instance = new xDomainTools;
    }
    return self::$instance;
}

private function getUaRand() {
    return $this->ua[rand(0, count($this->ua))];
}

public function getUrlContent($_url, $_referer = '', $_ua = '', $_return_transfer = true, $_timeout = 20, $_including_header = false, $_follow_location = true) {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $_url);
    curl_setopt($ch, CURLOPT_HEADER, $_including_header);
    curl_setopt($ch, CURLOPT_REFERER, $_referer);
    curl_setopt($ch, CURLOPT_USERAGENT, strlen($_ua) ? $_ua : $this->getUaRand());
  
    curl_setopt($ch, CURLOPT_COOKIEFILE, dirname(__FILE__) . '/cookies.txt');
    curl_setopt($ch, CURLOPT_COOKIEJAR, dirname(__FILE__) . '/cookies.txt');
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $_follow_location);
  
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $_timeout);
    curl_setopt($ch, CURLOPT_TIMEOUT, $_timeout);
  
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, $_return_transfer);
  
    $d = curl_exec($ch);
    curl_close($ch);
  
    return $d;
}

public function getRedirectUrl($_url) {
    $header_content = xDomainTools::getInstance()->getUrlContent($_url, '', '', true, 20, true, false);
    $header_content = str_replace(array(chr(10), chr(11), chr(13)), ' ', $header_content) . ' ';
    preg_match('`.*(301 Moved Permanently|Redirect).*Location: (http://[^ ]+) .*`Ui', $header_content, $m);
    #printr($m);
    if(strlen($m[2])) {
        return $m[2];
    }
    return '';
}

public function getRedirectUrlRecursive($_url, $_no_redirects = 3, $_iteration = 0) {
    $_iteration++;
    if($_iteration == $_no_redirects + 1) {
        return $_url;
    }
    $url_redirect = $this->getRedirectUrl($_url);
    if(strlen($url_redirect)) {
        return $this->getRedirectUrlRecursive($url_redirect, $_no_redirects, $_iteration);
    }
    return $url_redirect;
}

}

No comments: