You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
168 lines
6.4 KiB
168 lines
6.4 KiB
<?php
|
|
/****************************************************
|
|
* URL Class - Collection of URL Utilities
|
|
*
|
|
* Created By: Rick Hays
|
|
* Date: 2017-01-08
|
|
*
|
|
* Revisions:
|
|
*
|
|
*****************************************************/
|
|
class URL
|
|
{
|
|
/**
|
|
* isValidURL - Returns TRUE/FALSE if the string is a Valid URL.
|
|
* @param string $URL - Web Page Address to test.
|
|
*/
|
|
function isValidURL($url)
|
|
{
|
|
return preg_match('|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i', $url);
|
|
}
|
|
|
|
/**
|
|
* stripWWW - Strips the WWW off the incoming URL
|
|
* @param string $URL - Web Page Address to strip.
|
|
* @return mixed
|
|
*/
|
|
public function stripWWW($inURL)
|
|
{
|
|
if(stristr($inURL, 'www.') === FALSE)
|
|
$outURL = $inURL;
|
|
else
|
|
list($WWW, $outURL) = explode(".", $inURL, 2);
|
|
return $outURL;
|
|
}
|
|
|
|
/**
|
|
* getDomainName - Strips the WWW off the incoming URL
|
|
* @param $inURL
|
|
* @return mixed
|
|
*/
|
|
public function getDomainName($inURL)
|
|
{
|
|
preg_match("/^(https?:\/\/)?([^\/]+)/i", $inURL, $matches);
|
|
$host = $matches[2];
|
|
preg_match("/[^\.\/]+\.[^\.\/]+$/", $host, $matches);
|
|
return $matches[0];
|
|
}
|
|
|
|
/**
|
|
* Status_Code - Returns the Status Code of a URL ie(200 or 404).
|
|
* @param string $URL - Web Page Address to test.
|
|
*/
|
|
public function Status_Code($URL)
|
|
{
|
|
$URL_ARRAY = get_headers($URL, 1);
|
|
list($HTTP_VER, $STATUS_CODE, $STATUS_TEXT) = explode(" ", $URL_ARRAY[0]);
|
|
return $STATUS_CODE;
|
|
}
|
|
|
|
/**
|
|
* FavIcon - Get an image object of the FavIcon.
|
|
* Note: Strip off the "http://" before calling.
|
|
* @param string $URL - Web Page Address to get the FavIcon for.
|
|
* @todo - Add file types ie (PNG, JPG, GIF) as an option.
|
|
*/
|
|
public function FavIcon($URL)
|
|
{
|
|
$imageObject = imagecreatefrompng('http://www.google.com/s2/favicons?domain='.$URL);
|
|
if(!is_resource($imageObject))
|
|
{
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save_FavIcon - Saves the Websites FavIcon to the File/Path.
|
|
* Note: Strip off the "http://" before calling.
|
|
* @param string $URL - Web Page Address to get the FavIcon for.
|
|
* @param string $FILE_PATH - File and Path to save the FavIcon to.
|
|
* @todo - Add file types ie (PNG, JPG, GIF) as an option.
|
|
*/
|
|
public function Save_FavIcon($URL, $FILE_PATH)
|
|
{
|
|
$imageObject = imagecreatefrompng('http://www.google.com/s2/favicons?domain='.$URL);
|
|
if(!is_resource($imageObject))
|
|
{
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
// Save image to a file
|
|
imagepng($imageObject, $FILE_PATH);
|
|
// Destroy object
|
|
imageDestroy($imageObject);
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GetParam - Returns a Form Value
|
|
* @param string $param_name - name of form field.
|
|
* @param string $default [optional] - Default value to pass back if $param_name is blank
|
|
*/
|
|
public function GetParam($param_name, $default=NULL)
|
|
{
|
|
global $_POST;
|
|
global $_GET;
|
|
$param_value = "";
|
|
|
|
if(isset($_POST[$param_name]))
|
|
$param_value = $_POST[$param_name];
|
|
else if(isset($_GET[$param_name]))
|
|
$param_value = $_GET[$param_name];
|
|
else if($param_value === '')
|
|
$param_value = $default;
|
|
|
|
return $param_value;
|
|
}
|
|
|
|
/**
|
|
* Tests the User Agent to see if it is a mobile device
|
|
*
|
|
* @return boolean
|
|
*/
|
|
function is_MobileDevice()
|
|
{
|
|
// Uncomment the line below to force mobile;
|
|
//return TRUE;
|
|
$match1 = '/android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i';
|
|
$match2 = '/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|e\-|e\/|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\-|2|g)|yas\-|your|zeto|zte\-/i';
|
|
static $mobile = NULL;
|
|
if($mobile === NULL)
|
|
{
|
|
$mobile = FALSE;
|
|
$useragent = $_SERVER['HTTP_USER_AGENT'];
|
|
if(preg_match($match1,$useragent) || preg_match($match2, $useragent))
|
|
// We're not safe to say it's mobile yet. "tosh" is the mobile os for Toshiba phones, but it matches Machintosh.
|
|
if(!preg_match('/macintosh/i', $useragent))
|
|
$mobile = TRUE;
|
|
}
|
|
return $mobile;
|
|
}
|
|
|
|
/**
|
|
* Tests the User Agent to see if it is a Bot/Spider/Crawler
|
|
*
|
|
* @return boolean
|
|
*
|
|
* REVISIONS:
|
|
* 2012-04-18 RLH Found that there were some with NO USER AGENT, It was decided to count them as a spider.
|
|
*/
|
|
function is_SpiderBot()
|
|
{
|
|
$crawlers = 'AbachoBOT|accoona|AcioRobot|alexa|AltaVista|Ask Jeeves|ASPSeek|Baiduspider|bingbot|CocoCrawler|crawler|Dumbot|eStyle|FAST-WebCrawler|Feedfetcher-Google|Firefly|froogle|GeonaBot|Gigabot|girafabot|Googlebot|ia_archiver|IDBot|InfoSeek|inktomi|looksmart|Lycos|Mediapartners-Google|msnbot|MSRBOT|NationalDirectory|Openbot|rabaz|Rambler|Rankivabot|Scooter|Scrubby|Slurp|Sogou web spider|Spade|SurveyBot|TechnoratiSnoop|TECNOSEEK|Teoma|URL_Spider_SQL|WebAlta Crawler|WebBug|WebFindBot|Yahoo|Yammybot|YandexBot|ZyBorg';
|
|
$useragent = $_SERVER['HTTP_USER_AGENT'];
|
|
if ($useragent == '')
|
|
$isCrawler = TRUE;
|
|
else
|
|
$isCrawler = (preg_match("/$crawlers/", $useragent) > 0);
|
|
return $isCrawler;
|
|
}
|
|
|
|
} // End URL Class
|
|
|
|
|