|  | Posted by Rick Stem on 04/26/07 22:52 
I have checkURL(http://globalwarmingawareness2007.org.uk, globalwarmingawareness2007.org.uk)
 
 I see almost everyone using regular expressions. But I don't completely
 trust them. Don't know if this code is the best way to find if a user
 entered a valid URL and to avoid SQL injection from the URL.
 
 function checkURL($url, $name)
 {
 global $incorrect_input;
 
 $data=parse_url("http://".$url);
 if(!$data)
 die($incorrect_input[1].$name);
 $host=$data['host'];
 $path=$data['path'];
 $query=$data['query'];
 $fragment=$data['fragment'];
 
 //url does not start with a letter, number
 if (!preg_match('/^[A-Za-z0-9]/i',$host))
 die($incorrect_input[1].$name);
 
 //url does not contain a .
 if (!preg_match('/([A-Za-z0-9]+\.)+/i',$host))
 die($incorrect_input[1].$name);
 
 //url ends with .
 if (preg_match('/\.$/i',$host))
 die($incorrect_input[1].$name);
 
 $array=split('\.',$host);
 $arraysize=count($array);
 
 for ($i = 0; $i < $arraysize; $i++)
 {
 if (preg_match('/[^A-Za-z0-9\-\_]+/i',$array[$i]))
 die($incorrect_input[1].$name);
 }
 
 //Only allow alphanumeric letters, _,-,/
 if($path)
 {
 $len=strlen($path);
 for ($i = 0; $i < $len; $i++)
 {
 $ascii = ord($path[$i]);
 if (($ascii < 65 || $ascii > 90) &&
 ($ascii < 48 || $ascii > 57) &&
 ($ascii < 97 || $ascii > 122))
 if ($ascii != 45 && $ascii != 46 && $ascii != 95 && $ascii != 47)
 die($incorrect_input[1].$name);
 }
 }
 
 //Do not allow more than one consecutive slash for the path
 if (preg_match('/[\/]{2,}/i', $path))
 die($incorrect_input[1].$name);
 
 
 if($query)
 {
 if (preg_match('/[^A-Za-z0-9\/\-\_\=\&]+/i',$query))
 die($incorrect_input[1].$name);
 if (preg_match('/[\=\&]{2,}/i',$query))
 die($incorrect_input[1].$name);
 }
 
 if($fragment)
 {
 if (preg_match('/[^A-Za-z0-9\-\_\.]+/i',$fragment))
 die($incorrect_input[1].$name);
 }
 
 return($url);
 }
 [Back to original message] |