|
Posted by caine on 02/25/07 13:24
I want to extract web data from a news feed page
http://everling.nierchi.net/mmubulletins.php.
Just want to extract necessary info between open n closing tags of
<title>, <category> and <link>. Whenever I initiated the extraction,
first news title is always "MMU Bulletin Board RSS Feed" with the
proper bulletin's link stored, but not the correct news title being
stored.
Necessary info only appears within <item> and </item> which consists
those <title>, <category> and <link>.
<?php
include 'connect.php';
$URL="http://everling.nierchi.net/mmubulletins.php";
$f = fopen($URL, "r");
if($f){
$pre = "";
while(!feof($f))
{
$pre= fread($f, 1000);
$source = $source.$pre;
}
}
else
{
echo 'Unable to open '.$URL.'.';
die;
}
//extract the date into database
$datetime = date("Y-n-j");
$total= substr_count($source, "<item>");
//extract necessary information into database
$pos=0;
for($loop=0;$loop<$total;$loop++)
{
$line1 = strpos($source, "<title>", $pos);
$end1 = strpos($source, "</title>", $line1);
$line1 = $line1 + 7;
$end1 = $end1 - $line1;
$title = substr($source, $line1, $end1);
$title = convert($title);
$line2 = $line1 + $end1 + 1;
$line2 = strpos($source, "<category>", $line2);
$end2 = strpos($source , "</category>" , $line2);
$line2 = $line2 + 10;
$end2 = $end2 - $line2;
$category = substr($source , $line2, $end2);
$category = convert($category);
$line3 = $line2 + $end2 + 1;
$line3 = strpos($source , "<link>" , $line3);
$end3 = strpos($source , "</link>" , $line3);
$line3 = $line3 + 6;
$end3 = $end3 - $line3;
$link = substr($source , $line3 , $end3);
$link = convert($link);
$pos = $line3 + $end3 + 1;
$qry = "INSERT INTO `bul_data` (`DATE`, `TITLE`,
`DEPARTMENT`,`CAMPUS`, `LINK`) VALUES
( '$datetime','$title','$category','', '$link')";
$res = mysql_query($qry) OR die(mysql_error());
}
function convert($string)
{
$string = htmlspecialchars($string,ENT_QUOTES);
return $string;
}
?>
[Back to original message]
|