Problem with perl grab/dump code
Date: 10/14/05
(Web Development) Keywords: rss, xml, seo
code seems to be completing steps, but the dumpout isn't working. I'm certain I have something missing or misplaced that is causing this. Any help would be appreciated.
#!/usr/bin/perl -w
#
# Craigslist gearfinder
# see: http://www.blablabla.com
use strict;
use XML::Simple;
use LWP::Simple;
use Data::Dumper;
package cl-carfind;
use Devel::Peek;
my $readonly = "Test";
sub match { $readonly =~ /\w/g; }
sub print_pos{ print "pos: ",pos($readonly),"\n";}
sub dump{
my $dump_file = "/tmp/dump.$$";
print "Dumping the data into $dump_file\n";
open OLDERR, ">&STDERR";
open STDERR, ">".$dump_file or die "Can't open $dump_file: $!";
Dump($readonly);
close STDERR ;
open STDERR, ">&OLDERR";
}
1;
my $debug = 0;
my @feeds = (
'http://albany.craigslist.org/ele/index.rss',
'http://allentown.craigslist.org/ele/index.rss',
'http://albuquerque.craigslist.org/ele/index.rss',
'http://anchorage.craigslist.org/ele/index.rss',
'http://annarbor.craigslist.org/ele/index.rss',
'http://asheville.craigslist.org/ele/index.rss',
'http://atlanta.craigslist.org/ele/index.rss',
'http://austin.craigslist.org/ele/index.rss',
'http://bakersfield.craigslist.org/ele/index.rss',
'http://baltimore.craigslist.org/ele/index.rss',
'http://batonrouge.craigslist.org/ele/index.rss',
'http://bham.craigslist.org/ele/index.rss',
'http://boise.craigslist.org/ele/index.rss',
'http://boston.craigslist.org/ele/index.rss',
'http://buffalo.craigslist.org/ele/index.rss',
'http://burlington.craigslist.org/ele/index.rss',
'http://chambana.craigslist.org/ele/index.rss',
'http://charleston.craigslist.org/ele/index.rss',
'http://charlotte.craigslist.org/ele/index.rss',
'http://chicago.craigslist.org/ele/index.rss',
'http://chico.craigslist.org/ele/index.rss',
'http://cincinnati.craigslist.org/ele/index.rss',
'http://cleveland.craigslist.org/ele/index.rss',
'http://columbia.craigslist.org/ele/index.rss',
'http://columbus.craigslist.org/ele/index.rss',
'http://dallas.craigslist.org/ele/index.rss',
'http://delaware.craigslist.org/ele/index.rss',
'http://dayton.craigslist.org/ele/index.rss',
'http://denver.craigslist.org/ele/index.rss',
'http://desmoines.craigslist.org/ele/index.rss',
'http://detroit.craigslist.org/ele/index.rss',
'http://elpaso.craigslist.org/ele/index.rss',
'http://eugene.craigslist.org/ele/index.rss',
'http://fortmyers.craigslist.org/ele/index.rss',
'http://fresno.craigslist.org/ele/index.rss',
'http://grandrapids.craigslist.org/ele/index.rss',
'http://greensboro.craigslist.org/ele/index.rss',
'http://harrisburg.craigslist.org/ele/index.rss',
'http://hartford.craigslist.org/ele/index.rss',
'http://houston.craigslist.org/ele/index.rss',
'http://honolulu.craigslist.org/ele/index.rss',
'http://humboldt.craigslist.org/ele/index.rss',
'http://indianapolis.craigslist.org/ele/index.rss',
'http://inlandempire.craigslist.org/ele/index.rss',
'http://ithaca.craigslist.org/ele/index.rss',
'http://jackson.craigslist.org/ele/index.rss',
'http://jacksonville.craigslist.org/ele/index.rss',
'http://kansascity.craigslist.org/ele/index.rss',
'http://knoxville.craigslist.org/ele/index.rss',
'http://littlerock.craigslist.org/ele/index.rss',
'http://lasvegas.craigslist.org/ele/index.rss',
'http://lexington.craigslist.org/ele/index.rss',
'http://losangeles.craigslist.org/ele/index.rss',
'http://louisville.craigslist.org/ele/index.rss',
'http://maine.craigslist.org/ele/index.rss',
'http://madison.craigslist.org/ele/index.rss',
'http://memphis.craigslist.org/ele/index.rss',
'http://miami.craigslist.org/ele/index.rss',
'http://milwaukee.craigslist.org/ele/index.rss',
'http://minneapolis.craigslist.org/ele/index.rss',
'http://mobile.craigslist.org/ele/index.rss',
'http://modesto.craigslist.org/ele/index.rss',
'http://montana.craigslist.org/ele/index.rss',
'http://monterey.craigslist.org/ele/index.rss',
'http://montgomery.craigslist.org/ele/index.rss',
'http://nashville.craigslist.org/ele/index.rss',
'http://nh.craigslist.org/ele/index.rss',
'http://newhaven.craigslist.org/ele/index.rss',
'http://newjersey.craigslist.org/ele/index.rss',
'http://newyork.craigslist.org/ele/index.rss',
'http://neworleans.craigslist.org/ele/index.rss',
'http://norfolk.craigslist.org/ele/index.rss',
'http://nd.craigslist.org/ele/index.rss',
'http://oklahomacity.craigslist.org/ele/index.rss',
'http://omaha.craigslist.org/ele/index.rss',
'http://orangecounty.craigslist.org/ele/index.rss',
'http://orlando.craigslist.org/ele/index.rss',
'http://pensacola.craigslist.org/ele/index.rss',
'http://philadelphia.craigslist.org/ele/index.rss',
'http://phoenix.craigslist.org/ele/index.rss',
'http://pittsburgh.craigslist.org/ele/index.rss',
'http://portland.craigslist.org/ele/index.rss',
'http://puertorico.craigslist.org/ele/index.rss',
'http://providence.craigslist.org/ele/index.rss',
'http://raleigh.craigslist.org/ele/index.rss',
'http://redding.craigslist.org/ele/index.rss',
'http://reno.craigslist.org/ele/index.rss',
'http://richmond.craigslist.org/ele/index.rss',
'http://rochester.craigslist.org/ele/index.rss',
'http://sacramento.craigslist.org/ele/index.rss',
'http://saltlakecity.craigslist.org/ele/index.rss',
'http://sanantonio.craigslist.org/ele/index.rss',
'http://sandiego.craigslist.org/ele/index.rss',
'http://www.craigslist.org/ele/index.rss',
'http://slo.craigslist.org/ele/index.rss',
'http://santabarbara.craigslist.org/ele/index.rss',
'http://savannah.craigslist.org/ele/index.rss',
'http://seattle.craigslist.org/ele/index.rss',
'http://shreveport.craigslist.org/ele/index.rss',
'http://sd.craigslist.org/ele/index.rss',
'http://spokane.craigslist.org/ele/index.rss',
'http://stlouis.craigslist.org/ele/index.rss',
'http://stockton.craigslist.org/ele/index.rss',
'http://syracuse.craigslist.org/ele/index.rss',
'http://tallahassee.craigslist.org/ele/index.rss',
'http://tampa.craigslist.org/ele/index.rss',
'http://toledo.craigslist.org/ele/index.rss',
'http://tucson.craigslist.org/ele/index.rss',
'http://tulsa.craigslist.org/ele/index.rss',
'http://washingtonDC.craigslist.org/ele/index.rss',
'http://westernmass.craigslist.org/ele/index.rss',
'http://westpalmbeach.craigslist.org/ele/index.rss',
'http://wv.craigslist.org/ele/index.rss',
'http://wichita.craigslist.org/ele/index.rss',
'http://wyoming.craigslist.org/ele/index.rss',
'http://calgary.craigslist.org/ele/index.rss',
'http://edmonton.craigslist.org/ele/index.rss',
'http://halifax.craigslist.org/ele/index.rss',
'http://montreal.craigslist.org/ele/index.rss',
'http://ottawa.craigslist.org/ele/index.rss',
'http://quebec.craigslist.org/ele/index.rss',
'http://saskatoon.craigslist.org/ele/index.rss',
'http://toronto.craigslist.org/ele/index.rss',
'http://vancouver.craigslist.org/ele/index.rss',
'http://victoria.craigslist.org/ele/index.rss',
'http://winnipeg.craigslist.org/ele/index.rss',
'http://buenosaires.craigslist.org/ele/index.rss',
'http://caracas.craigslist.org/ele/index.rss',
'http://costarica.craigslist.org/ele/index.rss',
'http://lima.craigslist.org/ele/index.rss',
'http://mexicocity.craigslist.org/ele/index.rss',
'http://rio.craigslist.org/ele/index.rss',
'http://santiago.craigslist.org/ele/index.rss',
'http://saopaulo.craigslist.org/ele/index.rss',
'http://tijuana.craigslist.org/ele/index.rss',
'http://amsterdam.craigslist.org/ele/index.rss',
'http://athens.craigslist.org/ele/index.rss',
'http://barcelona.craigslist.org/ele/index.rss',
'http://berlin.craigslist.org/ele/index.rss',
'http://brussels.craigslist.org/ele/index.rss',
'http://budapest.craigslist.org/ele/index.rss',
'http://copenhagen.craigslist.org/ele/index.rss',
'http://florence.craigslist.org/ele/index.rss',
'http://frankfurt.craigslist.org/ele/index.rss',
'http://geneva.craigslist.org/ele/index.rss',
'http://hamburg.craigslist.org/ele/index.rss',
'http://helsinki.craigslist.org/ele/index.rss',
'http://istanbul.craigslist.org/ele/index.rss',
'http://lyon.craigslist.org/ele/index.rss',
'http://madrid.craigslist.org/ele/index.rss',
'http://marseilles.craigslist.org/ele/index.rss',
'http://milan.craigslist.org/ele/index.rss',
'http://moscow.craigslist.org/ele/index.rss',
'http://munich.craigslist.org/ele/index.rss',
'http://naples.craigslist.org/ele/index.rss',
'http://oslo.craigslist.org/ele/index.rss',
'http://paris.craigslist.org/ele/index.rss',
'http://prague.craigslist.org/ele/index.rss',
'http://rome.craigslist.org/ele/index.rss',
'http://stpetersburg.craigslist.org/ele/index.rss',
'http://stockholm.craigslist.org/ele/index.rss',
'http://vienna.craigslist.org/ele/index.rss',
'http://warsaw.craigslist.org/ele/index.rss',
'http://zurich.craigslist.org/ele/index.rss',
'http://bangalore.craigslist.org/ele/index.rss',
'http://bangkok.craigslist.org/ele/index.rss',
'http://beijing.craigslist.org/ele/index.rss',
'http://chennai.craigslist.org/ele/index.rss',
'http://delhi.craigslist.org/ele/index.rss',
'http://hongkong.craigslist.org/ele/index.rss',
'http://hyderabad.craigslist.org/ele/index.rss',
'http://istanbul.craigslist.org/ele/index.rss',
'http://jakarta.craigslist.org/ele/index.rss',
'http://jerusalem.craigslist.org/ele/index.rss',
'http://kolkata.craigslist.org/ele/index.rss',
'http://manila.craigslist.org/ele/index.rss',
'http://mumbai.craigslist.org/ele/index.rss',
'http://osaka.craigslist.org/ele/index.rss',
'http://seoul.craigslist.org/ele/index.rss',
'http://shanghai.craigslist.org/ele/index.rss',
'http://singapore.craigslist.org/ele/index.rss',
'http://tokyo.craigslist.org/ele/index.rss',
'http://taipei.craigslist.org/ele/index.rss',
'http://telaviv.craigslist.org/ele/index.rss',
'http://cairo.craigslist.org/ele/index.rss',
'http://capetown.craigslist.org/ele/index.rss',
'http://johannesburg.craigslist.org/ele/index.rss',
'http://belfast.craigslist.org/ele/index.rss',
'http://birmingham.craigslist.org/ele/index.rss',
'http://bristol.craigslist.org/ele/index.rss',
'http://cardiff.craigslist.org/ele/index.rss',
'http://dublin.craigslist.org/ele/index.rss',
'http://edinburgh.craigslist.org/ele/index.rss',
'http://glasgow.craigslist.org/ele/index.rss',
'http://leeds.craigslist.org/ele/index.rss',
'http://liverpool.craigslist.org/ele/index.rss',
'http://london.craigslist.org/ele/index.rss',
'http://manchester.craigslist.org/ele/index.rss',
'http://newcastle.craigslist.org/ele/index.rss',
'http://adelaide.craigslist.org/ele/index.rss',
'http://auckland.craigslist.org/ele/index.rss',
'http://brisbane.craigslist.org/ele/index.rss',
'http://melbourne.craigslist.org/ele/index.rss',
'http://perth.craigslist.org/ele/index.rss',
'http://sydney.craigslist.org/ele/index.rss',
);
for my $feed (@feeds)
{
my $xml = get($feed);
my $ref = XMLin($xml);
my $items = $ref->{item};
if ($debug)
{
print "$xml";
print Data::Dumper->Dump([$items]);
print "Content-type: text/plain\r\n\r\n";
print "PID: $$\n";
exit;
}
for my $item (@$items)
{
my $title = $item->{title};
my $url = $item->{link};
# regex match goes here
if ($title =~ /runner/i)
{
print "$title\n $url\n\n";
}
}
# don't suck too much bandwidth
sleep 2;
}
exit;
__END__
Source: http://www.livejournal.com/community/webdev/256422.html