#!/usr/bin/perl -w
use strict;
use HTML::TreeBuilder;
use LWP::Simple qw(get);
# element: extract the contents of an element by element id
# or class from a file or URL.
# Don Marti
# References:
# "Scanning HTML" by Sean M. Burke
# http://www.foo.be/docs/tpj/issues/vol5_3/tpj0503-0008.html
# perldoc HTML::TreeBuilder
# perldoc HTML::Element
my $id = $ARGV[0]; # which id (or failing that, class) to get
my $document = $ARGV[1]; # filename or URL of the source
die "Usage: $0 id document\n" if !$id or !$document;
my $html = (snarf_file($document) or get($document));
die "Can't get $document\n" if !defined($html);
my $tree = HTML::TreeBuilder->new();
$tree->parse($html);
$tree->eof;
foreach my $attr qw(id class) {
my $e = $tree->look_down($attr, $id);
if (defined($e)) {
print join '', map( ref($_) ? $_->as_HTML : $_, $e->content_list);
last;
}
}
sub snarf_file {
my $filename = shift;
local $/ = undef;
open (IN, "<$filename") or return undef;
my $result = ;
close IN or die $!;
return $result;
}