use strict;
use warnings;
use HTML::TreeBuilder;
use LWP::Simple;
my $url="http://plntfdb.bio.uni-potsdam.de/v3.0/";
my $page=get($url);
my @urls;
die "Couldn't get the URL $url!" unless defined $page;
#print ("\n $page \n \n");
#$root = HTML::TreeBuilder->new_from_content($page);
my $root = HTML::TreeBuilder->new();
$root->parse($page)  or die "Could not to parse the page!";
$root->eof( );
my @tds = $root->find_by_tag_name('td') or die "Could not find the tag:td";
foreach my $td(@tds){
	my $a = $td->find_by_tag_name('a') or die "Could not find the tag:a";
	my $href = $a->attr('href');
	if(defined($href)){
		my $url_tmp=$url.$href;
	push @urls,$url_tmp;
	}
}
foreach my $ur(@urls){
	my $content=get($ur);
#print $ur."\n";
	my $roots = HTML::TreeBuilder->new();
	$roots->parse($content) or die "Could not to parse the page:$url!";
	$roots->eof( );
	my @desc = $roots->find_by_attribute('id','subcontent') or die "Could not find the id"; 
	my $h1= $desc[0]->find_by_tag_name('h1');
	$h1->as_text =~ /(\S+)/;
	my $family_name = $1;
	print $family_name."\t";

	my $pa= $desc[0]->find_by_tag_name('p');
	print $pa->as_text."\t";

	$desc[0]->as_text =~ /SHOULD possess (.+?) domain/ ;
	my $domain_clu = $1;
#$domain_clu =~ s/\s+/>/g;
	print $domain_clu."\t";
#print $desc[0]->as_text."\n";
	if($desc[0]->as_text =~ /SHOULD NOT.+?possess (.+?) domain/){ ;
	my $domain_not = $1;
#$domain_not =~ s/\s+/>/g;
print $domain_not."\t";
	}else{
print "++"."\t";
	}

	my $h2= $desc[0]->find_by_tag_name('h2');
#print $h2->as_text."\n";
	my @ref = $roots->find_by_attribute('id','refs') ; 
	if(@ref){
	my @a_ref = $ref[0]->find_by_tag_name('a') or die "Could not find the tag:a"; 
	my %hash_es;
	foreach my $hr(@a_ref){
		my $hf = $hr->attr('href');
		my $hr_text = $hr->as_text;
		$hr_text =~ s/\s//g;
	 $hash_es{$hr_text}=$hf;
	}
	my $reference = $ref[0]->as_text;
	my @ref_essay = split /(PUBMEDID:\d+)/,$reference;
	foreach my $es(@ref_essay){
		print $es;
		if($es =~ /PUBMEDID:\d+/){
			print ">".$hash_es{$es}."^";
		}
		
	}
	}else{
		print "<<<<<<";
	}
#print "=================="."\n";
#print "@ref_essay"."\n";
=pod	
	my @a_fam = $desc[0]->find_by_tag_name('a') or die "Could not find the tag:a"; 
	foreach my $hr(@a_fam){
		my $hf = $hr->attr('href');
		if($hr->as_text  eq "Domain alignments"){
			last;
		}
#print $hr->as_text,"\t",$hf."\n";
	}
=cut
print "\n";

}