#!/usr/bin/perl
use LWP::UserAgent;
use Data:

umper;
use strict;
our $ua = LWP::UserAgent->new;
$ua->agent("NuBrowser/10.5 ");
our %genetic_code = (
'AAA' => 'K', # Lysine
'AAC' => 'N', # Asparagine
'AAG' => 'K', # Lysine
'AAT' => 'N', # Asparagine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AGA' => 'R', # Arginine
'AGC' => 'S', # Serine
'AGG' => 'R', # Arginine
'AGT' => 'S', # Serine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ATT' => 'I', # Isoleucine
'CAA' => 'Q', # Glutamine
'CAC' => 'H', # Histidine
'CAG' => 'Q', # Glutamine
'CAT' => 'H', # Histidine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'GAA' => 'E', # Glutamic Acid
'GAC' => 'D', # Aspartic Acid
'GAG' => 'E', # Glutamic Acid
'GAT' => 'D', # Aspartic Acid
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'TAA' => '*', # Stop
'TAC' => 'Y', # Tyrosine
'TAG' => '*', # Stop
'TAT' => 'Y', # Tyrosine
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TGA' => '*', # Stop
'TGC' => 'C', # Cysteine
'TGG' => 'W', # Tryptofane
'TGT' => 'C', # Cysteine
'TTA' => 'L', # Leucine
'TTC' => 'F', # Phenylalanine
'TTG' => 'L', # Leucine
'TTT' => 'F', # Phenylalanine
);
my $ass = get("http://bioinf.gen.tcd.ie/ge3027/class4/assignments.html");
my @bits = split("<BR>",$ass);
my $dna = $bits[1];
print "dna\n",$dna;
print "\nprotein\n", protein($dna),"\n";
my %fasta1 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta1.txt");
print "fasta 1 \n", Dumper(\%fasta1);
my %fasta2 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta2.txt");
print "fasta 2 \n", Dumper(\%fasta2);
print "\n4. report the sequence IDs that are found only in the first and not in the second file\n";
foreach (sort keys %fasta1){
print "$_ " unless $fasta2{$_};
}
print "\n5. report the sequence IDs for which the sequences differ between the two files\n";
foreach (sort keys %fasta1){
print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};
}
print "\n-\n";
foreach (sort keys %fasta2){
print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};
}
sub get($){
my ($url) = @_;
my $res = $ua->request(HTTP::Request->new(GET => $url));
#print " status:", $res->status_line,"\n";
return $res->content;
}
sub getFasta($){
my ($url) = @_;
my $f =get($url);
my @seqs = split /\n{2,3}/,$f;
my %hash;
foreach (@seqs) {
my ($head,$dna) = split "\n";
$head = substr($head,1);
$hash{$head} = protein($dna);
}
return %hash;
}
sub protein($){
my ($dna) = @_;
$dna =~ s/\s//g;
return join "",map { $genetic_code{$_}} unpack("a3" x (length($dna)/3), $dna)
}