View Single Post
  #2 (permalink)  
Old 06-10-2007, 05:13 PM
JakeCigar JakeCigar is offline
Novice
Join Date: Jun 2007
Posts: 14
iTrader: (0)
JakeCigar is on a distinguished road
#!/usr/bin/perl
use LWP::UserAgent;
use Data:umper;
use strict;
our $ua = LWP::UserAgent->new;
$ua->agent("NuBrowser/10.5 ");

our %genetic_code = (
'AAA' => 'K', # Lysine
'AAC' => 'N', # Asparagine
'AAG' => 'K', # Lysine
'AAT' => 'N', # Asparagine
'ACA' => 'T', # Threonine
'ACC' => 'T', # Threonine
'ACG' => 'T', # Threonine
'ACT' => 'T', # Threonine
'AGA' => 'R', # Arginine
'AGC' => 'S', # Serine
'AGG' => 'R', # Arginine
'AGT' => 'S', # Serine
'ATA' => 'I', # Isoleucine
'ATC' => 'I', # Isoleucine
'ATG' => 'M', # Methionine
'ATT' => 'I', # Isoleucine
'CAA' => 'Q', # Glutamine
'CAC' => 'H', # Histidine
'CAG' => 'Q', # Glutamine
'CAT' => 'H', # Histidine
'CCA' => 'P', # Proline
'CCC' => 'P', # Proline
'CCG' => 'P', # Proline
'CCT' => 'P', # Proline
'CGA' => 'R', # Arginine
'CGC' => 'R', # Arginine
'CGG' => 'R', # Arginine
'CGT' => 'R', # Arginine
'CTA' => 'L', # Leucine
'CTC' => 'L', # Leucine
'CTG' => 'L', # Leucine
'CTT' => 'L', # Leucine
'GAA' => 'E', # Glutamic Acid
'GAC' => 'D', # Aspartic Acid
'GAG' => 'E', # Glutamic Acid
'GAT' => 'D', # Aspartic Acid
'GCA' => 'A', # Alanine
'GCC' => 'A', # Alanine
'GCG' => 'A', # Alanine
'GCT' => 'A', # Alanine
'GGA' => 'G', # Glycine
'GGC' => 'G', # Glycine
'GGG' => 'G', # Glycine
'GGT' => 'G', # Glycine
'GTA' => 'V', # Valine
'GTC' => 'V', # Valine
'GTG' => 'V', # Valine
'GTT' => 'V', # Valine
'TAA' => '*', # Stop
'TAC' => 'Y', # Tyrosine
'TAG' => '*', # Stop
'TAT' => 'Y', # Tyrosine
'TCA' => 'S', # Serine
'TCC' => 'S', # Serine
'TCG' => 'S', # Serine
'TCT' => 'S', # Serine
'TGA' => '*', # Stop
'TGC' => 'C', # Cysteine
'TGG' => 'W', # Tryptofane
'TGT' => 'C', # Cysteine
'TTA' => 'L', # Leucine
'TTC' => 'F', # Phenylalanine
'TTG' => 'L', # Leucine
'TTT' => 'F', # Phenylalanine
);



my $ass = get("http://bioinf.gen.tcd.ie/ge3027/class4/assignments.html");
my @bits = split("<BR>",$ass);

my $dna = $bits[1];
print "dna\n",$dna;
print "\nprotein\n", protein($dna),"\n";


my %fasta1 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta1.txt");
print "fasta 1 \n", Dumper(\%fasta1);

my %fasta2 = getFasta("http://bioinf.gen.tcd.ie/ge3027/class4/fasta2.txt");
print "fasta 2 \n", Dumper(\%fasta2);

print "\n4. report the sequence IDs that are found only in the first and not in the second file\n";
foreach (sort keys %fasta1){
print "$_ " unless $fasta2{$_};
}
print "\n5. report the sequence IDs for which the sequences differ between the two files\n";
foreach (sort keys %fasta1){
print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};
}
print "\n-\n";
foreach (sort keys %fasta2){
print "$_ $fasta2{$_} != $fasta1{$_}\n" if $fasta2{$_} != $fasta1{$_};
}


sub get($){
my ($url) = @_;
my $res = $ua->request(HTTP::Request->new(GET => $url));
#print " status:", $res->status_line,"\n";
return $res->content;
}

sub getFasta($){
my ($url) = @_;
my $f =get($url);
my @seqs = split /\n{2,3}/,$f;
my %hash;
foreach (@seqs) {
my ($head,$dna) = split "\n";
$head = substr($head,1);
$hash{$head} = protein($dna);
}
return %hash;
}
sub protein($){
my ($dna) = @_;
$dna =~ s/\s//g;
return join "",map { $genetic_code{$_}} unpack("a3" x (length($dna)/3), $dna)

}

__________________
Powered by Yahoo! Answers
Digg this Post! Del.Icio.Us this Post! Technorati this Post! Furl this Post! Mister Wong this Post! Newsvine this Post! Spurl this Post! Reddit this Post! Netscape this Post!