G::IO Annotation
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
G::IO::Annotation
Package variables
No package variables defined.
Included modules
G::Messenger
G::Seq::Primitive
Inherit
Exporter
Synopsis
 use G::IO::Annotation;
 G::IO::Annotation::annotate_with_glimmer();
 G::IO::Annotation::annotate_with_LORF();
Description
 Annotates the genome sequence. 
 Only serves immature functions only.
 Intended for internal use only.
Methods
annotate_with_LORF
No description
Code
annotate_with_glimmer
No description
Code
run_glimmer
No description
Code
Methods description
None available.
Methods code
annotate_with_LORFdescriptionprevnextTop
sub annotate_with_LORF {
    my $this = shift;
    my $gb = shift;
    my $min = shift || 50;
    my $max = shift || 200000;
    my $seq = $gb->{SEQ};

    $this->{"CDS0"}->{dummy} = 1;
    for my $key (qw/LOCUS HEADER COMMENT FEATURE0 SEQ/){
	$this->{$key} = $gb->{$key};
    }

    $this->{FEATURE0}->{on} = 1 unless(defined $this->{FEATURE0});

    my $i     = 0;
    my $count = 0;

    for ($i = 0; $i <= 1; $i ++){
	$seq      = complement($gb->{SEQ}) if ($i);
	my $start = 0;
	my $end   = 0;

	my %stop;
	while(0 <= ($start = index($seq, 'atg', $start + 1))){
	    for my $codon (qw/tag taa tga/){
		my $tmp = $start;
		while(0 <= ($tmp = index($seq, $codon, $tmp + 1))){
		    if (($tmp - $start + 1) % 3 == 0 && $tmp - $start >= $min && $tmp - $start <= $max){
			if($tmp < $end || $end == 0){
			    $end = $tmp;
			}
		    }
		}
	    }
	    if($end != 0 && $stop{$end} < 1){
		$count ++;
		$stop{$end} ++;
		
		if ($i){
		    $this->{"FEATURE$count"}->{start}     = length($gb->{SEQ}) - $end   + 1;
		    $this->{"FEATURE$count"}->{end}       = length($gb->{SEQ}) - $start + 1;
		    $this->{"FEATURE$count"}->{direction} = "complement";
		}else{
		    $this->{"FEATURE$count"}->{start}     = $start + 1;
		    $this->{"FEATURE$count"}->{end}       = $end   + 1;
		    $this->{"FEATURE$count"}->{direction} = 'direct';
		}
		
		$this->{"FEATURE$count"}->{type}    = 'CDS';
		$this->{"FEATURE$count"}->{gene}    = "FEATURE$count";
		$this->{"FEATURE$count"}->{feature} = $count;
		$this->{"FEATURE$count"}->{cds}     = $count;
		$this->{"FEATURE$count"}->{on}      = 1;
		$this->{"CDS$count"}                = $this->{"FEATURE$count"};
	    }
	}
    }
}
annotate_with_glimmerdescriptionprevnextTop
sub annotate_with_glimmer {
    my $this = shift;
    my $file = shift;
    
    open (FASTA, $file);
    while(<FASTA>){
	if (/^\>/){
	    s/\>//;
	    my @hoge = split;
	    $this->{LOCUS}->{id} = $hoge[0];
	    next;
	}else{
	    s/[^a-zA-Z]//g;
	    $this->{SEQ} .= lc($_);
	}
    }
    close(FASTA);

    $this->{COMMENT} = "COMMENT     Annotated with G-language GAE. Arakawa et al. (2003) Bioinformatics\n";
    $this->{"CDS0"}->{dummy} = 1;
    $this->{"FEATURE0"}->{dummy} = 1;

    my $count = 0;
    my $cdscount = 0;
    open (GLIMMER, 'g2.coord') || die();
    while(<GLIMMER>){
	$count ++;
	$cdscount ++;
	my $nextcount = $count + 1;
	my @line = split;

	$this->{"FEATURE$count"}->{type} = 'gene';
	$this->{"FEATURE$count"}->{gene} = sprintf "g%04d", $cdscount;
	$this->{"FEATURE$count"}->{note} = sprintf "locus_tag: g%04d", $cdscount;
	$this->{"FEATURE$count"}->{on} = 1;

	$this->{"CDS$cdscount"}->{feature} = $count;
	$this->{"CDS$cdscount"}->{on} = 1;

	$this->{"FEATURE$nextcount"}->{cds} = $cdscount;
	$this->{"FEATURE$nextcount"}->{type} = 'CDS';
	$this->{"FEATURE$nextcount"}->{gene} = sprintf "g%04d", $cdscount;
	$this->{"FEATURE$nextcount"}->{function} = "orf; Unknown";
	$this->{"FEATURE$nextcount"}->{note} = "predicted by Glimmer 2.2";
	$this->{"FEATURE$nextcount"}->{codon_start} = 1;
	$this->{"FEATURE$nextcount"}->{transl_table} = 11;
	$this->{"FEATURE$nextcount"}->{product} = "orf, hypothetical protein";
	$this->{"FEATURE$nextcount"}->{protein_id} = "N/A";
	$this->{"FEATURE$nextcount"}->{db_xref} = "N/A";
	$this->{"FEATURE$nextcount"}->{on} = 1;

	if ($line[1] > $line[2]){
	    $this->{"CDS$cdscount"}->{start} = $line[2];
	    $this->{"CDS$cdscount"}->{end} = $line[1];
	    $this->{"CDS$cdscount"}->{direction} = "complement";
	    $this->{"FEATURE$count"}->{start} = $line[2];
	    $this->{"FEATURE$count"}->{end} = $line[1];
	    $this->{"FEATURE$count"}->{direction} = "complement";
	    $this->{"FEATURE$nextcount"}->{start} = $line[2];
	    $this->{"FEATURE$nextcount"}->{end} = $line[1];
	    $this->{"FEATURE$nextcount"}->{direction} = "complement";
	}else{
	    $this->{"CDS$cdscount"}->{start} = $line[1];
	    $this->{"CDS$cdscount"}->{end} = $line[2];
	    $this->{"CDS$cdscount"}->{direction} = "direct";
	    $this->{"FEATURE$count"}->{start} = $line[1];
	    $this->{"FEATURE$count"}->{end} = $line[2];
	    $this->{"FEATURE$count"}->{direction} = "direct";
	    $this->{"FEATURE$nextcount"}->{start} = $line[1];
	    $this->{"FEATURE$nextcount"}->{end} = $line[2];
	    $this->{"FEATURE$nextcount"}->{direction} = "direct";
	}

	$this->{"FEATURE$nextcount"}->{translation} = 
	    translate($this->get_geneseq("FEATURE$nextcount"));
	    $count ++;
    }
    close(GLIMMER);
}
run_glimmerdescriptionprevnextTop
sub run_glimmer {
    my $this = shift;
    my $file = shift;

    system("./run-glimmer2 $file");
}
General documentation
AUTHORTop
Kazuharu Arakawa, gaou@sfc.keio.ac.jp