G::IO Handler
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
 G::IO::Handler - Internal class with basic sequence manipulation methods
Package variables
No package variables defined.
Included modules
Carp
File::ShareDir ' :ALL '
G::DB::SDB
G::Messenger
G::Seq::AminoAcid
G::Seq::Primitive
Synopsis
 require G::IO::Handler;
 use base qw(G::IO::Handler);
Description
 Intended for internal use only. Super class for the core. Provides 
 the native methods.
Methods
_interpret_format
No description
Code
after_startcodon
No description
Code
after_stopcodon
No description
Code
annotate
No description
Code
around_startcodon
No description
Code
around_stopcodon
No description
Code
before_startcodon
No description
Code
before_stopcodon
No description
Code
cds
No description
Code
clone
No description
Code
del_key
No description
Code
disable_pseudogenes
No description
Code
disclose
No description
Code
feature
No description
Code
feature2id
No description
Code
filepath
No description
Code
find
No description
Code
gene
No description
Code
gene2id
No description
Code
get_cdsseq
No description
Code
get_exon
No description
Code
get_exons
No description
Code
get_gbkseq
No description
Code
get_geneseq
No description
Code
get_intron
No description
Code
getseq
No description
Code
hairpin_cut
No description
Code
id
No description
Code
intergenic
No description
Code
next_cds
No description
Code
next_feature
No description
Code
output
No description
Code
pos2feature
No description
Code
pos2gene
No description
Code
previous_cds
No description
Code
previous_feature
No description
Code
rRNA
No description
Code
relocate_origin
No description
Code
reverse_strand
No description
Code
seq
No description
Code
seq_info
No description
Code
set_essentiality
No description
Code
set_gene_aliases
No description
Code
set_generationtime
No description
Code
set_intergenic
No description
Code
startcodon
No description
Code
stopcodon
No description
Code
tRNA
No description
Code
Methods description
None available.
Methods code
_interpret_formatdescriptionprevnextTop
sub _interpret_format {
    my $filename = shift;
    my $ref = ref $filename;

    if ($filename =~ /^(.*?):(.*)/){
	unless(lc($1) =~ /(swall|swiss|sw|uniprot|genbank|genpept|embl|refseq)/){
	    croak("Unsupported database name for USA. Supported databases are\n" .
		"swiss, sw, uniprot, genbank, genpept, embl, or refseq\n");
	}
	return "usa";
    }elsif (lc($filename) =~ /\.(gb|gbk|gbank|genbank|nuccore)$/i){
	return 'genbank';
    }elsif (lc($filename) =~ /\.(fasta|fast|seq|fst|fa|fsa|nt|aa|ffn|fna)/i){
	return 'fasta';
    }elsif (lc($filename) =~ /\.(fastq|fq)/i){
	return 'fastq';
    }elsif (lc($filename) =~ /\.(gff3)/i){
	return 'gff3';
    }elsif (lc($filename) =~ /\.(gff2)/i){
	return 'gff2';
    }elsif (lc($filename) =~ /\.(gff)/i){
	return 'gff';
    }elsif ($filename =~ /^NC_\d+$/i ||
	    $filename =~ /^NP_\d+$/i ||
	    $filename =~ /^(?:[A-Z]|[A-Z][A-Z]).\d+$/i ||
	    $filename =~ /^NC_\d+\.\d+$/i ||
	    $filename =~ /^NP_\d+\.\d+$/i ||
	    $filename =~ /^(?:[A-Z]|[A-Z][A-Z]).\d+\.\d+$/i
	    ){
	return 'RefSeq';
    }elsif(-e $filename){
	my @tmp = split(/\./, $filename);
	my $format = $filename =~ /\./ ? pop(@tmp) : '';

	if (length $format){
	    return $format;
	}else{
	    open(FILE, $filename);
	    while(<FILE>){
		if(/^>/){
		    return 'fasta';
		}elsif(/^LOCUS/){
		    return 'genbank';
		}elsif(/^ID/){
		    return 'embl';
		}else{
		    carp("Unknown file format. Interpreting $filename as GenBank...\n");
		    return 'genbank';
		}
		last;
	    }
	    close(FILE);
	}
    }else{
	my $id = readFile("http://rest.g-language.org/gbp/orgname.cgi?id=$filename", 1);
	return ("named:$id") if(length($id));
    }
}
after_startcodondescriptionprevnextTop
sub after_startcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	my $start = $this->{$object}->{end} - 1 - 3 - $length + 1;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{end} - 3;
	}
	return complement(substr($this->{SEQ}, $start, $length));
    }else{
	return substr($this->{SEQ}, $this->{$object}->{start} + 3 - 1, $length);
    }
}
after_stopcodondescriptionprevnextTop
sub after_stopcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	my $start = $this->{$object}->{start} - 1 - $length;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{start};
	}
	return complement(substr($this->{SEQ}, $start, $length));
    }else{
	return substr($this->{SEQ}, $this->{$object}->{end} +1 - 1, $length);
    }
}
annotatedescriptionprevnextTop
sub annotate {
    my $this = shift;

    my($fh, $outfile) = tempfile(undef, SUFFIX=>'.gbk');
    $this->make_gb($outfile);

    require LWP::UserAgent;
    my $ua = LWP::UserAgent->new;
    my $id = $ua->post('http://rest.g-language.org/upload/upl.pl', 'Content_Type'=>'form-data', 'Content'=>['file'=>[$outfile]])->content;
    mirror("http://rest.g-language.org/$id/restauro", $outfile);
    my $new = new G::IO($outfile, "no cache", "no msg");
    return $new;
}
around_startcodondescriptionprevnextTop
sub around_startcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $gb = shift;
    my $cds = shift;
    my $before = shift || 0;
    my $after = shift || 0;
    my $option = shift;

    my $seq  = $gb->before_startcodon($cds, $before);
    $seq .= $gb->startcodon($cds) unless($option =~ /without/);
    $seq .= $gb->after_startcodon($cds, $after);

    return $seq;
}
around_stopcodondescriptionprevnextTop
sub around_stopcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $gb = shift;
    my $cds = shift;
    my $before = shift || 0;
    my $after = shift || 0;
    my $option = shift;

    my $seq  = $gb->before_stopcodon($cds, $before);
    $seq .= $gb->stopcodon($cds) unless($option =~ /without/);
    $seq .= $gb->after_stopcodon($cds, $after);

    return $seq;
}
before_startcodondescriptionprevnextTop
sub before_startcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	return complement(substr($this->{SEQ}, $this->{$object}->{end}, $length));
    }else{
	my $start = $this->{$object}->{start} - 1 - $length;
	if ($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{start} - 1;
	}
	return substr($this->{SEQ}, $start, $length);
    }
}
before_stopcodondescriptionprevnextTop
sub before_stopcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	return complement(substr($this->{SEQ}, $this->{$object}->{start} + 3 - 1, $length));
    }else{
	my $start = $this->{$object}->{end} - 3 - 1 - $length + 1;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{end} - 3;
	}
	return substr($this->{SEQ}, $start, $length);
    }
}
cdsdescriptionprevnextTop
sub cds {
    return feature($_[0], 'CDS', $_[1]);
}
clonedescriptionprevnextTop
sub clone {
    my $this = shift;
    my $sdbPath = _sdb_path();

    _set_sdb_path('/tmp');

    my $tmpfile = "GINTERNAL-" . time() . rand();
    sdb_save($this,$tmpfile);
    my $new = sdb_load($tmpfile);

    _set_sdb_path($sdbPath);
    unlink($tmpfile);

    return $new;
}
del_keydescriptionprevnextTop
sub del_key {
    $_[0]->{$_[1]}->{on} = 0;
    return 1;
}
disable_pseudogenesdescriptionprevnextTop
sub disable_pseudogenes {
    my $this = shift;

    foreach my $feature ($this->feature()){
	$this->{$feature}->{on} = 0 if ($this->{$feature}->{pseudo});
    }
}
disclosedescriptionprevnextTop
sub disclose {
    my $gb = shift;

    my $return = '';
    foreach my $level1 (keys %{$gb}){
	next if ($level1 eq 'SEQ');
	next unless($level1 =~ /LOCUS|FEATURE|HEADER/);
	if(ref($gb->{$level1}) =~ /HASH/){
	    foreach my $level2 (keys %{$gb->{$level1}}){
		$return .= join("\t", $level1, $level2, $gb->{$level1}->{$level2}) . "\n";
	    }
	}else{
	    $return .= join("\t", $level1, $gb->{$level1}) . "\n";
	}
    }

    return $return;
}
featuredescriptionprevnextTop
sub feature {
    my $this = shift;
    my $type  = shift;
    my $opt = shift;

    if($type eq 'all'){
	$opt = 'all';
	$type = '';
    }

    my $i = -1;
    my @feature;

    while(defined($this->{"FEATURE" . ($i + 1)})){
        $i ++;

	if(length($type)){
	    next unless ($this->{"FEATURE$i"}->{type} eq $type);
	}

	if ($opt ne 'all' && defined $this->{"FEATURE$i"}->{on}){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	}

        push (@feature, "FEATURE$i");
    }

    return @feature;
}
feature2iddescriptionprevnextTop
sub feature2id {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }

    return 'FEATURE' . $_[0]->{$_[1]}->{feature};
}
filepathdescriptionprevnextTop
sub filepath {
    return $_[0]->{FILENAME};
}
finddescriptionprevnextTop
sub find {
    my $this = shift;

    my @args = @_;
    my (@keywords, %keyhash, @results);
    my $i = 0;
    my $print = 0;

    while(defined $args[$i]){
        if (substr($args[$i], 0, 1) eq '-' && substr($args[$i], 1, 1) !~ /[0-9]/){
	    if (substr($args[$i],1) eq 'print'){
		$print = 1;
		$i +=2;
	    }else{
		$keyhash{substr($args[$i],1)} = $args[$i + 1];
		$i += 2;
	    }
        }else{
	    push(@keywords, $args[$i]);
            $i ++;
        }
    }

    foreach my $feat ($this->feature()){
	my $flag = 0;

	foreach my $key (keys %keyhash){
	    my $val = $keyhash{$key};

	    unless($this->{$feat}->{$key} =~ /$val/i){
		$flag = 1;
		last;
	    }
	}

	next if ($flag);

	foreach my $key (@keywords){
	    unless(join('%%%___%%%', values(%{$this->{$feat}})) =~ /$key/i){
		$flag = 1;
		last;
	    }
	}
    
	push(@results, $feat) unless($flag);
    }

    if(msg_ask_interface() eq 'Shell' || $print){
	foreach my $feat (@results){
	    my $gene = $this->{$feat}->{gene} || $this->{$feat}->{locus_tag} || $feat;
	    my $ec = $this->{$feat}->{EC_number};
	    $ec =~ s/\s+/,/g;
	    $ec = '(' . $ec . ')' if (length $ec);

	    msg_send(
		     sprintf("     %s\t%s\t%s\t%s..%s\t%s\t%s %s\n", $feat, $gene, $this->{$feat}->{type}, 
			     $this->{$feat}->{start}, $this->{$feat}->{end}, $this->{$feat}->{direction}, $this->{$feat}->{product}, $ec)
		     );
	}
    }

    return @results;
}
genedescriptionprevnextTop
sub gene {
    return feature($_[0], 'gene', $_[1]);
}
gene2iddescriptionprevnextTop
sub gene2id {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    return length($_[0]->{$_[1]}->{feature}) ? 'FEATURE' . $_[0]->{$_[1]}->{feature} : '';
}
get_cdsseqdescriptionprevnextTop
sub get_cdsseq {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;

    my $cdsseq = '';

    if($this->{$object}->{start} > $this->{$object}->{end}){
	$cdsseq = substr($this->{SEQ}, $this->{$object}->{start} - 1) . 
	    $this->get_gbkseq(1, $this->{$object}->{end});
    }else{
	$cdsseq = $this->get_gbkseq($this->{$object}->{start}, 
				    $this->{$object}->{end});
    }

    $cdsseq = &complement($cdsseq) 
	if ($this->{$object}->{direction} eq 'complement');

    return $cdsseq;
}
get_exondescriptionprevnextTop
sub get_exon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my $seq = '';

    foreach my $line (split(/,/, $this->{$cds}->{join})){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	$seq .= $tmp;
    }

    $seq = complement($seq) if ($this->{$cds}->{direction} eq 'complement');
    return $seq;
}
get_exonsdescriptionprevnextTop
sub get_exons {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my @seq;

    foreach my $line (split(/,/, $this->{$cds}->{join})){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	
	if($this->{$cds}->{direction} eq 'complement'){
	    unshift(@seq, complement($tmp));
	}else{
	    push(@seq, $tmp);
	}
    }

    return @seq;
}
get_gbkseqdescriptionprevnextTop
sub get_gbkseq {
    if (scalar(@_) < 3){
	carp("Not enough arguments.");
	return;
    }
    return getseq($_[0], $_[1] - 1, $_[2] - 1, $_[3]);
}
get_geneseqdescriptionprevnextTop
sub get_geneseq {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;

    my $geneseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    if ($this->{$object}->{join}){
	$geneseq = $this->get_exon($object);
    }elsif ($this->{$object}->{direction} eq 'complement'){
	$geneseq = &complement($geneseq);
    }

    my $codon_start = $this->{$object}->{codon_start} || 1;
    return substr($geneseq, $codon_start - 1);
}
get_introndescriptionprevnextTop
sub get_intron {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my @join = split(/\.\./, $this->{$cds}->{join});
    shift @join;
    pop @join;
    my @seq;

    foreach my $line (@join){
	$line =~ s/c//g;
	my ($start, $end) = split(/,/, $line, 2);
	my $tmp = $this->get_gbkseq($start + 1, $end - 1);
	$tmp = '' if($end - 2  - $start < 0);
	push (@seq, $tmp);
    }

    return @seq;
}
getseqdescriptionprevnextTop
sub getseq {
    if (scalar(@_) < 3){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $start = shift;
    my $end = shift;
    my $option = shift;

    if($start < $end){
	return substr($this->{SEQ}, $start, $end-$start+1);
    }else{
	if($option =~ /circ/){
	    return substr($this->{SEQ}, $start) . substr($this->{SEQ}, 0, $end + 1);
	}else{
	    my ($start2, $end2) = sort {$a <=> $b} ($start, $end);
	    return substr($this->{SEQ}, $start2, $end2-$start2+1);
	}
    }
}
hairpin_cutdescriptionprevnextTop
sub hairpin_cut {
    system('firefox http://www.toychan.net/afro/');
    return "\n==============\n!!!Afro Man!!!===============\n\n";
}
iddescriptionprevnextTop
sub id {
    return $_[0]->{LOCUS}->{id};
}
intergenicdescriptionprevnextTop
sub intergenic {
    my $this = shift;
    my $opt = shift || '';
    my $i = 0;
    my @cds;

    set_intergenic($this) unless(defined($this->{INTER1}));

    while($this->{"INTER" . ($i + 1)}){
	$i ++;

	if ($opt ne 'all'){
	    next if ($this->{"INTER$i"}->{on} == 0);
	}
	
	push (@cds, "INTER$i");
    }

    return @cds;
}
next_cdsdescriptionprevnextTop
sub next_cds {
    return next_feature($_[0], $_[1], 'CDS');
}
next_featuredescriptionprevnextTop
sub next_feature {
    my $this = shift;
    my $feature = shift || 'FEATURE0';
    my $opt = shift;

    my @features = $this->feature($opt);
    return undef if ($feature eq $features[-1]);

    $feature = $this->{$feature}->{left} if ($feature =~ /^INTER/);

    my $i = $this->{$feature}->{feature};
    $i ++;

    while(%{$this->{"FEATURE$i"}}){
	my $feat = "FEATURE$i";
	$i ++;

	if(length($opt)){
	    next unless($this->{$feat}->{type} eq $opt);
	}

	return $feat;
    }
}
outputdescriptionprevnextTop
sub output {
    my $gb = shift;
    my $file = shift;
    my $option = shift;

    $option = _interpret_format($file) unless(length $option);

    if (lc($option) eq 'genbank'){
	$gb->make_gb($file);
    }elsif(length $option){
	my $in;

	my ($fh, $outfile) = tempfile(undef, SUFFIX=>'.gbk');
	$gb->make_gb($outfile);

	require LWP::UserAgent;
	my $ua = LWP::UserAgent->new;
            my $response = $ua->post("http://rest.g-language.org/emboss/",
                                     Content_Type=>'multipart/form-data',
                                     Content=>[file1 => [$outfile], 'arg'=>"seqret/osformat2=$option/-feature"]
                                     );

	if ($response->is_success) {
	    writeFile($response->decoded_content, $file);
	} else {
	    die("Error in output:\n" . $response->status_line . "\n File format\" $option\"of\" $file\" is not supported.");
	}

	unlink($outfile);
    }else{
	&msg_error("G::output - Unknown format to output.");
    }
}
pos2featuredescriptionprevnextTop
sub pos2feature {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $pos = shift;
    my $pos2 = shift;

    if(length($pos2) > 1){
	my @genes = ();

	foreach my $feat ($this->feature()){
	    next if ($feat eq 'FEATURE0');

	    if ($this->{$feat}->{end} >= $pos && $pos2 >= $this->{$feat}->{start}){
		push(@genes, $feat);
	    }elsif ($pos2 < $this->{$feat}->{start}){
		return @genes;
	    }
	}

    }else{

	foreach my $feat ($this->feature()){
	    next if ($feat eq 'FEATURE0');

	    if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
		return $feat;
	    }elsif ($pos < $this->{$feat}->{start}){
		return '';
	    }
	}
    }
}
pos2genedescriptionprevnextTop
sub pos2gene {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $pos = shift;
    my $pos2 = shift;

    if(length($pos2) > 1){
	my @genes = ();

	foreach my $feat ($this->cds()){
	    if ($this->{$feat}->{end} >= $pos && $pos2 >= $this->{$feat}->{start}){
		push(@genes, $feat);
	    }elsif ($pos2 < $this->{$feat}->{start}){
		return @genes;
	    }
	}

    }else{
	foreach my $feat ($this->cds()){
	    if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
		return $feat;
	    }elsif ($pos < $this->{$feat}->{start}){
		return '';
	    }
	}
    }
}
previous_cdsdescriptionprevnextTop
sub previous_cds {
    return previous_feature($_[0], $_[1], 'CDS');
}
previous_featuredescriptionprevnextTop
sub previous_feature {
    my $this = shift;
    my $feature = shift || 'FEATURE0';
    my $opt = shift;

    return undef if ($feature eq 'FEATURE0');
    $feature = $this->{$feature}->{right} if ($feature =~ /^INTER/);

    my $i = $this->{$feature}->{feature};
    $i --;

    while(%{$this->{"FEATURE$i"}}){
	my $feat = "FEATURE$i";
	$i --;

	if(length($opt)){
	    next unless($this->{$feat}->{type} eq $opt);
	}

	return $feat;
    }
}
rRNAdescriptionprevnextTop
sub rRNA {
    my $this = shift;
    my $option = shift;

    if($option =~ /\d+S|SSU|LSU/){
	my %rRNA;

	foreach ($this->feature('rRNA')){
	    if($this->{$_}->{product} =~ /([0-9\.]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrf/){
		push(@{$rRNA{'5S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrl/){
		push(@{$rRNA{'23S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrs/){
		push(@{$rRNA{'16S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /([0-9\.]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{locus_tag} =~ /([0-9]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{note} =~ /([0-9\.]+S)+?/){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{product} =~ /(SSU|small subunit) ribosomal RNA/i){
		push(@{$rRNA{'SSU'}}, $_);
	    }elsif($this->{$_}->{product} =~ /(LSU|large subunit) ribosomal RNA/i){
		push(@{$rRNA{'LSU'}}, $_);
	    }
	}

	if(defined $rRNA{$option}){
	    my @result = sort {length($this->get_geneseq($b))<=>length($this->get_geneseq($a))} @{$rRNA{$option}};
	    return wantarray ? @result : $result[0];
	}
    }else{
	return feature($this, 'rRNA', $option);
    }
}
relocate_origindescriptionprevnextTop
sub relocate_origin {
    require G::IO;
    my $this = new G::IO("blessed");
    my $tmp = shift;
    my $gb = $tmp->clone();
    my $pos = shift;
    croak("New start position\( in Perl coordinate\) must be given.\n") unless($pos =~ /\d/);

    $this->{LOCUS} = $gb->{LOCUS};
    $this->{HEADER} = $gb->{HEADER};
    $this->{COMMENT} = $gb->{COMMENT};
    $this->{SEQ} = substr($gb->{SEQ}, $pos) . substr($gb->{SEQ}, 0, $pos);
    $this->{FEATURE0} = $gb->{FEATURE0};

    my (@before, @after);
    my @features = $gb->feature();
    shift @features;
    foreach my $feature (@features){
	if($gb->{$feature}->{start} >= $pos + 1){
	    push(@after, $feature);
	}else{
	    push(@before, $feature);
	}
    }

    my $f = 0;
    my $c = 0;
    foreach my $feature (@after, @before){
	$f ++;
	$this->{"FEATURE$f"} = $gb->{$feature};
	$this->{"FEATURE$f"}->{feature} = $f;
	if($gb->{$feature}->{type} eq 'CDS'){
	    $c ++;
	    $this->{"FEATURE$f"}->{cds} = $c;
	}

	if($gb->{$feature}->{end} >= $pos + 1 && $gb->{$feature}->{start} < $pos + 1){
	    $this->{"FEATURE$f"}->{start} += length($gb->{SEQ}) - $pos;
	    $this->{"FEATURE$f"}->{end} -= $pos;

	    if(length $this->{"FEATURE$f"}->{join}){
		msg_error("Warning: overriding join definition for FEATURE$f.\n");
		msg_error("  this is likely to destroy positional features of this gene entry.\n");
	    }

	    $this->{"FEATURE$f"}->{join} = sprintf("%d\.\.%d,%d\.\.%d", $this->{"FEATURE$f"}->{start},
						   length($gb->{SEQ}), 1, $this->{"FEATURE$f"}->{end});
	    next;
	}

	my $lng = length($gb->{SEQ}) - $pos;
	if($gb->{$feature}->{start} >= $pos + 1){
	    $lng = -$pos;
	}

	$this->{"FEATURE$f"}->{start} += $lng;
	$this->{"FEATURE$f"}->{end} += $lng;

	if(defined $this->{"FEATURE$f"}->{"join"}){
	    my @join = split(/\,/,$this->{"FEATURE$f"}->{"join"});
	    my @num = ();
	    my @new_join = ();

	    foreach(@join){
		if(tr/c/c/){
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng));
		} else {
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("%d\.\.%d",  $num[0] + $lng, $num[1] + $lng));
		}
	    }
	    $this->{"FEATURE$f"}->{join} = join(',', @new_join);
	}
    }
    
    $this->set_gene_aliases();

    return $this;
}
reverse_stranddescriptionprevnextTop
sub reverse_strand {
    require G::IO;
    my $this = new G::IO("blessed");
    my $tmpG = shift;
    my $gb = $tmpG->clone();

    $this->{LOCUS} = $gb->{LOCUS};
    $this->{HEADER} = $gb->{HEADER};
    $this->{COMMENT} = $gb->{COMMENT};
    $this->{SEQ} = complement($gb->{SEQ});
    $this->{FEATURE0} = $gb->{FEATURE0};

    my @feat = $gb->feature();
    shift @feat;
    my (@features, @tmp);
    foreach my $feature (@feat){
	if($gb->{$feature}->{type} eq 'gene'){
	    unshift(@features, @tmp);
	    @tmp = ($feature);
	}else{
	    push(@tmp, $feature);
	}
    }
    unshift(@features, @tmp);

    my $f = 0;
    my $c = 0;
    my $lng = length($gb->{SEQ}) + 1;
    foreach my $feature (@features){
	$f ++;
	$this->{"FEATURE$f"} = $gb->{$feature};
	$this->{"FEATURE$f"}->{feature} = $f;

	if($gb->{$feature}->{type} eq 'CDS'){
	    $c ++;
	    $this->{"FEATURE$f"}->{cds} = $c;
	}

	my ($start, $end) = ($lng - $gb->{$feature}->{end}, $lng - $gb->{$feature}->{start});
	$this->{"FEATURE$f"}->{start} = $start;
	$this->{"FEATURE$f"}->{end}   = $end;
	$this->{"FEATURE$f"}->{direction} = $gb->{$feature}->{direction} eq 'direct' ? 'complement' : 'direct';

	if(defined $this->{"FEATURE$f"}->{"join"}){
	    my @join = split(/\,/,$this->{"FEATURE$f"}->{"join"});
	    my @num = ();
	    my @new_join = ();

	    foreach(@join){
		if(tr/c/c/){
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("c%d\.\.%d", $lng - $num[1], $lng - $num[0]));
		} else {
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("%d\.\.%d",  $lng - $num[1], $lng - $num[0]));
		}
	    }
	    $this->{"FEATURE$f"}->{join} = join(',', reverse(@new_join));
	}
    }
    $this->set_gene_aliases();

    return $this;
}
seqdescriptionprevnextTop
sub seq {
    return $_[0]->{SEQ};
}
seq_infodescriptionprevnextTop
sub seq_info {
    my $this = shift;
    my $nomsg = shift;
    my $type = shift;

    my $length = length($this->{SEQ});

    my $a = $this->{SEQ} =~ tr/a/a/;
    my $t = $this->{SEQ} =~ tr/t/t/;
    my $g = $this->{SEQ} =~ tr/g/g/;
    my $c = $this->{SEQ} =~ tr/c/c/;
    my $symbol = $this->{SEQ} =~ tr/0123456789 _-/0123456789 _-/;
    my $others = $length - $a - $t - $g - $c;
    my $msg;

    unless($nomsg){
	$msg .= sprintf "\n\nAccession Number: %s\n", $this->{LOCUS}->{id} if(length($this->{LOCUS}->{id}));
	$msg .= sprintf "\nDefinition: %s\n", $this->{DEFINITION} if(length($this->{DEFINITION}));
	$msg .= sprintf "\n  Merged LOCUS Count : %9d\n" , scalar(grep{/LOCUS/} keys(%$this)) - 2 if(exists($this->{LOCUS2}));
	$msg .= sprintf "\n  Length of Sequence : %9d\n" , $length;
	
	if($length == 0){
	    $msg .= sprintf "\n  No Sequence Found.\n\n";
	}elsif($symbol > 5){
	    $msg .= sprintf "\n  Sequence seems to be invalid (neither nucleotide nor amino acid).\n\n";
	}elsif($others > $a + $t + $g + $c && $type != 1){
	    amino_info($this);
	    return;
	}else{
	    $msg .= sprintf "           A Content : %9d (%.2f\%)\n" , $a , $a / $length * 100;
$msg .= sprintf " T Content : %9d (%.2f\%)\n" , $t , $t / $length * 100;
$msg .= sprintf " G Content : %9d (%.2f\%)\n" , $g , $g / $length * 100;
$msg .= sprintf " C Content : %9d (%.2f\%)\n" , $c , $c / $length * 100;
$msg .= sprintf " Others : %9d (%.2f\%)\n" , $others, $others / $length * 100;
$msg .= sprintf " AT Content : %.2f\%\n" , ($a + $t) / $length * 100;
$msg .= sprintf " GC Content : %.2f\%\n\n" , ($g + $c) / $length * 100;
my ($genes, $cds, $trna, $rrna) = (scalar($this->gene()), scalar($this->cds()), scalar($this->tRNA()), scalar($this->rRNA())); if($cds > 0){ $msg .= sprintf " Number of genes : %d (CDSs: %s, tRNAs: %s, rRNAs: %s)\n " , $genes, $cds, $trna, $rrna; } } &msg_send($msg); } return ($a, $t, $g, $c);
}
set_essentialitydescriptionprevnextTop
sub set_essentiality {
    my $gb = shift;

    if ($gb->{LOCUS}->{id} eq 'U00096' || $gb->{LOCUS}->{id} eq 'NC_000913'){

        my $filename = dist_file('g-language', 'data/PECData.dat');
	my $i = 1;
	open(FILE, $filename) || die($!);
	while(<FILE>){
	    chomp;
	    my @hoge = split(/\t/);
	    my @list = ($hoge[0], split(/,/, $hoge[1]));

	    my $id;
	    foreach my $gene (@list){
		$id = $gb->gene2id($gene);
		last if(length $id);
	    }
	    
	    if($hoge[7] == 1){
		$gb->{$id}->{essentiality} = 1;		
	    }elsif($hoge[7] == 2){
		$gb->{$id}->{essentiality} = -1;
	    }
	}
	close(FILE);
   }else{
       msg_error("No essentiality data for this species.\n\n");
   }

    return 1;
}
set_gene_aliasesdescriptionprevnextTop
sub set_gene_aliases {
    my $this = shift;

    foreach my $feat ($this->feature()){
	next unless ($this->{$feat}->{type} =~ /CDS|RNA/);
	
	if(length $this->{$feat}->{gene}){
	    $this->{$this->{$feat}->{gene}} = $this->{$feat};
	}
	
	if(length $this->{$feat}->{locus_tag}){
	    $this->{$this->{$feat}->{locus_tag}} = $this->{$feat};
	}
	
	if($this->{$feat}->{type} eq 'CDS'){
	    $this->{'CDS' . $this->{$feat}->{cds}} = $this->{$feat};
	}
    }
}
set_generationtimedescriptionprevnextTop
sub set_generationtime {
    my $gb = shift;

    my $filename = dist_file('g-language', 'data/generationtime.dat');
    for my $line (readFile($filename, 1)){
	next unless($line =~ $gb->{LOCUS}->{id});

	my @F = split(/\t/, $line);
	$gb->{FEATURE0}->{'generationTime'} = $F[3];
	$gb->{FEATURE0}->{'optimumGrowthTemperature'} = $F[2];
    }

    unless(length($gb->{FEATURE0}->{'generationTime'})){
	msg_error("No generation time data for this species.\n\n");
    }

    return 1;
}
set_intergenicdescriptionprevnextTop
sub set_intergenic {
    my $gb = shift;
    return if(defined($gb->{INTER1}));

    my $num = 1;
    my $i = 0;
    my $interface = msg_ask_interface();
    msg_interface("NULL");
    my @cds = $gb->find(-type=>"CDS|RNA");

    foreach my $feature (@cds){
	if($i == 0 && $gb->{$feature}->{start} < $gb->{$feature}->{end}){
	    $gb->{"INTER$num"}->{start} = 1;
	    $gb->{"INTER$num"}->{end} = $gb->{$feature}->{start} - 1;
	    $gb->{"INTER$num"}->{direction} = "direct";
	    $gb->{"INTER$num"}->{left} = undef;
	    $gb->{"INTER$num"}->{right} = $feature;
	    $gb->{"INTER$num"}->{on} = 1;
	    $num++;
	}elsif($i == $#cds && $gb->{$cds[-1]}->{end} + 1 < $gb->{SEQ}){
	    $gb->{"INTER$num"}->{start} = $gb->{$cds[-1]}->{end} + 1;
	    $gb->{"INTER$num"}->{end} = length $gb->{SEQ};
	    $gb->{"INTER$num"}->{direction} = "direct";
	    $gb->{"INTER$num"}->{left} = $feature;
	    $gb->{"INTER$num"}->{right} = undef;
	    $gb->{"INTER$num"}->{on} = 1;
	    $num++;
	}elsif($i > 0){
	    if($gb->{$cds[$i - 1]}->{end} < $gb->{$feature}->{start}){
		$gb->{"INTER$num"}->{start} = $gb->{$cds[$i - 1]}->{end} + 1;
		$gb->{"INTER$num"}->{end} = $gb->{$feature}->{start} - 1;
		$gb->{"INTER$num"}->{direction} = "direct";
		$gb->{"INTER$num"}->{left} = $cds[$i - 1];
		$gb->{"INTER$num"}->{right} = $feature;
		$gb->{"INTER$num"}->{on} = 1;
		$num++;
	    }
	}
	$i++;
    }

    msg_interface($interface);
}
startcodondescriptionprevnextTop
sub startcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    return substr($_[0]->get_geneseq($_[1]), 0, 3);
}
stopcodondescriptionprevnextTop
sub stopcodon {
    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    return substr($_[0]->get_geneseq($_[1]), -3, 3);
}
tRNAdescriptionprevnextTop
sub tRNA {
    return feature($_[0], 'tRNA', $_[1]);
}
General documentation
AUTHORTop
Kazuharu Arakawa, gaou@sfc.keio.ac.jp
$gb->filepath()Top
   Name: $gb->filepath()   -   get the location path of input file

   Description:
         Returns the file path used to load the sequence.

   REST: 
      http://rest.g-language.org/ecoli/filepath
$gb->clone()Top
   Name: $gb->clone()   -   create a copy of the G instance

   Description:
         Returns cloned G instance, which is a new G instance with
         identical data. 

   REST: 
      http://rest.g-language.org/NC_000913/clone
$gb->annotate()Top
   Name: $gb->annotate()   -   reannotate the genome using Restauro-G

   Description:
      This method reannotates the entire genome, if coding sequences are already available.
      Added features includes:
      rs_des: description of protein
      rs_xr:  database cross references, including EMBL, RefSeq, KEGG, BioCyc, BRENDA, GO, InterPro, Pfam
      rs_com: comments
  Usage: 
      $new_gb = $gb->annotate();

   REST: 
      http://rest.g-language.org/NC_000913/annotate
References: 1. Tamaki S, Arakawa K, Kono N, Tomita M (2007) "Restauro-G: A Rapid Genome Re-Annotation System for Comparative Genomics", Genomics Proteomics Bioinformatics, 5(1): 53-58. Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20120830-01 initial posting
$gb->disclose()Top
   Name: $gb->disclose()   -   create a tab delimited list of all features

   Description:
         Returns tab delimited list of all feature table.
         Used for Bio::Glite.

   REST: 
      http://rest.g-language.org/NC_000913/disclose
$gb->find()Top
   Name: $gb->find()   -   search through the genome data object with keywords

   Description:
         This method provides powerful means to search within the G-language genome
         data object with keywords. Given a set of keywords, this method returns
         the list of feature IDs corresponding to the search query. In G-language Shell,
         search results are also directly printed out (you can specify -print=>1 option
         in API mode to print within your program).

         eg. @features = $gb->find('RNA', 'tyrosine');    # multiple keywords are allowed.

         Keywords can be specific to each of the feature attributes:

         eg. $gb->find(-type=>'CDS', -product=>'metabolism', 'subunit');

         Regular expressions are allowed for keywords:

         eg. $gb->find(-type=>'CDS', -EC_number=>'^2.7.');

   REST: 
      http://rest.g-language.org/NC_000913/product=metabolism/
$gb->getseq()Top
   Name: $gb->getseq()   -   get nucleotide sequence of the given positions (Perl coordinates)

   Description:
         Given the start and end positions (starting from 0 as in Perl),
         returns the sequence specified.

         eg. $gb->getseq(2,4); # returns the 3rd, 4th, and 5th nucleotides.

   Options:
       -circular   when the first position is larger than the second position,
                   retrieves the sequece spanning across the end of the circular
                   chromosome. (ex: $gb->getseq(4639670, 5, -circular))

   REST: 
      http://rest.g-language.org/NC_000913/getseq/1/3
$gb->get_gbkseq()Top
   Name: $gb->get_gbkseq()   -   get nucleotide sequence of the given positions (GenBank coordinates)

   Description:
         Given the start and end positions (starting from 1 as in 
         Genbank), returns the sequence specified.

         eg. $gb->get_gbkseq(1,3); # returns the 1st, 2nd, and 3rd nucleotides.

   Options:
       -circular   when the first position is larger than the second position,
                   retrieves the sequece spanning across the end of the circular
                   chromosome. (ex: $gb->getseq(4639670, 5, -circular))

   REST: 
      http://rest.g-language.org/NC_000913/get_gbkseq/1/3
$gb->get_cdsseq()Top
   Name: $gb->get_cdsseq()   -   get nucleotide sequence of the given CDS

   Description:
         Given a CDS ID, returns the CDS sequence. 
         'complement' is properly parsed.

         eg. $gb->get_cdsseq('recA'); # returns the 'recA' gene sequence.

   REST: 
      http://rest.g-language.org/NC_000913/get_cdsseq/recA
$gb->around_startcodon()Top
   Name: $gb->around_startcodon()   -   get the sequence around the startcodon of the given CDS

   Description:
         Given a CDS ID, lengths before startcodon and after start codon, 
         returns the sequence around of start codon.

         eg. $gb->around_startcodon('FEATURE5239', 100, 100); 
             # returns 100 bp sequence before and after the start codon of 'CDS1',
             # including the start codon itself

   Options:
         Optional Fourth argument containing a string "without" returns only the sequence
         before and after the start codon, and without the stat codon itself.
         eg. $gb->around_startcodon('FEATURE5239', 100, 100, "without");

   REST: 
      http://rest.g-language.org/NC_000913/around_startcodon/recA/200/200
$gb->around_stopcodon()Top
   Name: $gb->around_stopcodon()   -   get the sequence around the stopcodon of the given CDS

   Description:
         Given a CDS ID, lengths before stopcodon and after stop codon, 
         returns the sequence around of stop codon.

         eg. $gb->around_stopcodon('FEATURE5239', 100, 100); 
             # returns 100 bp sequence before and after the stop codon of 'CDS1',
             # including the stop codon itself

   Options:
         Optional Fourth argument containing a string "without" returns only the sequence
         before and after the stop codon, and without the stat codon itself.
         eg. $gb->around_stopcodon('FEATURE5239', 100, 100, "without");

   REST: 
      http://rest.g-language.org/NC_000913/around_startcodon/recA/200/200
$gb->before_startcodon()Top
   Name: $gb->before_startcodon()   -   get the upstream sequence of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence upstream of 
         start codon.

         eg. $gb->before_startcodon('CDS1', 100); 
             # returns 100 bp sequence upstream of the start codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/before_startcodon/recA/200
$gb->after_startcodon()Top
   Name: $gb->after_startcodon()   -   get the sequence downstream of start codon of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence downstream of 
         start codon.

         eg. $gb->after_startcodon('CDS1', 100); 
             # returns 100 bp sequence downstream of the start codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/after_startcodon/recA/200
$gb->before_stopcodon()Top
   Name: $gb->before_stopcodon()   -   get the sequence upstream of stop codon of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence upstream of 
         stop codon.

         eg. $gb->before_stopcodon('CDS1', 100); 
             # returns 100 bp sequence upstream of the stop codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/before_stopcodon/recA/200
$gb->after_stopcodon()Top
   Name: $gb->after_stopcodon()   -   get the downstream sequence of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence downstream of 
         stop codon.

         eg. $gb->after_stopcodon('CDS1', 100); 
             # returns 100 bp sequence downstream of the stop codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/after_stopcodon/recA/200
$gb->startcodon()Top
   Name: $gb->startcodon()   -   get the start codon of the given CDS

   Description:
         Given a CDS ID, returns the start codon.

         eg. $gb->startcodon("FEATURE$i"); # returns 'atg'

   REST: 
      http://rest.g-language.org/NC_000913/startcodon/recA
$gb->stopcodon()Top
   Name: $gb->stopcodon()   -   get the stop codon of the given CDS

   Description:
         Given a CDS ID, returns the stop codon.

         eg. $gb->stopcodon("FEATURE$i"); # returns 'tag'

   REST: 
      http://rest.g-language.org/NC_000913/stopcodon/recA
$gb->pos2feature()Top
   Name: $gb->pos2feature()   -   get a feature ID from position

   Description:
         Given a GenBank position (sequence starting from position 1) 
         returns the feature ID (ex. FEATURE123) of the feature at
         the given position. If multiple features exist for the given
         position, the first feature to appear is returned. Returns 
         NULL if no feature exists.

         When two positions are specified, all features within given 
         range is returned as an array of feature IDs.

   REST: 
      http://rest.g-language.org/NC_000913/pos2feature/1024
$gb->gene2id()Top
   Name: $gb->gene2id()   -   get a feature ID from canonical gene name

   Description:
         Given a GenBank gene name, returns the feature ID (ex. FEATURE123). 
         Returns NULL if no gene exists.

   REST: 
      http://rest.g-language.org/NC_000913/gene2id/recA
$gb->pos2gene()Top
   Name: $gb->pos2gene()   -   get a feature ID of CDS from position

   Description:
         Given a GenBank position (sequence starting from position 1) 
         returns the feature ID (ex. FEATURE123) of the gene at
         the given position. If multiple genes exists for the given
         position, the first gene to appear is returned. Returns 
         NULL if no gene exists.

         When two positions are specified, all genes within given 
         range is returned as an array of feature IDs.

   REST: 
      http://rest.g-language.org/NC_000913/pos2gene/1024
$gb->get_geneseq()Top
   Name: $gb->get_geneseq()   -   get nucleotide sequence of the given gene

   Description:
         Given a CDS ID, returns the CDS sequence, or the exon sequence
         If introns are present.
         'complement' is properly parsed, and introns are spliced out.
         Codon start position is used when codon_start key is available.

         eg. $gb->get_geneseq('recA'); # returns the 'recA' gene sequence or exon.

   REST: 
      http://rest.g-language.org/NC_000913/get_geneseq/recA
$gb->get_intron()Top
   Name: $gb->intron()   -   get a list of intron sequences of the given CDS

   Description:
         Given a CDS ID, returns the intron sequences as array of 
         sequences.

         eg. $gb->get_intron('CDS1'); 
             # returns ($1st_intron, $2nd_intron,..)

   REST: 
      http://rest.g-language.org/NC_000913/get_intron/recA
$gb->get_exon()Top
   Name: $gb->get_exon()   -   get a concatenated exon sequence of the given CDS

   Description:
         Given a CDS ID, returns the exon sequence.
         'complement' is properly parsed, and introns are spliced out.

         eg. $gb->get_exon('CDS1'); returns the 'CDS1' exon.

   REST: 
      http://rest.g-language.org/NC_000913/get_exon/recA
$gb->get_exons()Top
   Name: $gb->get_exons()   -   get a list of exon sequences of the given CDS

   Description:
         Given a CDS ID, returns the sequences of exon sequence.
         'complement' is properly parsed, and introns are spliced out.

         eg. $gb->get_exons('CDS1'); returns the array of exon sequences.

   REST: 
      http://rest.g-language.org/NG_016275/get_exons/IL10RA
$gb->seq_info()Top
   Name: $gb->seq_info()   -   display basic statistics about the data

   Description:
         Prints the basic information of the genome to STDOUT.

   REST: 
      http://rest.g-language.org/NC_000913/seq_info
$gb->disable_pseudogenes()Top
   Name: $gb->disable_pseudogenes()   -   turns all pseudogenes off

   Description:
         Turns off all pseudogenes by setting $genome->{$feature}->{on}
         to 0 when $genome->{$feature}->{pseudo} is true.

   REST: 
      http://rest.g-language.org/NC_000913/disable_pseudogenes
$gb->feature()Top
   Name: $gb->feature()   -   get a list of feature IDs

   Description:
         Returns the array of all feature IDs.
         Features are ignored when $gb->{$feature}->{on} is 0.

         eg.
           foreach ($gb->feature()){
               $gb->get_cdsseq($_);
           }
           #prints all feature sequences.

         Optionally, feature type can be supplied to return only the
         specifies features.

         eg. $gb->feature("tRNA"); # returns feature IDs only for tRNAs

         Option of "all" always returns all features regardless of the
         value of $gb->{$feature}->{on}.

   REST: 
      http://rest.g-language.org/NC_000913/feature
$gb->rRNA()Top
   Name: $gb->rRNA()   -   get a list of feature IDs of rRNAs

   Description:
         Returns the array of all feature IDs of rRNAs.

   Options:
         Second optional argument can specify the rRNA type 
         (5S, 16S, 23S, 5.8S, 18S, 28S). When used in scalar context,
         only the longest match is returned.

         ex: 
            @SSU = $gb->rRNA('16S');
            $longestSSU = $gb->rRNA('16S');

   REST: 
      http://rest.g-language.org/NC_000913/rRNA/16S
$gb->tRNA()Top
   Name: $gb->tRNA()   -   get a list of feature IDs of tRNAs

   Description:
         Returns the array of all feature IDs of tRNAs.

   REST: 
      http://rest.g-language.org/NC_000913/tRNA
$gb->gene()Top
   Name: $gb->gene()   -   get a list of feature IDs of genes

   Description:
         Returns the array of all feature IDs of genes.

   REST: 
      http://rest.g-language.org/NC_000913/gene
$gb->cds()Top
   Name: $gb->cds()   -   get a list of CDS IDs

   Description:
         Returns the array of all feature IDs of CDS.
         Features are ignored when $gb->{FEATURE$i}->{on} OR
         $gb->{CDS$i}->{on} is 0.

         !CAUTION! the object name is actually the FEATURE ID,
         to enable access to all feature values. However, most of the
         time you do not need to be aware of this difference.

         eg.
           foreach ($gb->cds()){
               $gb->get_geneseq($_);
           }
           #prints all gene sequences.

         Option of "all" always returns all features regardless of the
         value of $gb->{$feature}->{on}.

   REST: 
      http://rest.g-language.org/NC_000913/cds
$gb->next_feature()Top
   Name: $gb->next_feature()   -   get the next feature ID

   Description:
         Given a feature ID, returns the ID of the next feature.
         Second argument can be used to specify the type of the 
         next feature.

         eg. $gb->next_feature(FEATURE1234); # returns 'FEATURE1235'
             $gb->next_feature(FEATURE1234, 'tRNA'); 
             # returns next feature ID whose type is 'tRNA'

   REST: 
      http://rest.g-language.org/NC_000913/next_feature/recA
$gb->previous_feature()Top
   Name: $gb->previous_feature()   -   get the previous feature ID

   Description:
         Given a feature ID, returns the ID of the previous feature.
         Second argument can be used to specify the type of the 
         next feature.

         eg. $gb->previous_feature(FEATURE1234); # returns 'FEATURE1233'
             $gb->previous_feature(FEATURE1234, 'tRNA'); 
             # returns previous feature ID whose type is 'tRNA'

   REST: 
      http://rest.g-language.org/NC_000913/previous_feature/recA
$gb->next_cds()Top
   Name: $gb->next_cds()   -   get the feature ID of next CDS

   Description:
         Given a feature ID, returns the ID of the next cds.
         This is same as $gb->next_feature($featureID, 'CDS');

   REST: 
      http://rest.g-language.org/NC_000913/next_cds/recA
$gb->previous_cds()Top
   Name: $gb->previous_cds()   -   get the feature ID of previous CDS

   Description:
         Given a feature ID, returns the ID of the previous cds.
         This is same as $gb->previous_feature($featureID, 'CDS');

   REST: 
      http://rest.g-language.org/NC_000913/previous_cds/recA
$gb->intergenic()Top
   Name: $gb->intergenic()   -   get a list of IDs of intergenic regions

   Description:
         Returns the array of all IDs of intergenic regions. 
         Here "intergenic region" is defined as the region in a genome
         between coding and stable RNA genes. 

   REST: 
      http://rest.g-language.org/NC_000913/intergenic
$gb->id()Top
   Name: $gb->id()   -   get the sequence ID from G instance

   Description:
         Returns the sequence ID. Same as $gb->{LOCUS}->{id};

   REST: 
      http://rest.g-language.org/ecoli/id
$gb->seq()Top
   Name: $gb->seq()   -   get the sequence data from G instance

   Description:
         Returns the entire sequence. Same as $gb->{SEQ};

   REST: 
      http://rest.g-language.org/ecoli/seq
$gb->del_key()Top
   Name: $gb->del_key()   -   delete a data object from G instance

   Description:
         Given a object, deletes it from the G instance structure
         eg. $gb->del_key('FEATURE1'); # deletes 'FEATURE1' hash

   REST: 
      http://rest.g-language.org/NC_000913/del_key/FEATURE1
$gb->relocate_origin()Top
   Name: $gb->relocate_origin()   -   create a G instance starting at given position

   Description:
         Returns a G instance starting at given position, assuming circular
         chromosome. All information, including the sequence and feature 
         annotations are moved. Note that the given position is Perl position
         and NOT GenBank position. GenBank position -1 equals Perl position.

   Usage:
      $new = $gb->relocate_origin($position);

      This method would probably be most useful in conjunction with
      find_ori_ter(), to create a G instance starting from the 
      origin of replication, as follows:

        ($ori, $ter) = find_ori_ter($gb);
        $new = $gb->relocate_origin($ori);

      Several of related methods can be concatenated. For example, 
      to create a GenBank file of complementary DNA strand starting
      from the origin of replication, do the following:

        $gb->reverse_strand()->relocate_origin($ori)->output("out.gbk");

   REST: 
      http://rest.g-language.org/NC_000913/relocate_origin
$gb->reverse_strand()Top
   Name: $gb->reverse_strand()   -   create a G instance on complementary DNA strand

   Description:
         Returns a G instance for the complementary DNA strand. 
         All information, including the sequence and feature annotations
         is switched to reflect that of the complementary DNA strand. 
         In other words, gene order, direction of genes (either direct or 
         complement), and positions are reversed. 

   Usage: 
      $new = $gb->reverse_strand();

   REST: 
      http://rest.g-language.org/NC_000913/reverse_strand
$gb->output()Top
   Name: $gb->output()   -   output the G instance data to file

   Description:
         Given a filename and an option, outputs the G-language data object 
         to the specified file in a flat-file database of a given format.
         The options are the same as those of new().  Default format is 'GenBank'.

         eg. $gb->output("my_genome.embl", "EMBL");
             $gb->output("my_genome.gbk"); # with GenBank you can ommit the option.
             $gb->output("my_genome.gff3");

   REST: 
      http://rest.g-language.org/NC_000913/output/embl (convert input genome to EMBL format)
$gb->set_generationtime()Top
   Name: $gb->set_generationtime()   -   add generation time data for bacterial genomes

   Description:
         This method adds generation time data for bacterial genomes,
         based on the data in Reference 1.

         $gb->{FEATURE0}->{generationTime} is the generation time (hour), and
         $gb->{feature0}->{optimumGrowthTemperature} is the optimum growth temperature in Celsius.

  References: 
    1. Vieira-Silva S and Rocha RPC (2010) "The systemic imprint of growth and its uses in 
       ecological (meta)genomics", PLoS Genetics, 6(1):e1000808

  REST: 
      http://rest.g-language.org/NC_000913/set_generationtime
Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
HISTORY: 20110224-01 initial posting
$gb->set_essentiality()Top
   Name: $gb->set_essentiality()   -   add gene essentiality data for E.coli genome 

   Description:
         This method adds gene essentiality data for E.coli genome,
         based on the data in PEC database (http://www.shigen.nig.ac.jp/ecoli/pec/index.jsp)
Only works for U00096 or NC_000913 E.coli K-12 MG1655 flatfile.
$gb->{$cds}->{essentiality} is 1 for essential, -1 for non-essential genes. References: 1. Hashimoto M et al. (2005) "Cell size and nucleoid organization of engineered Escherichia coli cells with a reduced genome", Mol Microbiol, 55(1):137-149 REST: http://rest.g-language.org/NC_000913/set_essentiality
Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
HISTORY: 20100123-01 bundled the data in distribution, moved the function to G::IO::Handler 20070219-01 initial posting