Handler documentation.

sub _interpret_format {

    my $filename = shift;
    my $ref = ref $filename;

    if ($filename =~ /^(.*?):(.*)/){
	unless(lc($1) =~ /(swall|swiss|sw|uniprot|genbank|genpept|embl|refseq)/){
	    croak("Unsupported database name for USA. Supported databases are\n" .
		"swiss, sw, uniprot, genbank, genpept, embl, or refseq\n");
	}
	return "usa";
    }elsif (lc($filename) =~ /\.(gb|gbk|gbank|genbank|nuccore)$/i){
	return 'genbank';
    }elsif (lc($filename) =~ /\.(fasta|fast|seq|fst|fa|fsa|nt|aa|ffn|fna)/i){
	return 'fasta';
    }elsif (lc($filename) =~ /\.(fastq|fq)/i){
	return 'fastq';
    }elsif (lc($filename) =~ /\.(gff3)/i){
	return 'gff3';
    }elsif (lc($filename) =~ /\.(gff2)/i){
	return 'gff2';
    }elsif (lc($filename) =~ /\.(gff)/i){
	return 'gff';
    }elsif ($filename =~ /^NC_\d+$/i ||
	    $filename =~ /^NP_\d+$/i ||
	    $filename =~ /^(?:[A-Z]|[A-Z][A-Z]).\d+$/i ||
	    $filename =~ /^NC_\d+\.\d+$/i ||
	    $filename =~ /^NP_\d+\.\d+$/i ||
	    $filename =~ /^(?:[A-Z]|[A-Z][A-Z]).\d+\.\d+$/i
	    ){
	return 'RefSeq';
    }elsif(-e $filename){
	my @tmp = split(/\./, $filename);
	my $format = $filename =~ /\./ ? pop(@tmp) : '';

	if (length $format){
	    return $format;
	}else{
	    open(FILE, $filename);
	    while(<FILE>){
		if(/^>/){
		    return 'fasta';
		}elsif(/^LOCUS/){
		    return 'genbank';
		}elsif(/^ID/){
		    return 'embl';
		}else{
		    carp("Unknown file format. Interpreting $filename as GenBank...\n");
		    return 'genbank';
		}
		last;
	    }
	    close(FILE);
	}
    }else{
	my $id = readFile("http://rest.g-language.org/gbp/orgname.cgi?id=$filename", 1);
	return ("named:$id") if(length($id));
    }

}

sub after_startcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	my $start = $this->{$object}->{end} - 1 - 3 - $length + 1;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{end} - 3;
	}
	return complement(substr($this->{SEQ}, $start, $length));
    }else{
	return substr($this->{SEQ}, $this->{$object}->{start} + 3 - 1, $length);
    }

}

sub after_stopcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	my $start = $this->{$object}->{start} - 1 - $length;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{start};
	}
	return complement(substr($this->{SEQ}, $start, $length));
    }else{
	return substr($this->{SEQ}, $this->{$object}->{end} +1 - 1, $length);
    }

}

sub annotate {

    my $this = shift;

    my($fh, $outfile) = tempfile(undef, SUFFIX=>'.gbk');
    $this->make_gb($outfile);

    require LWP::UserAgent;
    my $ua = LWP::UserAgent->new;
    my $id = $ua->post('http://rest.g-language.org/upload/upl.pl', 'Content_Type'=>'form-data', 'Content'=>['file'=>[$outfile]])->content;
    mirror("http://rest.g-language.org/$id/restauro", $outfile);
    my $new = new G::IO($outfile, "no cache", "no msg");
    return $new;

}

sub around_startcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $gb = shift;
    my $cds = shift;
    my $before = shift || 0;
    my $after = shift || 0;
    my $option = shift;

    my $seq  = $gb->before_startcodon($cds, $before);
    $seq .= $gb->startcodon($cds) unless($option =~ /without/);
    $seq .= $gb->after_startcodon($cds, $after);

    return $seq;

}

sub around_stopcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $gb = shift;
    my $cds = shift;
    my $before = shift || 0;
    my $after = shift || 0;
    my $option = shift;

    my $seq  = $gb->before_stopcodon($cds, $before);
    $seq .= $gb->stopcodon($cds) unless($option =~ /without/);
    $seq .= $gb->after_stopcodon($cds, $after);

    return $seq;

}

sub before_startcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	return complement(substr($this->{SEQ}, $this->{$object}->{end}, $length));
    }else{
	my $start = $this->{$object}->{start} - 1 - $length;
	if ($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{start} - 1;
	}
	return substr($this->{SEQ}, $start, $length);
    }

}

sub before_stopcodon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;
    my $length = shift || 100;

    if ($this->{$object}->{direction} eq 'complement'){
	return complement(substr($this->{SEQ}, $this->{$object}->{start} + 3 - 1, $length));
    }else{
	my $start = $this->{$object}->{end} - 3 - 1 - $length + 1;
	if($start < 0){
	    $start = 0;
	    $length = $this->{$object}->{end} - 3;
	}
	return substr($this->{SEQ}, $start, $length);
    }

}

sub disclose {

    my $gb = shift;

    my $return = '';
    foreach my $level1 (keys %{$gb}){
	next if ($level1 eq 'SEQ');
	next unless($level1 =~ /LOCUS|FEATURE|HEADER/);
	if(ref($gb->{$level1}) =~ /HASH/){
	    foreach my $level2 (keys %{$gb->{$level1}}){
		$return .= join("\t", $level1, $level2, $gb->{$level1}->{$level2}) . "\n";
	    }
	}else{
	    $return .= join("\t", $level1, $gb->{$level1}) . "\n";
	}
    }

    return $return;

}

sub feature {

    my $this = shift;
    my $type  = shift;
    my $opt = shift;

    if($type eq 'all'){
	$opt = 'all';
	$type = '';
    }

    my $i = -1;
    my @feature;

    while(defined($this->{"FEATURE" . ($i + 1)})){
        $i ++;

	if(length($type)){
	    next unless ($this->{"FEATURE$i"}->{type} eq $type);
	}

	if ($opt ne 'all' && defined $this->{"FEATURE$i"}->{on}){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	}

        push (@feature, "FEATURE$i");
    }

    return @feature;

}

sub find {

    my $this = shift;

    my @args = @_;
    my (@keywords, %keyhash, @results);
    my $i = 0;
    my $print = 0;

    while(defined $args[$i]){
        if (substr($args[$i], 0, 1) eq '-' && substr($args[$i], 1, 1) !~ /[0-9]/){
	    if (substr($args[$i],1) eq 'print'){
		$print = 1;
		$i +=2;
	    }else{
		$keyhash{substr($args[$i],1)} = $args[$i + 1];
		$i += 2;
	    }
        }else{
	    push(@keywords, $args[$i]);
            $i ++;
        }
    }

    foreach my $feat ($this->feature()){
	my $flag = 0;

	foreach my $key (keys %keyhash){
	    my $val = $keyhash{$key};

	    unless($this->{$feat}->{$key} =~ /$val/i){
		$flag = 1;
		last;
	    }
	}

	next if ($flag);

	foreach my $key (@keywords){
	    unless(join('%%%___%%%', values(%{$this->{$feat}})) =~ /$key/i){
		$flag = 1;
		last;
	    }
	}
    
	push(@results, $feat) unless($flag);
    }

    if(msg_ask_interface() eq 'Shell' || $print){
	foreach my $feat (@results){
	    my $gene = $this->{$feat}->{gene} || $this->{$feat}->{locus_tag} || $feat;
	    my $ec = $this->{$feat}->{EC_number};
	    $ec =~ s/\s+/,/g;
	    $ec = '(' . $ec . ')' if (length $ec);

	    msg_send(
		     sprintf("     %s\t%s\t%s\t%s..%s\t%s\t%s %s\n", $feat, $gene, $this->{$feat}->{type}, 
			     $this->{$feat}->{start}, $this->{$feat}->{end}, $this->{$feat}->{direction}, $this->{$feat}->{product}, $ec)
		     );
	}
    }

    return @results;

}

sub get_cdsseq {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;

    my $cdsseq = '';

    if($this->{$object}->{start} > $this->{$object}->{end}){
	$cdsseq = substr($this->{SEQ}, $this->{$object}->{start} - 1) . 
	    $this->get_gbkseq(1, $this->{$object}->{end});
    }else{
	$cdsseq = $this->get_gbkseq($this->{$object}->{start}, 
				    $this->{$object}->{end});
    }

    $cdsseq = &complement($cdsseq) 
	if ($this->{$object}->{direction} eq 'complement');

    return $cdsseq;

}

sub get_exon {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my $seq = '';

    foreach my $line (split(/,/, $this->{$cds}->{join})){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	$seq .= $tmp;
    }

    $seq = complement($seq) if ($this->{$cds}->{direction} eq 'complement');
    return $seq;

}

sub get_exons {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my @seq;

    foreach my $line (split(/,/, $this->{$cds}->{join})){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	
	if($this->{$cds}->{direction} eq 'complement'){
	    unshift(@seq, complement($tmp));
	}else{
	    push(@seq, $tmp);
	}
    }

    return @seq;

}

sub get_geneseq {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $object = shift;

    my $geneseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    if ($this->{$object}->{join}){
	$geneseq = $this->get_exon($object);
    }elsif ($this->{$object}->{direction} eq 'complement'){
	$geneseq = &complement($geneseq);
    }

    my $codon_start = $this->{$object}->{codon_start} || 1;
    return substr($geneseq, $codon_start - 1);

}

sub get_intron {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $cds = shift;

    return unless (length $this->{$cds}->{join});

    my @join = split(/\.\./, $this->{$cds}->{join});
    shift @join;
    pop @join;
    my @seq;

    foreach my $line (@join){
	$line =~ s/c//g;
	my ($start, $end) = split(/,/, $line, 2);
	my $tmp = $this->get_gbkseq($start + 1, $end - 1);
	$tmp = '' if($end - 2  - $start < 0);
	push (@seq, $tmp);
    }

    return @seq;

}

sub getseq {

    if (scalar(@_) < 3){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $start = shift;
    my $end = shift;
    my $option = shift;

    if($start < $end){
	return substr($this->{SEQ}, $start, $end-$start+1);
    }else{
	if($option =~ /circ/){
	    return substr($this->{SEQ}, $start) . substr($this->{SEQ}, 0, $end + 1);
	}else{
	    my ($start2, $end2) = sort {$a <=> $b} ($start, $end);
	    return substr($this->{SEQ}, $start2, $end2-$start2+1);
	}
    }

}

sub next_feature {

    my $this = shift;
    my $feature = shift || 'FEATURE0';
    my $opt = shift;

    my @features = $this->feature($opt);
    return undef if ($feature eq $features[-1]);

    $feature = $this->{$feature}->{left} if ($feature =~ /^INTER/);

    my $i = $this->{$feature}->{feature};
    $i ++;

    while(%{$this->{"FEATURE$i"}}){
	my $feat = "FEATURE$i";
	$i ++;

	if(length($opt)){
	    next unless($this->{$feat}->{type} eq $opt);
	}

	return $feat;
    }

}

sub output {

    my $gb = shift;
    my $file = shift;
    my $option = shift;

    $option = _interpret_format($file) unless(length $option);

    if (lc($option) eq 'genbank'){
	$gb->make_gb($file);
    }elsif(length $option){
	my $in;

	my ($fh, $outfile) = tempfile(undef, SUFFIX=>'.gbk');
	$gb->make_gb($outfile);

	require LWP::UserAgent;
	my $ua = LWP::UserAgent->new;
            my $response = $ua->post("http://rest.g-language.org/emboss/",
                                     Content_Type=>'multipart/form-data',
                                     Content=>[file1 => [$outfile], 'arg'=>"seqret/osformat2=$option/-feature"]
                                     );

	if ($response->is_success) {
	    writeFile($response->decoded_content, $file);
	} else {
	    die("Error in output:\n" . $response->status_line . "\n File format\" $option\"of\" $file\" is not supported.");
	}

	unlink($outfile);
    }else{
	&msg_error("G::output - Unknown format to output.");
    }

}

sub pos2feature {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $pos = shift;
    my $pos2 = shift;

    if(length($pos2) > 1){
	my @genes = ();

	foreach my $feat ($this->feature()){
	    next if ($feat eq 'FEATURE0');

	    if ($this->{$feat}->{end} >= $pos && $pos2 >= $this->{$feat}->{start}){
		push(@genes, $feat);
	    }elsif ($pos2 < $this->{$feat}->{start}){
		return @genes;
	    }
	}

    }else{

	foreach my $feat ($this->feature()){
	    next if ($feat eq 'FEATURE0');

	    if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
		return $feat;
	    }elsif ($pos < $this->{$feat}->{start}){
		return '';
	    }
	}
    }

}

sub pos2gene {

    if (scalar(@_) < 2){
	carp("Not enough arguments.");
	return;
    }
    my $this = shift;
    my $pos = shift;
    my $pos2 = shift;

    if(length($pos2) > 1){
	my @genes = ();

	foreach my $feat ($this->cds()){
	    if ($this->{$feat}->{end} >= $pos && $pos2 >= $this->{$feat}->{start}){
		push(@genes, $feat);
	    }elsif ($pos2 < $this->{$feat}->{start}){
		return @genes;
	    }
	}

    }else{
	foreach my $feat ($this->cds()){
	    if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
		return $feat;
	    }elsif ($pos < $this->{$feat}->{start}){
		return '';
	    }
	}
    }

}

sub previous_feature {

    my $this = shift;
    my $feature = shift || 'FEATURE0';
    my $opt = shift;

    return undef if ($feature eq 'FEATURE0');
    $feature = $this->{$feature}->{right} if ($feature =~ /^INTER/);

    my $i = $this->{$feature}->{feature};
    $i --;

    while(%{$this->{"FEATURE$i"}}){
	my $feat = "FEATURE$i";
	$i --;

	if(length($opt)){
	    next unless($this->{$feat}->{type} eq $opt);
	}

	return $feat;
    }

}

sub rRNA {

    my $this = shift;
    my $option = shift;

    if($option =~ /\d+S|SSU|LSU/){
	my %rRNA;

	foreach ($this->feature('rRNA')){
	    if($this->{$_}->{product} =~ /([0-9\.]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrf/){
		push(@{$rRNA{'5S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrl/){
		push(@{$rRNA{'23S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /rrs/){
		push(@{$rRNA{'16S'}}, $_);
	    }elsif($this->{$_}->{gene} =~ /([0-9\.]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{locus_tag} =~ /([0-9]+S)/i){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{note} =~ /([0-9\.]+S)+?/){
		push(@{$rRNA{uc($1)}}, $_);
	    }elsif($this->{$_}->{product} =~ /(SSU|small subunit) ribosomal RNA/i){
		push(@{$rRNA{'SSU'}}, $_);
	    }elsif($this->{$_}->{product} =~ /(LSU|large subunit) ribosomal RNA/i){
		push(@{$rRNA{'LSU'}}, $_);
	    }
	}

	if(defined $rRNA{$option}){
	    my @result = sort {length($this->get_geneseq($b))<=>length($this->get_geneseq($a))} @{$rRNA{$option}};
	    return wantarray ? @result : $result[0];
	}
    }else{
	return feature($this, 'rRNA', $option);
    }

}

sub relocate_origin {

    require G::IO;
    my $this = new G::IO("blessed");
    my $tmp = shift;
    my $gb = $tmp->clone();
    my $pos = shift;
    croak("New start position\( in Perl coordinate\) must be given.\n") unless($pos =~ /\d/);

    $this->{LOCUS} = $gb->{LOCUS};
    $this->{HEADER} = $gb->{HEADER};
    $this->{COMMENT} = $gb->{COMMENT};
    $this->{SEQ} = substr($gb->{SEQ}, $pos) . substr($gb->{SEQ}, 0, $pos);
    $this->{FEATURE0} = $gb->{FEATURE0};

    my (@before, @after);
    my @features = $gb->feature();
    shift @features;
    foreach my $feature (@features){
	if($gb->{$feature}->{start} >= $pos + 1){
	    push(@after, $feature);
	}else{
	    push(@before, $feature);
	}
    }

    my $f = 0;
    my $c = 0;
    foreach my $feature (@after, @before){
	$f ++;
	$this->{"FEATURE$f"} = $gb->{$feature};
	$this->{"FEATURE$f"}->{feature} = $f;
	if($gb->{$feature}->{type} eq 'CDS'){
	    $c ++;
	    $this->{"FEATURE$f"}->{cds} = $c;
	}

	if($gb->{$feature}->{end} >= $pos + 1 && $gb->{$feature}->{start} < $pos + 1){
	    $this->{"FEATURE$f"}->{start} += length($gb->{SEQ}) - $pos;
	    $this->{"FEATURE$f"}->{end} -= $pos;

	    if(length $this->{"FEATURE$f"}->{join}){
		msg_error("Warning: overriding join definition for FEATURE$f.\n");
		msg_error("  this is likely to destroy positional features of this gene entry.\n");
	    }

	    $this->{"FEATURE$f"}->{join} = sprintf("%d\.\.%d,%d\.\.%d", $this->{"FEATURE$f"}->{start},
						   length($gb->{SEQ}), 1, $this->{"FEATURE$f"}->{end});
	    next;
	}

	my $lng = length($gb->{SEQ}) - $pos;
	if($gb->{$feature}->{start} >= $pos + 1){
	    $lng = -$pos;
	}

	$this->{"FEATURE$f"}->{start} += $lng;
	$this->{"FEATURE$f"}->{end} += $lng;

	if(defined $this->{"FEATURE$f"}->{"join"}){
	    my @join = split(/\,/,$this->{"FEATURE$f"}->{"join"});
	    my @num = ();
	    my @new_join = ();

	    foreach(@join){
		if(tr/c/c/){
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng));
		} else {
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("%d\.\.%d",  $num[0] + $lng, $num[1] + $lng));
		}
	    }
	    $this->{"FEATURE$f"}->{join} = join(',', @new_join);
	}
    }
    
    $this->set_gene_aliases();

    return $this;

}

sub reverse_strand {

    require G::IO;
    my $this = new G::IO("blessed");
    my $tmpG = shift;
    my $gb = $tmpG->clone();

    $this->{LOCUS} = $gb->{LOCUS};
    $this->{HEADER} = $gb->{HEADER};
    $this->{COMMENT} = $gb->{COMMENT};
    $this->{SEQ} = complement($gb->{SEQ});
    $this->{FEATURE0} = $gb->{FEATURE0};

    my @feat = $gb->feature();
    shift @feat;
    my (@features, @tmp);
    foreach my $feature (@feat){
	if($gb->{$feature}->{type} eq 'gene'){
	    unshift(@features, @tmp);
	    @tmp = ($feature);
	}else{
	    push(@tmp, $feature);
	}
    }
    unshift(@features, @tmp);

    my $f = 0;
    my $c = 0;
    my $lng = length($gb->{SEQ}) + 1;
    foreach my $feature (@features){
	$f ++;
	$this->{"FEATURE$f"} = $gb->{$feature};
	$this->{"FEATURE$f"}->{feature} = $f;

	if($gb->{$feature}->{type} eq 'CDS'){
	    $c ++;
	    $this->{"FEATURE$f"}->{cds} = $c;
	}

	my ($start, $end) = ($lng - $gb->{$feature}->{end}, $lng - $gb->{$feature}->{start});
	$this->{"FEATURE$f"}->{start} = $start;
	$this->{"FEATURE$f"}->{end}   = $end;
	$this->{"FEATURE$f"}->{direction} = $gb->{$feature}->{direction} eq 'direct' ? 'complement' : 'direct';

	if(defined $this->{"FEATURE$f"}->{"join"}){
	    my @join = split(/\,/,$this->{"FEATURE$f"}->{"join"});
	    my @num = ();
	    my @new_join = ();

	    foreach(@join){
		if(tr/c/c/){
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("c%d\.\.%d", $lng - $num[1], $lng - $num[0]));
		} else {
		    @num = split(/\.\./,$_);
		    push (@new_join, sprintf ("%d\.\.%d",  $lng - $num[1], $lng - $num[0]));
		}
	    }
	    $this->{"FEATURE$f"}->{join} = join(',', reverse(@new_join));
	}
    }
    $this->set_gene_aliases();

    return $this;

}

sub seq_info {

    my $this = shift;
    my $nomsg = shift;
    my $type = shift;

    my $length = length($this->{SEQ});

    my $a = $this->{SEQ} =~ tr/a/a/;
    my $t = $this->{SEQ} =~ tr/t/t/;
    my $g = $this->{SEQ} =~ tr/g/g/;
    my $c = $this->{SEQ} =~ tr/c/c/;
    my $symbol = $this->{SEQ} =~ tr/0123456789 _-/0123456789 _-/;
    my $others = $length - $a - $t - $g - $c;
    my $msg;

    unless($nomsg){
	$msg .= sprintf "\n\nAccession Number: %s\n", $this->{LOCUS}->{id} if(length($this->{LOCUS}->{id}));
	$msg .= sprintf "\nDefinition: %s\n", $this->{DEFINITION} if(length($this->{DEFINITION}));
	$msg .= sprintf "\n  Merged LOCUS Count : %9d\n" , scalar(grep{/LOCUS/} keys(%$this)) - 2 if(exists($this->{LOCUS2}));
	$msg .= sprintf "\n  Length of Sequence : %9d\n" , $length;
	
	if($length == 0){
	    $msg .= sprintf "\n  No Sequence Found.\n\n";
	}elsif($symbol > 5){
	    $msg .= sprintf "\n  Sequence seems to be invalid (neither nucleotide nor amino acid).\n\n";
	}elsif($others > $a + $t + $g + $c && $type != 1){
	    amino_info($this);
	    return;
	}else{
	    $msg .= sprintf "           A Content : %9d (%.2f\%)\n" , $a , $a / $length * 100;
	    $msg .= sprintf "           T Content : %9d (%.2f\%)\n" , $t , $t / $length * 100;
	    $msg .= sprintf "           G Content : %9d (%.2f\%)\n" , $g , $g / $length * 100;
	    $msg .= sprintf "           C Content : %9d (%.2f\%)\n" , $c , $c / $length * 100;
	    $msg .= sprintf "              Others : %9d (%.2f\%)\n" , $others,  $others / $length * 100;
	    $msg .= sprintf "          AT Content :    %.2f\%\n"    , ($a + $t) / $length * 100;
	    $msg .= sprintf "          GC Content :    %.2f\%\n\n"  , ($g + $c) / $length * 100;

	    my ($genes, $cds, $trna, $rrna) = (scalar($this->gene()), scalar($this->cds()), scalar($this->tRNA()), scalar($this->rRNA()));
	    if($cds > 0){
		$msg .= sprintf "      Number of genes :    %d (CDSs: %s, tRNAs: %s, rRNAs: %s)\n "    , $genes, $cds, $trna, $rrna;
	    }
	}
	&msg_send($msg);
    }

    return ($a, $t, $g, $c);

}

sub set_essentiality {

    my $gb = shift;

    if ($gb->{LOCUS}->{id} eq 'U00096' || $gb->{LOCUS}->{id} eq 'NC_000913'){

        my $filename = dist_file('g-language', 'data/PECData.dat');
	my $i = 1;
	open(FILE, $filename) || die($!);
	while(<FILE>){
	    chomp;
	    my @hoge = split(/\t/);
	    my @list = ($hoge[0], split(/,/, $hoge[1]));

	    my $id;
	    foreach my $gene (@list){
		$id = $gb->gene2id($gene);
		last if(length $id);
	    }
	    
	    if($hoge[7] == 1){
		$gb->{$id}->{essentiality} = 1;		
	    }elsif($hoge[7] == 2){
		$gb->{$id}->{essentiality} = -1;
	    }
	}
	close(FILE);
   }else{
       msg_error("No essentiality data for this species.\n\n");
   }

    return 1;

}

sub set_gene_aliases {

    my $this = shift;

    foreach my $feat ($this->feature()){
	next unless ($this->{$feat}->{type} =~ /CDS|RNA/);
	
	if(length $this->{$feat}->{gene}){
	    $this->{$this->{$feat}->{gene}} = $this->{$feat};
	}
	
	if(length $this->{$feat}->{locus_tag}){
	    $this->{$this->{$feat}->{locus_tag}} = $this->{$feat};
	}
	
	if($this->{$feat}->{type} eq 'CDS'){
	    $this->{'CDS' . $this->{$feat}->{cds}} = $this->{$feat};
	}
    }

}

sub set_generationtime {

    my $gb = shift;

    my $filename = dist_file('g-language', 'data/generationtime.dat');
    for my $line (readFile($filename, 1)){
	next unless($line =~ $gb->{LOCUS}->{id});

	my @F = split(/\t/, $line);
	$gb->{FEATURE0}->{'generationTime'} = $F[3];
	$gb->{FEATURE0}->{'optimumGrowthTemperature'} = $F[2];
    }

    unless(length($gb->{FEATURE0}->{'generationTime'})){
	msg_error("No generation time data for this species.\n\n");
    }

    return 1;

}

sub set_intergenic {

    my $gb = shift;
    return if(defined($gb->{INTER1}));

    my $num = 1;
    my $i = 0;
    my $interface = msg_ask_interface();
    msg_interface("NULL");
    my @cds = $gb->find(-type=>"CDS|RNA");

    foreach my $feature (@cds){
	if($i == 0 && $gb->{$feature}->{start} < $gb->{$feature}->{end}){
	    $gb->{"INTER$num"}->{start} = 1;
	    $gb->{"INTER$num"}->{end} = $gb->{$feature}->{start} - 1;
	    $gb->{"INTER$num"}->{direction} = "direct";
	    $gb->{"INTER$num"}->{left} = undef;
	    $gb->{"INTER$num"}->{right} = $feature;
	    $gb->{"INTER$num"}->{on} = 1;
	    $num++;
	}elsif($i == $#cds && $gb->{$cds[-1]}->{end} + 1 < $gb->{SEQ}){
	    $gb->{"INTER$num"}->{start} = $gb->{$cds[-1]}->{end} + 1;
	    $gb->{"INTER$num"}->{end} = length $gb->{SEQ};
	    $gb->{"INTER$num"}->{direction} = "direct";
	    $gb->{"INTER$num"}->{left} = $feature;
	    $gb->{"INTER$num"}->{right} = undef;
	    $gb->{"INTER$num"}->{on} = 1;
	    $num++;
	}elsif($i > 0){
	    if($gb->{$cds[$i - 1]}->{end} < $gb->{$feature}->{start}){
		$gb->{"INTER$num"}->{start} = $gb->{$cds[$i - 1]}->{end} + 1;
		$gb->{"INTER$num"}->{end} = $gb->{$feature}->{start} - 1;
		$gb->{"INTER$num"}->{direction} = "direct";
		$gb->{"INTER$num"}->{left} = $cds[$i - 1];
		$gb->{"INTER$num"}->{right} = $feature;
		$gb->{"INTER$num"}->{on} = 1;
		$num++;
	    }
	}
	$i++;
    }

    msg_interface($interface);

}

   Name: $gb->annotate()   -   reannotate the genome using Restauro-G

   Description:
      This method reannotates the entire genome, if coding sequences are already available.
      Added features includes:
      rs_des: description of protein
      rs_xr:  database cross references, including EMBL, RefSeq, KEGG, BioCyc, BRENDA, GO, InterPro, Pfam
      rs_com: comments
  Usage: 
      $new_gb = $gb->annotate();

   REST: 
      http://rest.g-language.org/NC_000913/annotate

   References:
      1. Tamaki S, Arakawa K, Kono N, Tomita M (2007) "Restauro-G: A Rapid Genome Re-Annotation 
         System for Comparative Genomics", Genomics Proteomics Bioinformatics, 5(1): 53-58.

   Author:
      Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

   History:
      20120830-01 initial posting

   Name: $gb->find()   -   search through the genome data object with keywords

   Description:
         This method provides powerful means to search within the G-language genome
         data object with keywords. Given a set of keywords, this method returns
         the list of feature IDs corresponding to the search query. In G-language Shell,
         search results are also directly printed out (you can specify -print=>1 option
         in API mode to print within your program).

         eg. @features = $gb->find('RNA', 'tyrosine');    # multiple keywords are allowed.

         Keywords can be specific to each of the feature attributes:

         eg. $gb->find(-type=>'CDS', -product=>'metabolism', 'subunit');

         Regular expressions are allowed for keywords:

         eg. $gb->find(-type=>'CDS', -EC_number=>'^2.7.');

   REST: 
      http://rest.g-language.org/NC_000913/product=metabolism/

   Name: $gb->getseq()   -   get nucleotide sequence of the given positions (Perl coordinates)

   Description:
         Given the start and end positions (starting from 0 as in Perl),
         returns the sequence specified.

         eg. $gb->getseq(2,4); # returns the 3rd, 4th, and 5th nucleotides.

   Options:
       -circular   when the first position is larger than the second position,
                   retrieves the sequece spanning across the end of the circular
                   chromosome. (ex: $gb->getseq(4639670, 5, -circular))

   REST: 
      http://rest.g-language.org/NC_000913/getseq/1/3

   Name: $gb->get_gbkseq()   -   get nucleotide sequence of the given positions (GenBank coordinates)

   Description:
         Given the start and end positions (starting from 1 as in 
         Genbank), returns the sequence specified.

         eg. $gb->get_gbkseq(1,3); # returns the 1st, 2nd, and 3rd nucleotides.

   Options:
       -circular   when the first position is larger than the second position,
                   retrieves the sequece spanning across the end of the circular
                   chromosome. (ex: $gb->getseq(4639670, 5, -circular))

   REST: 
      http://rest.g-language.org/NC_000913/get_gbkseq/1/3

   Name: $gb->around_startcodon()   -   get the sequence around the startcodon of the given CDS

   Description:
         Given a CDS ID, lengths before startcodon and after start codon, 
         returns the sequence around of start codon.

         eg. $gb->around_startcodon('FEATURE5239', 100, 100); 
             # returns 100 bp sequence before and after the start codon of 'CDS1',
             # including the start codon itself

   Options:
         Optional Fourth argument containing a string "without" returns only the sequence
         before and after the start codon, and without the stat codon itself.
         eg. $gb->around_startcodon('FEATURE5239', 100, 100, "without");

   REST: 
      http://rest.g-language.org/NC_000913/around_startcodon/recA/200/200

   Name: $gb->around_stopcodon()   -   get the sequence around the stopcodon of the given CDS

   Description:
         Given a CDS ID, lengths before stopcodon and after stop codon, 
         returns the sequence around of stop codon.

         eg. $gb->around_stopcodon('FEATURE5239', 100, 100); 
             # returns 100 bp sequence before and after the stop codon of 'CDS1',
             # including the stop codon itself

   Options:
         Optional Fourth argument containing a string "without" returns only the sequence
         before and after the stop codon, and without the stat codon itself.
         eg. $gb->around_stopcodon('FEATURE5239', 100, 100, "without");

   REST: 
      http://rest.g-language.org/NC_000913/around_startcodon/recA/200/200

   Name: $gb->before_startcodon()   -   get the upstream sequence of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence upstream of 
         start codon.

         eg. $gb->before_startcodon('CDS1', 100); 
             # returns 100 bp sequence upstream of the start codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/before_startcodon/recA/200

   Name: $gb->after_startcodon()   -   get the sequence downstream of start codon of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence downstream of 
         start codon.

         eg. $gb->after_startcodon('CDS1', 100); 
             # returns 100 bp sequence downstream of the start codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/after_startcodon/recA/200

   Name: $gb->before_stopcodon()   -   get the sequence upstream of stop codon of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence upstream of 
         stop codon.

         eg. $gb->before_stopcodon('CDS1', 100); 
             # returns 100 bp sequence upstream of the stop codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/before_stopcodon/recA/200

   Name: $gb->after_stopcodon()   -   get the downstream sequence of the given CDS

   Description:
         Given a CDS ID and length, returns the sequence downstream of 
         stop codon.

         eg. $gb->after_stopcodon('CDS1', 100); 
             # returns 100 bp sequence downstream of the stop codon of 'CDS1'.

   Options:
         Second argument specifying the length of sequence to retrieve is
         optional. (default: 100).

   REST: 
      http://rest.g-language.org/NC_000913/after_stopcodon/recA/200

   Name: $gb->pos2feature()   -   get a feature ID from position

   Description:
         Given a GenBank position (sequence starting from position 1) 
         returns the feature ID (ex. FEATURE123) of the feature at
         the given position. If multiple features exist for the given
         position, the first feature to appear is returned. Returns 
         NULL if no feature exists.

         When two positions are specified, all features within given 
         range is returned as an array of feature IDs.

   REST: 
      http://rest.g-language.org/NC_000913/pos2feature/1024

   Name: $gb->pos2gene()   -   get a feature ID of CDS from position

   Description:
         Given a GenBank position (sequence starting from position 1) 
         returns the feature ID (ex. FEATURE123) of the gene at
         the given position. If multiple genes exists for the given
         position, the first gene to appear is returned. Returns 
         NULL if no gene exists.

         When two positions are specified, all genes within given 
         range is returned as an array of feature IDs.

   REST: 
      http://rest.g-language.org/NC_000913/pos2gene/1024

   Name: $gb->get_geneseq()   -   get nucleotide sequence of the given gene

   Description:
         Given a CDS ID, returns the CDS sequence, or the exon sequence
         If introns are present.
         'complement' is properly parsed, and introns are spliced out.
         Codon start position is used when codon_start key is available.

         eg. $gb->get_geneseq('recA'); # returns the 'recA' gene sequence or exon.

   REST: 
      http://rest.g-language.org/NC_000913/get_geneseq/recA

   Name: $gb->get_exons()   -   get a list of exon sequences of the given CDS

   Description:
         Given a CDS ID, returns the sequences of exon sequence.
         'complement' is properly parsed, and introns are spliced out.

         eg. $gb->get_exons('CDS1'); returns the array of exon sequences.

   REST: 
      http://rest.g-language.org/NG_016275/get_exons/IL10RA

   Name: $gb->feature()   -   get a list of feature IDs

   Description:
         Returns the array of all feature IDs.
         Features are ignored when $gb->{$feature}->{on} is 0.

         eg.
           foreach ($gb->feature()){
               $gb->get_cdsseq($_);
           }
           #prints all feature sequences.

         Optionally, feature type can be supplied to return only the
         specifies features.

         eg. $gb->feature("tRNA"); # returns feature IDs only for tRNAs

         Option of "all" always returns all features regardless of the
         value of $gb->{$feature}->{on}.

   REST: 
      http://rest.g-language.org/NC_000913/feature

   Name: $gb->rRNA()   -   get a list of feature IDs of rRNAs

   Description:
         Returns the array of all feature IDs of rRNAs.

   Options:
         Second optional argument can specify the rRNA type 
         (5S, 16S, 23S, 5.8S, 18S, 28S). When used in scalar context,
         only the longest match is returned.

         ex: 
            @SSU = $gb->rRNA('16S');
            $longestSSU = $gb->rRNA('16S');

   REST: 
      http://rest.g-language.org/NC_000913/rRNA/16S

   Name: $gb->cds()   -   get a list of CDS IDs

   Description:
         Returns the array of all feature IDs of CDS.
         Features are ignored when $gb->{FEATURE$i}->{on} OR
         $gb->{CDS$i}->{on} is 0.

         !CAUTION! the object name is actually the FEATURE ID,
         to enable access to all feature values. However, most of the
         time you do not need to be aware of this difference.

         eg.
           foreach ($gb->cds()){
               $gb->get_geneseq($_);
           }
           #prints all gene sequences.

         Option of "all" always returns all features regardless of the
         value of $gb->{$feature}->{on}.

   REST: 
      http://rest.g-language.org/NC_000913/cds

   Name: $gb->next_feature()   -   get the next feature ID

   Description:
         Given a feature ID, returns the ID of the next feature.
         Second argument can be used to specify the type of the 
         next feature.

         eg. $gb->next_feature(FEATURE1234); # returns 'FEATURE1235'
             $gb->next_feature(FEATURE1234, 'tRNA'); 
             # returns next feature ID whose type is 'tRNA'

   REST: 
      http://rest.g-language.org/NC_000913/next_feature/recA

   Name: $gb->previous_feature()   -   get the previous feature ID

   Description:
         Given a feature ID, returns the ID of the previous feature.
         Second argument can be used to specify the type of the 
         next feature.

         eg. $gb->previous_feature(FEATURE1234); # returns 'FEATURE1233'
             $gb->previous_feature(FEATURE1234, 'tRNA'); 
             # returns previous feature ID whose type is 'tRNA'

   REST: 
      http://rest.g-language.org/NC_000913/previous_feature/recA

   Name: $gb->relocate_origin()   -   create a G instance starting at given position

   Description:
         Returns a G instance starting at given position, assuming circular
         chromosome. All information, including the sequence and feature 
         annotations are moved. Note that the given position is Perl position
         and NOT GenBank position. GenBank position -1 equals Perl position.

   Usage:
      $new = $gb->relocate_origin($position);

      This method would probably be most useful in conjunction with
      find_ori_ter(), to create a G instance starting from the 
      origin of replication, as follows:

        ($ori, $ter) = find_ori_ter($gb);
        $new = $gb->relocate_origin($ori);

      Several of related methods can be concatenated. For example, 
      to create a GenBank file of complementary DNA strand starting
      from the origin of replication, do the following:

        $gb->reverse_strand()->relocate_origin($ori)->output("out.gbk");

   REST: 
      http://rest.g-language.org/NC_000913/relocate_origin

   Name: $gb->reverse_strand()   -   create a G instance on complementary DNA strand

   Description:
         Returns a G instance for the complementary DNA strand. 
         All information, including the sequence and feature annotations
         is switched to reflect that of the complementary DNA strand. 
         In other words, gene order, direction of genes (either direct or 
         complement), and positions are reversed. 

   Usage: 
      $new = $gb->reverse_strand();

   REST: 
      http://rest.g-language.org/NC_000913/reverse_strand

   Name: $gb->output()   -   output the G instance data to file

   Description:
         Given a filename and an option, outputs the G-language data object 
         to the specified file in a flat-file database of a given format.
         The options are the same as those of new().  Default format is 'GenBank'.

         eg. $gb->output("my_genome.embl", "EMBL");
             $gb->output("my_genome.gbk"); # with GenBank you can ommit the option.
             $gb->output("my_genome.gff3");

   REST: 
      http://rest.g-language.org/NC_000913/output/embl (convert input genome to EMBL format)

   Name: $gb->set_generationtime()   -   add generation time data for bacterial genomes

   Description:
         This method adds generation time data for bacterial genomes,
         based on the data in Reference 1.

         $gb->{FEATURE0}->{generationTime} is the generation time (hour), and
         $gb->{feature0}->{optimumGrowthTemperature} is the optimum growth temperature in Celsius.

  References: 
    1. Vieira-Silva S and Rocha RPC (2010) "The systemic imprint of growth and its uses in 
       ecological (meta)genomics", PLoS Genetics, 6(1):e1000808

  REST: 
      http://rest.g-language.org/NC_000913/set_generationtime

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  HISTORY:
     20110224-01 initial posting

   Name: $gb->set_essentiality()   -   add gene essentiality data for E.coli genome 

   Description:
         This method adds gene essentiality data for E.coli genome,
         based on the data in PEC database (http://www.shigen.nig.ac.jp/ecoli/pec/index.jsp)
         Only works for U00096 or NC_000913 E.coli K-12 MG1655 flatfile.

         $gb->{$cds}->{essentiality} is 1 for essential, -1 for non-essential genes.

  References:
    1. Hashimoto M et al. (2005) "Cell size and nucleoid organization of engineered 
       Escherichia coli cells with a reduced genome", Mol Microbiol, 55(1):137-149

  REST: 
      http://rest.g-language.org/NC_000913/set_essentiality

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  HISTORY:
     20100123-01 bundled the data in distribution, moved the function to G::IO::Handler
     20070219-01 initial posting

_interpret_format	No description	Code
after_startcodon	No description	Code
after_stopcodon	No description	Code
annotate	No description	Code
around_startcodon	No description	Code
around_stopcodon	No description	Code
before_startcodon	No description	Code
before_stopcodon	No description	Code
cds	No description	Code
clone	No description	Code
del_key	No description	Code
disable_pseudogenes	No description	Code
disclose	No description	Code
feature	No description	Code
feature2id	No description	Code
filepath	No description	Code
find	No description	Code
gene	No description	Code
gene2id	No description	Code
get_cdsseq	No description	Code
get_exon	No description	Code
get_exons	No description	Code
get_gbkseq	No description	Code
get_geneseq	No description	Code
get_intron	No description	Code
getseq	No description	Code
hairpin_cut	No description	Code
id	No description	Code
intergenic	No description	Code
next_cds	No description	Code
next_feature	No description	Code
output	No description	Code
pos2feature	No description	Code
pos2gene	No description	Code
previous_cds	No description	Code
previous_feature	No description	Code
rRNA	No description	Code
relocate_origin	No description	Code
reverse_strand	No description	Code
seq	No description	Code
seq_info	No description	Code
set_essentiality	No description	Code
set_gene_aliases	No description	Code
set_generationtime	No description	Code
set_intergenic	No description	Code
startcodon	No description	Code
stopcodon	No description	Code
tRNA	No description	Code