G IO
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
G::IO - G-language v.2 core class
Package variables
Privates (from "my" definitions)
$outfile = ''
$infile = ''
$loaded = 0
$this = {}
Included modules
File::Basename
File::ShareDir ' :ALL '
G::DB::SDB
G::IO::Annotation
G::Messenger
Inherit
G::DB::Handler G::IO::GenBankI G::IO::GenBankO G::IO::Handler
Synopsis
 use G::IO;
 @ISA = (G::IO);
Description
 Inherits all necessary classes.
 Intended for internal use only.
 This is the central core module of G-language GAE.
Methods
DESTROY
No description
Code
_bp2gb
No description
Code
_generate_tmpfilename
No description
Code
bioperl
No description
Code
filepath
No description
Code
interpret_format
No description
Code
lastInstance
No description
Code
loaded_msg
No description
Code
multi_locus
No description
Code
new
No description
Code
output
No description
Code
set_gene_aliases
No description
Code
Methods description
None available.
Methods code
DESTROYdescriptionprevnextTop
sub DESTROY {
    my $self = shift;

    undef %{$self};
    unlink($outfile);
    $self->close_gb();
}
_bp2gbdescriptionprevnextTop
sub _bp2gb {
    my $bpobj = shift;

    my $tmpname = "G-INTERNAL-" . $ENV{USER} . time() . rand();
    write_sequence(">/tmp/$tmpname.gbk", "genbank", $bpobj);

    my $this = new G::IO("/tmp/$tmpname.gbk", "no msg");
    unlink("/tmp/$tmpname.gbk");

    return $this;
}
_generate_tmpfilenamedescriptionprevnextTop
sub _generate_tmpfilename {
    return '/tmp/G-INTERNAL-' . $ENV{USER} . time() . rand() .'.gbk';
}
bioperldescriptionprevnextTop
sub bioperl {
    require Bio::SeqIO;
    my $this = shift;
    my $outfile = _generate_tmpfilename();
    $this->output($outfile);
    
    my $in = Bio::SeqIO->new(-format => "genbank", -file =>$outfile);
    my $bp = $in->next_seq();
    unlink($outfile);
    return $bp;
}
filepathdescriptionprevnextTop
sub filepath {
    my $this = shift;

    return $infile;
}
interpret_formatdescriptionprevnextTop
sub interpret_format {
    my $filename = shift;
    my $ref = ref $filename;
    if ($ref =~ /Bio::Seq/){
	return 'bioperl';
    }elsif ($filename =~ /^(.*?):(.*)/){
	unless(lc($1) =~ /(swiss|genbank|genpept|embl|refseq)/){
	    die("Unsupported database name for USA. Supported databases are\n" .
		"swiss, genbank, genpept, embl, or refseq\n");
	}
	return "usa";
    }elsif (lc($filename) =~ /\.(gb|gbk|gbank|genbank)$/i){
	return 'genbank';
    }elsif (lc($filename) =~ /\.(fasta|fast|seq|fst|fa|fsa|nt|aa)/i){
	return 'fasta';
    }elsif ($filename =~ /^NC_\d+$/i){
	return 'RefSeq';
    }elsif ($filename =~ /^NP_\d+$/i){
	return 'net GenPept';
    }elsif ($filename =~ /^(?:[A-Z]|[A-Z][A-Z]).\d+$/i){
	return 'net GenBank';
    }else{
	require Bio::SeqIO;
	my $format = Bio::SeqIO->_guess_format($filename);

	if (length $format){
	    return $format;
	}else{
	    warn("Unknown file format. Interpreting $filename as GenBank...\n");
	    return 'genbank';
	}
    }
}
lastInstancedescriptionprevnextTop
sub lastInstance {
    return $this;
}
loaded_msgdescriptionprevnextTop
sub loaded_msg {
    my $this = shift;

    $loaded ++;
    return if ($loaded > 1);

    my $print =
	qq(
	     __/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/
                
                   G-language  Genome Analysis Environment v.$VERSION

 
                             http://www.g-language.org/

              Please cite: 
                 Arakawa K. et al. (2003) Bioinformatics.
                 Arakawa K. et al. (2006) Journal of Pestice Science.

 	      License: GNU General Public License
	      Copyright (C) 2001-2008 G-language Project
	      Institute for Advanced Biosciences, Keio University, JAPAN 

	     __/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/
\n);
&msg_error($print);
}
multi_locusdescriptionprevnextTop
sub multi_locus {
    my $this = shift;
    my $gb = shift;
    my $lng;
    my $i = 0;
    my $f = 1;
    my $c = 1;
    
    do{
        my $F = 1;
        $lng = length($this->{"SEQ"});
        $this->{"LOCUS$i"}   = $gb->{"LOCUS"};
        $this->{"HEADER$i"}  = $gb->{"HEADER"};
        $this->{"COMMENT$i"} = $gb->{"COMMENT"};

        while(exists($gb->{"FEATURE$F"})){
            $this->{"FEATURE$f"}              = $gb->{"FEATURE$F"};
            $this->{"FEATURE$f"}->{"start"}   = $gb->{"FEATURE$F"}->{"start"} + $lng;
            $this->{"FEATURE$f"}->{"end"}     = $gb->{"FEATURE$F"}->{"end"} + $lng;
            $this->{"FEATURE$f"}->{"locus"}   = $i;
	    $this->{"FEATURE$f"}->{"feature"} = $f;

	    if($gb->{"FEATURE$F"}->{"type"} eq "CDS"){
                if(defined $gb->{"FEATURE$F"}->{"join"}){
                    my @join = split(/\,/,$gb->{"FEATURE$F"}->{"join"});
		    my @num = ();
		    my @new_join = ();

                    foreach(@join){
                        if(tr/c/c/){
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng));
                        } else {
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("%d\.\.%d",  $num[0] + $lng, $num[1] + $lng));
                        }
                    }
                    $this->{"FEATURE$f"}->{join} = join(',', @new_join);
                }
		$this->{"FEATURE$f"}->{"cds"} = $c;
                $c++;
            }
            $f++;
            $F++;
        }
        $this->{"SEQ"} .= $gb->{"SEQ"};
        $i++;

    }while($gb->next_locus("no msg"));

    $this->{"FEATURE0"}->{"type"}  = "source";
    $this->{"FEATURE0"}->{"start"} = 1;
    $this->{"FEATURE0"}->{"end"}   = length($this->{"SEQ"});
    $this->{"LOCUS"}   = $this->{"LOCUS1"};
    $this->{"HEADER"}  = $this->{"HEADER1"};
    $this->{"COMMENT"} = $this->{"COMMENT1"};
}
newdescriptionprevnextTop
sub new {
    my $pkg = shift;
    my $filename = shift;

    if(length($filename) < 1){
	$filename = dist_file('g-language', 'genomes/ecoli.gbk');
	msg_error("WARNING: no genome file specified. Loading Escherichia coli K12 MG1655 genome as a sample.\n");
    }elsif(
	   $filename eq 'ecoli' || 
	   $filename eq 'bsub' || 
	   $filename eq 'pyro' || 
	   $filename eq 'mgen' || 
	   $filename eq 'cyano'
	   ){
	$filename = dist_file('g-language', "genomes/$filename.gbk");
    }

    my @options = @_;
    $this = {};
    my $tmp = {};
    $infile = $filename;
    $filename =~ s/\~\//sprintf("%s\/",$ENV{HOME})/e;
    
    bless $this;

    my $no_msg = 0;
    my $without_annotation = 0;
    my $multiple_locus = 0;
    my $long_sequence = 0;
    my $bioperl = 0;
    my $netgbk = 0;
    my $longest_ORF_annotation = 0;
    my $glimmer_annotation = 0;
    my $no_cache = 0;
    my $force_cache = 0;
    my $format = '';
    my $locus_msg = '';

    return $this if ($filename eq 'blessed');

    foreach my $opt_tmp (@options){
	my $opt = lc($opt_tmp);

	if ($opt =~ /no msg/){
	    $no_msg = 1;
	}elsif ($opt =~ /without annotation/){
	    $without_annotation = 1;
	    $no_cache = 1;
	}elsif ($opt =~ /multiple locus/){
	    $multiple_locus = 1;
	    $no_cache = 1;
	}elsif ($opt =~ /long sequence/){
	    $long_sequence = 1;
	    $no_cache = 1;
	}elsif ($opt =~ /bioperl/){
	    $bioperl = 1;
	    $format = 'bioperl';
	}elsif ($opt =~ /longest orf/){
	    $longest_ORF_annotation = 1;
	}elsif ($opt =~ /glimmer annotation/){
	    $glimmer_annotation = 1;
	}elsif ($opt =~ /no cache/){
	    $no_cache = 1;
	}elsif ($opt =~ /force cache/){
	    $force_cache = 1;
	    $no_cache = 1;
	}elsif ($opt =~ /net GenBank/){
	    $netgbk = 1; 
	    $format = 'net genbank';
	}elsif (   lc($opt) eq 'fasta' || lc($opt) eq 'embl' 
		|| lc($opt) eq 'swiss' || lc($opt) eq 'scf' 
		|| lc($opt) eq 'pir'   || lc($opt) eq 'gcg' 
		|| lc($opt) eq 'raw'   || lc($opt) eq 'ace'
		|| lc($opt) eq 'game'  || lc($opt) eq 'phd'
		|| lc($opt) eq 'qual'  || lc($opt) eq 'bsml' 
		|| lc($opt) eq 'genbank'){
	    $format = lc($opt);
	}else{
	    unless (lc($opt) eq 'net genbank'){
		warn("Warning: Unknown Option $opt at\" new G\"\n");
	    }
	}
    }

    $this->loaded_msg() unless ($no_msg);

    $format = interpret_format($filename) unless(length $format);

    if($format eq 'RefSeq'){
	$format = 'usa';
	$filename = "refseq:$filename";
    }elsif($format eq 'net GenBank'){
	$format = 'usa';
	$filename = "genbank:$filename";
    }elsif($format eq 'net GenPept'){
	$format = 'usa';
	$filename = "genpept:$filename";
    }

    my @stat = stat($filename);
    my $basename = basename($filename) if (-e $filename);
    my $cachedFile = sprintf("%s/.glang/cache-%s-%s-%s", $ENV{HOME}, $basename, $stat[7], $stat[9]);

    if (-e $cachedFile && $no_cache == 0){
	$this = sdb_load(sprintf("cache-%s-%s-%s", $basename, $stat[7], $stat[9]));
	$this->seq_info() unless ($no_msg);
	my $now = time();
	utime $now, $now, $cachedFile;
    }elsif ($bioperl || $format eq 'bioperl'){
        $this = _bp2gb($filename);
	$this->seq_info() unless ($no_msg);

    }elsif ($format eq 'usa'){
	my ($dbname, $entryname) = split(/:/, $filename, 2);

	msg_error("Retrieving sequence from " . uc($dbname) . "...\n");

	my $bp;

	eval {
	    $bp = get_sequence($dbname, $entryname);
	};
	die("$@ Could not retrieve $filename\n") if $@;

        $this = _bp2gb($bp);
	$this->seq_info() unless($no_msg);
	$this->{BIOPERL} =  $bp;
    }else{


	if ($format eq 'embl'){
	    $this = new G::IO::EmblI;
	}elsif ($format eq 'fasta'){
	    $this = new G::IO::FastaI;
	}elsif ($format ne 'genbank'){

	    $outfile = _generate_tmpfilename();
	    my $in = read_sequence($filename);
	    my $out = write_sequence(">$outfile", "genbank", $in);

	    $filename = $outfile;
	    $locus_msg = 'no msg';
	    $format = 'genbank';
	}

	*GENBANK = $this->open_gb($filename);

	if ($multiple_locus){
	    my $tmp = $this->clone();
	    undef(%$this);

#	    $this = new G::IO("blessed");
# my $tmp = new G::IO($filename, "no msg", $format, "no cache");
multi_locus($this, $tmp); $this->set_gene_aliases(); $this->seq_info() unless($no_msg); }else{ if ($without_annotation){ $this->goto_origin(); $this->getnucs(); $this->seq_info() unless ($no_msg); warn("This feature is deprecated. It will be removed in future releases."); }elsif ($long_sequence){ $this->read_locus(); $this->read_header(); $this->read_features(); $this->{origin} = tell *GENBANK; warn("This feature is deprecated. It will be removed in future releases."); }else{ $this->next_locus($no_msg); if($force_cache || $no_cache == 0){ sdb_save($this, sprintf("cache-%s-%s-%s", $basename, $stat[7], $stat[9])); } } } } if ($longest_ORF_annotation){ my $new = new G::IO("blessed"); bless $new; annotate_with_LORF($new, $this); return $new; }elsif ($glimmer_annotation){ my $new = new G::IO("blessed"); bless $new; open(FASTA, '>/tmp/out.fasta') || die ($!); printf FASTA ">%s\n%s\n", $this->{LOCUS}->{id}, $this->{SEQ}; close(FASTA); run_glimmer($this, '/tmp/out.fasta'); annotate_with_glimmer($new, '/tmp/out.fasta'); unlink('/tmp/out.fasta'); return $new; } return $this;
}
outputdescriptionprevnextTop
sub output {
    my $gb = shift;
    my $file = shift;
    my $option = shift;

    $option = interpret_format($file) unless(length $option);

    if (lc($option) eq 'genbank'){
	$gb->make_gb($file);
    }elsif(length $option){
	my $in;

	if(length $gb->{BIOPERL}){
	    $in = $gb->{BIOPERL};
	}else{
	    my $outfile = _generate_tmpfilename();
	    $gb->make_gb($outfile);
	    $in = read_sequence($outfile);
	    unlink($outfile);
	}

	write_sequence(">$file", $option, $in);
    }else{
	&msg_error("G::output - Unknown format to output.");
    }
}
set_gene_aliasesdescriptionprevnextTop
sub set_gene_aliases {
    my $this = shift;

    foreach my $feat ($this->feature()){
	next unless ($this->{$feat}->{type} =~ /CDS|RNA/);
	
	if(length $this->{$feat}->{gene}){
	    $this->{$this->{$feat}->{gene}} = $this->{$feat};
	}
	
	if(length $this->{$feat}->{locus_tag}){
	    $this->{$this->{$feat}->{locus_tag}} = $this->{$feat};
	}
	
	if($this->{$feat}->{type} eq 'CDS'){
	    $this->{'CDS' . $this->{$feat}->{cds}} = $this->{$feat};
	}
    }
}
General documentation
AUTHORTop
Kazuharu Arakawa, gaou@sfc.keio.ac.jp