G IO
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
G::IO - G-language System v.2 core class
Package variables
Privates (from "my" definitions)
@instances;
$loaded = 0
Included modules
Carp
File::Basename
File::ShareDir ' :ALL '
G::DB::SDB
G::IO::Annotation
G::Messenger
LWP::Simple
autouse ' File::Temp ' => qw ( tempfile )
Inherit
G::DB::Handler G::IO::GenBankI G::IO::GenBankO G::IO::Handler
Synopsis
 use G::IO;
 use base qw(G::IO);
Description
 Inherits all necessary classes.
 Intended for internal use only.
 This is the central core module of G-language GAE.
Methods
lastInstance
No description
Code
loaded_msg
No description
Code
multi_locus
No description
Code
new
No description
Code
Methods description
None available.
Methods code
lastInstancedescriptionprevnextTop
sub lastInstance {
    return $instances[0];
}
loaded_msgdescriptionprevnextTop
sub loaded_msg {
    my $this = shift;

    $loaded ++;
    return if ($loaded > 1);

    my $print =
	qq(
	     __/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/
                
                   G-language  Genome Analysis Environment v.$VERSION

 
                             http://www.g-language.org/

              Please cite: 
                 Arakawa K. et al. (2003) Bioinformatics.
                 Arakawa K. et al. (2006) Journal of Pestice Science.
	         Arakawa K. et al. (2008) Genes, Genomes and Genomics.

 	      License: GNU General Public License
	      Copyright (C) 2001-2016 G-language Project
	      Institute for Advanced Biosciences, Keio University, JAPAN 

	     __/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/__/
\n);
&msg_error($print);
}
multi_locusdescriptionprevnextTop
sub multi_locus {
    my $this = shift;
    my $gb  = shift;
    my $len = shift || 0;
    my $lng;
    my $i = 0;
    my $f = 1;
    my $c = 1;
    
#    msg_error("Multiple locus Length Sub: $len");
do{ my $F = 1; $lng = length($this->{"SEQ"}); $this->{"LOCUS$i"} = $gb->{"LOCUS"}; $this->{"LOCUS$i"}->{start} = $lng + 1; $this->{"LOCUS$i"}->{end} = $lng + 1 + length($gb->{SEQ}); $this->{"HEADER$i"} = $gb->{"HEADER"}; $this->{"COMMENT$i"} = $gb->{"COMMENT"}; while(exists($gb->{"FEATURE$F"})){ $this->{"FEATURE$f"} = $gb->{"FEATURE$F"}; $this->{"FEATURE$f"}->{"start"} = $gb->{"FEATURE$F"}->{"start"} + $lng; $this->{"FEATURE$f"}->{"end"} = $gb->{"FEATURE$F"}->{"end"} + $lng; $this->{"FEATURE$f"}->{"locus"} = $i; $this->{"FEATURE$f"}->{"feature"} = $f; if($gb->{"FEATURE$F"}->{"type"} eq "CDS"){ if(defined $gb->{"FEATURE$F"}->{"join"}){ my @join = split(/\,/,$gb->{"FEATURE$F"}->{"join"}); my @num = (); my @new_join = (); foreach(@join){ if(tr/c/c/){ @num = split(/\.\./,$_); push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng)); } else { @num = split(/\.\./,$_); push (@new_join, sprintf ("%d\.\.%d", $num[0] + $lng, $num[1] + $lng)); } } $this->{"FEATURE$f"}->{join} = join(',', @new_join); } $this->{"FEATURE$f"}->{"cds"} = $c; $c++; } $f++; $F++; } $this->{"SEQ"} .= $gb->{"SEQ"} . 'n' x $len; $i++; }while($gb->next_locus("no msg")); $this->{"FEATURE0"}->{"type"} = "source"; $this->{"FEATURE0"}->{"start"} = 1; $this->{"FEATURE0"}->{"end"} = length($this->{"SEQ"}); $this->{"LOCUS"} = $this->{"LOCUS1"}; $this->{"HEADER"} = $this->{"HEADER1"}; $this->{"COMMENT"} = $this->{"COMMENT1"};
}
newdescriptionprevnextTop
sub new {
    my $pkg = shift;
    my $filename = shift;

    if(length($filename) < 1){
	$filename = dist_file('g-language', 'genomes/ecoli.gbk');
	msg_error("WARNING: no genome file specified. Loading Escherichia coli K12 MG1655 genome as a sample.\n");
    }elsif(
	   lc($filename) eq 'ecoli' || 
	   lc($filename) eq 'bsub'  || 
	   lc($filename) eq 'pyro'  || 
	   lc($filename) eq 'mgen'  || 
	   lc($filename) eq 'cyano' ||
	   lc($filename) eq 'bbur'  ||
	   lc($filename) eq 'lambda'  ||
	   lc($filename) eq 'plasmidf'
	   ){
	$filename = dist_file('g-language', "genomes/$filename.gbk");
    }

    my @options = @_;
    my $this = {};
    my $tmp = {};

    $filename =~ s/\~\//sprintf("%s\/",$ENV{HOME})/e;
    
    bless $this;

    my $no_msg                 = 0;
    my $multiple_locus         = 0;
    my $multiple_locus_len     = 0;
    my $longest_ORF_annotation = 0;
    my $glimmer_annotation     = 0;
    my $no_cache               = 0;
    my $force_cache            = 0;
    my $gzip                   = 0;
    my $format                 = '';
    my $locus_msg              = '';

    return $this if ($filename eq 'blessed');

    $gzip = $filename =~ s/\.gz$//;
    
    foreach my $opt_tmp (@options){
	next if(length($opt_tmp) < 1);

	my $opt = lc($opt_tmp);

	if ($opt =~ /no msg/){
	    $no_msg = 1;
	}elsif ($opt =~ /multiple locus\D*(\d*)/){
	    $multiple_locus = 1;
	    $multiple_locus_len = $1;
#	    msg_error("Multiple locus len: $multiple_locus_len");
$no_cache = 1; }elsif ($opt =~ /longest orf/){ $longest_ORF_annotation = 1; }elsif ($opt =~ /glimmer annotation/){ $glimmer_annotation = 1; }elsif ($opt =~ /no cache/){ $no_cache = 1; }elsif ($opt =~ /force cache/){ $force_cache = 1; $no_cache = 1; }elsif ( $opt eq 'fasta' || $opt eq 'embl' || $opt eq 'swiss' || $opt eq 'scf' || $opt eq 'pir' || $opt eq 'gcg' || $opt eq 'raw' || $opt eq 'ace' || $opt eq 'game' || $opt eq 'phd' || $opt eq 'qual' || $opt eq 'bsml' || $opt eq 'fastq' || $opt eq 'genbank' ){ $format = $opt; }else{ # warn("Warning: Unknown Option $opt at \"new G\"\n");
} } $this->loaded_msg() unless ($no_msg); $format = G::IO::Handler::_interpret_format($filename) unless(length $format); if($format =~ /^named:(.*)$/){ $format = 'usa'; $filename = "genbank:$1"; }elsif($format eq 'RefSeq'){ $format = 'usa'; $filename = "genbank:$filename"; }elsif($format eq 'fastq' || $format eq 'fasta'){ $no_cache = 1 unless($force_cache); } my @stat = stat($filename); my $basename = basename($filename) if (-e $filename); my $cachedFile = sprintf("%s/.glang/cache-%s-%s-%s", $ENV{HOME}, $basename, $stat[7], $stat[9]); if (-e $cachedFile && $no_cache == 0){ $this = sdb_load(sprintf("cache-%s-%s-%s", $basename, $stat[7], $stat[9])); $this->seq_info() unless ($no_msg); my $now = time(); utime $now, $now, $cachedFile; }elsif ($format eq 'usa'){ my ($dbname, $entryname) = split(/:/, $filename, 2); if($dbname eq 'swiss' || $dbname eq 'sw' || $dbname eq 'uniprot' || $dbname eq 'swissprot' || $dbname eq 'swall'){ $dbname = 'uniprot'; } if($dbname eq 'refseq' || $dbname eq 'genbank'){ $dbname = 'nuccore'; } msg_error("Retrieving sequence $entryname from " . uc($dbname) . "...\n"); if($dbname eq 'nuccore' || $dbname eq 'embl'){ my $path = _sdb_path() . $entryname . '.' . $dbname; mirror('http://togows.dbcls.jp/entry/' . $dbname . '/'. $entryname, $path); if($no_msg){ $this = new G::IO($path, 'no msg'); }else{ $this = new G::IO($path); } croak('Sequence not found in database at http://togows.dbcls.jp/entry/' . $dbname . '/'. $entryname) unless(length($this->{SEQ})); }elsif($dbname eq 'uniprot'){ my $path = _sdb_path() . $entryname . '.' . $dbname; mirror('http://togows.dbcls.jp/entry/' . $dbname . '/'. $entryname, $path); if($no_msg){ $this = new G::IO($path, 'no msg', 'swiss'); }else{ $this = new G::IO($path, 'swiss'); } croak('Sequence not found in database at http://togows.dbcls.jp/entry/' . $dbname . '/'. $entryname) unless(length($this->{SEQ})); }else{ die("Unsupported USA: $filename\n"); } }else{ if ($format eq 'embl'){ $this = new G::IO::EmblI; }elsif ($format eq 'fasta'){ $this = new G::IO::FastaI; }elsif ($format eq 'fastq'){ $this = new G::IO::FastQI; }elsif ($format ne 'genbank'){ my($fh, $outfile) = tempfile(undef, SUFFIX=>'.gbk'); require LWP::UserAgent; my $ua = LWP::UserAgent->new; my $response = $ua->post("http://rest.g-language.org/emboss/", Content_Type=>'multipart/form-data', Content=>[file1 => [$filename], 'arg'=>'seqret/osformat2=genbank/-feature'] ); if ($response->is_success) { print $fh $response->decoded_content; } else { die $response->status_line; } $filename = $outfile; $locus_msg = 'no msg'; $format = 'genbank'; } my $fh = $this->open_gb($filename . ($gzip ? '.gz' : '')); if ($multiple_locus){ my $tmp = $this->clone(); undef(%$this); multi_locus($this, $tmp, $multiple_locus_len); $this->set_gene_aliases(); $this->seq_info() unless($no_msg); }else{ $this->next_locus($no_msg); if($force_cache || $no_cache == 0){ sdb_save($this, sprintf("cache-%s-%s-%s", $basename, $stat[7], $stat[9])); } } } if ($longest_ORF_annotation){ my $new = new G::IO("blessed"); bless $new; annotate_with_LORF($new, $this); return $new; }elsif ($glimmer_annotation){ my $new = new G::IO("blessed"); bless $new; open(FASTA, '>/tmp/out.fasta') || die ($!); printf FASTA ">%s\n%s\n", $this->{LOCUS}->{id}, $this->{SEQ}; close(FASTA); run_glimmer($this, '/tmp/out.fasta'); annotate_with_glimmer($new, '/tmp/out.fasta'); unlink('/tmp/out.fasta'); return $new; } shift @instances; push(@instances, $this); return $this;
}
General documentation
AUTHORTop
Kazuharu Arakawa, gaou@sfc.keio.ac.jp