G::IO GenBankI
Package variablesGeneral documentationMethods
Package variables
Privates (from "my" definitions)
%FH;
Included modules
Carp
G::Messenger
IO::File
IO::Zlib
Synopsis
No synopsis!
Description
No description!
Methods
DESTROY
No description
Code
getnucs
No description
Code
next_locus
No description
Code
open_gb
No description
Code
read_features
No description
Code
read_header
No description
Code
read_locus
No description
Code
Methods description
None available.
Methods code
DESTROYdescriptionprevnextTop
sub DESTROY {
    return unless($FH{$_[0]->{INSTANCE}});
    close ($FH{$_[0]->{INSTANCE}});
    delete($FH{$_[0]->{INSTANCE}});
}
getnucsdescriptionprevnextTop
sub getnucs {
    my $this = shift;

    my $fh = $FH{$this->{INSTANCE}};
    while(<$fh>){
	last if (/\/\//);
	s/[^A-Za-z]//g;
	$this->{"SEQ"} .= lc($_);
    }
}
next_locusdescriptionprevnextTop
sub next_locus {
    my $this = shift;
    my $msg  = shift;

    my $file = $this->{FILENAME};
    my $inst = $this->{INSTANCE};

    undef %$this;

    $this->{FILENAME} = $file;
    $this->{INSTANCE} = $inst;

    $this->read_locus($msg);
    $this->read_header();
    $this->read_features() unless($this->{G_INTERNAL_MSG} eq 'skip');
    $this->getnucs();

    if (length($this->{SEQ}) > 0){
	$this->set_gene_aliases();
	$this->seq_info() unless($msg);
	return 1;
    }else{
	return 0;
    }
}
open_gbdescriptionprevnextTop
sub open_gb {
    my $this = shift;
    my $filename = shift;
    $this->{FILENAME} = $filename;
    $this->{INSTANCE} = $filename . '-' . time() . '-' . rand();

    if($filename =~ /\.gz$/){
        $FH{$this->{INSTANCE}} = new IO::Zlib;
        $FH{$this->{INSTANCE}}->open($filename, 'rb') or croak("Error at G::IO::GenBankI: $!\n");;
    }else{
        $FH{$this->{INSTANCE}} = new IO::File;
        open($FH{$this->{INSTANCE}}, $filename) or croak("Error at G::IO::GenBankI: $!\n");
    }

    return $FH{$this->{INSTANCE}};
}
read_featuresdescriptionprevnextTop
sub read_features {
    local($_);
    my $this = shift;
    my $num = -1;
    my $cds = 0;
    my $transexc = 0;
    $this->{"CDS0"}->{dummy} = 1;

    my $fh = $FH{$this->{INSTANCE}};
    while(<$fh>){
	my $tmpline;
        JUMPLABEL: if(length($tmpline)){
	    $_ = $tmpline;
	};

	if (/^BASE COUNT/){
	    s/BASE COUNT //g;
	    $this->{"BASE_COUNT"} = $_;
	}elsif (/^ORIGIN/){
	    last;
	}elsif (/^ {5}(\S+)\s+(.*)$/ && $_ !~ /\//){
	    my $key = $1;
	    my $feature = $2;
	    $transexc = 0;
	    $num ++;

	    $this->{"FEATURE$num"}->{feature} = $num;
	    $this->{"FEATURE$num"}->{type} = $key;
	    $this->{"FEATURE$num"}->{on} = 1;
	    
	    if ($this->{"FEATURE$num"}->{type} eq "CDS"){
		$cds ++;
		$this->{"FEATURE$num"}->{cds} = $cds;
	    }

	    s/\. \./\.\./g; #for (1. .2) type irregular format
s/\^/\.\./g; #for (1^2) type irregular format
my $part_left = tr/\<//d; #for (<1..2) type irregular format
my $part_right = tr/\>//d; #for (1..2>) type irregular format
$this->{"FEATURE$num"}->{"partial"} = "$part_left $part_right"; if (/join/){ if (/complement\(join/){ $this->{"FEATURE$num"}->{"direction"} = "complement"; s/complement//; }else{ $this->{"FEATURE$num"}->{"direction"} = "direct"; } my $fth = $_; while(<$fh>){ $tmpline = $_; last if($tmpline =~ /\// || $tmpline =~ /ORIGIN/ || $tmpline =~ /^ {5}\S+/); $fth .= $tmpline; } substr($fth, 0, 19) = ''; $fth =~ s/join//g; $fth =~ s/\(//g; $fth =~ s/\)//g; $fth =~ s/ //g; $fth =~ s/\n//g; $fth =~ s/complement/c/g; my @tmpfth2; for my $field (split(/,/, $fth)){ if($field =~ /\.\./){ push(@tmpfth2, $field); }else{ push(@tmpfth2, "$field..$field"); } } $fth = join(',', @tmpfth2); my $tmpfth = $fth; $tmpfth =~ s/c//g; my @choparray = split(/\.\./, $tmpfth); $this->{"FEATURE$num"}->{"start"} = shift @choparray; $this->{"FEATURE$num"}->{"end"} = pop @choparray; $this->{"FEATURE$num"}->{"join"} = $fth; if ($fth =~ /\:/){ $this->{"FEATURE$num"}->{"partial"} = $fth; } goto JUMPLABEL; }elsif (/\?/){ $this->{"FEATURE$num"}->{"type"} = "partial_$key"; $this->{"FEATURE$num"}->{"partial"} = $feature; msg_error("Partial feature: $feature\n"); }elsif (/complement\((contig\d+)\:(\d+)\.\.(\d+)\)/){ $this->{"FEATURE$num"}->{"direction"} = "complement"; $this->{"FEATURE$num"}->{"start"} = $2; $this->{"FEATURE$num"}->{"end"} = $3; $this->{"FEATURE$num"}->{"contig"} = $1; }elsif (/(contig\d+)\:(\d+)\.\.(\d+)/){ $this->{"FEATURE$num"}->{"direction"} = "direct"; $this->{"FEATURE$num"}->{"start"} = $2; $this->{"FEATURE$num"}->{"end"} = $3; $this->{"FEATURE$num"}->{"contig"} = $1; }elsif (/complement\((\d+)\.\.(\d+)\)/){ $this->{"FEATURE$num"}->{"direction"} = "complement"; $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $2; }elsif (/(\d+)\.\.(\d+)/){ $this->{"FEATURE$num"}->{"direction"} = "direct"; $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $2; }elsif (/\s+complement\((\d+)\)/){ $this->{"FEATURE$num"}->{"direction"} = "complement"; $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $1; }elsif (/\s+(\d+)/){ $this->{"FEATURE$num"}->{"direction"} = "direct"; $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $1; }elsif (/replace\((\d+)\,\"/){ $this->{"FEATURE$num"}->{"direction"} = "direct"; $this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/); $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $1; $this->{"FEATURE$num"}->{"partial"} = $_; }elsif (/(\d+).*\.\..*(\d+)/){ $this->{"FEATURE$num"}->{"direction"} = "direct"; $this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/); $this->{"FEATURE$num"}->{"start"} = $1; $this->{"FEATURE$num"}->{"end"} = $2; $this->{"FEATURE$num"}->{"partial"} = $_; }else{ msg_error("Irregular location feature: $key $feature\n"); } }else{ if (/\/(\w+)=\"([^\"]+)\"/){ if(length($this->{"FEATURE$num"}->{"$1"})){ $this->{"FEATURE$num"}->{"$1"} .= "\t" . $2; }else{ $this->{"FEATURE$num"}->{"$1"} = $2; } }elsif (/\/(\w+)=\"([^\"]+)/){ my $tag = $1; my $tmp = $2; my $line; while(<$fh>){ if (!/\"/){ $tmp .= $_; }elsif (/([^\"]+)\"/){ $tmp .= $1; last; } } $tmp =~ s/\s+/ /g; $tmp =~ s/ //g if ($tag eq 'translation'); if(length($this->{"FEATURE$num"}->{"$tag"})){ $this->{"FEATURE$num"}->{"$tag"} .= "\t" . $tmp; }else{ $this->{"FEATURE$num"}->{"$tag"} = $tmp; } }elsif (/\/(\w+)=(\d+)/){ $this->{"FEATURE$num"}->{$1} = $2; }elsif (/\/(\w+)=\((.*)\)/){ my $key = $1; my $val = $2; if ($key eq 'transl_except'){ if ($transexc == 0){ $this->{"FEATURE$num"}->{$key} = $2; }else{ $this->{"FEATURE$num"}->{"$key" . $transexc} = $2; } $transexc ++; }else{ $this->{"FEATURE$num"}->{$key} = $val; } }elsif (/\/(\w+)=(.*)/){ $this->{"FEATURE$num"}->{$1} = $2; }elsif (/\/(\w+)/){ $this->{"FEATURE$num"}->{$1} .= '1'; } } }
}
read_headerdescriptionprevnextTop
sub read_header {
    my $this = shift;
    my $line = '';

    my $fh = $FH{$this->{INSTANCE}};
    while($line = <$fh>){
	last if ($line =~ /^FEATURES/);
	if($line =~ /^ORIGIN/){
	    $this->{G_INTERNAL_MSG} = 'skip';
	    last;
	}
	$this->{HEADER} .= $line;
    }

    $this->{HEADER} =~ /((Archaea|Bacteria|Eukaryota|Viruses|other sequences);*.*?)\n\S/s;
    $this->{TAXONOMY}->{0} = $1;
    $this->{TAXONOMY}->{0} =~ s/\n//g;
    $this->{TAXONOMY}->{0} =~ s/\s+ / /g;
    $this->{TAXONOMY}->{0} =~ s/; /;/g;
    $this->{TAXONOMY}->{0} =~ s/Bacillales/Bacilli;Bacillales/;
    $this->{TAXONOMY}->{0} =~ s/Lactobacillales/Bacilli;Lactobacillales/;
    $this->{TAXONOMY}->{0} =~ s/Staphylococcus/Staphylococcaceae;Staphylococcus/;
    $this->{TAXONOMY}->{0} =~ s/Synechococcus/Synechococcales;Synechococcaceae;Synechococcus/;

    my $i = 1;
    foreach my $entry (split(/;/, $this->{TAXONOMY}->{0})){
	$this->{TAXONOMY}->{$i} = $entry;
	$i ++;
    }
		       
    $this->{TAXONOMY}->{all}    = $this->{TAXONOMY}->{0};
    $this->{TAXONOMY}->{domain} = $this->{TAXONOMY}->{1};
    $this->{TAXONOMY}->{phylum} = $this->{TAXONOMY}->{2};
    $this->{TAXONOMY}->{class}  = $this->{TAXONOMY}->{3};
    $this->{TAXONOMY}->{order}  = $this->{TAXONOMY}->{4};
    $this->{TAXONOMY}->{family} = $this->{TAXONOMY}->{5};
    
    $this->{HEADER}     =~ /DEFINITION\s+(.+?)\n\S/s; 
    $this->{DEFINITION} = $1;
    $this->{HEADER}     =~ /ACCESSION\s+(.+?)\n\S/s; 
    $this->{ACCESSION}  = $1;
    $this->{HEADER}     =~ /SOURCE\s+(.+?)\n(?:  \S|\S)/s; 
    $this->{SOURCE}     = $1;
    $this->{HEADER}     =~ /ORGANISM\s+(.+?)\n\s+(?:Archaea|Bacteria|Eukaryota)/s; 
    $this->{ORGANISM}   = $1;
    $this->{$_} =~ s/ +/ /g foreach(qw(DEFINITION ACCESSION SOURCE ORGANISM));
    $this->{$_} =~ s/\n//g  foreach(qw(DEFINITION ACCESSION SOURCE ORGANISM));

    my $tmp = $this->{HEADER};
    $tmp =~ s/\n/ /g;
    $tmp =~ s/ +/ /g;
    if($tmp =~ /The reference sequence (?:was derived from|is identical to) (.+?)\./){
	$this->{GBKID} = $1;
    }else{
	unless($this->{LOCUS}->{id} !~ /^NC_/){
	    $this->{GBKID} = $this->{LOCUS}->{id};
	}
    }

    my @organism = split(/ /, $this->{ORGANISM});
    shift @organism if($organism[0] =~ /Candidatus/); 
    $this->{TAXONOMY}->{genus}   = shift @organism;
    $this->{TAXONOMY}->{species} = shift @organism;
}
read_locusdescriptionprevnextTop
sub read_locus {
    my $this = shift;
    my $msg = shift;
    my $tmp = '';
    local($_);

    my $fh = $FH{$this->{INSTANCE}};
    while(<$fh>){
	next unless (/LOCUS/);

	my @locus_line = split;
	shift @locus_line;

	if ($#locus_line == 6){
	    $this->{"LOCUS"}->{"circular"} = $_ =~ /circular/ ? 1:0;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     undef,
	     $this->{"LOCUS"}->{"nucleotide"},
	     undef,
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @locus_line;
	}elsif ($#locus_line == 5){
	    $this->{"LOCUS"}->{"circular"} = $_ =~ /circular/ ? 1:0;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     undef,
	     $this->{"LOCUS"}->{"nucleotide"},
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @locus_line;
	}else{
	    msg_error("ERROR: Unknown LOCUS definition\n") if ($msg ne 'no msg');
	    $this->{"LOCUS"}->{"circular"} = $_ =~ /circular/ ? 1:0;
	    $this->{"LOCUS"}->{"id"} = shift @locus_line;
	}
	last;
    }
}
General documentation
No general documentation available.