G::Tools Blast
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Summary
  G::Tools::Blast - Wrapper interface to BLAST
Package variables
No package variables defined.
Included modules
G::Messenger
G::Seq::Primitive
SelfLoader
SubOpt
Inherit
Exporter
Synopsis
No synopsis!
Description
    This class is a part of G-language Genome Analysis Environment, 
    collecting interfaces to BLAST.
Methods
_blast
No description
Code
_gblaster
No description
Code
_get_swissprot_for_blast
No description
Code
annotate_with_swissprot
No description
Code
blastallDescriptionCode
sp_blast
No description
Code
Methods description
blastallcode    nextTop
  Name: blastall   -   runs BLAST via DDBJ web service

  Description:
    This is a wrapper around DDBJ-REST BLAST web service provided by 
    http://www.xml.nig.ac.jp/doc/detail/Blast.html
BLAST is the most popular local alignment search tool.
Options and usages are identical to that of regular blastall command line tool. Supported databases includes: DDBJ, DDBJ_EXEST, DDBJNEW, DDBJNEW_EXEST, DAD, PDB, PDBSH, PRF PROTEIN, SWISS, WORMPEP, UNIPROT, TREMBL Run this function without any arguments to see full listing. Usage: blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>'test.fasta'); or blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>$AminoAcidSeq); or blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, $AminoAcidSeq); Options: All optional parameters are identical to that of commandline BLAST. Run this function without any arguments to see full listing. References: 1. Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997) "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs.", Nucleic Acids Res., 25(17):3389-3402. 2. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ (1990) "Basic local alignment search tool.", J Mol Biol., 215(3):403-410. Author: Kazuki Oshita (t07122ko@sfc.keio.ac.jp)
Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20080213-01 initial posting
Methods code
_blastdescriptionprevnextTop
sub _blast {
    &opt_default(p=>'blastn',qr=>'off',input=>"file");
    my @args=opt_get(@_);

    my $qr=&opt_val("qr");
    my $input=&opt_val("input");
    my $seq;
    my @param;
    my $param;
    my %opt;
    my @tmp;
    my $num;

    $opt{d}=shift @args;
    if($input eq 'seq'){
	$seq=shift @args;
	opendir(DIR,'/tmp');
	@tmp=readdir(DIR);
	$num=$#tmp+1+time;
	@tmp=keys(%$seq);
	close(DIR);
	open(FILE,'>/tmp/blast_'.$num.'.seq');
        print FILE ">$tmp[0]\n";
        print FILE $$seq{$tmp[0]},"\n";
	close(FILE);
	$opt{i}='/tmp/blast_'.$num.'.seq'; 
    }
    else{
	$opt{i}=shift @args;
    }

    $opt{p}=opt_val("p");
    $opt{e}=opt_val("e");
    $opt{m}=opt_val("m");
$opt{o}=opt_val("o"); $opt{F}=opt_val("F"); $opt{G}=opt_val("G"); $opt{E}=opt_val("E"); $opt{X}=opt_val("X"); $opt{I}=opt_val("I"); $opt{q}=opt_val("q");
$opt{r}
=opt_val("r"); $opt{v}=opt_val("v"); $opt{b}=opt_val("b"); $opt{f}=opt_val("f"); $opt{g}=opt_val("g"); $opt{Q}=opt_val("Q"); $opt{D}=opt_val("D"); $opt{a}=opt_val("a"); $opt{J}=opt_val("J"); $opt{M}=opt_val("M"); $opt{W}=opt_val("W"); $opt{z}=opt_val("z"); $opt{K}=opt_val("K"); $opt{P}=opt_val("P"); $opt{Y}=opt_val("Y"); $opt{S}=opt_val("S"); $opt{T}=opt_val("T"); $opt{U}=opt_val("U"); $opt{y}=opt_val("y"); $opt{Z}=opt_val("Z"); $opt{n}=opt_val("n"); $opt{A}=opt_val("A"); $opt{w}=opt_val("w"); $opt{t}=opt_val("t"); $opt{L}=opt_val("L"); $opt{O}=opt_val("O"); $opt{l}=opt_val("l"); $opt{R}=opt_val("R"); foreach(sort keys(%opt)){ next if($opt{$_} eq ''); push(@param,'-'.$_); push(@param,$opt{$_}); } $param=join(' ',@param); system('blastall',@param) if($qr eq "off"); system('qr',"blastall $param") if($qr eq "on"); unlink('/tmp/blast_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on'); return $param; } sub _formatdb{ &opt_default(p=>'F',o=>'T'); my @args=opt_get(@_); my $file=shift @args; my @param; my %opt; $opt{p}=opt_val('p'); $opt{o}=opt_val('o'); $opt{t}=opt_val('t'); $opt{l}=opt_val('l'); $opt{a}=opt_val('a'); $opt{b}=opt_val('b'); $opt{e}=opt_val('e'); $opt{n}=opt_val('n'); $opt{v}=opt_val('v'); $opt{s}=opt_val('s');
$opt{V}=opt_val('V');
$opt{A}=opt_val('A'); foreach(sort keys(%opt)){ next if($opt{$_} eq ''); push(@param,'-'.$_); push(@param,$opt{$_}); } system('formatdb','-i',"$file",@param); return '-i '."$file ".join(' ',@param); } sub _blast_tp_finder{ &opt_default(output => 'file',outfilename=>'parsed_blast_list',header => 'on'); my @args = opt_get(@_); my $file=shift; my $output = opt_val('output'); my $outfilename = opt_val('outfilename'); my $header = opt_val('header'); my @id_query; my $query_name; my @query; my @query_num; my $s1=0; my @query_num_sort; my $query_start; my $query_end; my @sbject_num; my @sbject_num_sort; my $sbject_start; my $sbject_end; my $length; my @identities; my $subject_name; my @strand; my @sbject; my @infile; my @score; my $evalue; my $this={}; my $p=0; my $redundancy=''; open(OUTFILE,'>'.$outfilename) if($output eq 'file'); open(FILE,"$file"); while(<FILE>){ # if(/Query=/){
# @id_query=split(/\s/,$_);
# $_=~s/Query= //;
# $_=~tr/\n//d;
# $query_name=$_;
# msg_send("$query_name\n");
# }
if(/Query= (\d+)_(\d+)_(.+)/){ $query_name=$3; $query_name=~tr/\n//d;
$redundancy = $1; msg_send("$redundancy\n"); } if(/^>/){ $s1=0; $subject_name=$_; $subject_name=~tr/>//d;
$subject_name=~tr/\n//d;
msg_send("$redundancy".'_vs_'."$subject_name\n"); print OUTFILE "$redundancy".'_vs_'."$subject_name\n" ; next if($redundancy == $subject_name); } &msg_send("\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n") if ($header eq 'on' && $output eq 'STDOUT' && $s1 == 0); print OUTFILE "\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n" if($header eq 'on' && $output eq 'file'); if(/Length =(.+)/){ $s1 = 1; $length = $1; $length =~tr/\n//d;
} if(/Identities =/ && $s1 ==1){ @identities = split(/\s/,$_); } if(/Strand/ && $s1 ==1){ @strand=split(/\s/,$_); } if(/Query:/){ @query=split(/[\s]+/,$_); push(@query_num,$query[1]); push(@query_num,$query[3]); $s1=1; } if(/Score / || />/ || /Lambda/){ $this->{$p}->{"evalue"} = $evalue; &msg_send("$evalue") if($output eq 'STDOUT'); print OUTFILE "$evalue" if($output eq 'file'); @score=split(/Expect/,$_); $evalue=$score[1]; $evalue=~tr/=//d;
$evalue=~tr/\n//d;
@query_num_sort=sort {$b<=>$a} @query_num; $query_start=pop @query_num_sort; $query_start=~tr/\s//;
$query_end=shift @query_num_sort; $query_end=~tr/\s//;
@sbject_num_sort=sort {$b<=>$a} @sbject_num; $sbject_start=pop @sbject_num_sort; $sbject_start=~tr/\s//;
$sbject_end=shift @sbject_num_sort; $sbject_end=~tr/\s//;
$identities[4] =~ tr/\s\(\)//d; if($query_start!=""){ $this->{$p}->{"query_name"} = $query_name; $this->{$p}->{"subject_name"} = $subject_name; $this->{$p}->{"length"} = $length; $this->{$p}->{"query_start"} = $query_start; $this->{$p}->{"query_end"} = $query_end; $this->{$p}->{"subject_start"} = $sbject_start; $this->{$p}->{"subject_end"} = $sbject_end; $this->{$p}->{"identity"} = $identities[3]; $this->{$p}->{"percentage"} = $identities[4]; $this->{$p}->{"strand"} = $strand[5]; &msg_send("\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n") if ($output eq 'STDOUT'); print OUTFILE "\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n" if ($output eq 'file'); $p++; } @query=(); @query_num=(); @sbject=(); @sbject_num=(); @identities=(); # msg_send('.');
} if($s1==1){ if(/Sbjct:/){ @sbject=split(/\s+/,$_); push(@sbject_num,$sbject[1]); push(@sbject_num,$sbject[3]); } } } return $this; close(FILE); close(OUTFILE); } sub mapping_blast2{ require Bio::Tools::Run::StandAloneBlast; msg_send("####################BLAST_SEARCHSTART#####################\n"); &opt_default(clustering=>"off", p=>"blastn",database=>"genome_file",query=>"cDNA_dir\/", makedir=>"Blast_result\/",resultfile=>"blastresultlist"); ## my args
my @args=opt_get(@_); my $clustering=&opt_val("clustering"); my $p=&opt_val("p"); my $database=&opt_val("database"); my $query=&opt_val("query"); my $makedir=&opt_val("makedir"); my $resultfile=&opt_val("resultfile"); ##Using Blast
my %opt; my($num,$clusterdata,$count,$factory,$paramnum,$blast_report,$item,$item2); my(@cdnapass,@readpart,@selectedcdna,@blastoptname,@param,@params); my($cdnaid,$parseresult1,$parseresult2,$icount,$jcount,$kcount,$queryplace,$memory); my(@blastresultfiles,@blastdata,@genomeid); if($clustering ne 'off'){ open(FILE, $clustering); $num=0; while(<FILE>){ if($_=~ /^[a-zA-Z0-9]/){ if($_=~ /,/){ $queryplace=$query; $clusterdata=$_; @readpart= split(/,/, $clusterdata); @cdnapass[$num]="$query"."$readpart[0]".".seq"; $selectedcdna[$num]="$readpart[0]".".seq"; chomp @cdnapass[$num]; $num ++; } } } close FILE; } if($clustering eq 'off'){ if(-f $query){ $cdnapass[0]=$query; $selectedcdna[0]=~ s/\s+\///g; $num=1; } else{ opendir(R_DIR,"$query"); $queryplace=$query; @selectedcdna=readdir(R_DIR); $num=@selectedcdna; if($selectedcdna[0]=~ /\./){ shift(@selectedcdna); $num --; } if($selectedcdna[0]=~ /\.\./){ shift(@selectedcdna); $num --; } $count=0; foreach $item2(@selectedcdna){ $cdnapass[$count]="$query"."$item2"; $count++; } closedir(R_DIR); } } $opt{p}=opt_val("p"); $opt{e}=opt_val("e"); $opt{m}=opt_val("m");
$opt{o}=opt_val("o"); $opt{F}=opt_val("F"); $opt{G}=opt_val("G"); $opt{E}=opt_val("E"); $opt{X}=opt_val("X"); $opt{I}=opt_val("I"); $opt{q}=opt_val("q");
$opt{r}
=opt_val("r"); $opt{v}=opt_val("v"); $opt{b}=opt_val("b"); $opt{f}=opt_val("f"); $opt{g}=opt_val("g"); $opt{Q}=opt_val("Q"); $opt{D}=opt_val("D"); $opt{a}=opt_val("a"); $opt{J}=opt_val("J"); $opt{M}=opt_val("M"); $opt{W}=opt_val("W"); $opt{z}=opt_val("z"); $opt{K}=opt_val("K"); $opt{P}=opt_val("P"); $opt{Y}=opt_val("Y"); $opt{S}=opt_val("S"); $opt{T}=opt_val("T"); $opt{U}=opt_val("U"); $opt{y}=opt_val("y"); $opt{Z}=opt_val("Z"); $opt{n}=opt_val("n"); $opt{A}=opt_val("A"); $opt{w}=opt_val("w"); $opt{t}=opt_val("t"); $opt{L}=opt_val("L"); $opt{O}=opt_val("O"); $opt{l}=opt_val("l"); $opt{R}=opt_val("R"); foreach(sort keys(%opt)){ next if($opt{$_} eq ''); push(@blastoptname,$_); push(@param,$opt{$_}); } unless($makedir=~ /none/){ system("mkdir $makedir"); } for($count=0;$count<$num;$count++){ msg_send("Searching_start: NO $count\n ") ; if($makedir=~ /none/){ @params = ('program' => "$p",'database' => "$database",'outfile' => "$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast'); } else{ @params = ('program' => "$p",'database' => "$database",'outfile' => $makedir."/$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast'); } $factory = Bio::Tools::Run::StandAloneBlast->new(@params); $paramnum=0; foreach $item(@blastoptname){ $factory->$item($param[$paramnum]); $paramnum ++; } msg_send("$cdnapass[$count]\n"); $blast_report = $factory->blastall($cdnapass[$count]); msg_send("finish\n"); } ##############################################################
#################BLAST PARSER #######################
##############################################################
opendir(R_DIR,"$makedir"); @blastresultfiles=readdir(R_DIR); $num=@blastresultfiles; if($blastresultfiles[0]=~ /\./){ shift(@blastresultfiles); $num --; } if($blastresultfiles[0]=~ /\.\./){ shift(@blastresultfiles); $num --; } print "###################BLAST_PARSING_START####################\n"; for($count=0;$count<$num;$count++){ &blastparser(-open=>"$makedir"."$blastresultfiles[$count]" ,-resultfile=>"$resultfile") } $count=-1; $icount=-1; $kcount=0; $jcount=0; open(FILE9, "$resultfile"); while(<FILE9>){ if($_=~ /=/){ $icount++; $jcount=0; $kcount=0; $count++; $parseresult1=<FILE9>; chomp $parseresult1; @genomeid=split(/\t/,$parseresult1); $genomeid[0]=~ s/>//g; $genomeid[0]=~ s/ +//g; $parseresult2=<FILE9>; chomp $parseresult2; @blastdata=split(/\t/,$parseresult2); $memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]"; $memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0]; $memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1]; $memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9]; $memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11]; $memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13]; $memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database; } if($_=~ />/){ $jcount++; $kcount=0; chomp $_; $parseresult1=$_; chomp $parseresult1; @genomeid=split(/\t/,$parseresult1); $genomeid[0]=~ s/>//g; $genomeid[0]=~ s/ +//g; $parseresult2=<FILE9>; chomp $parseresult2; @blastdata=split(/\t/,$parseresult2); $memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]"; $memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0]; $memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1]; $memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9]; $memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11]; $memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13]; $memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database; $kcount++; } if($_=~ /^[0-9]/){ $kcount++; $parseresult2=$_; chomp $parseresult2; @blastdata=split(/\t/,$parseresult2); $memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]"; $memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0]; $memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1]; $memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9]; $memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11]; $memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13]; $memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database; } } close FILE9; msg_send("finish\n"); return $memory; } sub blastcutting{ &opt_default(basenum=>"30000",distance=>"50000", open=>"none", multicut=>"on",multidir=>"multientry\/",upper=>"0",logfilename=>"blastcut.log",resultdir=>"blastcut\/" ); my @args=opt_get(@_); my $multidir=&opt_val("multidir"); my $basenum=&opt_val("basenum"); my $open=&opt_val("open"); my $upper=&opt_val("upper"); my $logfilename=&opt_val("logfilename"); my $resultdir=&opt_val("resultdir"); my $multicut=&opt_val("multicut"); my $distance=&opt_val("distance"); my $this = shift @args; my %opt; my($count,$acount,$bcount,$ccount,$hcount,$icount,$flag,$flag2,$entryname,$multigene,$tmp,$genomenum); my($cutsentence,$sentence1,$sentence2,$cDNA_dir); my(@hinum,@strand,@database_start,@database_end,@database_name,@database_bits,@query_name); my(@max,@max_start,@max_end,@cDNA_pass); $flag=1; $flag2=1; my $memory; msg_send("#####################GENOME_CUT_START#####################\n"); msg_send("DEVIDE FASTA FILE...\n"); $hcount=0; $count=0; $acount=0; $icount=0; if($multicut eq "on"){ system("mkdir $multidir"); if($open=~ /none/){ open(FILE,"$this->{'0'}->{'0'}->{'0'}->{database_place}"); } else{ open(FILE,"$open"); } while(<FILE>){ if($_=~ />/){ if($flag=0){ close W_FILE; $flag=1; } $entryname=$_; chomp $entryname; $entryname=~ s/>//g; $entryname=~ s/ //g; open(W_FILE,">$multidir"."$entryname"); print W_FILE ">$entryname\n"; $flag=0; } else{ $multigene=$_; $multigene=~ s/\r\n/\n/g; chomp $multigene; $multigene=~ s/[^ATGCatgcNn]/N/g; print W_FILE $multigene; } } close FILE; close W_FILE; } $flag=1; while($flag==1){ $flag2=1; while($flag2==1){ if($this->{$hcount}->{$icount}->{$count}->{strand}=~ /s/ ){ $strand[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{strand}; $database_start[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_start}; $database_end[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_end}; $database_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_name}; $query_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{query_name}; $database_bits[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_bits}; $count ++; } else{ $max[$hcount][$icount]=$count; $icount++; $count=0; unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){ $flag2=0; } } } $hinum[$hcount]=$icount; $hcount ++; $icount=0; unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){ $flag=0; } } for($ccount=0;$ccount<$hcount;$ccount++){ for($bcount=0;$bcount<$hinum[$ccount];$bcount++){ if($strand[$ccount][$bcount][0] eq "Plus"){ $max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0]; $max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0]; for($acount=0;$acount<$max[$ccount][$bcount];$acount++){ if($strand[$ccount][$bcount][$acount] eq "Plus"){ if($max_start[$ccount][$bcount]>$database_start[$ccount][$bcount][$acount] && $max_start[$ccount][$bcount]-$database_start[$ccount][$bcount][$acount]+1<$distance ){ $max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount]; } if($max_end[$ccount][$bcount]<$database_end[$ccount][$bcount][$acount] && $database_end[$ccount][$bcount][$acount]-$max_end[$ccount][$bcount]+1<$distance){ $max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount]; } } } } else{ $max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0]; $max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0]; for($acount=0;$acount<$max[$ccount][$bcount];$acount++){ if($strand[$ccount][$bcount][$acount] eq "Minus"){ if($max_start[$ccount][$bcount]<$database_start[$ccount][$bcount][$acount] && $database_start[$ccount][$bcount][$acount] - $max_start[$ccount][$bcount]+1<$distance){ $max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount]; } if($max_end[$ccount][$bcount]>$database_end[$ccount][$bcount][$acount] && $max_end[$ccount][$bcount]-$database_end[$ccount][$bcount][$acount] +1 <$distance){ $max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount]; } } } $tmp=$max_start[$ccount][$bcount]; $max_start[$ccount][$bcount]=$max_end[$ccount][$bcount]; $max_end[$ccount][$bcount]=$tmp; } } } msg_send("PRINTING SEQUENCE...\n"); $cDNA_dir=$query_name[0][0][0]; $cDNA_dir=~ s/=//g; @cDNA_pass=split(/\//,$cDNA_dir); pop(@cDNA_pass); $cDNA_dir=join('/',@cDNA_pass); open(ROG,">$logfilename"); print ROG "cDNA: $cDNA_dir\n"; print ROG "Genome: $resultdir\n\n"; system("mkdir $resultdir"); for($ccount=0;$ccount<$hcount;$ccount++){ if($upper==0){$upper=$hinum[$ccount];} if($upper>$hinum[$ccount]){$upper=$hinum[$ccount];} for($bcount=0;$bcount<$upper;$bcount++){ print ROG "$query_name[$ccount][$bcount][0]\n"; $memory->{$ccount}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]"; $memory->{$query_name[$ccount][$bcount][0]}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]"; open(FILE,"$multidir"."$database_name[$ccount][$bcount][0]"); while(<FILE>){ if($_=~ />/){ $sentence1=$_; $sentence2=<FILE>; } } chomp $sentence2; $genomenum=length($sentence2); $max_start[$ccount][$bcount]=$max_start[$ccount][$bcount]-$basenum; $max_end[$ccount][$bcount]=$max_end[$ccount][$bcount]+$basenum; if($max_start[$ccount][$bcount]<1){$max_start[$ccount][$bcount]=1;} if($max_end[$ccount][$bcount]>$genomenum){$max_end[$ccount][$bcount]=$genomenum;} print ROG "$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n\n"; $memory->{$ccount}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]"; $memory->{$query_name[$ccount][$bcount][0]}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]"; $memory->{$ccount}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount]; $memory->{$query_name[$ccount][$bcount][0]}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount]; $memory->{$ccount}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount]; $memory->{$query_name[$ccount][$bcount][0]}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount]; open(W_FILE,">$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]"); $cutsentence=substr($sentence2,$max_start[$ccount][$bcount]-1,$max_end[$ccount][$bcount]-$max_start[$ccount][$bcount]+1); print W_FILE ">$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n"; print W_FILE "$cutsentence"; close FILE; close W_FILE; } } $memory->{Genome_ID}=$multidir; msg_send("BLAST_COMPLETE\n"); return $memory; } sub _post_blast_clusterer{ &opt_default(e=>'e-100',filename=>'blast_clusters.tmp',output=>'f'); my @args = &opt_get(@_); my $file = shift @args; my $eval = &opt_val('e'); my $filename = &opt_val('filename'); my $output = &opt_val('output'); my $clstlist; my $this = {}; my $i = 0; my ($query,$subject,$tmp,$clstlist); my @list = (); my @clusters = (); my %red; open(FILE,$file); open(OUTFILE,">$filename"); while(<FILE>){ if(/^=(.+)/){ if($i > 0){ $clstlist = join(',',@list); push (@clusters,$clstlist); print OUTFILE ("$clstlist\n")if($output eq 'f'); } $i++; @list = (); $clstlist = ''; $query = $1; push(@list,$query); } elsif(/^>(.+)\s+([0-9]+)/){ $subject = $1; $subject =~ s/\s\n//g; next if($subject eq $query); while(<FILE>){ if(/^.+\s+[0-9]+\s+(.+)\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+(.+)\s+([0-9])+\s+([0-9])+\s+([0-9])+\s+([0-9])+/){ if($1 <= $eval){ push(@list,$subject); msg_send("."); $this->{$query}->{$subject}->{strand} = $2; $this->{$query}->{$subject}->{qstart} = $3; $this->{$query}->{$subject}->{sstart} = $4; $this->{$query}->{$subject}->{qend} = $5; $this->{$query}->{$subject}->{send} = $6; } } last; } } } $clstlist = join(',',@list); push (@clusters,$clstlist); print OUTFILE ("$clstlist\n")if($output eq 'f'); @list = (); close(FILE); close(OUTFILE); return @clusters; } sub _list_clusterer { &opt_default(filename=>'cluster_list',sort=>'on',split=>','); &opt_get(@_); my $input = shift; my $filename = opt_val('filename'); my $sort = opt_val('sort'); my $split = opt_val('split'); my @clusters=(); my @array=(); my ($cluster,$new,$key); my $a; my $c=1; my $i=0; my $key; my %hash; open(INFILE,"$input"); open(OUTFILE,">$filename"); open(TMP,">tmp.txt"); while(<INFILE>){ chomp; @array = split(/$split/); $cluster = $c; $new = 1; msg_send("."); foreach $key (sort keys %hash){ foreach $a (@array){ $a =~ s/\s//g; if($key eq $a){ $cluster = $hash{$key}; $new = 0; last; } } if($new == 0){ last; } } if($new == 1){ foreach $a (@array){ $hash{$a} = $c; } $c++; print TMP $c,"\n"; } else{ foreach $a (@array){ $hash{$a} = $cluster; } print TMP $cluster, "\n"; } } close(INFILE); for($i=1;$i<=$c;$i++){ foreach $key (keys %hash){ if($hash{$key} == $i){ print OUTFILE $key, ","; } } print OUTFILE "\n"; } close(INFILE); close(OUTFILE); _list_sorter("$filename",-filename=>"$filename.sort",-split=>"$split") if ($sort ne 'off'); } sub _list_sorter { &opt_default(split=>',',filename=>"sort_list"); my @args = &opt_get(@_); my $input = shift @args; my $filename = &opt_val('filename'); my $split = &opt_val('split'); my $list; my (@array,@narray,@flist); open(IN,"$input"); open(OUT,">$filename"); while(<IN>){ chomp; @array = split(/$split/); @narray = (); foreach(sort @array){ $_ =~ s/\s//g; push(@narray,$_)if($_ ne ''); } if(scalar @narray > 1){ $list = join(',',@narray); }else{ $list = $_; } push(@flist,$list); print OUT "$list\n"; } close(IN); close(OUT); return(@flist); foreach(@flist){ msg_send("$_\n"); } } sub blastparser{ &opt_default(open=>"result" ,filename=>"blastresultlist"); my @args=opt_get(@_); my $open=&opt_val("open"); my $resultfile=&opt_val("filename"); my $this = shift @args; my $seq; my $param; my @param; my %opt; my @blastoptname; my $sentence; ##
my $sentence1; ##
my $sentence2; ##
my $sentence3; ##
my $sentence4; ##
my $sentence5; ##
my $sentence20; ##
my $lA; ##
my $lB; ##
my $lC; ##
my $lD; ##
my $lE; ##
my $lF; ##
my $lG; my @lva; ##
my @lvb; ##
my @sentenceA; ##
my @sentenceB; ##
my @sentenceC; ##
my $sentenceem; ##
my @partse; ##
my @partse2; ##
my @partig; ##
my @partig2; ##
my @partig3; ##
my @partig12; ##
my @partig13; ##
open(FILE, $open); open(FILE2, ">>$resultfile"); print FILE2 "QUERY\n"; print FILE2 "DATABASE\t"; print FILE2 "DATABASE_ALLBITS\n"; print FILE2 "SCOREBITS\t"; print FILE2 "SCORE\t"; print FILE2 "EXPECT\t"; print FILE2 "IDENTITIES(A\/\t"; print FILE2 "B)\t"; print FILE2 "%MATCH\t"; print FILE2 "GAP(A\/\t"; print FILE2 "B)\t"; print FILE2 "%GAP\t"; print FILE2 "STRAND\t"; print FILE2 "QUERY_START\t"; print FILE2 "SBJCT_START\t"; print FILE2 "QUERY_END\t"; print FILE2 "SBJCT_END\t\n"; print FILE2 "---------------------------------------------------------------------------------------------\n"; $lA=0; $lB=0; $lC=0; $lD=0; $lE=0; $lF=0; $lG=0; while(<FILE>){ if($_=~ /Query= /){ $sentence=$_; $sentence=~ s/Query= //g; print FILE2 "=$sentence"; } if($_=~ />/ || $sentence3 =~ />/){ if($_=~ />/){ chomp $_; print FILE2 "$_\t";##h
$sentence20=<FILE>; chomp $sentence20; $sentence20=~ s/ //g; $sentence20=~ s/Length=//g; print FILE2 "$sentence20\n"; ##h
} if($sentence3 =~ />/){ chomp $sentence3; print FILE2 "$sentence3\t"; ##h
chomp $sentence20; $sentence20=~ s/ //g; $sentence20=~ s/Length=//g; print FILE2 "$sentence20\n"; ##h
$sentence3=$sentenceem; } } if($_=~ /^ Score/ || $sentence4 =~ /Score/){ if($_=~ /Score/){ $sentenceA[$lA]=$_; chomp $sentenceA[$lA]; @partse=split(/,/,$sentenceA[$lA]); $partse[0]=~ s/ +//g; $partse[0]=~ s/Score=//g; $partse[0]=~ s/bits//g; $partse[0]=~ s/\)//g; @partse2=split(/\(/,$partse[0]); $partse[1]=~ s/ +//g; $partse[1]=~ s/Expect=//g; print FILE2 "$partse2[0]\t"; print FILE2 "$partse2[1]\t"; print FILE2 "$partse[1]\t"; $lA++; $sentenceA[$lA]=<FILE>; chomp $sentenceA[$lA]; @partig=split(/,/,$sentenceA[$lA]); $partig[0]=~ s/ +//g; $partig[0]=~ s/Identities=//g; $partig[0]=~ s/%\)//g; @partig2=split(/\//,$partig[0]); print FILE2 "$partig2[0]\t"; @partig3=split(/\(/,$partig2[1]); print FILE2 "$partig3[0]\t"; print FILE2 "$partig3[1]\t"; $partig[1]=~ s/ +//g; $partig[1]=~ s/Gaps=//g; $partig[1]=~ s/%\)//g; @partig12=split(/\//,$partig[1]); @partig13=split(/\(/,$partig12[1]); if($partig[1]=~ /[0-9]/){ print FILE2 "$partig12[0]\t"; print FILE2 "$partig13[0]\t"; print FILE2 "$partig13[1]\t"; } else{ print FILE2 "0\t"; print FILE2 "0\t"; print FILE2 "0\t"; } $lA++; $sentenceA[$lA]=<FILE>; chomp $sentenceA[$lA]; $sentenceA[$lA]=~ s/ //g; $sentenceA[$lA]=~ s/Strand=//g; $sentenceA[$lA]=~ s/Plus\///g; print FILE2 "$sentenceA[$lA]\t"; $lA++; } if($sentence4=~ /Score/){ $sentenceA[$lA]=$sentence4; chomp $sentenceA[$lA]; @partse=split(/,/,$sentenceA[$lA]); $partse[0]=~ s/ +//g; $partse[0]=~ s/Score=//g; $partse[0]=~ s/bits//g; $partse[0]=~ s/\)//g; @partse2=split(/\(/,$partse[0]); $partse[1]=~ s/ +//g; $partse[1]=~ s/Expect=//g; print FILE2 "$partse2[0]\t"; print FILE2 "$partse2[1]\t"; print FILE2 "$partse[1]\t"; $lA++; $sentenceA[$lA]=$_; chomp $sentenceA[$lA]; @partig=split(/,/,$sentenceA[$lA]); $partig[0]=~ s/ +//g; $partig[0]=~ s/Identities=//g; $partig[0]=~ s/%\)//g; @partig2=split(/\//,$partig[0]); print FILE2 "$partig2[0]\t"; @partig3=split(/\(/,$partig2[1]); print FILE2 "$partig3[0]\t"; print FILE2 "$partig3[1]\t"; $partig[1]=~ s/ +//g; $partig[1]=~ s/Gaps=//g; $partig[1]=~ s/%\)//g; @partig12=split(/\//,$partig[1]); @partig13=split(/\(/,$partig12[1]); if($partig[1]=~ /[0-9]/){ print FILE2 "$partig12[0]\t"; print FILE2 "$partig13[0]\t"; print FILE2 "$partig13[1]\t"; } else{ print FILE2 "0\t"; print FILE2 "0\t"; print FILE2 "0\t"; } $lA++; $sentenceA[$lA]=<FILE>; chomp $sentenceA[$lA]; $sentenceA[$lA]=~ s/ //g; $sentenceA[$lA]=~ s/Strand=//g; $sentenceA[$lA]=~ s/Plus\///g; print FILE2 "$sentenceA[$lA]\t"; $lA++; $sentence4=$sentenceem; } } if($_=~ /Query:/){ $sentenceB[$lB]=$_; $lB ++; $sentence1=<FILE>; $sentence2=<FILE>; $sentenceC[$lC]=$sentence2; $lC++; while(<FILE>){ if($_=~ /Query:/){ $sentenceB[$lB]=$_; ##print $sentenceB[$lB];
$lB ++; $sentence1=<FILE>; $sentence2=<FILE>; $sentenceC[$lC]=$sentence2; $lC++; } if($_=~ />/ || $_ =~ /Score/ || $_ =~ / Database/){ if($_=~ />/){ $sentence3=$_; @lva=split(/\s+/,$sentenceB[$lD]); @lvb=split(/\s+/,$sentenceC[$lE]); print FILE2 "$lva[1]\t"; print FILE2 "$lvb[1]\t"; @lva=split(/\s+/,$sentenceB[$lB-1]); @lvb=split(/\s+/,$sentenceC[$lC-1]); print FILE2 "$lva[3]\t"; print FILE2 "$lvb[3]\t\n"; $lD=$lB; $lE=$lC; $sentence20=<FILE>; last; } if($_=~ /Score/){ $sentence4=$_; @lva=split(/\s+/,$sentenceB[$lD]); @lvb=split(/\s+/,$sentenceC[$lE]); print FILE2 "$lva[1]\t"; print FILE2 "$lvb[1]\t"; @lva=split(/\s+/,$sentenceB[$lB-1]); @lvb=split(/\s+/,$sentenceC[$lC-1]); print FILE2 "$lva[3]\t"; print FILE2 "$lvb[3]\t\n"; $lD=$lB; $lE=$lC; last; } if($_=~ /Database/){ $sentence5=$_; @lva=split(/\s+/,$sentenceB[$lD]); @lvb=split(/\s+/,$sentenceC[$lE]); print FILE2 "$lva[1]\t"; print FILE2 "$lvb[1]\t"; @lva=split(/\s+/,$sentenceB[$lB-1]); @lvb=split(/\s+/,$sentenceC[$lC-1]); print FILE2 "$lva[3]\t"; print FILE2 "$lvb[3]\t\n"; $lD=$lB; $lE=$lC; last; } } } } } close FILE; close FILE2; } 1;
}
_gblasterdescriptionprevnextTop
sub _gblaster {
    my $options = shift;

    my @blast = `blastall $options`;
    my @result;
    foreach my $tmp (@blast){
	chomp($tmp);
	my ($query, $subject, $percent, $length, undef, undef, $qstart, $qend, 
	    $sstart, $send, $eval, $score) = split(/\s+/, $tmp, 12);
	push(@result, [$query, $subject, $percent, $length, $qstart, $qend, 
		       $sstart, $send, $eval, $score]);
    }

    return @result;
}
_get_swissprot_for_blastdescriptionprevnextTop
sub _get_swissprot_for_blast {
    my $force = shift;
    my $cwd = getcwd();

    unless(-e "/tmp/glang/swissprot" && $force eq ''){
	mkdir("/tmp/glang", 0777);
	chdir("/tmp/glang");
	system("wget ftp://ftp.ncbi.nih.gov/blast/db/FASTA/swissprot.gz -O swissprot.gz");
	system("gunzip swissprot.gz");
	system("formatdb -i swissprot -p T");
	chdir($cwd);
    }

    return 1;
}
annotate_with_swissprotdescriptionprevnextTop
sub annotate_with_swissprot {
    opt_default(eval=>"10e-25");
    my @argv = opt_get(@_);
    my $gb = opt_as_gb(shift @argv);
    
    my $eval = opt_val("eval");

    foreach my $cds ($gb->cds()){
	my $seq = $gb->{$cds}->{translation};
	$seq = translate($gb->get_geneseq($cds)) unless(length $seq);
	
	my $result = sp_blast($seq);
	if($result->{0}->{eval} <= $eval){
	    $gb->{$cds}->{sp_blast} = 
		sprintf("%.2f pct identical to sw:%s\( %s\) gi:%s e-val: %s score: %s", 
			$result->{0}->{identity}, $result->{0}->{accession},
			$result->{0}->{entry}, $result->{0}->{gi}, 
			$result->{0}->{eval}, $result->{0}->{score}
			);
	    $gb->{$cds}->{sp_xref} = $result->{0}->{entry};
	    $gb->{$cds}->{sp_xref_ac} = $result->{0}->{accession};
	}
    }

    return 1;
}
blastalldescriptionprevnextTop
sub blastall {
    require LWP::UserAgent;
    require G::IO;
    my @argv = opt_get(@_);
    my %data = opt_val();

    my $program  = $data{'p'} || 'blastp';
    my $database = $data{'d'} || 'SWISS';
    my $outfile  = $data{'o'} || 'stdout';
    my $infile   = $data{'i'} || shift @argv;

    delete($data{'p'});
    delete($data{'d'});
    delete($data{'o'});
    delete($data{'i'});

    my $query = $infile;
    if (-e $infile) {
	my $gb = new G::IO($infile, 'fasta', 'no msg');
	$query = $gb->{SEQ};
    }

    my $option = '';
    foreach my $key (keys %data){
	$option .= '-' . $key . ' ' . $data{$key};
    }

    my $ua = LWP::UserAgent->new;
    my $request = HTTP::Request->new('POST',"http://xml.nig.ac.jp/rest/Invoke");
    $request->content_type("application/x-www-form-urlencoded");
    
    if ($program && $database && $infile) {
	$request->content("service=Blast&method=searchParam&program=$program&database=$database&query=$query&param=$option");
    } else {
	msg_error << "__HELP__"

blastall arguments:

-p Program Name [String]
-d Database [String]
default = nr
-i Query File [File In]
default = stdin
-e Expectation value (E) [Real]
default = 10.0
-m alignment view options:
0 = pairwise,
1 = query-anchored showing identities,
2 = query-anchored no identities,
3 = flat query-anchored, show identities,
4 = flat query-anchored, no identities,
5 = query-anchored no identities and blunt ends,
6 = flat query-anchored, no identities and blunt ends,
7 = XML Blast output,
8 = tabular,
9 tabular with comment lines
10 ASN, text
11 ASN, binary [Integer]
default = 0
range from 0 to 11
-o BLAST report Output File [File Out] Optional
default = stdout
-F Filter query sequence (DUST with blastn, SEG with others) [String]
default = T
-G Cost to open a gap (-1 invokes default behavior) [Integer]
default = -1
-E Cost to extend a gap (-1 invokes default behavior) [Integer]
default = -1
-X X dropoff value for gapped alignment (in bits) (zero invokes default behavior)
blastn 30, megablast 20, tblastx 0, all others 15 [Integer]
default = 0
-I Show GI's in deflines [T/F]
default = F
-q Penalty for a nucleotide mismatch (blastn only) [Integer]
default = -3
-r Reward for a nucleotide match (blastn only) [Integer]
default = 1
-v Number of database sequences to show one-line descriptions for (V) [Integer]
default = 500
-b Number of database sequence to show alignments for (B) [Integer]
default = 250
-f Threshold for extending hits, default if zero
blastp 11, blastn 0, blastx 12, tblastn 13
tblastx 13, megablast 0 [Real]
default = 0
-g Perform gapped alignment (not available with tblastx) [T/F]
default = T
-Q Query Genetic code to use [Integer]
default = 1
-D DB Genetic code (for tblast[nx] only) [Integer]
default = 1
-a Number of processors to use [Integer]
default = 1
-O SeqAlign file [File Out] Optional
-J Believe the query defline [T/F]
default = F
-M Matrix [String]
default = BLOSUM62
-W Word size, default if zero (blastn 11, megablast 28, all others 3) [Integer]
default = 0
-z Effective length of the database (use zero for the real size) [Real]
default = 0
-K Number of best hits from a region to keep (off by default, if used a value of 100 is recommended) [Integer]
default = 0
-P 0 for multiple hit, 1 for single hit (does not apply to blastn) [Integer]
default = 0
-Y Effective length of the search space (use zero for the real size) [Real]
default = 0
-S Query strands to search against database (for blast[nx], and tblastx)
3 is both, 1 is top, 2 is bottom [Integer]
default = 3
-T Produce HTML output [T/F]
default = F
-l Restrict search of database to list of GI's [String] Optional
-U Use lower case filtering of FASTA sequence [T/F] Optional
-y X dropoff value for ungapped extensions in bits (0.0 invokes default behavior)
blastn 20, megablast 10, all others 7 [Real]
default = 0.0
-Z X dropoff value for final gapped alignment in bits (0.0 invokes default behavior)
blastn/megablast 50, tblastx 0, all others 25 [Integer]
default = 0
-R PSI-TBLASTN checkpoint file [File In] Optional
-n MegaBlast search [T/F]
default = F
-L Location on query sequence [String] Optional
-A Multiple Hits window size, default if zero (blastn/megablast 0, all others 40 [Integer]
default = 0
-w Frame shift penalty (OOF algorithm for blastx) [Integer]
default = 0
-t Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments. (0 invokes default behavior; a negative value disables linking.) [Integer]
default = 0
-B Number of concatenated queries, for blastn and tblastn [Integer] Optional
default = 0
-V Force use of the legacy BLAST engine [T/F] Optional
default = F
-C Use composition-based statistics for blastp or tblastn:
As first character:
D or d: default (equivalent to T)
0 or F or f: no composition-based statistics
1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
2: Composition-based score adjustment as in Bioinformatics 21:902-911,
2005, conditioned on sequence properties
3: Composition-based score adjustment as in Bioinformatics 21:902-911,
2005, unconditionally
For programs other than tblastn, must either be absent or be D, F or 0.
As second character, if first character is equivalent to 1, 2, or 3:
U or u: unified p-value combining alignment p-value and compositional p-value in round 1 only
[String]
default = D
-s Compute locally optimal Smith-Waterman alignments (This option is only
available for gapped tblastn.) [T/F]
default = F

__HELP__
msg_error "Displays information on the currently available databases\n"; msg_error "=========================================================\n"; $request->content("service=Blast&method=getSupportDatabaseList"); } my $response; my $callback = sub { for (@_) { msg_send($_) unless ref($_) =~ /HTTP::/ || /LWP::/; } }; if ($outfile && lc($outfile) ne "stdout") { $response = $ua->request( $request,$outfile ); } else { $response = $ua->request( $request, $callback, 30000 ); }
}
sp_blastdescriptionprevnextTop
sub sp_blast {
    my $seq = shift;

    _get_swissprot_for_blast();

    my $filename = to_fasta($seq, -filename=>"/tmp/seq.fasta");
    my @table = _gblaster("-p blastp -d /tmp/glang/swissprot -i $filename -m8");

    my $data = {};
    my $i = 0;

    foreach my $line (@table){
	my @gi = split(/\|/, $$line[1]);
	$data->{$i}->{identity} = $$line[2];
	$data->{$i}->{length} = $$line[3];
	$data->{$i}->{eval} = $$line[8];
	$data->{$i}->{score} = $$line[9];
	$data->{$i}->{gi} = $gi[1];
	$data->{$i}->{accession} = $gi[3];
	$data->{$i}->{entry} = $gi[4];
	$i ++;
    }

    return $data;
}
General documentation
No general documentation available.