sub _blast
{ &opt_default(p=>'blastn',qr=>'off',input=>"file");
my @args=opt_get(@_);
my $qr=&opt_val("qr");
my $input=&opt_val("input");
my $seq;
my @param;
my $param;
my %opt;
my @tmp;
my $num;
$opt{d}=shift @args;
if($input eq 'seq'){
$seq=shift @args;
opendir(DIR,'/tmp');
@tmp=readdir(DIR);
$num=$#tmp+1+time;
@tmp=keys(%$seq);
close(DIR);
open(FILE,'>/tmp/blast_'.$num.'.seq');
print FILE ">$tmp[0]\n";
print FILE $$seq{$tmp[0]},"\n";
close(FILE);
$opt{i}='/tmp/blast_'.$num.'.seq';
}
else{
$opt{i}=shift @args;
}
$opt{p}=opt_val("p");
$opt{e}=opt_val("e");
$opt{m}=opt_val("m"); $opt{o}=opt_val("o");
$opt{F}=opt_val("F");
$opt{G}=opt_val("G");
$opt{E}=opt_val("E");
$opt{X}=opt_val("X");
$opt{I}=opt_val("I");
$opt{q}=opt_val("q"); $opt{r}=opt_val("r");
$opt{v}=opt_val("v");
$opt{b}=opt_val("b");
$opt{f}=opt_val("f");
$opt{g}=opt_val("g");
$opt{Q}=opt_val("Q");
$opt{D}=opt_val("D");
$opt{a}=opt_val("a");
$opt{J}=opt_val("J");
$opt{M}=opt_val("M");
$opt{W}=opt_val("W");
$opt{z}=opt_val("z");
$opt{K}=opt_val("K");
$opt{P}=opt_val("P");
$opt{Y}=opt_val("Y");
$opt{S}=opt_val("S");
$opt{T}=opt_val("T");
$opt{U}=opt_val("U");
$opt{y}=opt_val("y");
$opt{Z}=opt_val("Z");
$opt{n}=opt_val("n");
$opt{A}=opt_val("A");
$opt{w}=opt_val("w");
$opt{t}=opt_val("t");
$opt{L}=opt_val("L");
$opt{O}=opt_val("O");
$opt{l}=opt_val("l");
$opt{R}=opt_val("R");
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
$param=join(' ',@param);
system('blastall',@param) if($qr eq "off");
system('qr',"blastall $param") if($qr eq "on");
unlink('/tmp/blast_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');
return $param;
}
sub _formatdb{
&opt_default(p=>'F',o=>'T');
my @args=opt_get(@_);
my $file=shift @args;
my @param;
my %opt;
$opt{p}=opt_val('p');
$opt{o}=opt_val('o');
$opt{t}=opt_val('t');
$opt{l}=opt_val('l');
$opt{a}=opt_val('a');
$opt{b}=opt_val('b');
$opt{e}=opt_val('e');
$opt{n}=opt_val('n');
$opt{v}=opt_val('v');
$opt{s}=opt_val('s'); $opt{V}=opt_val('V'); $opt{A}=opt_val('A');
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
system('formatdb','-i',"$file",@param);
return '-i '."$file ".join(' ',@param);
}
sub _blast_tp_finder{
&opt_default(output => 'file',outfilename=>'parsed_blast_list',header => 'on');
my @args = opt_get(@_);
my $file=shift;
my $output = opt_val('output');
my $outfilename = opt_val('outfilename');
my $header = opt_val('header');
my @id_query;
my $query_name;
my @query;
my @query_num;
my $s1=0;
my @query_num_sort;
my $query_start;
my $query_end;
my @sbject_num;
my @sbject_num_sort;
my $sbject_start;
my $sbject_end;
my $length;
my @identities;
my $subject_name;
my @strand;
my @sbject;
my @infile;
my @score;
my $evalue;
my $this={};
my $p=0;
my $redundancy='';
open(OUTFILE,'>'.$outfilename) if($output eq 'file');
open(FILE,"$file");
while(<FILE>){
if(/Query= (\d+)_(\d+)_(.+)/){
$query_name=$3;
$query_name=~tr/\n//d; $redundancy = $1;
msg_send("$redundancy\n");
}
if(/^>/){
$s1=0;
$subject_name=$_;
$subject_name=~tr/>//d; $subject_name=~tr/\n//d; msg_send("$redundancy".'_vs_'."$subject_name\n");
print OUTFILE "$redundancy".'_vs_'."$subject_name\n" ;
next if($redundancy == $subject_name);
}
&msg_send("\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n") if ($header eq 'on' && $output eq 'STDOUT' && $s1 == 0);
print OUTFILE "\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n" if($header eq 'on' && $output eq 'file');
if(/Length =(.+)/){
$s1 = 1;
$length = $1;
$length =~tr/\n//d; }
if(/Identities =/ && $s1 ==1){
@identities = split(/\s/,$_);
}
if(/Strand/ && $s1 ==1){
@strand=split(/\s/,$_);
}
if(/Query:/){
@query=split(/[\s]+/,$_);
push(@query_num,$query[1]);
push(@query_num,$query[3]);
$s1=1;
}
if(/Score / || />/ || /Lambda/){
$this->{$p}->{"evalue"} = $evalue;
&msg_send("$evalue") if($output eq 'STDOUT');
print OUTFILE "$evalue" if($output eq 'file');
@score=split(/Expect/,$_);
$evalue=$score[1];
$evalue=~tr/=//d; $evalue=~tr/\n//d; @query_num_sort=sort {$b<=>$a} @query_num;
$query_start=pop @query_num_sort;
$query_start=~tr/\s//; $query_end=shift @query_num_sort;
$query_end=~tr/\s//; @sbject_num_sort=sort {$b<=>$a} @sbject_num;
$sbject_start=pop @sbject_num_sort;
$sbject_start=~tr/\s//; $sbject_end=shift @sbject_num_sort;
$sbject_end=~tr/\s//; $identities[4] =~ tr/\s\(\)//d;
if($query_start!=""){
$this->{$p}->{"query_name"} = $query_name;
$this->{$p}->{"subject_name"} = $subject_name;
$this->{$p}->{"length"} = $length;
$this->{$p}->{"query_start"} = $query_start;
$this->{$p}->{"query_end"} = $query_end;
$this->{$p}->{"subject_start"} = $sbject_start;
$this->{$p}->{"subject_end"} = $sbject_end;
$this->{$p}->{"identity"} = $identities[3];
$this->{$p}->{"percentage"} = $identities[4];
$this->{$p}->{"strand"} = $strand[5];
&msg_send("\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n") if ($output eq 'STDOUT');
print OUTFILE "\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n" if ($output eq 'file');
$p++;
}
@query=();
@query_num=();
@sbject=();
@sbject_num=();
@identities=();
}
if($s1==1){
if(/Sbjct:/){
@sbject=split(/\s+/,$_);
push(@sbject_num,$sbject[1]);
push(@sbject_num,$sbject[3]);
}
}
}
return $this;
close(FILE);
close(OUTFILE);
}
sub mapping_blast2{
require Bio::Tools::Run::StandAloneBlast;
msg_send("####################BLAST_SEARCHSTART#####################\n");
&opt_default(clustering=>"off", p=>"blastn",database=>"genome_file",query=>"cDNA_dir\/",
makedir=>"Blast_result\/",resultfile=>"blastresultlist");
my @args=opt_get(@_);
my $clustering=&opt_val("clustering");
my $p=&opt_val("p");
my $database=&opt_val("database");
my $query=&opt_val("query");
my $makedir=&opt_val("makedir");
my $resultfile=&opt_val("resultfile");
my %opt;
my($num,$clusterdata,$count,$factory,$paramnum,$blast_report,$item,$item2);
my(@cdnapass,@readpart,@selectedcdna,@blastoptname,@param,@params);
my($cdnaid,$parseresult1,$parseresult2,$icount,$jcount,$kcount,$queryplace,$memory);
my(@blastresultfiles,@blastdata,@genomeid);
if($clustering ne 'off'){
open(FILE, $clustering);
$num=0;
while(<FILE>){
if($_=~ /^[a-zA-Z0-9]/){
if($_=~ /,/){
$queryplace=$query;
$clusterdata=$_;
@readpart= split(/,/, $clusterdata);
@cdnapass[$num]="$query"."$readpart[0]".".seq";
$selectedcdna[$num]="$readpart[0]".".seq";
chomp @cdnapass[$num];
$num ++;
}
}
}
close FILE;
}
if($clustering eq 'off'){
if(-f $query){
$cdnapass[0]=$query;
$selectedcdna[0]=~ s/\s+\///g;
$num=1;
}
else{
opendir(R_DIR,"$query");
$queryplace=$query;
@selectedcdna=readdir(R_DIR);
$num=@selectedcdna;
if($selectedcdna[0]=~ /\./){
shift(@selectedcdna);
$num --;
}
if($selectedcdna[0]=~ /\.\./){
shift(@selectedcdna);
$num --;
}
$count=0;
foreach $item2(@selectedcdna){
$cdnapass[$count]="$query"."$item2";
$count++;
}
closedir(R_DIR);
}
}
$opt{p}=opt_val("p");
$opt{e}=opt_val("e");
$opt{m}=opt_val("m"); $opt{o}=opt_val("o");
$opt{F}=opt_val("F");
$opt{G}=opt_val("G");
$opt{E}=opt_val("E");
$opt{X}=opt_val("X");
$opt{I}=opt_val("I");
$opt{q}=opt_val("q"); $opt{r}=opt_val("r");
$opt{v}=opt_val("v");
$opt{b}=opt_val("b");
$opt{f}=opt_val("f");
$opt{g}=opt_val("g");
$opt{Q}=opt_val("Q");
$opt{D}=opt_val("D");
$opt{a}=opt_val("a");
$opt{J}=opt_val("J");
$opt{M}=opt_val("M");
$opt{W}=opt_val("W");
$opt{z}=opt_val("z");
$opt{K}=opt_val("K");
$opt{P}=opt_val("P");
$opt{Y}=opt_val("Y");
$opt{S}=opt_val("S");
$opt{T}=opt_val("T");
$opt{U}=opt_val("U");
$opt{y}=opt_val("y");
$opt{Z}=opt_val("Z");
$opt{n}=opt_val("n");
$opt{A}=opt_val("A");
$opt{w}=opt_val("w");
$opt{t}=opt_val("t");
$opt{L}=opt_val("L");
$opt{O}=opt_val("O");
$opt{l}=opt_val("l");
$opt{R}=opt_val("R");
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@blastoptname,$_);
push(@param,$opt{$_});
}
unless($makedir=~ /none/){
system("mkdir $makedir");
}
for($count=0;$count<$num;$count++){
msg_send("Searching_start: NO $count\n ") ;
if($makedir=~ /none/){
@params = ('program' => "$p",'database' => "$database",'outfile' => "$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast');
}
else{
@params = ('program' => "$p",'database' => "$database",'outfile' => $makedir."/$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast');
}
$factory = Bio::Tools::Run::StandAloneBlast->new(@params);
$paramnum=0;
foreach $item(@blastoptname){
$factory->$item($param[$paramnum]);
$paramnum ++;
}
msg_send("$cdnapass[$count]\n");
$blast_report = $factory->blastall($cdnapass[$count]);
msg_send("finish\n");
}
opendir(R_DIR,"$makedir");
@blastresultfiles=readdir(R_DIR);
$num=@blastresultfiles;
if($blastresultfiles[0]=~ /\./){
shift(@blastresultfiles);
$num --;
}
if($blastresultfiles[0]=~ /\.\./){
shift(@blastresultfiles);
$num --;
}
print "###################BLAST_PARSING_START####################\n";
for($count=0;$count<$num;$count++){
&blastparser(-open=>"$makedir"."$blastresultfiles[$count]" ,-resultfile=>"$resultfile")
}
$count=-1;
$icount=-1;
$kcount=0;
$jcount=0;
open(FILE9, "$resultfile");
while(<FILE9>){
if($_=~ /=/){
$icount++;
$jcount=0;
$kcount=0;
$count++;
$parseresult1=<FILE9>;
chomp $parseresult1;
@genomeid=split(/\t/,$parseresult1);
$genomeid[0]=~ s/>//g;
$genomeid[0]=~ s/ +//g;
$parseresult2=<FILE9>;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
}
if($_=~ />/){
$jcount++;
$kcount=0;
chomp $_;
$parseresult1=$_;
chomp $parseresult1;
@genomeid=split(/\t/,$parseresult1);
$genomeid[0]=~ s/>//g;
$genomeid[0]=~ s/ +//g;
$parseresult2=<FILE9>;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
$kcount++;
}
if($_=~ /^[0-9]/){
$kcount++;
$parseresult2=$_;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
}
}
close FILE9;
msg_send("finish\n");
return $memory;
}
sub blastcutting{
&opt_default(basenum=>"30000",distance=>"50000", open=>"none", multicut=>"on",multidir=>"multientry\/",upper=>"0",logfilename=>"blastcut.log",resultdir=>"blastcut\/" );
my @args=opt_get(@_);
my $multidir=&opt_val("multidir");
my $basenum=&opt_val("basenum");
my $open=&opt_val("open");
my $upper=&opt_val("upper");
my $logfilename=&opt_val("logfilename");
my $resultdir=&opt_val("resultdir");
my $multicut=&opt_val("multicut");
my $distance=&opt_val("distance");
my $this = shift @args;
my %opt;
my($count,$acount,$bcount,$ccount,$hcount,$icount,$flag,$flag2,$entryname,$multigene,$tmp,$genomenum);
my($cutsentence,$sentence1,$sentence2,$cDNA_dir);
my(@hinum,@strand,@database_start,@database_end,@database_name,@database_bits,@query_name);
my(@max,@max_start,@max_end,@cDNA_pass);
$flag=1;
$flag2=1;
my $memory;
msg_send("#####################GENOME_CUT_START#####################\n");
msg_send("DEVIDE FASTA FILE...\n");
$hcount=0;
$count=0;
$acount=0;
$icount=0;
if($multicut eq "on"){
system("mkdir $multidir");
if($open=~ /none/){
open(FILE,"$this->{'0'}->{'0'}->{'0'}->{database_place}");
}
else{
open(FILE,"$open");
}
while(<FILE>){
if($_=~ />/){
if($flag=0){
close W_FILE;
$flag=1;
}
$entryname=$_;
chomp $entryname;
$entryname=~ s/>//g;
$entryname=~ s/ //g;
open(W_FILE,">$multidir"."$entryname");
print W_FILE ">$entryname\n";
$flag=0;
}
else{
$multigene=$_;
$multigene=~ s/\r\n/\n/g;
chomp $multigene;
$multigene=~ s/[^ATGCatgcNn]/N/g;
print W_FILE $multigene;
}
}
close FILE;
close W_FILE;
}
$flag=1;
while($flag==1){
$flag2=1;
while($flag2==1){
if($this->{$hcount}->{$icount}->{$count}->{strand}=~ /s/ ){
$strand[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{strand};
$database_start[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_start};
$database_end[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_end};
$database_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_name};
$query_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{query_name};
$database_bits[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_bits};
$count ++;
}
else{
$max[$hcount][$icount]=$count;
$icount++;
$count=0;
unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){
$flag2=0;
}
}
}
$hinum[$hcount]=$icount;
$hcount ++;
$icount=0;
unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){
$flag=0;
}
}
for($ccount=0;$ccount<$hcount;$ccount++){
for($bcount=0;$bcount<$hinum[$ccount];$bcount++){
if($strand[$ccount][$bcount][0] eq "Plus"){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0];
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0];
for($acount=0;$acount<$max[$ccount][$bcount];$acount++){
if($strand[$ccount][$bcount][$acount] eq "Plus"){
if($max_start[$ccount][$bcount]>$database_start[$ccount][$bcount][$acount] && $max_start[$ccount][$bcount]-$database_start[$ccount][$bcount][$acount]+1<$distance ){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount];
}
if($max_end[$ccount][$bcount]<$database_end[$ccount][$bcount][$acount] && $database_end[$ccount][$bcount][$acount]-$max_end[$ccount][$bcount]+1<$distance){
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount];
}
}
}
}
else{
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0];
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0];
for($acount=0;$acount<$max[$ccount][$bcount];$acount++){
if($strand[$ccount][$bcount][$acount] eq "Minus"){
if($max_start[$ccount][$bcount]<$database_start[$ccount][$bcount][$acount] && $database_start[$ccount][$bcount][$acount] - $max_start[$ccount][$bcount]+1<$distance){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount];
}
if($max_end[$ccount][$bcount]>$database_end[$ccount][$bcount][$acount] && $max_end[$ccount][$bcount]-$database_end[$ccount][$bcount][$acount] +1 <$distance){
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount];
}
}
}
$tmp=$max_start[$ccount][$bcount];
$max_start[$ccount][$bcount]=$max_end[$ccount][$bcount];
$max_end[$ccount][$bcount]=$tmp;
}
}
}
msg_send("PRINTING SEQUENCE...\n");
$cDNA_dir=$query_name[0][0][0];
$cDNA_dir=~ s/=//g;
@cDNA_pass=split(/\//,$cDNA_dir);
pop(@cDNA_pass);
$cDNA_dir=join('/',@cDNA_pass);
open(ROG,">$logfilename");
print ROG "cDNA: $cDNA_dir\n";
print ROG "Genome: $resultdir\n\n";
system("mkdir $resultdir");
for($ccount=0;$ccount<$hcount;$ccount++){
if($upper==0){$upper=$hinum[$ccount];}
if($upper>$hinum[$ccount]){$upper=$hinum[$ccount];}
for($bcount=0;$bcount<$upper;$bcount++){
print ROG "$query_name[$ccount][$bcount][0]\n";
$memory->{$ccount}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]";
$memory->{$query_name[$ccount][$bcount][0]}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]";
open(FILE,"$multidir"."$database_name[$ccount][$bcount][0]");
while(<FILE>){
if($_=~ />/){
$sentence1=$_;
$sentence2=<FILE>;
}
}
chomp $sentence2;
$genomenum=length($sentence2);
$max_start[$ccount][$bcount]=$max_start[$ccount][$bcount]-$basenum;
$max_end[$ccount][$bcount]=$max_end[$ccount][$bcount]+$basenum;
if($max_start[$ccount][$bcount]<1){$max_start[$ccount][$bcount]=1;}
if($max_end[$ccount][$bcount]>$genomenum){$max_end[$ccount][$bcount]=$genomenum;}
print ROG "$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n\n";
$memory->{$ccount}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]";
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]";
$memory->{$ccount}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount];
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount];
$memory->{$ccount}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount];
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount];
open(W_FILE,">$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]");
$cutsentence=substr($sentence2,$max_start[$ccount][$bcount]-1,$max_end[$ccount][$bcount]-$max_start[$ccount][$bcount]+1);
print W_FILE ">$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n";
print W_FILE "$cutsentence";
close FILE;
close W_FILE;
}
}
$memory->{Genome_ID}=$multidir;
msg_send("BLAST_COMPLETE\n");
return $memory;
}
sub _post_blast_clusterer{
&opt_default(e=>'e-100',filename=>'blast_clusters.tmp',output=>'f');
my @args = &opt_get(@_);
my $file = shift @args;
my $eval = &opt_val('e');
my $filename = &opt_val('filename');
my $output = &opt_val('output');
my $clstlist;
my $this = {};
my $i = 0;
my ($query,$subject,$tmp,$clstlist);
my @list = ();
my @clusters = ();
my %red;
open(FILE,$file);
open(OUTFILE,">$filename");
while(<FILE>){
if(/^=(.+)/){
if($i > 0){
$clstlist = join(',',@list);
push (@clusters,$clstlist);
print OUTFILE ("$clstlist\n")if($output eq 'f');
}
$i++;
@list = ();
$clstlist = '';
$query = $1;
push(@list,$query);
}
elsif(/^>(.+)\s+([0-9]+)/){
$subject = $1;
$subject =~ s/\s\n//g;
next if($subject eq $query);
while(<FILE>){
if(/^.+\s+[0-9]+\s+(.+)\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+(.+)\s+([0-9])+\s+([0-9])+\s+([0-9])+\s+([0-9])+/){
if($1 <= $eval){
push(@list,$subject);
msg_send(".");
$this->{$query}->{$subject}->{strand} = $2;
$this->{$query}->{$subject}->{qstart} = $3;
$this->{$query}->{$subject}->{sstart} = $4;
$this->{$query}->{$subject}->{qend} = $5;
$this->{$query}->{$subject}->{send} = $6;
}
}
last;
}
}
}
$clstlist = join(',',@list);
push (@clusters,$clstlist);
print OUTFILE ("$clstlist\n")if($output eq 'f');
@list = ();
close(FILE);
close(OUTFILE);
return @clusters;
}
sub _list_clusterer {
&opt_default(filename=>'cluster_list',sort=>'on',split=>',');
&opt_get(@_);
my $input = shift;
my $filename = opt_val('filename');
my $sort = opt_val('sort');
my $split = opt_val('split');
my @clusters=();
my @array=();
my ($cluster,$new,$key);
my $a;
my $c=1;
my $i=0;
my $key;
my %hash;
open(INFILE,"$input");
open(OUTFILE,">$filename");
open(TMP,">tmp.txt");
while(<INFILE>){
chomp;
@array = split(/$split/);
$cluster = $c;
$new = 1;
msg_send(".");
foreach $key (sort keys %hash){
foreach $a (@array){
$a =~ s/\s//g;
if($key eq $a){
$cluster = $hash{$key};
$new = 0;
last;
}
}
if($new == 0){
last;
}
}
if($new == 1){
foreach $a (@array){
$hash{$a} = $c;
}
$c++;
print TMP $c,"\n";
}
else{
foreach $a (@array){
$hash{$a} = $cluster;
}
print TMP $cluster, "\n";
}
}
close(INFILE);
for($i=1;$i<=$c;$i++){
foreach $key (keys %hash){
if($hash{$key} == $i){
print OUTFILE $key, ",";
}
}
print OUTFILE "\n";
}
close(INFILE);
close(OUTFILE);
_list_sorter("$filename",-filename=>"$filename.sort",-split=>"$split") if ($sort ne 'off');
}
sub _list_sorter {
&opt_default(split=>',',filename=>"sort_list");
my @args = &opt_get(@_);
my $input = shift @args;
my $filename = &opt_val('filename');
my $split = &opt_val('split');
my $list;
my (@array,@narray,@flist);
open(IN,"$input");
open(OUT,">$filename");
while(<IN>){
chomp;
@array = split(/$split/);
@narray = ();
foreach(sort @array){
$_ =~ s/\s//g;
push(@narray,$_)if($_ ne '');
}
if(scalar @narray > 1){
$list = join(',',@narray);
}else{
$list = $_;
}
push(@flist,$list);
print OUT "$list\n";
}
close(IN);
close(OUT);
return(@flist);
foreach(@flist){
msg_send("$_\n");
}
}
sub blastparser{
&opt_default(open=>"result" ,filename=>"blastresultlist");
my @args=opt_get(@_);
my $open=&opt_val("open");
my $resultfile=&opt_val("filename");
my $this = shift @args;
my $seq;
my $param;
my @param;
my %opt;
my @blastoptname;
my $sentence; my $sentence1; my $sentence2; my $sentence3; my $sentence4; my $sentence5;
my $sentence20;
my $lA; my $lB; my $lC; my $lD; my $lE; my $lF; my $lG;
my @lva; my @lvb;
my @sentenceA; my @sentenceB; my @sentenceC; my $sentenceem; my @partse; my @partse2; my @partig; my @partig2; my @partig3; my @partig12; my @partig13;
open(FILE, $open);
open(FILE2, ">>$resultfile");
print FILE2 "QUERY\n";
print FILE2 "DATABASE\t";
print FILE2 "DATABASE_ALLBITS\n";
print FILE2 "SCOREBITS\t";
print FILE2 "SCORE\t";
print FILE2 "EXPECT\t";
print FILE2 "IDENTITIES(A\/\t";
print FILE2 "B)\t";
print FILE2 "%MATCH\t";
print FILE2 "GAP(A\/\t";
print FILE2 "B)\t";
print FILE2 "%GAP\t";
print FILE2 "STRAND\t";
print FILE2 "QUERY_START\t";
print FILE2 "SBJCT_START\t";
print FILE2 "QUERY_END\t";
print FILE2 "SBJCT_END\t\n";
print FILE2 "---------------------------------------------------------------------------------------------\n";
$lA=0;
$lB=0;
$lC=0;
$lD=0;
$lE=0;
$lF=0;
$lG=0;
while(<FILE>){
if($_=~ /Query= /){
$sentence=$_;
$sentence=~ s/Query= //g;
print FILE2 "=$sentence";
}
if($_=~ />/ || $sentence3 =~ />/){
if($_=~ />/){
chomp $_;
print FILE2 "$_\t"; $sentence20=<FILE>;
chomp $sentence20;
$sentence20=~ s/ //g;
$sentence20=~ s/Length=//g;
print FILE2 "$sentence20\n"; }
if($sentence3 =~ />/){
chomp $sentence3;
print FILE2 "$sentence3\t"; chomp $sentence20;
$sentence20=~ s/ //g;
$sentence20=~ s/Length=//g;
print FILE2 "$sentence20\n"; $sentence3=$sentenceem;
}
}
if($_=~ /^ Score/ || $sentence4 =~ /Score/){
if($_=~ /Score/){
$sentenceA[$lA]=$_;
chomp $sentenceA[$lA];
@partse=split(/,/,$sentenceA[$lA]);
$partse[0]=~ s/ +//g;
$partse[0]=~ s/Score=//g;
$partse[0]=~ s/bits//g;
$partse[0]=~ s/\)//g;
@partse2=split(/\(/,$partse[0]);
$partse[1]=~ s/ +//g;
$partse[1]=~ s/Expect=//g;
print FILE2 "$partse2[0]\t";
print FILE2 "$partse2[1]\t";
print FILE2 "$partse[1]\t";
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
@partig=split(/,/,$sentenceA[$lA]);
$partig[0]=~ s/ +//g;
$partig[0]=~ s/Identities=//g;
$partig[0]=~ s/%\)//g;
@partig2=split(/\//,$partig[0]);
print FILE2 "$partig2[0]\t";
@partig3=split(/\(/,$partig2[1]);
print FILE2 "$partig3[0]\t";
print FILE2 "$partig3[1]\t";
$partig[1]=~ s/ +//g;
$partig[1]=~ s/Gaps=//g;
$partig[1]=~ s/%\)//g;
@partig12=split(/\//,$partig[1]);
@partig13=split(/\(/,$partig12[1]);
if($partig[1]=~ /[0-9]/){
print FILE2 "$partig12[0]\t";
print FILE2 "$partig13[0]\t";
print FILE2 "$partig13[1]\t";
}
else{
print FILE2 "0\t";
print FILE2 "0\t";
print FILE2 "0\t";
}
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
$sentenceA[$lA]=~ s/ //g;
$sentenceA[$lA]=~ s/Strand=//g;
$sentenceA[$lA]=~ s/Plus\///g;
print FILE2 "$sentenceA[$lA]\t";
$lA++;
}
if($sentence4=~ /Score/){
$sentenceA[$lA]=$sentence4;
chomp $sentenceA[$lA];
@partse=split(/,/,$sentenceA[$lA]);
$partse[0]=~ s/ +//g;
$partse[0]=~ s/Score=//g;
$partse[0]=~ s/bits//g;
$partse[0]=~ s/\)//g;
@partse2=split(/\(/,$partse[0]);
$partse[1]=~ s/ +//g;
$partse[1]=~ s/Expect=//g;
print FILE2 "$partse2[0]\t";
print FILE2 "$partse2[1]\t";
print FILE2 "$partse[1]\t";
$lA++;
$sentenceA[$lA]=$_;
chomp $sentenceA[$lA];
@partig=split(/,/,$sentenceA[$lA]);
$partig[0]=~ s/ +//g;
$partig[0]=~ s/Identities=//g;
$partig[0]=~ s/%\)//g;
@partig2=split(/\//,$partig[0]);
print FILE2 "$partig2[0]\t";
@partig3=split(/\(/,$partig2[1]);
print FILE2 "$partig3[0]\t";
print FILE2 "$partig3[1]\t";
$partig[1]=~ s/ +//g;
$partig[1]=~ s/Gaps=//g;
$partig[1]=~ s/%\)//g;
@partig12=split(/\//,$partig[1]);
@partig13=split(/\(/,$partig12[1]);
if($partig[1]=~ /[0-9]/){
print FILE2 "$partig12[0]\t";
print FILE2 "$partig13[0]\t";
print FILE2 "$partig13[1]\t";
}
else{
print FILE2 "0\t";
print FILE2 "0\t";
print FILE2 "0\t";
}
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
$sentenceA[$lA]=~ s/ //g;
$sentenceA[$lA]=~ s/Strand=//g;
$sentenceA[$lA]=~ s/Plus\///g;
print FILE2 "$sentenceA[$lA]\t";
$lA++;
$sentence4=$sentenceem;
}
}
if($_=~ /Query:/){
$sentenceB[$lB]=$_;
$lB ++;
$sentence1=<FILE>;
$sentence2=<FILE>;
$sentenceC[$lC]=$sentence2;
$lC++;
while(<FILE>){
if($_=~ /Query:/){
$sentenceB[$lB]=$_;
$lB ++;
$sentence1=<FILE>;
$sentence2=<FILE>;
$sentenceC[$lC]=$sentence2;
$lC++;
}
if($_=~ />/ || $_ =~ /Score/ || $_ =~ / Database/){
if($_=~ />/){
$sentence3=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
$sentence20=<FILE>;
last;
}
if($_=~ /Score/){
$sentence4=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
last;
}
if($_=~ /Database/){
$sentence5=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
last;
}
}
}
}
}
close FILE;
close FILE2;
}
1;} |
sub blastall
{ require LWP::UserAgent;
require G::IO;
my @argv = opt_get(@_);
my %data = opt_val();
my $program = $data{'p'} || 'blastp';
my $database = $data{'d'} || 'SWISS';
my $outfile = $data{'o'} || 'stdout';
my $infile = $data{'i'} || shift @argv;
delete($data{'p'});
delete($data{'d'});
delete($data{'o'});
delete($data{'i'});
my $query = $infile;
if (-e $infile) {
my $gb = new G::IO($infile, 'fasta', 'no msg');
$query = $gb->{SEQ};
}
my $option = '';
foreach my $key (keys %data){
$option .= '-' . $key . ' ' . $data{$key};
}
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new('POST',"http://xml.nig.ac.jp/rest/Invoke");
$request->content_type("application/x-www-form-urlencoded");
if ($program && $database && $infile) {
$request->content("service=Blast&method=searchParam&program=$program&database=$database&query=$query¶m=$option");
} else {
msg_error << "__HELP__"
blastall arguments:
-p Program Name [String] -d Database [String] default = nr -i Query File [File In] default = stdin -e Expectation value (E) [Real] default = 10.0 -m alignment view options: 0 = pairwise, 1 = query-anchored showing identities, 2 = query-anchored no identities, 3 = flat query-anchored, show identities, 4 = flat query-anchored, no identities, 5 = query-anchored no identities and blunt ends, 6 = flat query-anchored, no identities and blunt ends, 7 = XML Blast output, 8 = tabular, 9 tabular with comment lines 10 ASN, text 11 ASN, binary [Integer] default = 0 range from 0 to 11 -o BLAST report Output File [File Out] Optional default = stdout -F Filter query sequence (DUST with blastn, SEG with others) [String] default = T -G Cost to open a gap (-1 invokes default behavior) [Integer] default = -1 -E Cost to extend a gap (-1 invokes default behavior) [Integer] default = -1 -X X dropoff value for gapped alignment (in bits) (zero invokes default behavior) blastn 30, megablast 20, tblastx 0, all others 15 [Integer] default = 0 -I Show GI's in deflines [T/F] default = F -q Penalty for a nucleotide mismatch (blastn only) [Integer] default = -3 -r Reward for a nucleotide match (blastn only) [Integer] default = 1 -v Number of database sequences to show one-line descriptions for (V) [Integer] default = 500 -b Number of database sequence to show alignments for (B) [Integer] default = 250 -f Threshold for extending hits, default if zero blastp 11, blastn 0, blastx 12, tblastn 13 tblastx 13, megablast 0 [Real] default = 0 -g Perform gapped alignment (not available with tblastx) [T/F] default = T -Q Query Genetic code to use [Integer] default = 1 -D DB Genetic code (for tblast[nx] only) [Integer] default = 1 -a Number of processors to use [Integer] default = 1 -O SeqAlign file [File Out] Optional -J Believe the query defline [T/F] default = F -M Matrix [String] default = BLOSUM62 -W Word size, default if zero (blastn 11, megablast 28, all others 3) [Integer] default = 0 -z Effective length of the database (use zero for the real size) [Real] default = 0 -K Number of best hits from a region to keep (off by default, if used a value of 100 is recommended) [Integer] default = 0 -P 0 for multiple hit, 1 for single hit (does not apply to blastn) [Integer] default = 0 -Y Effective length of the search space (use zero for the real size) [Real] default = 0 -S Query strands to search against database (for blast[nx], and tblastx) 3 is both, 1 is top, 2 is bottom [Integer] default = 3 -T Produce HTML output [T/F] default = F -l Restrict search of database to list of GI's [String] Optional -U Use lower case filtering of FASTA sequence [T/F] Optional -y X dropoff value for ungapped extensions in bits (0.0 invokes default behavior) blastn 20, megablast 10, all others 7 [Real] default = 0.0 -Z X dropoff value for final gapped alignment in bits (0.0 invokes default behavior) blastn/megablast 50, tblastx 0, all others 25 [Integer] default = 0 -R PSI-TBLASTN checkpoint file [File In] Optional -n MegaBlast search [T/F] default = F -L Location on query sequence [String] Optional -A Multiple Hits window size, default if zero (blastn/megablast 0, all others 40 [Integer] default = 0 -w Frame shift penalty (OOF algorithm for blastx) [Integer] default = 0 -t Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments. (0 invokes default behavior; a negative value disables linking.) [Integer] default = 0 -B Number of concatenated queries, for blastn and tblastn [Integer] Optional default = 0 -V Force use of the legacy BLAST engine [T/F] Optional default = F -C Use composition-based statistics for blastp or tblastn: As first character: D or d: default (equivalent to T) 0 or F or f: no composition-based statistics 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 2: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally For programs other than tblastn, must either be absent or be D, F or 0. As second character, if first character is equivalent to 1, 2, or 3: U or u: unified p-value combining alignment p-value and compositional p-value in round 1 only [String] default = D -s Compute locally optimal Smith-Waterman alignments (This option is only available for gapped tblastn.) [T/F] default = F
__HELP__
msg_error "Displays information on the currently available databases\n";
msg_error "=========================================================\n";
$request->content("service=Blast&method=getSupportDatabaseList");
}
my $response;
my $callback = sub {
for (@_) {
msg_send($_) unless ref($_) =~ /HTTP::/ || /LWP::/;
}
};
if ($outfile && lc($outfile) ne "stdout") {
$response = $ua->request( $request,$outfile );
} else {
$response = $ua->request( $request, $callback, 30000 );
}} |