sub set_operon
{ my @args = opt_get(@_);
my $gb = shift @args;
if ($gb->{LOCUS}->{id} eq 'U00096' || $gb->{LOCUS}->{id} eq 'NC_000913'){
my $url = "http://regulondb.ccg.unam.mx:80/data/OperonSet.txt";
my $dir = $ENV{HOME} . '/.glang/data/OperonSet.txt';
mirror($url, $dir);
die("setOperon: cannot retrieve data from RegulonDB.") unless(-e $dir);
my $flag = 0;
open(FILE, $dir) || die($!);
while (<FILE>) {
chomp;
if (/^Columns\:/) {
$flag++;
next;
}
elsif(/^\t\(\d\)\s/) {
$flag++;
next;
}
if($flag == 5){
my %geneOrder;
my ($operon, $num, $direction, $genes) = split(/\t/, $_, 4);
next unless($num >= 2);
foreach my $genepair (split(/,/, $genes)){
my ($gene, $locustag) = split(/\|/, $genepair, 2);
my $cds = $gb->gene2id($locustag);
$cds = $gb->gene2id($gene) unless(length $cds);
if($cds){
$gb->{$cds}->{operon} = $operon;
$geneOrder{$cds} = $gb->{$cds}->{start};
}
}
my $i = 1;
if($direction eq 'forward'){
foreach my $cds (sort {$geneOrder{$a} <=> $geneOrder{$b}} keys %geneOrder){
$gb->{$cds}->{operonN} = $i;
$i ++;
}
}else{
foreach my $cds (sort {$geneOrder{$b} <=> $geneOrder{$a}} keys %geneOrder){
$gb->{$cds}->{operonN} = $i;
$i ++;
}
}
}else{
msg_error($_, "\n");
}
}
close(FILE);
foreach my $cds ($gb->cds()){
$gb->{$cds}->{operonN} = 0 unless(length $gb->{$cds}->{operon});
}
}else{
msg_error("No Operon data for this species.\n\n");
}
return 1;} |
perl(1).