G::Tools Literature
Included librariesPackage variablesGeneral documentationMethods
Package variables
No package variables defined.
Included modules
G::Messenger
SelfLoader
SubOpt
Inherit
Exporter
Synopsis
No synopsis!
Description
No description!
Methods
KeySearch
No description
Code
PDFtoTEXT
No description
Code
PubMedSearch
No description
Code
WordCount
No description
Code
Methods description
None available.
Methods code
KeySearchdescriptionprevnextTop
sub KeySearch {
    &opt_default();
    my @args=opt_get(@_);

    my $dir=shift @args;
    my $key=shift @args;
    my @files;
    my @txts;
    my %hash;
    my $i;
    my $pdf;
    my $txt;
    my $tmp;

    opendir DIR, $dir;
    @files=readdir DIR;
    
    open(KEY,">$dir/$dir".'.key');

    foreach(@files){
	next if($_ eq '.' || $_ eq '..');
	next if(/\.log$/);
	next if(/\.key$/);
	
	if(/\.pdf/){
	    $tmp=PDFtoTEXT("$dir/$_");
	    push(@txts,$tmp);
	    $pdf++;
	}
	else{
	    $tmp="$dir/$_";
	    push(@txts,$tmp);
	    $txt++;
	}
    }

    foreach $tmp (@txts){
	$hash{$tmp}=WordCount($tmp, $key);
	$i++;
    }
    
    print KEY '**************************************************************'."\n".'**** Key Search (1.00)  Key Word Count from PDF Documents ****'."\n".'**************************************************************'."\n\n";

    print KEY "Key: $key\n";
    print KEY "Directory: $dir\n";
    print KEY "Paper: $i\( PDF: $pdf files   TXT: $txt files\)\n"; 
    print KEY "---------------------------------\n\n";

    foreach(sort{$hash{$b} <=> $hash{$a}}keys(%hash)){
	$tmp=substr($_, index($_,'/')+1);
	print KEY "$tmp: $hash{$_}\n";
    }
    close(KEY);

    return $i;
}
PDFtoTEXTdescriptionprevnextTop
sub PDFtoTEXT {
    &opt_default();
    my @args=opt_get(@_);

    my $pdf=shift @args;

    system('pdftotext '."$pdf");
    $pdf=~s/\.pdf$/\.txt/;
return $pdf;
}
PubMedSearchdescriptionprevnextTop
sub PubMedSearch {
    require LWP::Simple;
    require LWP::UserAgent;
    require HTTP::Cookies;
    
    my $time=time;
    &opt_default(limit=>500, dir=>'PUBMED'.$time, key=>'');
    my @args=opt_get(@_);
    
    my $query=shift @args;
    my $limit=opt_val('limit');
    my $dir=opt_val('dir');
    my $key=opt_val('key');

    my $com;
    my @date;
    my $req;
    my $res;
    my $ua;
    my $i;
    my $frag;
    my $frag2;
    my @line;
    my @line2;
    my @line3;
    my $url;
    my $url2;
    my $tmp;
    my $tmp2;
    my $head;
    my $abst;
    my $abstract;
    my $title;
    my $authors;
    my $affiliation;
    my $journal;
    my $pmid;
    my $download=0;
    my $invalid=0;
    my $permit=0;
    my $nopdf=0;
    my $notfound=0;
    
    $tmp=-d "$dir";
    if($tmp == 1){
	print "\"$dir\" : The directory has already existed.\n";
	return;
    }

    @date=localtime($time);
    $date[5]=$date[5]+1900;
    $date[4]=$date[4]+1;

    print '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print "Query word is\" $query\".\n";
    print "Search limit is\" $limit\".\n";
    print "Key word is\" $key\".\n" if($key);
    print "---------------------------------\n";

    mkdir("$dir",0777);
    open(LOG, ">$dir/$dir".'.log');

    print LOG '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print LOG "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print LOG "Query: $query\n";
    print LOG "Limit: $limit\n";
    print LOG "Key: $key\n" if($key);
    print LOG "Directory: $dir\n";
    print LOG "Log file: $dir\/$dir\.log\n";
    print LOG "Key file: $dir\/$dir\.key\n" if($key);
    print LOG "---------------------------------\n\n";

    $query =~ tr/ /+/;

    $ua = LWP::UserAgent->new;
    $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1));
    
    $com='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?SUBMIT=y&DB=PubMed&cmd=&term='.$query.'&dispmax='.$limit;
    $req = HTTP::Request->new(GET => $com);
    $res = $ua->request($req);
    
    unless($res->is_success){
	print "Error occured: PubMed isn't available.\n";
	print "---------------------------------\n";
	return;
    }

    @line=split(/\n/,$res->as_string);
    foreach(@line){
	if(/\<td width\=\"100\%\"\>\<font size\=\"\-1\"\>\<a href\=\"(.*)\"\>.*/){
	    $i++;
	    $frag = 0;
	    $frag2 = 0;
	    $journal = "";
	    $title = "";
	    $authors = "";
	    $affiliation = "";
	    $abstract = "";
	    $pmid = "";
	    
	    $url=$1;
	    $url =~ s/amp\;//g;
	    $req = HTTP::Request->new(GET => "$url");
	    $abst = $ua->request($req);

#######################################################
# Abstract #
#######################################################
if($abst->content =~ /\<input name\=\"uid\" type\=\"checkbox\" value\=\"\d+\"\>\<b\>1\: \<\/b\>(.*)\<\/td\>/){ $journal=$1; if($journal =~ /Error occured\: cannot get document summary/){ $journal = 'Error occured! cannot get document summary'; print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n"; close(TXT); print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the abstract.\n"; print LOG "---------------------------------\n\n"; $notfound++; $frag = 1; $frag = 2; next; } } if($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)\<br\>\<br\>(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } elsif($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "Title: $title\n"; print LOG "Authors: $authors\n"; print LOG "$pmid\n"; @line2=split(/\n/,$abst->content); foreach(@line2){ if(/\<dd\>\<SPAN\>\<a href\=\"(.*)\" OnClick.*/){ $frag2 = 1; $url=$1; $url='http://www.ncbi.nlm.nih.gov:80'.$url; #######################################################
# Direct Access #
#######################################################
if($url =~ /\.pdf$/){ $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url. Only abstract is saved.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } last; } #######################################################
# Access to the site #
#######################################################
$url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(HEAD => "$url"); $head = $ua->request($req); $url = $head->{_request}->{_uri}; $url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); unless($res->is_success){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } #######################################################
# Springer #
#######################################################
if($url =~ /\.springer\./){ my $spfrag; $url =~ s/index.html//; @line3=split(/\n/, $res->content); foreach(@line3){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } elsif(/Otherwise click \<a href\=\"\.\.\/\.\.(.*)\"\>here\!\<\/a\>\<\/p\>/){ $tmp = $1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $url =~ s/index.html//; @line2=split(/\n/, $res->content); foreach(@line2){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } } last; } elsif(/\<a href\=\"\.\.\/\.\.(.*\.pdf)\"\>PDF/ || /\<a HREF\=\"\.\.\/\.\.(.*\.pdf)\"\>Article in PDF format/){ $spfrag =1; $tmp=$1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; last; } } } last if($spfrag == 1); if($res->content =~ /.*\"(.*\.pdf)\".*/){ $url2 = $url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Springer-ny #
#######################################################
if($url =~ /\.springer-ny\./){ $url =~ s/index\.html//; $url2=$url.'paper/index.html'; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } #######################################################
# Catchword #
#######################################################
if($url =~ /\.catchword\./){ my $catchword; if($res->content =~ /SRC\=\"(.*)\" NAME\=\"toolbar\"/){ $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); @line3=split('\n', $res->content); foreach(@line3){ if($res->content =~ /\<a href\=\"(.*)\"alt\=\"full document\"/){ $catchword=1; $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } if($catchword == 0){ $frag = 1; print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } } } #######################################################
# InterScience #
#######################################################
elsif($url =~ /\.interscience\./){ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $url2 = 'http://www3.interscience.wiley.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Wiley #
#######################################################
if($url =~ /doi\.wiley\.com/){ if($res->content =~ /\<h1\>Error\<\/h1\>/){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } } #######################################################
# Synergy #
#######################################################
elsif($url =~ /\.blackwell-synergy\./){ if($res->as_string =~ /\<a href\=\"javascript\:newWindow\(\'(.*\.x\/pdf)\'.*/){ $url2 = 'http://www.blackwell-synergy.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->as_string =~ /\<a href\=\"(.*pdf.*)\"\>/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } #######################################################
# EMBO #
#######################################################
elsif($url =~ /\/\/emboj\./){ if($res->as_string =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/){ $url = 'http://emboj.oupjournals.org'.$1; $url =~ s/content/reprint/; $url = $url.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# JVMS #
#######################################################
elsif($url =~ /\/\/jvms\./){ if($res->content =~ /\<a href\=\"(.*)\"\>PDF/){ $url = 'http://jvms.jstage.jst.go.jp'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstr\
act is saved.\n"
; print LOG "--------------------------------\
-\n\n"
; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# J Biol Chem, J Clinical Inv and Neurology #
#######################################################
elsif($url =~ /\/\/(www\.jbc\.org)/ || $url =~ /\/\/(www\.jci\.org)/ || $url =~ /\/\/(www\.neurology\.org)/ || $url =~ /\/\/(circ\.ahajournals\.org)/ || $url =~ /\/\/(www\.pnas\.org)/ || $url =~ /\/\/(www\.fasebj\.org)/ || $url =~ /\/\/(www\.jneurosci\.org)/ || $url =~ /\/\/(bioinformatics\.oupjournals\.org)/){ $tmp=0; $tmp2=$1; if($res->content =~ /window\.location \= \"(.*)\"\;/){ $url='http://'.$tmp2.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } elsif($res->content =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/ || $res->content =~ /\<A HREF\=\"(.*)\"\>Screen \(PDF\)/){ $url='http://'.$tmp2.$1.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } if($tmp == 1){ if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Nature #
#######################################################
elsif($url =~ /\/\/www\.nature\.com/){ if($res->content =~ /Full text.*\"(.*)\"\>PDF/){ $url='http://www.nature.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Portlandpress #
#######################################################
elsif($url =~ /\/\/cs\.portlandpress\.com/){ if($res->content =~ /\<A class\=\"sidelinks\" HREF\=\"(.*\.pdf)\"\>\<img src/){ $url='http://cs.portlandpress.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Elsevier #
#######################################################
elsif($url =~ /\/\/linkinghub\.elsevier\.com/){ if($res->content =~ /\<a HREF\=\"(.*)\"\>\<img border.*src\=\"http\:\/\/www\.sciencedirect\.com\//){ $tmp = $1; $tmp =~ s/amp\;//g; $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); } if($res->content =~ /.*\"(.*\.pdf)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } else{ $frag=1; print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } } #######################################################
# ScienceDirect and Others #
#######################################################
else{ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $tmp = $1; $tmp =~ s/UADB\/xppview\/// if($url =~ /\.acs\.org\//); $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ if($res->content =~ /^\%PDF.*/){ $frag = 1; open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } else{ if($url =~ /(http\:\/\/.*?)\/.*/){ $tmp=$1.$tmp; } $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } elsif($res->as_string =~ /.*\"(http\:\/\/.*pdf.*)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } } if($frag == 0 & $frag2 == 1){ if($url =~ /.*\.sciencedirect\..*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } else{ print "$i\.txt: Invalid format from $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Invalid format. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $invalid++; } } elsif($frag == 0 & $frag2 == 0){ print "$i\.txt: There is no PDF document.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL:\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: There is no PDF document. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $nopdf++; } } } print "Searched from ".$i." papers.","\n"; print "Downloaded: $download\n"; print "Not permitted: $permit\n"; print "Not found: $notfound\n"; print "Invalid format: $invalid\n"; print "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print sprintf("%d",$download/($i-$nopdf)*100)."\% of papers have been downloaded.\n\n";
} else{ print "0% of papers have been downloaded.\n\n"; } print "Directory is\" $dir\".\n"; print "Log file is\" $dir\/$dir\.log\"\n"; print "Key file is\" $dir\/$dir\.key\"\n" if($key); print LOG "Total: $i\n"; print LOG "Downloaded: $download\n"; print LOG "Not permitted: $permit\n"; print LOG "Not found: $notfound\n"; print LOG "Invalid format: $invalid\n"; print LOG "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print LOG "Accuracy: ".sprintf("%d",$download/($i-$nopdf)*100)."\%\n";
} else{ print LOG "Accuracy: 0%\n"; } close(LOG); if($key){ KeySearch($dir, $key); }
}
WordCountdescriptionprevnextTop
sub WordCount {
    &opt_default();
    my @args=opt_get(@_);

    my $file=shift @args;
    my $query=shift @args;
    my $count;

    open(FILE,"$file");
    while(<FILE>){
	$count+=s/${query}/${query}/g;
} return $count;
}
General documentation
No general documentation available.