home *** CD-ROM | disk | FTP | other *** search
- # ------------------------------------------------------------------------------
- # HTTP ROUTINES LIBRARY by A.I.Studio / Igor Afanasyev
- # ------------------------------------------------------------------------------
-
- use Socket;
-
- select(STDOUT); $| = 1;
-
- %COOKIES_VALUES = ();
- %COOKIES_DOMAINS = ();
- %COOKIES_PATHS = ();
-
- $TRACE_PROGRESS = $ENV{'TRACE_PROGRESS'};
- $TRACE_HEADER = $ENV{'TRACE_HEADER'};
- $TRACE_BODY = $ENV{'TRACE_BODY'};
- $TRACE_COOKIES = $ENV{'TRACE_COOKIES'};
-
- $Referer = $ENV{'HTTP_REFERER'};
- ($Referer eq '') && ($Referer = 'http://www.watznew.com/');
-
- $UserAgent = $ENV{'HTTP_USER_AGENT'};
- ($UserAgent eq '') && ($UserAgent = 'WatzNew Agent Perl Script (www.watznew.com)'); # 'Mozilla/4.0 (compatible; MSIE 5.0; Win32)'
-
- undef $HttpHeader; # this variable contains document header after HTTP request is done
- undef $HttpBody; # this variable contains document body after HTTP request is done
- undef $LastHost; # these variables contain the last used host, port and path
- undef $LastPort;
- undef $LastPath;
-
- # ------------------------------------------------------------------------------
- sub INF {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- print "INF: $s\n";
- }
-
- # ------------------------------------------------------------------------------
- sub MSG {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- print "MSG: $s\n";
- }
-
- # ------------------------------------------------------------------------------
- sub ERR {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- print "ERR: $s\n";
- exit;
- }
-
- # ------------------------------------------------------------------------------
- sub Squeeze {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- $s =~ s/ / /gi;
- $s =~ s/[\r\t\n ]+/ /gi;
- $s =~ s/^[\r\t\n ]//gi;
- $s =~ s/[\r\t\n ]$//gi;
- return $s;
- }
-
- # ------------------------------------------------------------------------------
- sub PrepareParam {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- $s =~ s/<.*?>/ /gi;
- $s =~ s/"/"/gi;
- $s = &Squeeze($s);
- return $s;
- } # end of sub
-
- # ------------------------------------------------------------------------------
- sub ConvertHtmlCodes {
- # ------------------------------------------------------------------------------
- my ($s) = @_;
- $s =~ s/ / /gi;
- $s =~ s/"/"/gi;
- $s =~ s/</</gi;
- $s =~ s/>/>/gi;
- return $s;
- } # end of sub
-
- # ------------------------------------------------------------------------------
- sub SplitUrl { # splits url to ($Host,$Port,$Path) array
- # ------------------------------------------------------------------------------
- my ($Url) = @_;
-
- my $Host,$Port,$Path;
-
- if ($Url =~ m|^http(s*?)://(.*)|i) {
- $Secure = ($1 eq 's');
- $Host = $2;
- $Port = 80;
- $Path = "/";
- ($Host =~ s|^([^/]+)/(.*)$|$1|) && ($Path = $2);
- ($Host =~ s/:(\d+)$//) && ($Port = $1);
- ($Host =~ s/:(\D+)$//) && ($Port = 80);
- $Path = '/'.$Path;
- } else {
- &ERR("Wrong url syntax: $Url");
- }
-
- return ($Host,$Port,$Path);
- } # end of sub
-
-
- # ------------------------------------------------------------------------------
- sub ExpandUrl {
- # ------------------------------------------------------------------------------
- my ($Url,$BaseUrl) = @_;
- my $h,$p,$b;
-
- $b = $BaseUrl.'/';
- $b =~ s|//$|/|g;
-
- if ($b =~ m|^(http[s]{0,1}://(.+?))/|i) {
- $h = $1;
- } else { # wrong base url syntax
- return $Url;
- }
-
- ($BaseUrl =~ m|(.+/)(.*?)$|) && ($p = $1);
-
- ($Url =~ m|^http[s]{0,1}://|i) && (return $Url);
- ($Url =~ m|^/|) && (return $h.$Url);
- return $p.$Url;
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub ExpandUrls {
- # ------------------------------------------------------------------------------
- my ($Html,$BaseUrl) = @_;
-
- while ($Html =~ m/<A (.*?)HREF=["]*(.*?)["]*[> ]/gi) {
- my $path = $2;
- my $match = $&;
- my $subst = '__A__HREF="'.&ExpandUrl($path,$BaseUrl).'">';
- $Html =~ s/\Q$match\E/$subst/gi;
- }
-
- $Html =~ s/__A__HREF/<A HREF/gi;
-
- return $Html;
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub MakeCookies { # sends COOKIES
- # ------------------------------------------------------------------------------
- my ($Domain,$Path) = @_;
- my $s = '';
-
- foreach $c (keys %COOKIES_VALUES) {
- my $v = $COOKIES_VALUES{$c};
- my $d = $COOKIES_DOMAINS{$c};
- my $p = $COOKIES_PATHS{$c};
-
- if ($Domain =~ m/\Q$d\E$/) {
- if ($Path =~ m/^\Q$p\E/) {
- ($s ne '') && ($s .= '; ');
- $s .= "$c=$v";
- $TRACE_COOKIES && print "\t## Cookie: [$c]=[$v]\n";
- }
- }
- }
-
- if ($s ne '') {
- $TRACE_COOKIES && print "\n";
- return "Cookie: $s\n";
- } else {
- return '';
- }
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub HttpPost { # sends POST request and returns a page
- # ------------------------------------------------------------------------------
- my ($Host,$Port,$Path,$PostData,$SkipBodyMode) = @_;
- my $PostDataLength = length($PostData);
-
- $Path = '/' if ($Path eq '');
-
- $LastHost = $Host;
- $LastPort = $Port;
- $LastPath = $Path;
-
- $TRACE_PROGRESS && print "\n\t## HttpPost [$Host] [$Port] [$Path]\n";
- $TRACE_PROGRESS && print "\t## Referer [$Referer]\n\n";
-
- &INF("Looking for $Host");
-
- $iaddr = inet_aton($Host) || &ERR("Can't locate host: $Host");
- $paddr = sockaddr_in($Port, $iaddr);
- $proto = getprotobyname('tcp');
- socket(SOCK, PF_INET, SOCK_STREAM, $proto) || &ERR("$!");
- connect(SOCK, $paddr) || &ERR("$!");
-
- select(SOCK); $| = 1; select(STDOUT);
-
- &INF("Sending request to server");
-
- my $headers =
- "POST $Path HTTP/1.1\n".
- "Host: $Host\n".
- "Referer: $Referer\n".
- "Cache-Control: no-cache\n".
- "Accept: */*\n".
- "Accept-Charset: windows-1251\n".
- "User-Agent: $UserAgent\n".
- "Content-type: application/x-www-form-urlencoded\n".
- "Content-length: $PostDataLength\n".
- "Connection: close\n";
-
- $headers .= &MakeCookies($Host,$Path);
-
- print SOCK $headers;
- $headers =~ s/\n/\n\t>> /g;
- $TRACE_PROGRESS && print "\t>> $headers\n\n";
-
- print SOCK "\n$PostData\n";
-
- $Referer = "http://$Host:$Port$Path";
-
- return &ProcessServerResponce($SkipBodyMode);
- } # end of sub
-
- # ------------------------------------------------------------------------------
- sub HttpGet { # sends GET request and returns a page
- # ------------------------------------------------------------------------------
- my ($Host,$Port,$Path,$SkipBodyMode) = @_;
-
- $TRACE_PROGRESS && print "\n\t## HttpGet [$Host] [$Port] [$Path]\n";
- $TRACE_PROGRESS && print "\t## Referer [$Referer]\n\n";
-
- $Path = '/' if ($Path eq '');
-
- $LastHost = $Host;
- $LastPort = $Port;
- $LastPath = $Path;
-
- &INF("Looking for $Host");
-
- $iaddr = inet_aton($Host) || &ERR("Can't locate host: $Host");
- $paddr = sockaddr_in($Port, $iaddr);
- $proto = getprotobyname('tcp');
- socket(SOCK, PF_INET, SOCK_STREAM, $proto) || &ERR("$!");
- connect(SOCK, $paddr) || &ERR("$!");
-
- select(SOCK); $| = 1; select(STDOUT);
-
- &INF("Sending request to server");
-
- my $headers =
- "GET $Path HTTP/1.1\n".
- "Host: $Host\n".
- "Referer: $Referer\n".
- "Cache-Control: no-cache\n".
- "Accept: */*\n".
- "Accept-Charset: windows-1251\n".
- "User-Agent: $UserAgent\n".
- "Connection: close\n";
-
- $headers .= &MakeCookies($Host,$Path);
-
- print SOCK $headers;
- $headers =~ s/\n/\n\t>> /g;
- $TRACE_PROGRESS && print "\t>> $headers\n\n";
-
- print SOCK "\n";
-
- $Referer = "$Host:$Port$Path";
-
- return &ProcessServerResponce($SkipBodyMode);
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub _HttpProcessAutoRedirect { # handles automatic auto-redirection
- # ------------------------------------------------------------------------------
- my ($c, $HaltOnError) = @_;
- undef my $done;
- undef my $Url;
-
- do {
- while (($c == 301) || ($c == 302) || ($c == 303)) {
- if ($HttpHeader =~ m/Location:[ ]*(.+?)\n/i) {
- $Url = $1;
- $c = &HttpGet(&SplitUrl($Url));
- } else {
- &ERR("Can't find the address to redirect");
- }
- }
-
- $done = 1;
-
- if (($c == 200) && ($HttpBody =~ m/<META HTTP-EQUIV=["]*Refresh["]* CONTENT=["]*(.*?)URL=(.*?)["]*>/si)) {
- my $u = $2;
- if ($Url ne $u) {
- undef $done;
- $Url = $u;
- $c = &HttpGet(&SplitUrl($Url));
- }
- }
-
- } until ($done);
-
- ($HaltOnError) && ($c ne 200) && &ERR("HTTP Error: $c");
-
- return ($c, $Url);
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub HttpGetUrl { # sends GET request
- # ------------------------------------------------------------------------------
- my ($Url,$SkipBodyMode,$HaltOnError) = @_;
- my $c = &HttpGet(&SplitUrl($Url),$SkipBodyMode);
- ($HaltOnError) && ($c ne 200) && &ERR("HTTP Error: $c");
- return $c;
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub HttpGetUrlAutoRedirect { # sends GET request and handles auto-redirection
- # ------------------------------------------------------------------------------
- my ($Url,$SkipBodyMode,$HaltOnError) = @_;
- return &_HttpProcessAutoRedirect(&HttpGet(&SplitUrl($Url),$SkipBodyMode),$HaltOnError);
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub HttpPostUrl { # sends POST request
- # ------------------------------------------------------------------------------
- my ($Url,$PostData,$SkipBodyMode,$HaltOnError) = @_;
- my $c = &HttpPost(&SplitUrl($Url),$PostData,$SkipBodyMode);
- ($HaltOnError) && ($c ne 200) && &ERR("HTTP Error: $c");
- return $c;
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub HttpPostUrlAutoRedirect { # sends POST request and handles auto-redirection
- # ------------------------------------------------------------------------------
- my ($Url,$PostData,$SkipBodyMode,$HaltOnError) = @_;
- return &_HttpProcessAutoRedirect(&HttpPost(&SplitUrl($Url),$PostData,$SkipBodyMode),$HaltOnError);
- } # end sub
-
- # ------------------------------------------------------------------------------
- sub ProcessServerResponce {
- # ------------------------------------------------------------------------------
- $HttpBody = '';
- $HttpHeader = '';
-
- my $OldBytesRead = 0;
- my $OldReadTime;
-
- my ($SkipBodyMode) = @_;
-
- $Code = -1;
-
- do {
- do {
- $Responce = <SOCK>;
- $Responce =~ s/\r//g;
- $HttpHeader .= $Responce;
- $TRACE_HEADER && print "\t<< $Responce";
- $Responce =~ s/\n//g;
- } until ($Responce eq '');
-
- ($Responce eq '') && ($Code = 0);
- $HttpHeader =~ m/^HTTP\/\d+?\.\d+? (\d+)/ && ($Code = $1);
-
- $TRACE_PROGRESS && &INF("HTTP status code: $Code");
-
- if (($Code ge 100) && ($Code lt 200)) {
- $Code = -1;
- $HttpHeader = '';
- }
- } until ($Code ge 0);
-
- # extracting cookies
- my %TMP_COOKIES_VALUES = ();
- my %TMP_COOKIES_PATHS = ();
- my %TMP_COOKIES_DOMAINS = ();
-
- pos $HttpHeader = 0;
- while ($HttpHeader =~ m/Set-Cookie:[ ]*(.*?)=(.*?)\n/gi) {
- my $c = $1;
- my $v = $2.';';
- my $expires, $path, $domain;
-
- $expires = '';
- if ($v =~ m/; expires=(.*?);/i) {
- $expires = $1;
- }
-
- $path = '';
- if ($v =~ m/; path=(.*?);/i) {
- $path = $1;
- }
-
- $domain = '';
- if ($v =~ m/; domain=(.*?);/i) {
- $domain = $1;
- }
-
- $v =~ s/;.*//;
-
- my $e = 0;
- if ($expires =~ m/^(\w+) (\w+) (\d+) (\d+):(\d+):(\d+) (\d+)$/i) { # i.e. 'Wed Dec 31 16:00:01 1969'
- my($lsec,$lmin,$lhour,$lmday,$lmon,$lyear,$lwday,$lyday,$lisdst) = localtime(time);
- $lyear += 1900;
-
- my $mname = $2;
- my $day = $3;
- my $hour = $4;
- my $min = $5;
- my $sec = $6;
- my $year = $7;
-
- if ($year < $lyear) {
- $e = 1;
- }
- }
-
- if ($e == 1) {
- $TRACE_COOKIES && print "\t-- Removing cookie: [$c]\n";
-
- delete $TMP_COOKIES_VALUES{$c};
- delete $TMP_COOKIES_PATHS{$c};
- delete $TMP_COOKIES_DOMAINS{$c};
-
- delete $COOKIES_VALUES{$c};
- delete $COOKIES_PATHS{$c};
- delete $COOKIES_DOMAINS{$c};
- } else {
- $TRACE_COOKIES && print "\t++ Cookie: [$c]=[$v], Path=[$path], Domain=[$domain]\n";
-
- $TMP_COOKIES_VALUES{$c} = $v;
- $TMP_COOKIES_PATHS{$c} = $path;
- $TMP_COOKIES_DOMAINS{$c} = $domain;
- }
- }
-
- foreach $c (keys %TMP_COOKIES_VALUES) {
- $COOKIES_VALUES{$c} = $TMP_COOKIES_VALUES{$c};
- $COOKIES_PATHS{$c} = $TMP_COOKIES_PATHS{$c};
- $COOKIES_DOMAINS{$c} = $TMP_COOKIES_DOMAINS{$c};
- }
- # end extracting cookies
-
- local $skip = ($SkipBodyMode eq always) || ($SkipBodyMode eq undef) && ($Code ne 200);
- ($SkipBodyMode eq never) && ($skip = undef);
-
- $TRACE_PROGRESS && $skip && &INF("Body skipped");
-
- if (!$skip) {
- $TRACE_PROGRESS && &INF("Loading document");
-
- $OldReadTime = time;
-
- while (my $Responce = <SOCK>) {
- $Responce =~ s/\r//g;
- $HttpBody .= $Responce;
- $TRACE_BODY && print "\t:: $Responce";
-
- if ($TRACE_PROGRESS) {
- my $read = length($HttpBody);
- my $rtime = time;
-
- if (($rtime - $OldReadTime > 10) || ($read - $OldBytesRead >= 5000)) {
- # Fire an event if 10+ seconds passed or 5000+ bytes read
- &INF("$read bytes read");
- $OldBytesRead = $read;
- $OldReadTime = $rtime;
- }
- }
- }
- }
-
- close(SOCK) || &ERR("$!");
-
- return $Code;
- } # end sub
-
- # ------------------------------------------------------------------------------
-
- 1; # return true