18,19c18,21
< # 05-May-2004, ver 0.94  (check for new versions, http://www.brendangregg.com)
< #			 (or run a web search for "chaosreader")
---
> # 11-Sep-2011, ver 0.95
> # (http://www.brendangregg.com/chaosreader.html seems to be outdated.
> # E-mail to the address brendan@sun.com mentioned on that web site returned
> # with an error.)
32c34
< # USAGE: chaosreader [-aehikqrvxAHIRTUXY] [-D dir] 
---
> # USAGE: chaosreader [-aehiknqrvxAHIRTUXY] [-D dir] 
63a66
> #    -n, --names           # Include hostnames in hyperlinked HTTPlog (HTML)
199c202
< # SEE ALSO: ethereal (GUI packet viewer), dsniff (sniffing toolkit)
---
> # SEE ALSO: wireshark (GUI packet viewer), dsniff (sniffing toolkit)
201a205,206
> #            Copyright (c) 2008 Indian Larry.
> #            Copyright (c) 2011 Jens Lechtenbörger.
203,219c208,223
< #  This program is free software; you can redistribute it and/or
< #  modify it under the terms of the GNU General Public License
< #  as published by the Free Software Foundation; either version 2
< #  of the License, or (at your option) any later version. 
< #
< #  This program is distributed in the hope that it will be useful,
< #  but WITHOUT ANY WARRANTY; without even the implied warranty of
< #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
< #  GNU General Public License for more details. 
< #
< #  You should have received a copy of the GNU General Public License
< #  along with this program; if not, write to the Free Software Foundation, 
< #  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
< #
< #  (http://www.gnu.org/copyleft/gpl.html)
< #
< # Author: Brendan Gregg  [Sydney, Australia]
---
> # This program is free software: you can redistribute it and/or modify
> # it under the terms of the GNU General Public License as published by
> # the Free Software Foundation, either version 3 of the License, or
> # (at your option) any later version.
> #
> # This program is distributed in the hope that it will be useful,
> # but WITHOUT ANY WARRANTY; without even the implied warranty of
> # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> # GNU General Public License for more details.
> #
> # You should have received a copy of the GNU General Public License
> # along with this program.  If not, see <http://www.gnu.org/licenses/>.
> #
> # Authors: Brendan Gregg      [Sydney, Australia]
> #          Indian Larry       [http://refrequelate.blogspot.com/]
> #          Jens Lechtenbörger [Münster, Germany]
240a245,260
> # 11-Sep-2011, Jens Lechtenbörger:
> # - Switch from GPLv2 to GPLv3
> # - Integrate diff from
> #   http://refrequelate.blogspot.com/2008/07/more-de-chunking-chaosreader-patch.html
> #   to reassemble chunked HTTP transfers.
> # - Parse linux cooked captures, which result from listening on "any"
> #   interface.  (Chaosreader0.94 does not produce any output for such
> #   pcap files.)
> # - Use HTTP content-type to identify file types such as HTML, XML,
> #   Javascript, CSS; use those types for better file extensions than
> #   "data".
> # - Uncompress gzip'ed data.
> # - Add new command line switch to show host names in HTTPlog and to
> #   create href-links from HTTPlog rows to the corresponding rows in
> #   the table on index.html.
> # - Several minor improvements (see comments with "JL:").
244a265
> use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
861a883,887
> 	    # JL: Try linux cooked capture
> 	    ($lptype,$lladdr_type,$lladdr_len,
> 	     $ether_src,$ll_dummy,$ether_type,$ether_data) =
> 		unpack('nnnH12nH4a*',$packet_data) unless $decoded;
> 	    if ($ether_type ne "0800") {
862a889
> 	    }
1630c1657,1659
< 	$Index{HTML}[$number] = "<tr><td><i>$number.</i></td>" .
---
> 	# JL: Added id attribute as link target
> 	$Index{HTML}[$number] = "<tr id=\"$number\">" .
> 	 "<td><i>$number.</i></td>" .
1758c1787
< 		&Process_HTTP($session_id);
---
> 		&Process_HTTP($session_id,$number);
2162a2192
> # JL: Added host parameter
2165c2195
< 	my ($junk,$var,$value,$term,$data,$request,$site,$post,$get,$reply);
---
> 	my ($junk,$var,$value,$term,$data,$request,$host,$site,$post,$get,$reply);
2173a2204
> 	my $number = shift;
2220a2252,2254
> 		### JL: Get the host string
> 		($host) = $request =~ /Host:\s(\S*)\s/is;
> 
2225c2259,2264
< 			$site = "http://${dest}$site";
---
> 		        # JL: Prefer hostname over IP address
> 		        if ($Arg{httplog_html}) {
> 			    $site = "http://${host}$site";
> 			} else { 
> 			    $site = "http://${dest}$site";
> 			}
2230,2231c2269,2271
< 		($type) = $reply =~ /Content-Type:\s(\S*)/s;
< 		($size) = $reply =~ /Content-Length:\s(\S*)/s;
---
> 		# JL: Be careful to use case insensitive matching
> 		($type) = $reply =~ /Content-Type:\s(\S*)/is;
> 		($size) = $reply =~ /Content-Length:\s(\S*)/is;
2238,2241c2278,2280
< 		 sprintf("%9d.%03d %6d %s %s/%03d %d %s %s %s %s%s/%s %s\n",
< 		 int($time),(($time - int($time))*1000),($duration*1000),
< 		 $src,$result,$status,$size,"GET",$site,"-","NONE","",
< 		 "-",$type);
---
> 			Print_Log_Line($number,$time,$duration,
> 				$src,$dest,$result,$status,$size,
> 				"GET",$site,"-","NONE","","-",$type);
2250a2290,2291
> 		### JL: Get the host string
> 		($host) = $request =~ /Host:\s(\S*)/is;
2254,2255c2295,2296
< 		($type) = $reply =~ /Content-Type:\s(\S*)/s;
< 		($size) = $reply =~ /Content-Length:\s(\S*)/s;
---
> 		($type) = $reply =~ /Content-Type:\s(\S*)/is;
> 		($size) = $reply =~ /Content-Length:\s(\S*)/is;
2262,2265c2303,2305
< 		 sprintf("%9d.%03d %6d %s %s/%03d %d %s %s %s %s%s/%s %s\n",
< 		 int($time),(($time - int($time))*1000),($duration*1000),
< 		 $src,$result,$status,$size,"POST",$site,"-","NONE","",
< 		 "-",$type);
---
> 		    Print_Log_Line($number,$time,$duration,
> 				   $src,$dest,$result,$status,$size,
> 				   "POST",$site,"-","NONE","","-",$type);
2569c2609
< 		print "Chaosreader ver 0.94\n\n";
---
> 		print "Chaosreader ver 0.95\n\n";
2648c2688
< <a href="httplog.text"><font color="blue"><b>HTTP Proxy Log</b></font></a> 
---
> <a href="$Arg{httplog_name}"><font color="blue"><b>HTTP Proxy Log</b></font></a> 
2934a2975,3006
> # JL: Print a line for the HTTPlog
> #
> sub Print_Log_Line {
> 	my $number = shift;
>         my $time = shift;
> 	my $duration = shift;
> 	my $src = shift;
> 	my $dest = shift;
> 	my $result = shift;
> 	my $status = shift;
> 	my $size = shift;
> 	my $method = shift;
> 	my $site = shift;
> 	my $type = shift;
> 
> 	if ($Arg{httplog_html}) {
> 	    sprintf("<pre><a href=\"index.html#%d\">%d</a>" .
> 		    " %9d.%03d %6d " .
> 		    "%-15s %-15s %s/%03d %d %s %s %s %s%s/%s %s</pre><br/>\n",
> 		    $number,$number,
> 		    int($time),(($time - int($time))*1000),($duration*1000),
> 		    $src,$dest,$result,$status,$size,
> 		    $method,$site,"-","NONE","","-",$type);
> 	} else {
> 	    sprintf("%9d.%03d %6d %s %s/%03d %d %s %s %s %s%s/%s %s\n",
> 		    int($time),(($time - int($time))*1000),($duration*1000),
> 		    $src,$result,$status,$size,
> 		    $method,$site,"-","NONE","","-",$type);
> 	}
> }
> 
> 
2942c3014,3015
< 	#  Create HTTPlog.text
---
> 	#  Create httplog file
>         # JL: Don't use hardcoded filename
2944,2945c3017
< 	open(FILE,">httplog.text") || die "ERROR29: creating HTTP log: $!\n";
< 
---
>         open(FILE,">$Arg{httplog_name}") || die "ERROR29: creating HTTP log: $!\n";
2962a3035
> 	elsif ($data =~ /^.PNG/) 	        { $type = "png"; } # JL
2969a3043
> 	elsif ($data =~ /<xml/i) 		{ $type = "xml"; } # JL
2982a3057
> 	return 1 if ($ext eq "png"); # JL
5354a5430,5446
> 		### JL: Chunk Check, patch from http://refrequelate.blogspot.com/2008/07/more-de-chunking-chaosreader-patch.html
> 		if ( $http_header =~ /Transfer-Encoding: chunked/ ) {
> 		    my $new_http_data="";
> 		    my $chunksize=-1;
> 		    my $pos=0;
> 		    until ($chunksize==0) {
> 			my $eolpos=index($http_data,"\r\n",$pos);
> 			$chunksize=hex(substr($http_data,$pos,$eolpos - $pos));
> 			$pos=($eolpos+2);
> 			if ($chunksize > 0) {
> 			    $new_http_data.=substr($http_data,$pos,$chunksize);
> 			}
> 			$pos+=($chunksize+2);
> 		    }
> 		    $http_data=$new_http_data;
> 		}
> 
5359a5452,5488
> 		### JL: Content Type treatment
> 		my $http_content_type = "";
> 		if ( $http_header =~ /Content-Type: text\/html/i ) {
> 		    $http_content_type = ".html";
> 		}
> 		elsif ( $http_header =~ /Content-Type: (application|text)\/((x-)?javascript|x-js)/i ) {
> 		    $http_content_type = ".js";
> 		}
> 		elsif ( $http_header =~ /Content-Type: (application|text)\/json/i ) {
> 		    $http_content_type = ".json";
> 		}
> 		elsif ( $http_header =~ /Content-Type: text\/css/i ) {
> 		    $http_content_type = ".css";
> 		}
> 		elsif ( $http_header =~ /Content-Type: text\/plain/i ) {
> 		    $http_content_type = ".txt";
> 		}
> 		elsif ( $http_header =~ /Content-Type: text\/xml/i ) {
> 		    $http_content_type = ".xml";
> 		}
> 		elsif ( $http_header =~ /Content-Type: image\/x-icon/i ) {
> 		    $http_content_type = ".icon";
> 		}
> 		# JL: The following three are useful if the image is
> 		# gz-compressed (should not be necessary, but happens).
> 		# In that case, http_type indicates "gz" but not the
> 		# image type.
> 		elsif ( $http_header =~ /Content-Type: image\/jpeg/i ) {
> 		    $http_content_type = ".jpeg";
> 		}
> 		elsif ( $http_header =~ /Content-Type: image\/gif/i ) {
> 		    $http_content_type = ".gif";
> 		}
> 		elsif ( $http_header =~ /Content-Type: image\/png/i ) {
> 		    $http_content_type = ".png";
> 		}
> 
5361,5362c5490,5495
< 	        $filename = "session_${numtext}.part_$parttext${ext}." .
< 		 "$http_type";
---
> 		# JL: Create filename based on Content-Type
> 	        my $filename = "session_${numtext}.part_$parttext${ext}";
> 		$filename .= "$http_content_type";
> 		if ( ($http_content_type eq "") or ($http_type eq "gz") ) {
> 		    $filename .= ".$http_type";
> 		}
5368a5502,5512
> 		### JL: gz decompressing
> 		if ( $http_type eq "gz" ) {
> 		    my $gunzipped = substr($filename, 0, length($filename) - 3);
> 		    my $gunzip_failed = 0;
> 		    gunzip $filename => $gunzipped
> 			or $gunzip_failed = 1;
> 		    if ( $gunzip_failed == 0 ) {
> 			$filename = $gunzipped;
> 		    }
> 		}
> 
6302a6447,6448
> 	$Arg{httplog_html} = 0; # JL: Should we create HTTPlog in HTML?
> 	$Arg{httplog_name} = "httplog.text"; # JL: Old default as variable
6316a6463
> 				"n|names" => \$opt_n, # JL: new option
6365a6513,6514
> 	$Arg{httplog_html} = 1 if $opt_n;
> 	$Arg{httplog_name} = "httplog.html" if $opt_n;
6521c6670
<         print "USAGE: chaosreader [-aehikqrvxAHIRTUXY] [-D dir] 
---
>         print "USAGE: chaosreader [-aehiknqrvxAHIRTUXY] [-D dir] 
6545c6694
<         print "Version 0.94, 01-May-2004
---
>         print "Version 0.95, 11-Sep-2011
6547c6696
< USAGE: chaosreader [-aehikqrvxAHIRTUXY] [-D dir] 
---
> USAGE: chaosreader [-aehiknqrvxAHIRTUXY] [-D dir] 
6578a6728
>    -n, --names           # Include hostnames in hyperlinked HTTPlog (HTML)

