#! /usr/bin/perl # Assuming that you have perl, and that you have changed the permissions on this file so that it can be executable ("chmod u+x diff_col") # then the usage is: # diff_col [colnum1] [colnum2] [threshold] [file1] [file2] # [threshold] is in percent # # written by: Jaideep Singh # date: March 24, 2007 # email: singhj AT jlab DOT org # # desc: compares the nth columns of file 1 and file 2 # outputs the data when the rel diff is larger than threshold in pct. # skips over lines starting with "#" # columns are assumed to be separated by some whitespace $colnum1 = $ARGV[0]; $colnum2 = $ARGV[1]; $thresh = $ARGV[2]; $file1 = $ARGV[3]; $file2 = $ARGV[4]; $usage = "Usage: diff_col [colnum1] [colnum2] [threshold] [file1] [file2]\n"; die "$usage" unless ($#ARGV == 4); open(FILEONE,"<$file1") or die "Having trouble opening $file1\n"; open(FILETWO,"<$file2") or die "Having trouble opening $file2\n"; @data1 = (); @data2 = (); $num1 = 0; $num2 = 0; while() { chomp; unless (/^#/) { $raw = $_; $raw =~ s/^\s+//; @line_parts = split /\s+/ , $raw; $data1[$num1] = $line_parts[$colnum1]; $num1++; } } print "column number is out of bounds in $file1 ($#line_parts)\n" if ($#line_parts < $colnum1); while() { chomp; unless (/^#/) { $raw = $_; $raw =~ s/^\s+//; @line_parts = split /\s+/ , $raw; $data2[$num2] = $line_parts[$colnum2]; $num2++; } } print "column number is out of bounds in $file2 ($#line_parts)\n" if ($#line_parts < $colnum2); close(FILEONE); close(FILETWO); if ($num1 == $num2) { print "number of rows match! $num1\n"; $num = $num1 } elsif ($num1 > $num2) { print "number of rows do not match: $num1 !\ $num2\n"; $num = $num2; } else { print "number of rows do not match: $num1 !\ $num2\n"; $num = $num1; } $badcol0 = 0; $badcolnum = 0; $badcolstr = 0; for ($n = 0 ; $n < $num ; $n++) { $one = $data1[$n]; $two = $data2[$n]; # if ( ($one =~ /[0123456789\.\+\-eE]+/) and ($two =~ /[0123456789\.\+\-eE]+/) ) { if ( ($one =~ /\d+/) and ($two =~ /\d+/) ) { if ( $one + $two == 0.0 && $one == 0.0) { # $ratio = $two-$one; # if ( abs($ratio) > $thresh ) { # $ratio = sprintf "%.2F" , $ratio; # print "$n \t $one \t $two \t $ratio \t sum is 0.0\n"; # $badcol++; # } } else { $ratio = 200.0*($two-$one)/($two+$one); if ( abs($ratio) >= $thresh ) { $ratio = sprintf "%.2F" , $ratio; print "$n \t $one \t $two \t $ratio \n"; $badcolnum++; } } } elsif ($one ne $two) { print "$n \t $one \t $two \t strings \n"; $badcolstr++; } # else { print "$one \t $two \t oh really...\n"; } } print "\"bad\" sum = 0 columns: $badcol0 out of $num\n" unless ($badcol0 == 0); print "\"bad\" nummber columns: $badcolnum out of $num\n" unless ($badcolnum == 0); print "\"bad\" string columns: $badcolstr out of $num\n" unless ($badcolstr == 0);