#!/usr/bin/perl -w # summarize_epics.perl [filename] # # by: Jaideep Singh # # date: 01/26/2006 # # about: reads a raw epics data text file and create a summary file # creates a file: # (0) first line is the prescalar string # (1) afterwards, each line is an epics variable # (2) if the variable is numeric, then the line will be: # # variable name number of values minimum maximum mean standard deviation # (3) the numeric variables come at the beginning of the file and are separated from the alphanumeric variables by a "-----" line # (4) if the variable is alphanumeric, then the line will be: # # variable name number of values first value number of occurences second value number of occurences ... # # Note that some EPICS-like stuff still leaks through... # # input file is of the format: # first line might be prescalar string # all subsequent lines: # timestampseconds from 0000 Jan 1 2003variable namevaricable value # # # EPICS info is injected into the data stream at certain times invertals. # More important stuff is logged more often. # Immediately preceding one log cycle, a timestamp is grabbed # Each variable is recorded in ASCII format on its own line: # # [variable name, upto 30 characters][at least one blank space][upto 20 characters for value] # # for example: # # IPM1H04B.XPOS -0.0408508 # 000000000111111111122222222223333333333444444444455 # 123456789012345678901234567890123456789012345678901 # # Variable names can have ".", ":","+","_" as well as alphanumeric characters use strict "subs"; #getting name of file to search + ARGUMENT ERROR HANDLING $filename = $ARGV[0]; die "Uh, what file do you want me to look at dude?: Too few arguments\n" unless ($filename); die "Scared and confused: Too many arguments\n" unless (($#ARGV+1) < 2); open(FILE,$filename) or die "d'oh!: - $filename - not found\n"; #print "\nSearching raw data file:\t$filename\n"; #initializing variables $line_count = 0; LINENUM: while() { $line_count++; chomp; if (/(ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+\,ps\d\=\d+)/) { $prescalars = $1; } elsif ($_ ne "") { @line_parts = split(/\t/); if ($line_parts[3] =~ /([0123456789eE\.\+\-]+)/) { $poop = $1; if ($line_parts[3] eq $1) { $type = "num" } else { $type = "str" } } else {$poop = ""; $type = "str"} # $poop is used as just a diagnostic variable # print "\*$line_parts[3]\*\t\*$poop\*\t$type\n"; if ($type eq "num") { if (not $data_num{$line_parts[2]}{"count"}) { $data_num{$line_parts[2]}{"min"} = $line_parts[3]; $data_num{$line_parts[2]}{"max"} = $line_parts[3]; $data_num{$line_parts[2]}{"count"} = 1.0; $data_num{$line_parts[2]}{"sum"} = $line_parts[3]; $data_num{$line_parts[2]}{"sum_sq"} = $line_parts[3]*$line_parts[3]; $data_num{$line_parts[2]}{"mean"} = $data_num{$line_parts[2]}{"sum"}/$data_num{$line_parts[2]}{"count"}; $data_num{$line_parts[2]}{"std"} = sqrt(abs($data_num{$line_parts[2]}{"sum_sq"}/$data_num{$line_parts[2]}{"count"}-$data_num{$line_parts[2]}{"mean"}*$data_num{$line_parts[2]}{"mean"})); } else { $data_num{$line_parts[2]}{"min"} = $line_parts[3] if ($data_num{$line_parts[2]}{"min"} > $line_parts[3]); $data_num{$line_parts[2]}{"max"} = $line_parts[3] if ($data_num{$line_parts[2]}{"max"} < $line_parts[3]); $data_num{$line_parts[2]}{"count"}++; $data_num{$line_parts[2]}{"sum"} = $data_num{$line_parts[2]}{"sum"} + $line_parts[3]; $data_num{$line_parts[2]}{"sum_sq"} = $data_num{$line_parts[2]}{"sum_sq"} + $line_parts[3]*$line_parts[3]; $data_num{$line_parts[2]}{"mean"} = $data_num{$line_parts[2]}{"sum"}/$data_num{$line_parts[2]}{"count"}; $data_num{$line_parts[2]}{"std"} = sqrt(abs($data_num{$line_parts[2]}{"sum_sq"}/$data_num{$line_parts[2]}{"count"}-$data_num{$line_parts[2]}{"mean"}*$data_num{$line_parts[2]}{"mean"})); $data_num{$line_parts[2]}{"std"} = 0.0 if ($data_num{$line_parts[2]}{"max"} == $data_num{$line_parts[2]}{"min"}); } } else { if (not $data_str{$line_parts[2]}{$line_parts[3]}) {$data_str{$line_parts[2]}{$line_parts[3]} = 1.0} else {$data_str{$line_parts[2]}{$line_parts[3]}++} } } }; close(FILE); #setting up output file @path_name_parts = split(/\//,$filename); @file_name_parts = split(/\./,$path_name_parts[$#path_name_parts]); $file_out = $file_name_parts[0]."_summary.txt"; #print "OUTPUT filename : $file_out\n\n"; open(OUTPUT,">$file_out"); print OUTPUT "$prescalars\n" if ($prescalars); foreach $variable_name (sort keys %data_num) { $count = $data_num{$variable_name}{"count"}; $min = sprintf( "%.9E" , $data_num{$variable_name}{"min"} ); $max = sprintf( "%.9E" , $data_num{$variable_name}{"max"} ); $mean = sprintf( "%.9E" , $data_num{$variable_name}{"mean"} ); $std = sprintf( "%.9E" , $data_num{$variable_name}{"std"} ); print OUTPUT "$variable_name\t$count\t$min\t$max\t$mean\t$std\n" } print OUTPUT "-----\n"; # because %data_str is a hash of hashes, $data_str{$variable_name} is a reference to a hash # therefore $temp is a reference to a hash # and %$temp is the hash # and $$temp{$variable_value} is the value corresponding to the $variable_value key foreach $variable_name (sort keys %data_str) { $temp = $data_str{$variable_name}; $num_values = scalar keys %$temp; print OUTPUT "$variable_name\t$num_values"; foreach $variable_value (sort keys %$temp) { print OUTPUT "\t$variable_value\t$$temp{$variable_value}"} print OUTPUT "\n"; } close(OUTPUT);