6 # Don't edit the line below, it must look exactly like this.
7 # Everything above this line will be replaced #
21 $opt_debug_subroutine);
23 my $opts = { "h|?" => \$opt_help,
24 "help" => \$opt_long_help,
25 "columns:s" => \$opt_columns,
26 "ignore_columns:s" => \$opt_ignore_columns,
27 "debug:0" => \$opt_debug,
28 "debug_package:s" => \$opt_debug_package,
29 "debug_subroutine:s" => \$opt_debug_subroutine };
31 my $res = GetOptions( %{$opts} );
35 if ( scalar( @ARGV ) < 1 and !($opt_help or $opt_long_help) ){
36 print "At least one list file must be specified. Use 'data_stats -h' for help.\n";
41 if($opt_help or $opt_long_help) {
48 data_stats [ -h | -? ] [ -help ]
50 [ -ignore_columns='string' ]
52 [ -debug_package='string' ]
53 [ -debug_subroutine='string' ]
58 if( $opt_long_help and !$opt_help ){
63 data_stats calculates and prints simple statistics for data
64 files. Simple stastics are max, min, mean, median, range and
65 standard deviation for each column.
69 Calculate statistics for all columns in file.dta
73 Calculate statistics for all columns except WGT and AGE.
75 $ data_stats --ignore_columns=WGT,AGE
77 Calculate statistics for WGT and AGE only
79 $ data_stats --columns=WGT,AGE
83 The options are given here in their long form. Any option may be
84 abbreviated to any nonconflicting prefix. The -threads option may
85 be abbreviated to -t(or even -thr) but -debug may not be
86 abbreviated to -d because it conflicts with -debug_packages and
89 The following options are valid:
93 Print a list of options.
98 By default data_stats prints statistics for all columns of the
99 data set. But if you give a comma separated list of column headers
100 or column numbers with the -columns option, then data_stats will
101 print statistics for those columns only.
104 -ignore_columns='strings'
106 ignore_columns work the oposite way of -columns, it lets you
107 select column headers or column numbers for which data_stats
108 should not print statistcs. It also takes a comma separated list.
112 Print this, longer, help message.
119 This is mainly for developers who whish to debug PsN. By default
120 'integer' is zero but you can try setting it to '1' and you might
121 get some helpfull warnings. If you run in to problems that require
122 support, you might be told to crank this number up and send the
126 -debug_package='string'
128 Default value is: empty string
130 If use together with '-debug' it is possible to choose which part
131 of PsN you want to see debug messages from. Again this is mostly
135 -debug_subroutine='string'
137 Default value is: empty string
139 With this option it is possible to specify, with even finer
140 granularity, which part of PsN you want to see debug messages
141 from. This is definitly only for developers.
148 debug
-> level
( $opt_debug );
149 debug
-> package( $opt_debug_package );
150 debug
-> subroutine
( $opt_debug_subroutine );
152 my $datafile = $ARGV[0];
153 my $dataobj = data
-> new
('filename'=> $datafile);
157 if( $opt_ignore_columns ){
158 foreach my $ign ( split( /,/ , $opt_ignore_columns ) ){
159 $ignore_columns{$ign} = 1;
166 @columns = split( /,/, $opt_columns)
168 @columns = @
{$dataobj -> header
};
172 foreach my $function ( ' 'x8
, 'Min', 'Max', 'Mean', 'Median', 'Range', 'SD', ){
173 printf "%-8s", $function;
174 foreach my $head ( @columns ){
175 unless( $ignore_columns{$head} ){
177 printf "%-8s", $head;
179 my $function = lc( $function );
180 if( $head =~ /[^\d]/ ){
181 printf "%-8d", $dataobj -> $function( column_head
=> $head );
183 printf "%-8d", $dataobj -> $function( column
=> $head );