functionalized version 2, implemented version 1 using functions in version 2
[PsN.git] / bin / data_stats
blobc20ca57a86158972154f4fb8399911b0d52e1fea
1 #!/usr/bin/perl
3 use FindBin qw($Bin);
4 use lib "$Bin/../lib";
6 # Don't edit the line below, it must look exactly like this.
7 # Everything above this line will be replaced #
9 use PsN;
10 use data;
11 use debug;
12 use strict;
13 use Getopt::Long;
15 my ($opt_help,
16 $opt_long_help,
17 $opt_columns,
18 $opt_ignore_columns,
19 $opt_debug,
20 $opt_debug_package,
21 $opt_debug_subroutine);
23 my $opts = { "h|?" => \$opt_help,
24 "help" => \$opt_long_help,
25 "columns:s" => \$opt_columns,
26 "ignore_columns:s" => \$opt_ignore_columns,
27 "debug:0" => \$opt_debug,
28 "debug_package:s" => \$opt_debug_package,
29 "debug_subroutine:s" => \$opt_debug_subroutine };
31 my $res = GetOptions( %{$opts} );
33 exit unless $res;
35 if ( scalar( @ARGV ) < 1 and !($opt_help or $opt_long_help) ){
36 print "At least one list file must be specified. Use 'data_stats -h' for help.\n";
37 exit;
41 if($opt_help or $opt_long_help) {
42 print <<'ENDHELP';
44 data_stats
46 Usage:
48 data_stats [ -h | -? ] [ -help ]
49 [ -columns='string' ]
50 [ -ignore_columns='string' ]
51 [ -debug='integer' ]
52 [ -debug_package='string' ]
53 [ -debug_subroutine='string' ]
54 outputfile(s)
56 ENDHELP
58 if( $opt_long_help and !$opt_help ){
60 print <<'ENDHELP';
61 Description:
63 data_stats calculates and prints simple statistics for data
64 files. Simple stastics are max, min, mean, median, range and
65 standard deviation for each column.
67 Examples:
69 Calculate statistics for all columns in file.dta
71 $ data_stats file.dta
73 Calculate statistics for all columns except WGT and AGE.
75 $ data_stats --ignore_columns=WGT,AGE
77 Calculate statistics for WGT and AGE only
79 $ data_stats --columns=WGT,AGE
81 Options:
83 The options are given here in their long form. Any option may be
84 abbreviated to any nonconflicting prefix. The -threads option may
85 be abbreviated to -t(or even -thr) but -debug may not be
86 abbreviated to -d because it conflicts with -debug_packages and
87 -debug_subroutines.
89 The following options are valid:
91 -h | -?
93 Print a list of options.
96 -columns='strings'
98 By default data_stats prints statistics for all columns of the
99 data set. But if you give a comma separated list of column headers
100 or column numbers with the -columns option, then data_stats will
101 print statistics for those columns only.
104 -ignore_columns='strings'
106 ignore_columns work the oposite way of -columns, it lets you
107 select column headers or column numbers for which data_stats
108 should not print statistcs. It also takes a comma separated list.
110 -help
112 Print this, longer, help message.
115 -debug='integer'
117 Default value is: 0
119 This is mainly for developers who whish to debug PsN. By default
120 'integer' is zero but you can try setting it to '1' and you might
121 get some helpfull warnings. If you run in to problems that require
122 support, you might be told to crank this number up and send the
123 output to us.
126 -debug_package='string'
128 Default value is: empty string
130 If use together with '-debug' it is possible to choose which part
131 of PsN you want to see debug messages from. Again this is mostly
132 for developers.
135 -debug_subroutine='string'
137 Default value is: empty string
139 With this option it is possible to specify, with even finer
140 granularity, which part of PsN you want to see debug messages
141 from. This is definitly only for developers.
143 ENDHELP
145 exit;
148 debug -> level( $opt_debug );
149 debug -> package( $opt_debug_package );
150 debug -> subroutine( $opt_debug_subroutine );
152 my $datafile = $ARGV[0];
153 my $dataobj = data -> new ('filename'=> $datafile);
155 my %ignore_columns;
157 if( $opt_ignore_columns ){
158 foreach my $ign ( split( /,/ , $opt_ignore_columns ) ){
159 $ignore_columns{$ign} = 1;
163 my @columns;
165 if( $opt_columns ) {
166 @columns = split( /,/, $opt_columns)
167 } else {
168 @columns = @{$dataobj -> header};
171 my $first = 1;
172 foreach my $function ( ' 'x8, 'Min', 'Max', 'Mean', 'Median', 'Range', 'SD', ){
173 printf "%-8s", $function;
174 foreach my $head ( @columns ){
175 unless( $ignore_columns{$head} ){
176 if( $first ){
177 printf "%-8s", $head;
178 } else {
179 my $function = lc( $function );
180 if( $head =~ /[^\d]/ ){
181 printf "%-8d", $dataobj -> $function( column_head => $head );
182 } else {
183 printf "%-8d", $dataobj -> $function( column => $head );
188 $first = 0;
189 print "\n";
193 print "\n";