bug in model parameters
[PsN.git] / bin / data_stats
blob00ae866b515e86f17cb894acc5eeb390276f6858
1 #!/usr/bin/perl
3 use FindBin qw($Bin);
4 use lib "$Bin/../lib";
6 # Don't edit the line below, it must look exactly like this.
7 # Everything above this line will be replaced #
9 use PsN;
10 use data;
11 use debug;
12 use strict;
13 use Getopt::Long;
15 my ($opt_help,
16 $opt_long_help,
17 $opt_columns,
18 $opt_ignore_columns,
19 $opt_debug,
20 $opt_debug_package,
21 $opt_debug_subroutine);
23 my $opts = { "h|?" => \$opt_help,
24 "help" => \$opt_long_help,
25 "columns:s" => \$opt_columns,
26 "ignore_columns:s" => \$opt_ignore_columns,
27 "debug:0" => \$opt_debug,
28 "debug_package:s" => \$opt_debug_package,
29 "debug_subroutine:s" => \$opt_debug_subroutine };
31 my $res = GetOptions( %{$opts} );
33 exit unless $res;
35 if ( scalar( @ARGV ) < 1 and !($opt_help or $opt_long_help) ){
36 print "At least one list file must be specified. Use 'data_stats -h' for help.\n";
37 exit;
41 if($opt_help or $opt_long_help) {
42 print <<'ENDHELP';
44 data_stats
46 Usage:
48 data_stats [ -h | -? ] [ -help ]
49 [ -columns='string' ]
50 [ -ignore_columns='string' ]
51 [ -debug='integer' ]
52 [ -debug_package='string' ]
53 [ -debug_subroutine='string' ]
54 outputfile(s)
56 ENDHELP
58 if( $opt_long_help and !$opt_help ){
60 print <<'ENDHELP';
61 Description:
63 data_stats calculates and prints simple statistics for data
64 files. Simple stastics are max, min, mean, median, range and
65 standard deviation for each column.
67 Examples:
69 Calculate statistics for all columns in file.dta
71 $ data_stats file.dta
73 Calculate statistics for all columns except WGT and AGE.
75 $ data_stats --ignore_columns=WGT,AGE
77 Calculate statistics for WGT and AGE only
79 $ data_stats --columns=WGT,AGE
81 Options:
83 The options are given here in their long form. Any option may be
84 abbreviated to any nonconflicting prefix. The -threads option may
85 be abbreviated to -t(or even -thr) but -debug may not be
86 abbreviated to -d because it conflicts with -debug_packages and
87 -debug_subroutines.
89 The following options are valid:
91 -h | -?
93 Print a list of options.
96 -columns='strings'
98 By default data_stats prints statistics for all columns of the
99 data set. But if you give a comma separated list of column headers
100 or column numbers with the -columns option, then data_stats will
101 print statistics for those columns only.
104 -ignore_columns='strings'
106 ignore_columns work the oposite way of -columns, it lets you
107 select column headers or column numbers for which data_stats
108 should not print statistcs. It also takes a comma separated list.
110 -help
112 Print this, longer, help message.
115 -debug='integer'
117 Default value is: 0
119 This is mainly for developers who whish to debug PsN. By default
120 'integer' is zero but you can try setting it to '1' and you might
121 get some helpfull warnings. If you run in to problems that require
122 support, you might be told to crank this number up and send the
123 output to us.
126 -debug_package='string'
128 Default value is: empty string
130 If use together with '-debug' it is possible to choose which part
131 of PsN you want to see debug messages from. Again this is mostly
132 for developers.
135 -debug_subroutine='string'
137 Default value is: empty string
139 With this option it is possible to specify, with even finer
140 granularity, which part of PsN you want to see debug messages
141 from. This is definitly only for developers.
143 ENDHELP
145 exit;
148 debug -> level( $opt_debug );
149 debug -> package( $opt_debug_package );
150 debug -> subroutine( $opt_debug_subroutine );
152 my $datafile = $ARGV[0];
154 my $dataobj = data -> new ('filename'=> $datafile);
156 my %ignore_columns;
158 if( $opt_ignore_columns ){
159 foreach my $ign ( split( /,/ , $opt_ignore_columns ) ){
160 $ignore_columns{$ign} = 1;
164 my @columns;
166 if( $opt_columns ) {
167 @columns = split( /,/, $opt_columns)
168 } else {
169 @columns = @{$dataobj -> header};
172 my $first = 1;
173 foreach my $function ( ' 'x8, 'Min', 'Max', 'Mean', 'Median', 'Range', 'SD', ){
174 printf "%-8s", $function;
175 foreach my $head ( @columns ){
176 unless( $ignore_columns{$head} ){
177 if( $first ){
178 printf "%-8s", $head;
179 } else {
180 my $function = lc( $function );
181 if( $head =~ /[^\d]/ ){
182 printf "%-8d", $dataobj -> $function( column_head => $head );
183 } else {
184 printf "%-8d", $dataobj -> $function( column => $head );
189 $first = 0;
190 print "\n";
194 print "\n";