9 validate_gff3.pl -gff3_file <gff3_file> [-ontology_file <ontology_file1> -ontology_file <ontology_file1> ...]
10 -out <out_file_prefix> -config <config_file>
11 [-db_type <db_type>] [-db_name <db_name>] [-username <username>] [-password <password>]
12 [-verbose <0|1|2>] [-silent <0|1>]
16 This script analyzes a gff3 file and validates a number of points. It uses the GFF3::Validator module for analysis. For
17 further information on analysis steps, please refer to validate_gff3.pod.
21 The script uses a MySQL or SQLite database to analyze the gff3 file. The gff3 file is parsed and
22 content relevant to the analysis is loaded into the database. Use of database (as opposed to
23 performing analysis in memory) makes processing of large files feasible and significatly
24 increases overall processing speed. At the end of the analysis a
25 report is generated that lists errors and warnings ordered by line numbers. The report file
26 can be easily processed using grep and other Unix text processing tools.
28 The usage of the script follows with descriptions of command-line parameters:
30 validate_gff3.pl -gff3_file <gff3_file> [-ontology_file <ontology_file1> -ontology_file <ontology_file1> ...]
31 -out <out_file_prefix> -config <config_file>
32 [-db_type <db_type>] [-db_dir <db_dir>]
33 [-dbname <dbname>] [-username <username>] [-password <password>]
34 [-verbose <0|1|2>] [-silent <0|1>]
36 -gff3_file : (Required) Name of gff3 file to process.
37 -ontology_file : (Optional) Name of ontology file, multiple files can be specified.
38 Command-line ontology files and ontology files provided as directives
39 are merged and used for analysis. If neither is provided or is not accesible,
40 default ontology file is retrieved and used.
41 -out : (Required) Prefix to name log and report files, these become <out>.log and <out>.report
42 -config : (Required) Name of config file (see documentation in validate_gff3.cfg provided in the package
44 -db_type : (Optional) Type of database ('mysql' or 'sqlite').
46 -db_dir : (Optional) Directory to store temp sqlite database files
47 If not available, retrieved from config file (temp_dir param)
48 -dbname : (Optional) Name of MySQL database/SQLite db file to use for analysis.
49 If not available, retrieved from config file.
50 If db_type is 'sqlite' and no dbname is specified and none available in config file, a temp db is used
51 -username : (Optional) Username for analysis database (must have write privileges).
52 If not available, retrieved from config file.
53 If not available, defaults to "".
54 -password : (Optional) Password for analysis database.
55 If not available, retrieved from config file.
56 If not available, defaults to "".
57 -verbose : (Optional) Verbosity of logging.
59 1: Initialization information
60 2: + Progress information
62 If not available, defaults to 2.
63 -silent : (Optional) Whether to suppress logging to screen
66 1: Don't log to screen
67 If not available, defaults to 0.
68 -max_messages : (Optional) Whether to report all errors/warnings
70 0: Report all messages
71 <number>: Exit and report after <number> messages
72 If not available, defaults to 0.
81 use lib
"$FindBin::RealBin/lib";
88 my $usage = qq[$FindBin::Script
-gff3_file
<gff3_file
> [-ontology_file
<ontology_file1
> -ontology_file
<ontology_file1
> ...]
89 -out
<out_file_prefix
>
90 [-config
<config_file
>]
91 [-db_type
<db_type
>] [-db_dir
<db_dir
>]
92 [-dbname
<dbname
>] [-username
<username
>] [-password
<password
>]
93 [-verbose
<0|1|2>] [-silent
<0|1>]];
95 # Parse command-line params
109 my $result = GetOptions
("gff3_file=s" => \
$gff3_file,
110 "ontology_files=s" => \
@ontology_files,
112 "config=s" => \
$config,
113 "db_type=s" => \
$db_type,
114 "db_dir=s" => \
$db_dir,
115 "dbname=s" => \
$dbname,
116 "username=s" => \
$username,
117 "password=s" => \
$password,
118 "verbose=s" => \
$verbose,
119 "silent=s" => \
$silent,
120 "max_messages=s" => \
$max_messages,
121 ) or die("Usage: $usage\n");
123 # Check command-line params
124 if (!$gff3_file or !$out ) {
125 die("Usage: $usage\n");
129 $config ||= "$FindBin::RealBin/validate_gff3.cfg";
130 my $config_obj = Config
::General
->new(-ConfigFile
=> $config, -CComments
=> 0);
131 my %config = $config_obj->getall;
133 # Populate defaults from config
134 $db_type = lc($db_type) || 'mysql';
135 croak
("Unrecognized database type ($db_type)!") unless $db_type =~ /^(mysql|sqlite)$/;
137 $db_dir ||= $config{temp_dir
};
138 unless( -d
$db_dir ) {
139 mkdir $db_dir or die "$db_dir does not exist, and can't create it\n";
140 chmod 0777, $db_dir or warn "WARNING: could not set global temp dir $db_dir world-writable\n";
142 croak
("Cannot determine db dir!") unless $db_dir;
145 if ($dbname && $db_type eq 'mysql') {
146 $datasource = "DBI:mysql:dbname=$dbname";
148 elsif ($dbname && $db_type eq 'sqlite') {
149 $datasource = "DBI:SQLite:dbname=$dbname";
151 elsif (!$dbname && $db_type eq 'mysql') {
152 $datasource = $config{datasource
};
154 elsif (!$dbname && $db_type eq 'sqlite') {
155 my ($temp_fh, $temp_file) = File
::Temp
::tempfile
("validate_gff3_sqlite_XXXXX",
160 $datasource = "DBI:SQLite:dbname=$temp_file";
163 $datasource = $config{datasource
}; # Placeholder
165 croak
("Cannot determine database name!") unless $datasource;
168 my $log_file = "$out.log";
169 my $report_file = "$out.report";
171 # Create validator object
172 my $validator = GFF3
::Validator
->new(-config
=> $config,
173 -gff3_file
=> $gff3_file,
174 -datasource
=> $datasource,
175 -username
=> $username,
176 -password
=> $password,
177 -verbose
=> $verbose,
179 -max_messages
=> $max_messages,
180 -log_file
=> $log_file,
181 -report_file
=> $report_file,
182 -ontology_files
=> \
@ontology_files,
183 -table_id
=> "", # Currently do not use table id feature within the command-line version
186 # Create/Reset tables to store the data
187 $validator->create_tables;
189 # Load gff3 analysis database
190 $validator->load_analysis_dbs;
192 # Validate unique ids
193 $validator->validate_unique_ids;
195 # Load ontology(s) into memory
196 $validator->load_ontology;
198 # Validate ontology terms
199 $validator->validate_ontology_terms;
202 $validator->validate_parentage;
204 # Validate derives_from
205 $validator->validate_derives_from;
207 # Dump an error report
208 $validator->dump_report;
211 # $validator->cleanup; # Currently, do not clean up within the command-line version
213 $validator->log("# [END]");
219 Payan Canaran <canaran@cshl.edu>
223 $Id: validate_gff3.pl,v 1.1 2007/12/03 14:20:23 canaran Exp $
227 - SQLite support adapted from patch contributed by Robert Buels <rmb32@cornell.edu>.
229 =head1 COPYRIGHT AND LICENSE
231 Copyright (c) 2006-2007 Cold Spring Harbor Laboratory
233 This program is free software; you can redistribute it and/or modify it
234 under the same terms as Perl itself. See DISCLAIMER.txt for
235 disclaimers of warranty.