make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_people.pl
blobcc9de1dce026cf6504c94f5e7f84fe49726a3bd8
1 #!/usr/bin/perl
3 =head1
5 load_people.pl - loading user accounts into cxgn databases
7 =head1 SYNOPSIS
9 load_people.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 =head2 ARGUMENTS
15 =over 5
17 =item -H
19 host name (required) e.g. "localhost"
21 =item -D
23 database name (required) e.g. "cxgn_cassava"
25 =item -i
27 path to infile (required)
29 =back
31 =head2 FLAGS
33 =over 5
35 =item -t
37 Test run. Rolling back at the end.
39 =back
41 =head1 DESCRIPTION
43 This script loads user account data into the sgn_people.sp_person and sgn_people.sp_login table. Each column in the spreadsheet represents a single user, and one row will be added to sp_login and one row to sp_person.
45 The input file is xlsx format, and should have the following columns (column order is not important):
47 username
48 first_name
49 last_name
50 email
51 organization
52 address
53 country
54 phone
55 research_keywords
56 research_interests
57 webpage
59 The first four columns listed are required.
61 The script outputs the username, first_name, last_name, email and assigned initial random password. This can be used to send the password to the user.
63 =head1 AUTHOR
65 Lukas Mueller <lam87@cornell.edu>, Nov 2022
67 =cut
69 use strict;
71 use Getopt::Std;
72 use Data::Dumper;
73 use JSON::Any;
74 use JSON::PP;
75 use Carp qw /croak/ ;
76 use Try::Tiny;
77 use Pod::Usage;
78 use Spreadsheet::XLSX;
79 use Bio::Chado::Schema;
80 use CXGN::People::Person;
81 use CXGN::People::Schema;
82 use CXGN::DB::InsertDBH;
83 use SGN::Model::Cvterm;
84 use Text::Iconv;
85 use Crypt::RandPasswd;
86 our ($opt_H, $opt_D, $opt_i, $opt_t);
88 getopts('H:i:tD:');
90 my $dbhost = $opt_H;
91 my $dbname = $opt_D;
92 my $file = $opt_i;
94 print STDERR "Input file: $file\n";
95 print STDERR "DB host: $dbhost\n";
96 print STDERR "DB name: $dbname\n";
97 print STDERR "Rollback: $opt_t\n";
99 if (!$opt_H || !$opt_D || !$opt_i) {
100 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file)\n");
103 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
104 dbname=>$dbname,
105 dbargs => {AutoCommit => 1,
106 RaiseError => 1}
110 my $people_schema = CXGN::People::Schema->connect( sub { $dbh->get_actual_dbh() } );
112 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
113 $dbh->do('SET search_path TO public,sgn,sgn_people');
115 my $converter = Text::Iconv->new("utf-8", "windows-1251");
117 my $excel = Spreadsheet::XLSX->new($opt_i, $converter);
119 my $coderef = sub {
121 foreach my $sheet (@{$excel->{Worksheet}}) {
123 printf("Sheet: %s\n", $sheet->{Name});
125 $sheet->{MaxRow} ||= $sheet->{MinRow};
127 print STDERR "MIN ROW = ".$sheet->{MinRow}."\n";
129 # parse header
131 my @required_headers = qw | first_name last_name username email |;
133 my %ch;
134 my @header;
136 foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) {
137 @header = @{$sheet->{Cells}->[0]};
139 for(my $i =0; $i< @header; $i++) {
140 print STDERR $header[$i]->{Val}."\n";
141 $ch{$header[$i]->{Val} } = $i;
145 print STDERR "HEADERS: ".Dumper(\%ch);
146 print STDERR "REQUIRED: ".Dumper(\@required_headers);
148 my @missing_headers;
149 foreach my $h (@required_headers) {
150 if (!exists($ch{$h})) {
151 push @missing_headers, $h;
155 if (@missing_headers) {
156 print STDERR "Required headers include: ". join(", ", @required_headers)."\n";
157 print STDERR "Missing: ".join(", ", @missing_headers)."\n";
159 die "Check file format for header requirements.";
162 foreach my $row (1 .. $sheet->{MaxRow}) {
164 $sheet->{MaxCol} ||= $sheet->{MinCol};
166 my @fields = map { $_ ->{Val} } @{$sheet->{Cells}->[$row]};
168 my %data;
169 for(my $n =0; $n< @header; $n++) {
170 if ($fields[$n]) {
171 $data{$header[$n]->{Val}} = $fields[$n];
175 print STDERR "DATA: ".Dumper(\%data);
177 if (!$data{username} || !$data{email} || !$data{first_name} || !$data{last_name}) {
178 print STDERR "Not enough information provided in row ".join(", ", @fields).". Skipping.\n";
179 next();
182 my $row = $people_schema->resultset("SpPerson")->find( { username => $data{username} } );
183 if ($row) {
184 print STDERR "Username $data{username} already exists in the database. Skipping this row.\n";
185 next();
188 $row = $people_schema->resultset("SpPerson")->find( { contact_email => $data{email} } );
189 if ($row) {
190 print STDERR "Email $data{contact_email} already exists in the database. Skipping this row.\n";
191 next();
194 my $password =Crypt::RandPasswd->word( 8 , 8 );
195 if ($data{username}) {
196 my $login = CXGN::People::Login->new($dbh);
198 $login->set_username($data{username});
199 $login->set_private_email($data{email});
200 $login->set_pending_email($data{email});
201 $login->set_organization($data{organization});
202 $login->set_password($password);
204 print "$data{first_name}\t$data{last_name}\t$data{username}\t$data{email}\t$password\n";
207 my $sp_person_id = $login->store();
209 print STDERR "SP PERSON ID = ".$sp_person_id."\n";
211 my $person = CXGN::People::Person->new($dbh, $sp_person_id);
213 $person->set_first_name($data{first_name});
214 $person->set_last_name($data{last_name});
215 $person->set_contact_email($data{email});
216 $person->set_address($data{address});
217 $person->set_country($data{country});
218 $person->set_phone_number($data{phone_number});
219 $person->set_research_keywords($data{research_keywords});
220 $person->set_research_interests($data{research_interests});
221 $person->set_webpage($data{webpage});
222 $person->store();
228 try {
229 $schema->txn_do($coderef);
230 if (!$opt_t) {
231 print "Transaction succeeded! Commiting user data! \n\n";
233 else {
234 die "Not storing, rolling back\n";
237 } catch {
238 # Transaction failed
239 die "An error occured! Rolling back!" . $_ . "\n";
242 $dbh->disconnect();