9 use MogileFS::Util qw(error_code);
12 find_mogclient_or_skip();
14 my $sto = eval { temp_store(); };
18 plan skip_all => "Can't create temporary test database: $@";
25 my ($hostA_ip, $hostB_ip, $hostC_ip) = (qw/127.0.1.1 127.0.1.2 127.0.1.3/);
29 $mogroot{1} = File::Temp::tempdir( CLEANUP => 1 );
30 $mogroot{2} = File::Temp::tempdir( CLEANUP => 1 );
31 $mogroot{3} = File::Temp::tempdir( CLEANUP => 1 );
32 my $dev2host = { 1 => 1, 2 => 1,
35 foreach (sort { $a <=> $b } keys %$dev2host) {
36 my $root = $mogroot{$dev2host->{$_}};
37 mkdir("$root/dev$_") or die "Failed to create dev$_ dir: $!";
40 my $ms1 = create_mogstored($hostA_ip, $mogroot{1});
41 ok($ms1, "got mogstored1");
42 my $ms2 = create_mogstored($hostB_ip, $mogroot{2});
43 ok($ms2, "got mogstored2");
45 while (! -e "$mogroot{1}/dev1/usage" &&
46 ! -e "$mogroot{2}/dev4/usage") {
47 print "Waiting on usage...\n";
51 my $tmptrack = create_temp_tracker($sto);
54 my $mogc = MogileFS::Client->new(
56 hosts => [ "127.0.0.1:7001" ],
58 my $be = $mogc->{backend}; # gross, reaching inside of MogileFS::Client
60 # test some basic commands to backend
61 ok($be->do_request("test", {}), "test ping worked");
63 ok($tmptrack->mogadm("domain", "add", "testdom"), "created test domain");
64 ok($tmptrack->mogadm("class", "add", "testdom", "1copy", "--mindevcount=1"), "created 1copy class in testdom");
65 ok($tmptrack->mogadm("class", "add", "testdom", "2copies", "--mindevcount=2"), "created 2copies class in testdom");
67 ok($tmptrack->mogadm("host", "add", "hostA", "--ip=$hostA_ip", "--status=alive"), "created hostA");
68 ok($tmptrack->mogadm("host", "add", "hostB", "--ip=$hostB_ip", "--status=alive"), "created hostB");
70 ok($tmptrack->mogadm("device", "add", "hostA", 1), "created dev1 on hostA");
71 ok($tmptrack->mogadm("device", "add", "hostA", 2), "created dev2 on hostA");
72 ok($tmptrack->mogadm("device", "add", "hostB", 3), "created dev3 on hostB");
73 ok($tmptrack->mogadm("device", "add", "hostB", 4), "created dev4 on hostB");
75 my $ms3 = create_mogstored($hostC_ip, $mogroot{3});
76 ok($ms3, "got mogstored3");
77 ok($tmptrack->mogadm("host", "add", "hostC", "--ip=$hostC_ip", "--status=alive"), "created hostC");
78 ok($tmptrack->mogadm("device", "add", "hostC", 5), "created dev5 on hostC");
79 ok($tmptrack->mogadm("device", "add", "hostC", 6), "created dev6 on hostC");
83 my $was = $be->{timeout}; # can't use local on phash :(
85 ok($be->do_request("do_monitor_round", {}), "waited for monitor")
86 or die "Failed to wait for monitor";
87 ok($be->do_request("do_monitor_round", {}), "waited for monitor")
88 or die "Failed to wait for monitor";
89 $be->{timeout} = $was;
92 # create a couple hundred files now
94 diag("Creating $n_files files...");
95 for my $n (1..$n_files) {
96 my $fh = $mogc->new_file("manyhundred_$n", "2copies")
97 or die "Failed to create manyhundred_$n: " . $mogc->errstr;
98 my $data = "File number $n.\n" x 128;
100 close($fh) or die "Failed to close manyhundred_$n";
101 diag("created $n/$n_files") if $n % 10 == 0;
103 pass("Created a ton of files");
105 # wait for replication to go down
106 # We need to wait for BOTH queues to be empty before we continue to rebalance.
107 # If there is anything left w/ a devid that we rebalance away from, there would
108 # be a failure when the HTTP delete happens simultaenously to the replication.
109 # This will manifest as subtest 48 failing often...
112 my ($to_repl_rows, $to_queue_rows);
115 $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate");
116 $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue");
117 last if $to_repl_rows eq 0 && $to_queue_rows eq 0;
118 diag("Files to replicate: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows");
121 die "Failed to replicate all $n_files files" if $to_repl_rows || $to_queue_rows;
122 pass("Replicated all $n_files files");
125 # Create a rebalance object and test a few things.
126 use MogileFS::Device;
128 use MogileFS::Config;
129 use MogileFS::Rebalance;
130 use MogileFS::Factory::Host;
131 use MogileFS::Factory::Device;
132 use Data::Dumper qw/Dumper/;
134 my $dfac = MogileFS::Factory::Device->get_factory;
135 my $hfac = MogileFS::Factory::Host->get_factory;
137 map { $hfac->set($_) } $sto->get_all_hosts;
138 map { $dfac->set($_) } $sto->get_all_devices;
139 my @devs = $dfac->get_all;
141 ### Hacks to make tests work :/
142 $MogileFS::Config::skipconfig = 1;
143 MogileFS::Config->load_config;
144 for my $h ($hfac->get_all) {
145 print "hostid: ", $h->id, " name: ", $h->hostname, "\n";
146 $h->{observed_state} = "reachable";
149 print "Dev: ", $d->id;
150 print " free: ", $d->percent_free;
151 print " used: ", $d->percent_full;
153 $d->{observed_state} = "writeable";
156 ### Actual rebalance tests.
157 my ($devfids, $devfids2, $saved_state);
158 my $rebal_pol = "from_hosts=1 fid_age=old limit_type=device limit_by=none to_all_devs=0 to_hosts=3 leave_in_drain_mode=0";
160 my $rebal = MogileFS::Rebalance->new;
161 ok($rebal->policy($rebal_pol));
162 ok($rebal->init(\@devs));
163 ok($devfids = $rebal->next_fids_to_rebalance(\@devs, $sto, 5));
164 ok($devfids2 = $rebal->next_fids_to_rebalance(\@devs, $sto, 8));
165 ok($saved_state = $rebal->save_state);
166 # print Dumper($rebal), "\n";
172 #print Dumper($saved_state), "\n";
173 #print Dumper($devfids), "\n";
174 #print Dumper($devfids2), "\n";
178 my $rebal = MogileFS::Rebalance->new;
179 ok($rebal->policy($rebal_pol));
180 ok($rebal->load_state($saved_state));
181 ok($devfids2 = $rebal->next_fids_to_rebalance(\@devs, $sto, 3));
182 # print Dumper($rebal), "\n";
188 #print Dumper($saved_state), "\n";
189 #print Dumper($devfids2), "\n";
191 # ensure all devices are still marked alive.
192 ok($tmptrack->mogadm("device", "mark", "hostA", 1, "alive"), "dev1 alive");
193 ok($tmptrack->mogadm("device", "mark", "hostA", 2, "alive"), "dev2 alive");
194 ok($tmptrack->mogadm("device", "mark", "hostB", 3, "alive"), "dev3 alive");
195 ok($tmptrack->mogadm("device", "mark", "hostB", 4, "alive"), "dev4 alive");
196 ok($tmptrack->mogadm("device", "mark", "hostC", 5, "alive"), "dev5 alive");
197 ok($tmptrack->mogadm("device", "mark", "hostC", 6, "alive"), "dev6 alive");
200 my $moga = MogileFS::Admin->new(
202 hosts => [ "127.0.0.1:7001" ],
205 ok(! defined $moga->rebalance_stop);
208 # Quickly test the "no dupes" policy.
209 # ensures that source devices are properly filtered.
210 my $rebal_pol_dupes = "from_devices=1";
211 ok($res = $moga->rebalance_set_policy($rebal_pol_dupes));
212 if (! defined $res) {
213 print "Admin error: ", $moga->errstr, "\n";
215 ok($res = $moga->rebalance_test);
217 for my $dev (sort split /,/, $res->{ddevs}) {
222 ok($res = $moga->rebalance_set_policy($rebal_pol));
223 if (! defined $res) {
224 print "Admin error: ", $moga->errstr, "\n";
226 ok($res = $moga->rebalance_test);
227 #print "Test result: ", Dumper($res), "\n\n";
228 ok(! defined $moga->rebalance_status);
229 if (! defined $res) {
230 print "Admin error: ", $moga->errstr, "\n";
232 #print "Status results: ", Dumper($res), "\n\n";
233 ok($res = $moga->rebalance_start);
234 if (! defined $res) {
235 print "Admin error: ", $moga->errstr, "\n";
238 # print "Start results: ", Dumper($res), "\n\n";
241 # This sleep should be replaced with a "rebalance status" check to confirm
242 # it's been started. Otherwise there's up to two seconds where JobMaster might
243 # not have seen the start request yet. Lowered the sleep from 5 to 3.
248 my ($to_repl_rows, $to_queue_rows);
251 $to_repl_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_replicate");
252 $to_queue_rows = $dbh->selectrow_array("SELECT COUNT(*) FROM file_to_queue");
253 last if $to_repl_rows eq 0 && $to_queue_rows eq 0;
254 diag("Files to rebalance: file_to_replicate=$to_repl_rows file_to_queue=$to_queue_rows");
257 die "Failed to rebalance all files" if $to_repl_rows || $to_queue_rows;
258 pass("Replicated all files");
261 # TODO: Verify that files moved from devs 1,2 to 5,6
262 # select devid, count(*) from file_on group by devid;
264 # TODO: Verify that devices are left in drain mode or not left in drain mode.
266 # NOTE: The above just does some barebones testing. I was using the Dumper
267 # to visually inspect.
268 # For the enterprising, more tests are needed:
269 # - fiddle mbused/mbfree for devices and test the percentages
270 # - test move limits (count, size, etc)
273 my ($tries, $code) = @_;
275 return 1 if $code->();