kill a little more dead fsck/Checker code
[MogileFS-Server.git] / lib / MogileFS / Worker / Fsck.pm
blob5fb4e24fed07eef0acf63a6fe4e1511ae12ae238
1 package MogileFS::Worker::Fsck;
3 use strict;
4 use base 'MogileFS::Worker';
5 use fields (
6 'opt_nostat', # bool: do we trust mogstoreds? skipping size stats?
7 );
8 use MogileFS::Util qw(every error debug);
9 use MogileFS::Config;
10 use MogileFS::Server;
11 use List::Util ();
12 use Time::HiRes ();
14 use constant SUCCESS => 0;
15 use constant TEMPORARY => 1;
16 use constant PERMANENT => 2;
17 use constant REPLICATE => 3;
19 use constant EV_NO_PATHS => "NOPA";
20 use constant EV_POLICY_VIOLATION => "POVI";
21 use constant EV_FILE_MISSING => "MISS";
22 use constant EV_BAD_LENGTH => "BLEN";
23 use constant EV_CANT_FIX => "GONE";
24 use constant EV_START_SEARCH => "SRCH";
25 use constant EV_FOUND_FID => "FOND";
26 use constant EV_RE_REPLICATE => "REPL";
27 use constant EV_BAD_COUNT => "BCNT";
29 use POSIX ();
31 my $nowish; # approximate unixtime, updated once per loop.
33 sub watchdog_timeout { 120 }
35 sub work {
36 my $self = shift;
38 # this can be CPU-intensive. let's nice ourselves down.
39 POSIX::nice(10);
41 my $sto = Mgd::get_store();
42 my $max_checked = 0;
44 every(2.0, sub {
45 my $sleep_set = shift;
46 $nowish = time();
47 local $Mgd::nowish = $nowish;
49 my $queue_todo = $self->queue_todo('fsck');
50 # This counts the same as a $self->still_alive;
51 $self->send_to_parent('worker_bored 50 fsck');
52 return unless @{$queue_todo};
53 return unless $self->validate_dbh;
55 my @fids = ();
56 while (my $todo = shift @{$queue_todo}) {
57 my $fid = MogileFS::FID->new($todo->{fid});
58 unless ($fid->exists) {
59 # FID stopped existing before being checked.
60 $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE);
62 push(@fids, $fid);
64 return unless @fids;
66 $self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0;
67 MogileFS::FID->mass_load_devids(@fids);
69 # don't sleep in loop, next round, since we found stuff to work on
70 # this round...
71 $sleep_set->(0);
73 my $new_max;
74 my $hit_problem = 0;
76 foreach my $fid (@fids) {
77 if (!$self->check_fid($fid)) {
78 # some connectivity problem... retry this fid later.
79 # (don't dequeue it)
80 $self->still_alive;
81 next;
83 $sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE);
85 });
88 # given a $fid (MogileFS::FID, with pre-populated ->devids data)
89 # return 0 if reachability problems.
90 # return 1 if fid was checked (regardless of there being problems or not)
91 # if no problems, no action.
92 # if problems, log & enqueue fixes
93 use constant STALLED => 0;
94 use constant HANDLED => 1;
95 sub check_fid {
96 my ($self, $fid) = @_;
98 my $fix = sub {
99 my $fixed = eval { $self->fix_fid($fid) };
100 if (! defined $fixed) {
101 error("Fsck stalled for fid $fid: $@");
102 return STALLED;
104 $fid->fsck_log(EV_CANT_FIX) if ! $fixed;
106 # that might've all taken awhile, let's update our approximate time
107 $nowish = $self->still_alive;
108 return HANDLED;
111 # first obvious fucked-up case: no devids even presumed to exist.
112 unless ($fid->devids) {
113 # first, log this weird condition.
114 $fid->fsck_log(EV_NO_PATHS);
116 # weird, schedule a fix (which will do a search over all
117 # devices as a last-ditch effort to locate it)
118 return $fix->();
121 # first, see if the assumed devids meet the replication policy for
122 # the fid's class.
123 unless ($fid->devids_meet_policy) {
124 # log a policy violation
125 $fid->fsck_log(EV_POLICY_VIOLATION);
126 return $fix->();
129 # This is a simple fixup case
130 unless (scalar($fid->devids) == $fid->devcount) {
131 # log a bad count
132 $fid->fsck_log(EV_BAD_COUNT);
134 # TODO: We could fix this without a complete fix pass
135 # $fid->update_devcount();
136 return $fix->();
139 # in the fast case, do nothing else (don't check if assumed file
140 # locations are actually there). in the fast case, all we do is
141 # check the replication policy, which is already done, so finish.
142 return HANDLED if $self->{opt_nostat};
144 # stat each device to see if it's still there. on first problem,
145 # stop and go into the slow(er) fix function.
146 my $err;
147 my $rv = $self->parallel_check_sizes([ $fid->devfids ], sub {
148 my ($dfid, $disk_size) = @_;
149 if (! defined $disk_size) {
150 my $dev = $dfid->device;
151 # We end up checking is_perm_dead twice, but that's the way the
152 # flow goes...
153 if ($dev->dstate->is_perm_dead) {
154 $err = "needfix";
155 return 0;
157 error("Connectivity problem reaching device " . $dev->id . " on host " . $dev->host->ip . "\n");
158 $err = "stalled";
159 return 0;
161 return 1 if $disk_size == $fid->length;
162 $err = "needfix";
163 # Note: not doing fsck_log, as fix_fid will log status for each device.
164 return 0;
167 if ($rv) {
168 return HANDLED;
169 } elsif ($err eq "stalled") {
170 return STALLED;
171 } elsif ($err eq "needfix") {
172 return $fix->();
173 } else {
174 die "Unknown error checking fid sizes in parallel.\n";
178 sub parallel_check_sizes {
179 my ($self, $dflist, $cb) = @_;
180 # serial, for now: (just prepping for future parallel future,
181 # getting interface right)
182 foreach my $df (@$dflist) {
183 my $size = $self->size_on_disk($df);
184 return 0 unless $cb->($df, $size);
186 return 1;
189 # this is the slow path. if something above in check_fid finds
190 # something amiss in any way, we went the slow path on a fid and try
191 # really hard to fix the situation.
193 # return true if situation handled, 0 if nothing could be done.
194 # die on errors (like connectivity problems).
195 use constant CANT_FIX => 0;
196 sub fix_fid {
197 my ($self, $fid) = @_;
198 debug(sprintf("Fixing FID %d", $fid->id));
200 # This should happen first, since the fid gets awkwardly reloaded...
201 $fid->update_devcount;
203 # make devfid objects from the devids that this fid is on,
204 my @dfids = map { MogileFS::DevFID->new($_, $fid) } $fid->devids;
206 # track all known good copies (dev objects), as well as all bad
207 # copies (places it should've been, but isn't)
208 my @good_devs;
209 my @bad_devs;
210 my %already_checked; # devid -> 1.
212 my $check_dfids = sub {
213 my $is_desperate_mode = shift;
215 # stat all devices.
216 foreach my $dfid (@dfids) {
217 my $dev = $dfid->device;
218 next if $already_checked{$dev->id}++;
220 # Got a dead link, but reaper hasn't cleared it yet?
221 if ($dev->dstate->is_perm_dead) {
222 push @bad_devs, $dev;
223 next;
226 my $disk_size = $self->size_on_disk($dfid);
227 die "dev " . $dev->id . " unreachable" unless defined $disk_size;
229 if ($disk_size == $fid->length) {
230 push @good_devs, $dfid->device;
231 # if we were doing a desperate search, one is enough, we can stop now!
232 return if $is_desperate_mode;
233 next;
236 # don't log in desperate mode, as we'd have "file missing!" log entries
237 # for every device in the normal case, which is expected.
238 unless ($is_desperate_mode) {
239 if ($disk_size == -1) {
240 $fid->fsck_log(EV_FILE_MISSING, $dev);
241 } else {
242 $fid->fsck_log(EV_BAD_LENGTH, $dev);
246 push @bad_devs, $dfid->device;
250 $check_dfids->();
252 # if we didn't find it anywhere, let's go do an exhaustive search over
253 # all devices, looking for it...
254 unless (@good_devs) {
255 # replace @dfids with list of all (alive) devices. dups will be ignored by
256 # check_dfids
257 $fid->fsck_log(EV_START_SEARCH);
258 @dfids = List::Util::shuffle(
259 map { MogileFS::DevFID->new($_, $fid) }
260 grep { $_->dstate->should_fsck_search_on }
261 Mgd::device_factory()->get_all
263 $check_dfids->("desperate");
265 # still can't fix it?
266 return CANT_FIX unless @good_devs;
268 # wow, we actually found it!
269 $fid->fsck_log(EV_FOUND_FID);
270 $fid->note_on_device($good_devs[0]); # at least one good one.
272 # fall through to check policy (which will most likely be
273 # wrong, with only one file_on record...) and re-replicate
276 # remove the file_on mappings for devices that were bogus/missing.
277 foreach my $bdev (@bad_devs) {
278 error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id);
279 $fid->forget_about_device($bdev);
282 # in case the devcount or similar was fixed.
283 $fid->want_reload;
285 # Note: this will reload devids, if they called 'note_on_device'
286 # or 'forget_about_device'
287 unless ($fid->devids_meet_policy) {
288 $fid->enqueue_for_replication(in => 1);
289 $fid->fsck_log(EV_RE_REPLICATE);
290 return HANDLED;
293 # Clean up the device count if it's wrong
294 unless(scalar($fid->devids) == $fid->devcount) {
295 $fid->update_devcount();
296 $fid->fsck_log(EV_BAD_COUNT);
299 return HANDLED;
302 # returns 0 on missing,
303 # undef on connectivity error,
304 # else size of file on disk (after HTTP HEAD or mogstored stat)
305 sub size_on_disk {
306 my ($self, $dfid) = @_;
307 return undef if $dfid->device->dstate->is_perm_dead;
308 return $dfid->size_on_disk;
313 # Local Variables:
314 # mode: perl
315 # c-basic-indent: 4
316 # indent-tabs-mode: nil
317 # End: