1 % THIS IS A COPY, DO NOT EDIT IT!!
2 % REFEER TO ORIGINAL /home/rodrigo/Dropbox/papers//bibliography.bib
3 @incollection
{springerlink
:aguilera98
,
4 author = {Aguilera
, Marcos Kawazoe and Chen
, Wei and Toueg
, Sam
},
5 affiliation
= {Cornell University
, Computer Science Department
, Ithaca NY
14853-7501, USA
{aguilera
,weichen
,sam
}@cs.cornell.edu
},
6 title = {Failure Detection and Consensus in the Crash
-Recovery Model
},
7 booktitle = {Distributed Computing
},
8 series = {Lecture Notes in Computer Science
},
9 editor = {Kutten
, Shay
},
10 publisher = {Springer Berlin
/ Heidelberg
},
18 @InProceedings
{labos2009
:viera09
,
19 address = {Estoril
, Lisbon
, Portugal
},
21 author = {Vieira
, Gustavo M. D. and Zwaenepoel
, Willy and Buzato
, Luis E.
},
22 booktitle = {Proceedings of the
39th
{I
}nternational
{C
}onference on
23 {D
}ependable
{S
}ystems and
{N
}etworks
({DSN
})},
24 details
= {http
://infoscience.epfl.ch
/record
/135193},
25 documenturl
= {http
://infoscience.epfl.ch
/getfile.py?recid
=135193&mode
=best
},
27 location
= {Estoril
, Lisbon
, Portugal
},
28 oai
-id
= {oai
:infoscience.epfl.ch
:135193},
29 oai
-set
= {conf
; fulltext
-public
; fulltext
},
34 title = {Dynamic
{C
}ontent
{W
}eb
{A
}pplications
: {C
}rash
,
35 {F
}ailover
, and
{R
}ecovery
{A
}nalysis
},
37 url
= {http
://www.dsn.org
/},
41 @article
{Chandra
:1996:WFD
:234533.234549,
42 author = {Chandra
, Tushar Deepak and Hadzilacos
, Vassos and Toueg
, Sam
},
43 title = {The weakest failure detector for solving consensus
},
52 url
= {http
://doi.acm.org
/10.1145/234533.234549},
53 doi
= {http
://doi.acm.org
/10.1145/234533.234549},
56 address = {New York
, NY
, USA
},
57 keywords = {Byzantine Generals' problem
, agreement problem
, asynchronous systems
, atomic broadcast
, commit problem
, consensus problem
, crash failures
, failure detection
, fault
-tolerance
, message passing
, partial synchrony
, processor failures
},
60 @inproceedings
{Aguilera
:2001:SLE
:645958.676119,
61 author = {Aguilera
, Marcos Kawazoe and Delporte
-Gallet
, Carole and Fauconnier
, Hugues and Toueg
, Sam
},
62 title = {Stable Leader Election
},
63 booktitle = {Proceedings of the
15th International Conference on Distributed Computing
},
66 isbn
= {3-540-42605-1},
69 url
= {http
://portal.acm.org
/citation.cfm?id
=645958.676119},
71 publisher = {Springer
-Verlag
},
72 address = {London
, UK
},
75 @InProceedings
{vieira08
:_trepl
,
76 author = {Vieira
, Gustavo M. D. and Buzato
, Luis E.
},
77 title = {Treplica
: {U
}biquitous
{R
}eplication
},
78 booktitle = {Proceedings of the
26th Brazilian Symposium on Computer Networks and Distributed Systems
},
81 @inproceedings
{Aguilera
:2007:SNP
:1294261.1294278,
82 author = {Aguilera
, Marcos K. and Merchant
, Arif and Shah
, Mehul and Veitch
, Alistair and Karamanolis
, Christos
},
83 title = {Sinfonia
: a new paradigm for building scalable distributed systems
},
84 booktitle = {Proceedings of twenty
-first ACM SIGOPS symposium on Operating systems principles
},
87 isbn
= {978-1-59593-591-5},
88 location
= {Stevenson
, Washington
, USA
},
91 url
= {http
://doi.acm.org
/10.1145/1294261.1294278},
92 doi
= {http
://doi.acm.org
/10.1145/1294261.1294278},
95 address = {New York
, NY
, USA
},
96 keywords = {distributed systems
, fault tolerance
, scalability
, shared memory
, transactions
, two
-phase commit
},
99 @article
{Lamport
:1978_clocks
,
100 author = {Lamport
, Leslie
},
101 title = {Time
, clocks
, and the ordering of events in a distributed system
},
102 journal = {Commun. ACM
},
110 url
= {http
://doi.acm.org
/10.1145/359545.359563},
111 doi
= {http
://doi.acm.org
/10.1145/359545.359563},
114 address = {New York
, NY
, USA
},
115 keywords = {clock synchronization
, computer networks
, distributed systems
, multiprocess systems
},
118 @inproceedings
{Isard
:2007:DDD
:1272996.1273005,
119 author = {Isard
, Michael and Budiu
, Mihai and Yu
, Yuan and Birrell
, Andrew and Fetterly
, Dennis
},
120 title = {Dryad
: distributed data
-parallel programs from sequential building blocks
},
121 booktitle = {Proceedings of the
2nd ACM SIGOPS
/EuroSys European Conference on Computer Systems
2007},
122 series = {EuroSys '
07},
124 isbn
= {978-1-59593-636-3},
125 location
= {Lisbon
, Portugal
},
128 url
= {http
://doi.acm.org
/10.1145/1272996.1273005},
129 doi
= {http
://doi.acm.org
/10.1145/1272996.1273005},
132 address = {New York
, NY
, USA
},
133 keywords = {cluster computing
, concurrency
, dataflow
, distributed programming
},
136 @article
{Defago
:2004,
137 author = {D\'
{e
}fago
, Xavier and Schiper
, Andr\'
{e
} and Urb\'
{a
}n
, P\'
{e
}ter
},
138 title = {Total order broadcast and multicast algorithms
: Taxonomy and survey
},
139 journal = {ACM Comput. Surv.
},
147 url
= {http
://doi.acm.org
/10.1145/1041680.1041682},
148 doi
= {http
://doi.acm.org
/10.1145/1041680.1041682},
151 address = {New York
, NY
, USA
},
152 keywords = {Distributed systems
, agreement problems
, atomic broadcast
, atomic multicast
, classification
, distributed algorithms
, fault
-tolerance
, global ordering
, group communication
, message passing
, survey
, taxonomy
, total ordering
},
155 @TechReport
{hadzilacos94
,
156 author = {Hadzilacos
, Vassos and Toueg
, Sam
},
157 title = {A modular approach to the specification and implementation of fault
-tolerant broadcasts
},
158 institution = {Department of Computer Science
, Cornell University
},
161 address = {Ithaca
, NY
},
164 @inproceedings
{Rodrigues
:2000,
165 author = {Rodrigues
, L. and Raynal
, M.
},
166 title = {Atomic Broadcast in Asynchronous Crash
-Recovery Distributed Systems
},
167 booktitle = {Proceedings of the The
20th International Conference on Distributed Computing Systems
( ICDCS
2000)},
168 series = {ICDCS '
00},
170 isbn
= {0-7695-0601-1},
172 url
= {http
://portal.acm.org
/citation.cfm?id
=850927.851790},
174 publisher = {IEEE Computer Society
},
175 address = {Washington
, DC
, USA
},
176 keywords = {Distributed Algorithms
, Distributed Fault Tolerant Systems
, Communication Protocols
}
179 @article
{Chandra
:1996:UFD
:226643.226647,
180 author = {Chandra
, Tushar Deepak and Toueg
, Sam
},
181 title = {Unreliable failure detectors for reliable distributed systems
},
190 url
= {http
://doi.acm.org
/10.1145/226643.226647},
191 doi
= {http
://doi.acm.org
/10.1145/226643.226647},
194 address = {New York
, NY
, USA
},
195 keywords = {Byzantine Generals' problem
, agreement problem
, asynchronous systems
, atomic broadcast
, commit problem
, consensus problem
, crash failures
, failure detection
, fault
-tolerance
, message passing
, partial synchrony
, processor failures
},
198 @inproceedings
{Schroeder
:2007:DFR
:1267903.1267904,
199 author = {Schroeder
, Bianca and Gibson
, Garth A.
},
200 title = {Disk failures in the real world
: what does an MTTF of
1,000,000 hours mean to you?
},
201 booktitle = {Proceedings of the
5th USENIX conference on File and Storage Technologies
},
203 location
= {San Jose
, CA
},
205 url
= {http
://portal.acm.org
/citation.cfm?id
=1267903.1267904},
207 publisher = {USENIX Association
},
208 address = {Berkeley
, CA
, USA
},
210 @article
{Dwork
:1988:CPP
:42282.42283,
211 author = {Dwork
, Cynthia and Lynch
, Nancy and Stockmeyer
, Larry
},
212 title = {Consensus in the presence of partial synchrony
},
221 url
= {http
://doi.acm.org
/10.1145/42282.42283},
222 doi
= {http
://doi.acm.org
/10.1145/42282.42283},
225 address = {New York
, NY
, USA
},
228 @ARTICLE
{Boichat_deconstructingpaxos
,
229 author = {Romain Boichat and Partha Dutta and Svend Frølund and Rachid Guerraoui
},
230 title = {Deconstructing paxos
},
231 journal = {SIGACT News
},
235 @article
{Lamport
:1998:PP
:279227.279229,
236 author = {Lamport
, Leslie
},
237 title = {The part
-time parliament
},
238 journal = {ACM Trans. Comput. Syst.
},
246 url
= {http
://doi.acm.org
/10.1145/279227.279229},
247 doi
= {http
://doi.acm.org
/10.1145/279227.279229},
250 address = {New York
, NY
, USA
},
251 keywords = {state machines
, three
-phase commit
, voting
},
253 @article
{Elnozahy
:2002:SRP
:568522.568525,
254 author = {Elnozahy
, E. N.
(Mootaz
) and Alvisi
, Lorenzo and Wang
, Yi
-Min and Johnson
, David B.
},
255 title = {A survey of rollback
-recovery protocols in message
-passing systems
},
256 journal = {ACM Comput. Surv.
},
257 issue_date
= {September
2002},
265 url
= {http
://doi.acm.org
/10.1145/568522.568525},
266 doi
= {http
://doi.acm.org
/10.1145/568522.568525},
269 address = {New York
, NY
, USA
},
270 keywords = {message logging
, rollback
-recovery
},
272 @inproceedings
{Koo
:1986:CRD
:324493.325074,
273 author = {Koo
, Richard and Toueg
, Sam
},
274 title = {Checkpointing and rollback
-recovery for distributed systems
},
275 booktitle = {Proceedings of
1986 ACM Fall joint computer conference
},
278 isbn
= {0-8186-4743-4},
279 location
= {Dallas
, Texas
, United States
},
280 pages = {1150--1158},
282 url
= {http
://portal.acm.org
/citation.cfm?id
=324493.325074},
284 publisher = {IEEE Computer Society Press
},
285 address = {Los Alamitos
, CA
, USA
},
287 @article
{10.1109/CCGRID
.2010.40,
288 author = {Leonardo Arturo Bautista Gomez and Naoya Maruyama and Franck Cappello and Satoshi Matsuoka
},
289 title = {Distributed Diskless Checkpoint for Large Scale Systems
},
290 journal ={Cluster Computing and the Grid
, IEEE International Symposium on
},
292 isbn
= {978-0-7695-4039-9},
295 doi
= {http
://doi.ieeecomputersociety.org
/10.1109/CCGRID
.2010.40},
296 publisher = {IEEE Computer Society
},
297 address = {Los Alamitos
, CA
, USA
},
300 @article
{Charron
-Bost
:2007:HDF
:1233481.1233496,
301 author = {Charron
-Bost
, Bernadette and Schiper
, Andr\'
{e
}},
302 title = {Harmful dogmas in fault tolerant distributed computing
},
303 journal = {SIGACT News
},
311 url
= {http
://doi.acm.org
/10.1145/1233481.1233496},
312 doi
= {http
://doi.acm.org
/10.1145/1233481.1233496},
315 address = {New York
, NY
, USA
},
317 @inproceedings
{Camargos
:2007:MP
:1281100.1281150,
318 author = {Camargos
, L\'
{a
}saro Jonas and Schmidt
, Rodrigo Malta and Pedone
, Fernando
},
319 title = {Multicoordinated Paxos
},
320 booktitle = {Proceedings of the twenty
-sixth annual ACM symposium on Principles of distributed computing
},
323 isbn
= {978-1-59593-616-5},
324 location
= {Portland
, Oregon
, USA
},
327 url
= {http
://doi.acm.org
/10.1145/1281100.1281150},
328 doi
= {http
://doi.acm.org
/10.1145/1281100.1281150},
331 address = {New York
, NY
, USA
},
332 keywords = {Paxos
, atomic broadcast
, consensus
, generalized
, multicoordinated
},
334 @inproceedings
{Fischer
:1983:CPU
:647891.739594,
335 author = {Fischer
, Michael J.
},
336 title = {The Consensus Problem in Unreliable Distributed Systems
(A Brief Survey
)},
337 booktitle = {Proceedings of the
1983 International FCT
-Conference on Fundamentals of Computation Theory
},
339 isbn
= {3-540-12689-9},
342 url
= {http
://portal.acm.org
/citation.cfm?id
=647891.739594},
344 publisher = {Springer
-Verlag
},
345 address = {London
, UK
},
347 @article
{10.1109/SRDS
.2009.25,
348 author = {Omid Shahmirzadi and Sergio Mena and Andre Schiper
},
349 title = {Relaxed Atomic Broadcast
: State
-Machine Replication Using Bounded Memory
},
350 journal ={Reliable Distributed Systems
, IEEE Symposium on
},
355 doi
= {http
://doi.ieeecomputersociety.org
/10.1109/SRDS
.2009.25},
356 publisher = {IEEE Computer Society
},
357 address = {Los Alamitos
, CA
, USA
},
360 @inproceedings
{Okun
:2002:NSR
:829526.831119,
361 author = {Okun
, Michael and Barak
, Amnon
},
362 title = {On Node State Reconstruction for Fault Tolerant Distributed Algorithms
},
363 booktitle = {Proceedings of the
21st IEEE Symposium on Reliable Distributed Systems
},
366 isbn
= {0-7695-1659-9},
368 url
= {http
://portal.acm.org
/citation.cfm?id
=829526.831119},
370 publisher = {IEEE Computer Society
},
371 address = {Washington
, DC
, USA
},
372 keywords = {Distributed algorithms
, fault tolerance
, state reconstruction
, recovery
},
375 @inproceedings
{Chandra
:2007:PML
:1281100.1281103,
376 author = {Chandra
, Tushar D. and Griesemer
, Robert and Redstone
, Joshua
},
377 title = {Paxos made live
: an engineering perspective
},
378 booktitle = {Proceedings of the twenty
-sixth annual ACM symposium on Principles of distributed computing
},
381 isbn
= {978-1-59593-616-5},
382 location
= {Portland
, Oregon
, USA
},
385 url
= {http
://doi.acm.org
/10.1145/1281100.1281103},
386 doi
= {http
://doi.acm.org
/10.1145/1281100.1281103},
389 address = {New York
, NY
, USA
},
390 keywords = {Paxos
, experiences
, fault
-tolerance
, implementation
},
393 @conference
{juang2002crash
,
394 title={{Crash recovery with little overhead
}},
395 author={Juang
, T.T.Y. and Venkatesan
, S.
},
396 booktitle={Distributed Computing Systems
, 1991.
, 11th International Conference on
},
402 @inproceedings
{Freiling
:2009:MCA
:1729641.1730101,
403 author = {Freiling
, Felix C. and Lambertz
, Christian and Majster
-Cederbaum
, Mila
},
404 title = {Modular Consensus Algorithms for the Crash
-Recovery Model
},
405 booktitle = {Proceedings of the
2009 International Conference on Parallel and Distributed Computing
, Applications and Technologies
},
406 series = {PDCAT '
09},
408 isbn
= {978-0-7695-3914-0},
411 url
= {http
://dx.doi.org
/10.1109/PDCAT
.2009.50},
412 doi
= {http
://dx.doi.org
/10.1109/PDCAT
.2009.50},
414 publisher = {IEEE Computer Society
},
415 address = {Washington
, DC
, USA
},
416 keywords = {asynchronous systems
, consensus
, fault tolerance
, process crash and recovery
, stable storage
},
418 @TechReport
{oliveira97
:consensus
,
419 author = {Oliveira
, R. and Guerraoui
, R. and Schiper
, A.
},
420 title = {Consensus in the crash
-recover model
},
421 institution = {Département d'Informatique
, Ecole Polytechnique Fédérale
},
424 address = {Laussane
, Switzerland
},
428 @inproceedings
{Freiling
:2008:ECA
:1432291.1432332,
429 author = {Freiling
, Felix C. and Lambertz
, Christian and Majster
-Cederbaum
, Mila
},
430 title = {Easy Consensus Algorithms for the Crash
-Recovery Model
},
431 booktitle = {Proceedings of the
22nd international symposium on Distributed Computing
},
434 isbn
= {978-3-540-87778-3},
435 location
= {Arcachon
, France
},
438 url
= {http
://dx.doi.org
/10.1007/978-3-540-87779-0_39
},
439 doi
= {http
://dx.doi.org
/10.1007/978-3-540-87779-0_39
},
441 publisher = {Springer
-Verlag
},
442 address = {Berlin
, Heidelberg
},
444 @article
{Vieira
:2008:CRF
:1390853.1390875,
445 author = {Vieira
, Gustavo M. D. and Buzato
, Luiz E.
},
446 title = {On the coordinator's rule for Fast Paxos
},
447 journal = {Inf. Process. Lett.
},
455 url
= {http
://portal.acm.org
/citation.cfm?id
=1390853.1390875},
456 doi
= {10.1016/j.ipl
.2008.03.001},
458 publisher = {Elsevier North
-Holland
, Inc.
},
459 address = {Amsterdam
, The Netherlands
, The Netherlands
},
460 keywords = {Consensus
, Crash
-recovery
, Distributed systems
, Paxos
},
463 @article
{Schneider
:1990:IFS
:98163.98167,
464 author = {Schneider
, Fred B.
},
465 title = {Implementing fault
-tolerant services using the state machine approach
: a tutorial
},
466 journal = {ACM Comput. Surv.
},
474 url
= {http
://doi.acm.org
/10.1145/98163.98167},
475 doi
= {http
://doi.acm.org
/10.1145/98163.98167},
478 address = {New York
, NY
, USA
},
481 @article
{lamport1978implementation
,
482 title={{The implementation of reliable distributed multiprocess systems
}},
483 author={Lamport
, L.
},
484 journal={Computer Networks
(1976)},
492 @inproceedings
{Oki
:1988:VRN
:62546.62549,
493 author = {Oki
, Brian M. and Liskov
, Barbara H.
},
494 title = {Viewstamped Replication
: A New Primary Copy Method to Support Highly
-Available Distributed Systems
},
495 booktitle = {Proceedings of the seventh annual ACM Symposium on Principles of distributed computing
},
498 isbn
= {0-89791-277-2},
499 location
= {Toronto
, Ontario
, Canada
},
502 url
= {http
://doi.acm.org
/10.1145/62546.62549},
503 doi
= {http
://doi.acm.org
/10.1145/62546.62549},
506 address = {New York
, NY
, USA
},
508 @article
{Lamport
:1982:BGP
:357172.357176,
509 author = {Lamport
, Leslie and Shostak
, Robert and Pease
, Marshall
},
510 title = {The Byzantine Generals Problem
},
511 journal = {ACM Trans. Program. Lang. Syst.
},
512 issue_date
= {July
1982},
520 url
= {http
://doi.acm.org
/10.1145/357172.357176},
521 doi
= {http
://doi.acm.org
/10.1145/357172.357176},
524 address = {New York
, NY
, USA
},
526 @article
{Neiger
:1990:AIF
:83334.83337,
527 author = {Neiger
, Gil and Toueg
, Sam
},
528 title = {Automatically increasing the fault
-tolerance of distributed algorithms
},
529 journal = {J. Algorithms
},
537 url
= {http
://portal.acm.org
/citation.cfm?id
=83334.83337},
538 doi
= {10.1016/0196-6774(90)90019-B
},
540 publisher = {Academic Press
, Inc.
},
541 address = {Duluth
, MN
, USA
},
543 @TechReport
{vieira10
:implementing
-tr
,
544 author = {Vieira
, Gustavo and Buzato
, Luiz
},
545 title = {Implementation of an Object
-Oriented Specification
546 for Active Replication Using Consensus
},
547 institution = {Instituto de Computação
, Universidade Estadual de Campinas
},
552 @article
{Reynal
:2005:SIF
:1052796.1052806,
553 author = {Reynal
, Michel
},
554 title = {A short introduction to failure detectors for asynchronous distributed systems
},
555 journal = {SIGACT News
},
563 url
= {http
://doi.acm.org
/10.1145/1052796.1052806},
564 doi
= {http
://doi.acm.org
/10.1145/1052796.1052806},
567 address = {New York
, NY
, USA
},
569 @inproceedings
{Bonnet
:2010:CAD
:1825731.1826088,
570 author = {Bonnet
, Fran\c
{c
}ois and Raynal
, Michel
},
571 title = {Consensus in Anonymous Distributed Systems
: Is There a Weakest Failure Detector?
},
572 booktitle = {Proceedings of the
2010 24th IEEE International Conference on Advanced Information Networking and Applications
},
575 isbn
= {978-0-7695-4018-4},
578 url
= {http
://dx.doi.org
/10.1109/AINA
.2010.19},
579 doi
= {http
://dx.doi.org
/10.1109/AINA
.2010.19},
581 publisher = {IEEE Computer Society
},
582 address = {Washington
, DC
, USA
},
584 @inproceedings
{Gupta
:2001:SED
:383962.384010,
585 author = {Gupta
, Indranil and Chandra
, Tushar D. and Goldszmidt
, Germ\'
{a
}n S.
},
586 title = {On scalable and efficient distributed failure detectors
},
587 booktitle = {Proceedings of the twentieth annual ACM symposium on Principles of distributed computing
},
590 isbn
= {1-58113-383-9},
591 location
= {Newport
, Rhode Island
, United States
},
594 url
= {http
://doi.acm.org
/10.1145/383962.384010},
595 doi
= {http
://doi.acm.org
/10.1145/383962.384010},
598 address = {New York
, NY
, USA
},
599 keywords = {accuracy
, distributed systems
, efficiency
, failure detectors
, scalability
},
601 @inproceedings
{Xiong
:2009:SFD
:1632708.1633468,
602 author = {Xiong
, Naixue and Yang
, Yan and Cao
, Ming and He
, Jing and Shu
, Lei
},
603 title = {A Survey on Fault
-Tolerance in Distributed Network Systems
},
604 booktitle = {Proceedings of the
2009 International Conference on Computational Science and Engineering
- Volume
02},
607 isbn
= {978-0-7695-3823-5},
608 pages = {1065--1070},
610 url
= {http
://dx.doi.org
/10.1109/CSE
.2009.497},
611 doi
= {http
://dx.doi.org
/10.1109/CSE
.2009.497},
613 publisher = {IEEE Computer Society
},
614 address = {Washington
, DC
, USA
},
615 keywords = {Failure detector
, Fault
-tolerance
, Network Systems
, Quality
-of
-service
},
617 @article
{10.1109/CSE
.2009.497,
618 author = {Naixue Xiong and Yan Yang and Ming Cao and Jing He and Lei Shu
},
619 title = {A Survey on Fault
-Tolerance in Distributed Network Systems
},
620 journal ={Computational Science and Engineering
, IEEE International Conference on
},
622 isbn
= {978-0-7695-3823-5},
625 doi
= {http
://doi.ieeecomputersociety.org
/10.1109/CSE
.2009.497},
626 publisher = {IEEE Computer Society
},
627 address = {Los Alamitos
, CA
, USA
},
630 @article
{Chockler
:2001:GCS
:503112.503113,
631 author = {Chockler
, Gregory V. and Keidar
, Idit and Vitenberg
, Roman
},
632 title = {Group communication specifications
: a comprehensive study
},
633 journal = {ACM Comput. Surv.
},
634 issue_date
= {December
2001},
642 url
= {http
://doi.acm.org
/10.1145/503112.503113},
643 doi
= {http
://doi.acm.org
/10.1145/503112.503113},
646 address = {New York
, NY
, USA
},
647 keywords = {Group communication systems
, partitionable group membership
, process group membership
, specifications of group communication systems
, view synchrony
, virtual synchrony
},
649 @inproceedings
{Hurfin
:1998:CAS
:829523.830974,
650 author = {M. Hurfin
, A. Most\'
{e
}faoui and Raynal
, M.
},
651 title = {Consensus in Asynchronous Systems Where Processes Can Crash and Recover
},
652 booktitle = {Proceedings of the The
17th IEEE Symposium on Reliable Distributed Systems
},
655 isbn
= {0-8186-9218-9},
657 url
= {http
://portal.acm.org
/citation.cfm?id
=829523.830974},
659 publisher = {IEEE Computer Society
},
660 address = {Washington
, DC
, USA
},
662 @inproceedings
{Pinheiro
:2007:FTL
:1267903.1267905,
663 author = {Pinheiro
, Eduardo and Weber
, Wolf
-Dietrich and Barroso
, Luiz Andr\'
{e
}},
664 title = {Failure trends in a large disk drive population
},
665 booktitle = {Proceedings of the
5th USENIX conference on File and Storage Technologies
},
667 location
= {San Jose
, CA
},
670 url
= {http
://portal.acm.org
/citation.cfm?id
=1267903.1267905},
672 publisher = {USENIX Association
},
673 address = {Berkeley
, CA
, USA
},
675 @ARTICLE
{gray07
:empirical
,
676 author = {{Gray
}, J. and
{van Ingen
}, C.
},
677 title = "
{Empirical Measurements of Disk Failure Rates and Error Rates
}"
,
678 journal = {ArXiv Computer Science e
-prints
},
679 eprint
= {arXiv
:cs
/0701166},
680 keywords = {Computer Science
- Databases
, Computer Science
- Architecture
},
683 adsurl
= {http
://adsabs.harvard.edu
/abs
/2007cs.......
.1166G
},
685 @article
{10.1109/SRDS
.2008.9,
686 author = {Timo Warns and Christian Storm and Wilhelm Hasselbring
},
687 title = {Availability of Globally Distributed Nodes
: An Empirical Evaluation
},
688 journal ={Reliable Distributed Systems
, IEEE Symposium on
},
693 doi
= {http
://doi.ieeecomputersociety.org
/10.1109/SRDS
.2008.9},
694 publisher = {IEEE Computer Society
},
695 address = {Los Alamitos
, CA
, USA
},
698 @article
{10.1109/SRDS
.2010.17,
699 author = {Raphael Marcos Menderico and Islene Calciolari Garcia
},
700 title = {Diskless Checkpointing with Rollback
-Dependency Trackability
},
701 journal ={Reliable Distributed Systems
, IEEE Symposium on
},
706 doi
= {http
://doi.ieeecomputersociety.org
/10.1109/SRDS
.2010.17},
707 publisher = {IEEE Computer Society
},
708 address = {Los Alamitos
, CA
, USA
},
711 @article
{Chandy
:1985:DSD
:214451.214456,
712 author = {Chandy
, K. Mani and Lamport
, Leslie
},
713 title = {Distributed snapshots
: determining global states of distributed systems
},
714 journal = {ACM Trans. Comput. Syst.
},
722 url
= {http
://doi.acm.org
/10.1145/214451.214456},
723 doi
= {http
://doi.acm.org
/10.1145/214451.214456},
726 address = {New York
, NY
, USA
},
728 @article
{Randell
:1978:RIC
:356725.356729,
729 author = {Randell
, B. and Lee
, P. and Treleaven
, P. C.
},
730 title = {Reliability Issues in Computing System Design
},
731 journal = {ACM Comput. Surv.
},
739 url
= {http
://doi.acm.org
/10.1145/356725.356729},
740 doi
= {http
://doi.acm.org
/10.1145/356725.356729},
743 address = {New York
, NY
, USA
},
746 @article
{Pease
:1980:RAP
:322186.322188,
747 author = {Pease
, M. and Shostak
, R. and Lamport
, L.
},
748 title = {Reaching Agreement in the Presence of Faults
},
757 url
= {http
://doi.acm.org
/10.1145/322186.322188},
758 doi
= {http
://doi.acm.org
/10.1145/322186.322188},
761 address = {New York
, NY
, USA
},