Bibliography improved.
[mymsc.git] / bibliography.bib
blob351abfdf40c7cf3941d134cc71d17d3cf4ad1f35
1 % THIS IS A COPY, DO NOT EDIT IT!!
2 % REFEER TO ORIGINAL /home/rodrigo/Dropbox/papers//bibliography.bib
3 @InProceedings{labos2009:viera09,
4 address = {Estoril, Lisbon, Portugal},
5 affiliation = {EPFL},
6 author = {Vieira, Gustavo M. D. and Zwaenepoel, Willy and Buzato, Luis E.},
7 booktitle = {Proceedings of the 39th {I}nternational {C}onference on
8 {D}ependable {S}ystems and {N}etworks ({DSN})},
9 details = {http://infoscience.epfl.ch/record/135193},
10 documenturl = {http://infoscience.epfl.ch/getfile.py?recid=135193&mode=best},
11 doi = { },
12 pages = {229--238},
13 location = {Estoril, Lisbon, Portugal},
14 oai-id = {oai:infoscience.epfl.ch:135193},
15 oai-set = {conf; fulltext-public; fulltext},
16 publisher = { },
17 review = {REVIEWED},
18 series = { },
19 status = {PUBLISHED},
20 title = {Dynamic {C}ontent {W}eb {A}pplications: {C}rash,
21 {F}ailover, and {R}ecovery {A}nalysis},
22 unit = {LABOS},
23 url = {http://www.dsn.org/},
24 year = 2009
26 @article{Chandra:1996:WFD:234533.234549,
27 author = {Chandra, Tushar D. and Hadzilacos, Vassos and Toueg, Sam},
28 title = {The weakest failure detector for solving consensus},
29 journal = {J. ACM},
30 volume = {43},
31 issue = {4},
32 month = {July},
33 year = {1996},
34 issn = {0004-5411},
35 pages = {685--722},
36 numpages = {38},
37 url = {http://doi.acm.org/10.1145/234533.234549},
38 doi = {http://doi.acm.org/10.1145/234533.234549},
39 acmid = {234549},
40 publisher = {ACM},
41 address = {New York, NY, USA},
42 keywords = {Byzantine Generals' problem, agreement problem, asynchronous systems, atomic broadcast, commit problem, consensus problem, crash failures, failure detection, fault-tolerance, message passing, partial synchrony, processor failures},
44 @inproceedings{Aguilera:2001:SLE:645958.676119,
45 author = {Aguilera, Marcos K. and Delporte-Gallet, Carole and Fauconnier, Hugues and Toueg, Sam},
46 title = {Stable Leader Election},
47 booktitle = {Proceedings of the 15th International Conference on Distributed Computing},
48 series = {DISC '01},
49 year = {2001},
50 isbn = {3-540-42605-1},
51 pages = {108--122},
52 numpages = {15},
53 url = {http://portal.acm.org/citation.cfm?id=645958.676119},
54 acmid = {676119},
55 publisher = {Springer-Verlag},
56 address = {London, UK},
58 @InProceedings{vieira08:_trepl,
59 author = {Vieira, Gustavo M. D. and Buzato, Luis E.},
60 title = {Treplica: {U}biquitous {R}eplication},
61 booktitle = {Proceedings of the 26th Brazilian Symposium on Computer Networks and Distributed Systems},
62 year = 2008}
63 @inproceedings{Aguilera:2007:SNP:1294261.1294278,
64 author = {Aguilera, Marcos K. and Merchant, Arif and Shah, Mehul and Veitch, Alistair and Karamanolis, Christos},
65 title = {Sinfonia: a new paradigm for building scalable distributed systems},
66 booktitle = {Proceedings of twenty-first ACM SIGOPS symposium on Operating systems principles},
67 series = {SOSP '07},
68 year = {2007},
69 isbn = {978-1-59593-591-5},
70 location = {Stevenson, Washington, USA},
71 pages = {159--174},
72 numpages = {16},
73 url = {http://doi.acm.org/10.1145/1294261.1294278},
74 doi = {http://doi.acm.org/10.1145/1294261.1294278},
75 acmid = {1294278},
76 publisher = {ACM},
77 address = {New York, NY, USA},
78 keywords = {distributed systems, fault tolerance, scalability, shared memory, transactions, two-phase commit},
80 @article{Lamport:1978_clocks,
81 author = {Lamport, Leslie},
82 title = {Time, clocks, and the ordering of events in a distributed system},
83 journal = {Commun. ACM},
84 volume = {21},
85 issue = {7},
86 month = {July},
87 year = {1978},
88 issn = {0001-0782},
89 pages = {558--565},
90 numpages = {8},
91 url = {http://doi.acm.org/10.1145/359545.359563},
92 doi = {http://doi.acm.org/10.1145/359545.359563},
93 acmid = {359563},
94 publisher = {ACM},
95 address = {New York, NY, USA},
96 keywords = {clock synchronization, computer networks, distributed systems, multiprocess systems},
98 @inproceedings{Isard:2007:DDD:1272996.1273005,
99 author = {Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis},
100 title = {Dryad: distributed data-parallel programs from sequential building blocks},
101 booktitle = {Proceedings of the 2nd ACM SIGOPS/EuroSys European Conference on Computer Systems 2007},
102 series = {EuroSys '07},
103 year = {2007},
104 isbn = {978-1-59593-636-3},
105 location = {Lisbon, Portugal},
106 pages = {59--72},
107 numpages = {14},
108 url = {http://doi.acm.org/10.1145/1272996.1273005},
109 doi = {http://doi.acm.org/10.1145/1272996.1273005},
110 acmid = {1273005},
111 publisher = {ACM},
112 address = {New York, NY, USA},
113 keywords = {cluster computing, concurrency, dataflow, distributed programming},
115 @article{Defago:2004,
116 author = {D\'{e}fago, Xavier and Schiper, Andr\'{e} and Urb\'{a}n, P\'{e}ter},
117 title = {Total order broadcast and multicast algorithms: Taxonomy and survey},
118 journal = {ACM Comput. Surv.},
119 volume = {36},
120 issue = {4},
121 month = {December},
122 year = {2004},
123 issn = {0360-0300},
124 pages = {372--421},
125 numpages = {50},
126 url = {http://doi.acm.org/10.1145/1041680.1041682},
127 doi = {http://doi.acm.org/10.1145/1041680.1041682},
128 acmid = {1041682},
129 publisher = {ACM},
130 address = {New York, NY, USA},
131 keywords = {Distributed systems, agreement problems, atomic broadcast, atomic multicast, classification, distributed algorithms, fault-tolerance, global ordering, group communication, message passing, survey, taxonomy, total ordering},
133 @TechReport{hadzilacos94,
134 author = {Hadzilacos, Vassos and Toueg, Sam},
135 title = {A modular approach to the specification and implementation of fault-tolerant broadcasts},
136 institution = {Department of Computer Science, Cornell University},
137 year = 1994,
138 key = {TR94-1425},
139 address = {Ithaca, NY},
140 month = {May}}
141 @inproceedings{Rodrigues:2000,
142 author = {Rodrigues, L. and Raynal, M.},
143 title = {Atomic Broadcast in Asynchronous Crash-Recovery Distributed Systems},
144 booktitle = {Proceedings of the The 20th International Conference on Distributed Computing Systems ( ICDCS 2000)},
145 series = {ICDCS '00},
146 year = {2000},
147 isbn = {0-7695-0601-1},
148 pages = {288--},
149 url = {http://portal.acm.org/citation.cfm?id=850927.851790},
150 acmid = {851790},
151 publisher = {IEEE Computer Society},
152 address = {Washington, DC, USA},
153 keywords = {Distributed Algorithms, Distributed Fault Tolerant Systems, Communication Protocols}
155 @article{Chandra:1996:UFD:226643.226647,
156 author = {Chandra, Tushar D. and Toueg, Sam},
157 title = {Unreliable failure detectors for reliable distributed systems},
158 journal = {J. ACM},
159 volume = {43},
160 issue = {2},
161 month = {March},
162 year = {1996},
163 issn = {0004-5411},
164 pages = {225--267},
165 numpages = {43},
166 url = {http://doi.acm.org/10.1145/226643.226647},
167 doi = {http://doi.acm.org/10.1145/226643.226647},
168 acmid = {226647},
169 publisher = {ACM},
170 address = {New York, NY, USA},
171 keywords = {Byzantine Generals' problem, agreement problem, asynchronous systems, atomic broadcast, commit problem, consensus problem, crash failures, failure detection, fault-tolerance, message passing, partial synchrony, processor failures},
173 @inproceedings{Schroeder:2007:DFR:1267903.1267904,
174 author = {Schroeder, Bianca and Gibson, Garth A.},
175 title = {Disk failures in the real world: what does an MTTF of 1,000,000 hours mean to you?},
176 booktitle = {Proceedings of the 5th USENIX conference on File and Storage Technologies},
177 year = {2007},
178 location = {San Jose, CA},
179 articleno = {1},
180 pages = {1--16},
181 url = {http://portal.acm.org/citation.cfm?id=1267903.1267904},
182 acmid = {1267904},
183 publisher = {USENIX Association},
184 address = {Berkeley, CA, USA},
186 @article{Dwork:1988:CPP:42282.42283,
187 author = {Dwork, Cynthia and Lynch, Nancy and Stockmeyer, Larry},
188 title = {Consensus in the presence of partial synchrony},
189 journal = {J. ACM},
190 volume = {35},
191 issue = {2},
192 month = {April},
193 year = {1988},
194 issn = {0004-5411},
195 pages = {288--323},
196 numpages = {36},
197 url = {http://doi.acm.org/10.1145/42282.42283},
198 doi = {http://doi.acm.org/10.1145/42282.42283},
199 acmid = {42283},
200 publisher = {ACM},
201 address = {New York, NY, USA},
203 @ARTICLE{Boichat_deconstructingpaxos,
204 author = {Boichat, Romain and Dutta, Partha and Fr{\o}lund, Svend and Guerraoui, Rachid},
205 title = {Deconstructing paxos},
206 journal = {SIGACT News},
207 volume = {34},
208 issue = {1},
209 month = {March},
210 year = {2003},
211 issn = {0163-5700},
212 pages = {47--67},
213 numpages = {21},
214 url = {http://doi.acm.org/10.1145/637437.637447},
215 doi = {http://doi.acm.org/10.1145/637437.637447},
216 acmid = {637447},
217 publisher = {ACM},
218 address = {New York, NY, USA},
220 @article{Lamport:1998:PP:279227.279229,
221 author = {Lamport, Leslie},
222 title = {The part-time parliament},
223 journal = {ACM Trans. Comput. Syst.},
224 volume = {16},
225 issue = {2},
226 month = {May},
227 year = {1998},
228 issn = {0734-2071},
229 pages = {133--169},
230 numpages = {37},
231 url = {http://doi.acm.org/10.1145/279227.279229},
232 doi = {http://doi.acm.org/10.1145/279227.279229},
233 acmid = {279229},
234 publisher = {ACM},
235 address = {New York, NY, USA},
236 keywords = {state machines, three-phase commit, voting},
238 @article{Elnozahy:2002:SRP:568522.568525,
239 author = {Elnozahy, E. N. (Mootaz) and Alvisi, Lorenzo and Wang, Yi-Min and Johnson, David B.},
240 title = {A survey of rollback-recovery protocols in message-passing systems},
241 journal = {ACM Comput. Surv.},
242 issue_date = {September 2002},
243 volume = {34},
244 issue = {3},
245 month = {September},
246 year = {2002},
247 issn = {0360-0300},
248 pages = {375--408},
249 numpages = {34},
250 url = {http://doi.acm.org/10.1145/568522.568525},
251 doi = {http://doi.acm.org/10.1145/568522.568525},
252 acmid = {568525},
253 publisher = {ACM},
254 address = {New York, NY, USA},
255 keywords = {message logging, rollback-recovery},
257 @inproceedings{Koo:1986:CRD:324493.325074,
258 author = {Koo, Richard and Toueg, Sam},
259 title = {Checkpointing and rollback-recovery for distributed systems},
260 booktitle = {Proceedings of 1986 ACM Fall joint computer conference},
261 series = {ACM '86},
262 year = {1986},
263 isbn = {0-8186-4743-4},
264 location = {Dallas, Texas, United States},
265 pages = {1150--1158},
266 numpages = {9},
267 url = {http://portal.acm.org/citation.cfm?id=324493.325074},
268 acmid = {325074},
269 publisher = {IEEE Computer Society Press},
270 address = {Los Alamitos, CA, USA},
272 @article{10.1109/CCGRID.2010.40,
273 author = {Leonardo Arturo Bautista Gomez and Naoya Maruyama and Franck Cappello and Satoshi Matsuoka},
274 title = {Distributed Diskless Checkpoint for Large Scale Systems},
275 journal ={Cluster Computing and the Grid, IEEE International Symposium on},
276 volume = {0},
277 isbn = {978-0-7695-4039-9},
278 year = {2010},
279 pages = {63-72},
280 doi = {http://doi.ieeecomputersociety.org/10.1109/CCGRID.2010.40},
281 publisher = {IEEE Computer Society},
282 address = {Los Alamitos, CA, USA},
284 @article{Charron-Bost:2007:HDF:1233481.1233496,
285 author = {Charron-Bost, Bernadette and Schiper, Andr\'{e}},
286 title = {Harmful dogmas in fault tolerant distributed computing},
287 journal = {SIGACT News},
288 volume = {38},
289 issue = {1},
290 month = {March},
291 year = {2007},
292 issn = {0163-5700},
293 pages = {53--61},
294 numpages = {9},
295 url = {http://doi.acm.org/10.1145/1233481.1233496},
296 doi = {http://doi.acm.org/10.1145/1233481.1233496},
297 acmid = {1233496},
298 publisher = {ACM},
299 address = {New York, NY, USA},
301 @inproceedings{Camargos:2007:MP:1281100.1281150,
302 author = {Camargos, L\'{a}saro Jonas and Schmidt, Rodrigo Malta and Pedone, Fernando},
303 title = {Multicoordinated Paxos},
304 booktitle = {Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
305 series = {PODC '07},
306 year = {2007},
307 isbn = {978-1-59593-616-5},
308 location = {Portland, Oregon, USA},
309 pages = {316--317},
310 numpages = {2},
311 url = {http://doi.acm.org/10.1145/1281100.1281150},
312 doi = {http://doi.acm.org/10.1145/1281100.1281150},
313 acmid = {1281150},
314 publisher = {ACM},
315 address = {New York, NY, USA},
316 keywords = {Paxos, atomic broadcast, consensus, generalized, multicoordinated},
318 @inproceedings{Fischer:1983:CPU:647891.739594,
319 author = {Fischer, Michael J.},
320 title = {The Consensus Problem in Unreliable Distributed Systems (A Brief Survey)},
321 booktitle = {Proceedings of the 1983 International FCT-Conference on Fundamentals of Computation Theory},
322 year = {1983},
323 isbn = {3-540-12689-9},
324 pages = {127--140},
325 numpages = {14},
326 url = {http://portal.acm.org/citation.cfm?id=647891.739594},
327 acmid = {739594},
328 publisher = {Springer-Verlag},
329 address = {London, UK},
331 @article{10.1109/SRDS.2009.25,
332 author = {Omid Shahmirzadi and Sergio Mena and Andr\'{e} Schiper},
333 title = {Relaxed Atomic Broadcast: State-Machine Replication Using Bounded Memory},
334 journal ={Reliable Distributed Systems, IEEE Symposium on},
335 volume = {0},
336 issn = {1060-9857},
337 year = {2009},
338 pages = {3-11},
339 doi = {http://doi.ieeecomputersociety.org/10.1109/SRDS.2009.25},
340 publisher = {IEEE Computer Society},
341 address = {Los Alamitos, CA, USA},
343 @inproceedings{Okun:2002:NSR:829526.831119,
344 author = {Okun, Michael and Barak, Amnon},
345 title = {On Node State Reconstruction for Fault Tolerant Distributed Algorithms},
346 booktitle = {Proceedings of the 21st IEEE Symposium on Reliable Distributed Systems},
347 series = {SRDS '02},
348 year = {2002},
349 isbn = {0-7695-1659-9},
350 pages = {160--},
351 url = {http://portal.acm.org/citation.cfm?id=829526.831119},
352 acmid = {831119},
353 publisher = {IEEE Computer Society},
354 address = {Washington, DC, USA},
355 keywords = {Distributed algorithms, fault tolerance, state reconstruction, recovery},
357 @inproceedings{Chandra:2007:PML:1281100.1281103,
358 author = {Chandra, Tushar D. and Griesemer, Robert and Redstone, Joshua},
359 title = {Paxos made live: an engineering perspective},
360 booktitle = {Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
361 series = {PODC '07},
362 year = {2007},
363 isbn = {978-1-59593-616-5},
364 location = {Portland, Oregon, USA},
365 pages = {398--407},
366 numpages = {10},
367 url = {http://doi.acm.org/10.1145/1281100.1281103},
368 doi = {http://doi.acm.org/10.1145/1281100.1281103},
369 acmid = {1281103},
370 publisher = {ACM},
371 address = {New York, NY, USA},
372 keywords = {Paxos, experiences, fault-tolerance, implementation},
374 @conference{juang2002crash,
375 title={{Crash recovery with little overhead}},
376 author={Juang, T.T.Y. and Venkatesan, S.},
377 booktitle={Distributed Computing Systems, 1991., 11th International Conference on},
378 pages={454--461},
379 isbn={0818621443},
380 year={2002},
381 organization={IEEE}
383 @inproceedings{Freiling:2009:MCA:1729641.1730101,
384 author = {Freiling, Felix C. and Lambertz, Christian and Majster-Cederbaum, Mila},
385 title = {Modular Consensus Algorithms for the Crash-Recovery Model},
386 booktitle = {Proceedings of the 2009 International Conference on Parallel and Distributed Computing, Applications and Technologies},
387 series = {PDCAT '09},
388 year = {2009},
389 isbn = {978-0-7695-3914-0},
390 pages = {287--292},
391 numpages = {6},
392 url = {http://dx.doi.org/10.1109/PDCAT.2009.50},
393 doi = {http://dx.doi.org/10.1109/PDCAT.2009.50},
394 acmid = {1730101},
395 publisher = {IEEE Computer Society},
396 address = {Washington, DC, USA},
397 keywords = {asynchronous systems, consensus, fault tolerance, process crash and recovery, stable storage},
399 @TechReport{oliveira97:consensus,
400 author = {Oliveira, Rui and Guerraoui, Rachid and Schiper, Andr\'{e}},
401 title = {Consensus in the crash-recover model},
402 institution = {Département d'Informatique, Ecole Polytechnique Fédérale},
403 year = 1997,
404 number = {97-239},
405 address = {Laussane, Switzerland},
406 month = {August}}
407 @inproceedings{Freiling:2008:ECA:1432291.1432332,
408 author = {Freiling, Felix C. and Lambertz, Christian and Majster-Cederbaum, Mila},
409 title = {Easy Consensus Algorithms for the Crash-Recovery Model},
410 booktitle = {Proceedings of the 22nd international symposium on Distributed Computing},
411 series = {DISC '08},
412 year = {2008},
413 isbn = {978-3-540-87778-3},
414 location = {Arcachon, France},
415 pages = {507--508},
416 numpages = {2},
417 url = {http://dx.doi.org/10.1007/978-3-540-87779-0_39},
418 doi = {http://dx.doi.org/10.1007/978-3-540-87779-0_39},
419 acmid = {1432332},
420 publisher = {Springer-Verlag},
421 address = {Berlin, Heidelberg},
423 @article{Vieira:2008:CRF:1390853.1390875,
424 author = {Vieira, Gustavo M. D. and Buzato, Luiz E.},
425 title = {On the coordinator's rule for Fast Paxos},
426 journal = {Inf. Process. Lett.},
427 volume = {107},
428 issue = {5},
429 month = {August},
430 year = {2008},
431 issn = {0020-0190},
432 pages = {183--187},
433 numpages = {5},
434 url = {http://portal.acm.org/citation.cfm?id=1390853.1390875},
435 doi = {10.1016/j.ipl.2008.03.001},
436 acmid = {1390875},
437 publisher = {Elsevier North-Holland, Inc.},
438 address = {Amsterdam, The Netherlands, The Netherlands},
439 keywords = {Consensus, Crash-recovery, Distributed systems, Paxos},
441 @article{Schneider:1990:IFS:98163.98167,
442 author = {Schneider, Fred B.},
443 title = {Implementing fault-tolerant services using the state machine approach: a tutorial},
444 journal = {ACM Comput. Surv.},
445 volume = {22},
446 issue = {4},
447 month = {December},
448 year = {1990},
449 issn = {0360-0300},
450 pages = {299--319},
451 numpages = {21},
452 url = {http://doi.acm.org/10.1145/98163.98167},
453 doi = {http://doi.acm.org/10.1145/98163.98167},
454 acmid = {98167},
455 publisher = {ACM},
456 address = {New York, NY, USA},
458 @article{lamport1978implementation,
459 title={{The implementation of reliable distributed multiprocess systems}},
460 author={Lamport, Leslie},
461 journal={Computer Networks (1976)},
462 volume={2},
463 number={2},
464 pages={95--114},
465 issn={0376-5075},
466 year={1978},
467 publisher={Elsevier}
469 @inproceedings{Oki:1988:VRN:62546.62549,
470 author = {Oki, Brian M. and Liskov, Barbara H.},
471 title = {Viewstamped Replication: A New Primary Copy Method to Support Highly-Available Distributed Systems},
472 booktitle = {Proceedings of the seventh annual ACM Symposium on Principles of distributed computing},
473 series = {PODC '88},
474 year = {1988},
475 isbn = {0-89791-277-2},
476 location = {Toronto, Ontario, Canada},
477 pages = {8--17},
478 numpages = {10},
479 url = {http://doi.acm.org/10.1145/62546.62549},
480 doi = {http://doi.acm.org/10.1145/62546.62549},
481 acmid = {62549},
482 publisher = {ACM},
483 address = {New York, NY, USA},
485 @article{Lamport:1982:BGP:357172.357176,
486 author = {Lamport, Leslie and Shostak, Robert and Pease, Marshall},
487 title = {The Byzantine Generals Problem},
488 journal = {ACM Trans. Program. Lang. Syst.},
489 issue_date = {July 1982},
490 volume = {4},
491 issue = {3},
492 month = {July},
493 year = {1982},
494 issn = {0164-0925},
495 pages = {382--401},
496 numpages = {20},
497 url = {http://doi.acm.org/10.1145/357172.357176},
498 doi = {http://doi.acm.org/10.1145/357172.357176},
499 acmid = {357176},
500 publisher = {ACM},
501 address = {New York, NY, USA},
503 @article{Neiger:1990:AIF:83334.83337,
504 author = {Neiger, Gil and Toueg, Sam},
505 title = {Automatically increasing the fault-tolerance of distributed algorithms},
506 journal = {J. Algorithms},
507 volume = {11},
508 issue = {3},
509 month = {September},
510 year = {1990},
511 issn = {0196-6774},
512 pages = {374--419},
513 numpages = {46},
514 url = {http://portal.acm.org/citation.cfm?id=83334.83337},
515 doi = {10.1016/0196-6774(90)90019-B},
516 acmid = {83337},
517 publisher = {Academic Press, Inc.},
518 address = {Duluth, MN, USA},
520 @TechReport{vieira10:implementing-tr,
521 author = {Vieira, Gustavo M. D. and Buzato, Luiz E.},
522 title = {Implementation of an Object-Oriented Specification
523 for Active Replication Using Consensus},
524 institution = {Instituto de Computação, Universidade Estadual de Campinas},
525 year = 2010,
526 number = {IC-10-26},
527 month = {August}}
528 @article{Reynal:2005:SIF:1052796.1052806,
529 author = {Reynal, Michel},
530 title = {A short introduction to failure detectors for asynchronous distributed systems},
531 journal = {SIGACT News},
532 volume = {36},
533 issue = {1},
534 month = {March},
535 year = {2005},
536 issn = {0163-5700},
537 pages = {53--70},
538 numpages = {18},
539 url = {http://doi.acm.org/10.1145/1052796.1052806},
540 doi = {http://doi.acm.org/10.1145/1052796.1052806},
541 acmid = {1052806},
542 publisher = {ACM},
543 address = {New York, NY, USA},
545 @inproceedings{Bonnet:2010:CAD:1825731.1826088,
546 author = {Bonnet, Fran\c{c}ois and Raynal, Michel},
547 title = {Consensus in Anonymous Distributed Systems: Is There a Weakest Failure Detector?},
548 booktitle = {Proceedings of the 2010 24th IEEE International Conference on Advanced Information Networking and Applications},
549 series = {AINA '10},
550 year = {2010},
551 isbn = {978-0-7695-4018-4},
552 pages = {206--213},
553 numpages = {8},
554 url = {http://dx.doi.org/10.1109/AINA.2010.19},
555 doi = {http://dx.doi.org/10.1109/AINA.2010.19},
556 acmid = {1826088},
557 publisher = {IEEE Computer Society},
558 address = {Washington, DC, USA},
560 @inproceedings{Gupta:2001:SED:383962.384010,
561 author = {Gupta, Indranil and Chandra, Tushar D. and Goldszmidt, Germ\'{a}n S.},
562 title = {On scalable and efficient distributed failure detectors},
563 booktitle = {Proceedings of the twentieth annual ACM symposium on Principles of distributed computing},
564 series = {PODC '01},
565 year = {2001},
566 isbn = {1-58113-383-9},
567 location = {Newport, Rhode Island, United States},
568 pages = {170--179},
569 numpages = {10},
570 url = {http://doi.acm.org/10.1145/383962.384010},
571 doi = {http://doi.acm.org/10.1145/383962.384010},
572 acmid = {384010},
573 publisher = {ACM},
574 address = {New York, NY, USA},
575 keywords = {accuracy, distributed systems, efficiency, failure detectors, scalability},
577 @inproceedings{Xiong:2009:SFD:1632708.1633468,
578 author = {Xiong, Naixue and Yang, Yan and Cao, Ming and He, Jing and Shu, Lei},
579 title = {A Survey on Fault-Tolerance in Distributed Network Systems},
580 booktitle = {Proceedings of the 2009 International Conference on Computational Science and Engineering - Volume 02},
581 series = {CSE '09},
582 year = {2009},
583 isbn = {978-0-7695-3823-5},
584 pages = {1065--1070},
585 numpages = {6},
586 url = {http://dx.doi.org/10.1109/CSE.2009.497},
587 doi = {http://dx.doi.org/10.1109/CSE.2009.497},
588 acmid = {1633468},
589 publisher = {IEEE Computer Society},
590 address = {Washington, DC, USA},
591 keywords = {Failure detector, Fault-tolerance, Network Systems, Quality-of-service},
593 @article{10.1109/CSE.2009.497,
594 author = {Naixue Xiong and Yan Yang and Ming Cao and Jing He and Lei Shu},
595 title = {A Survey on Fault-Tolerance in Distributed Network Systems},
596 journal ={Computational Science and Engineering, IEEE International Conference on},
597 volume = {2},
598 isbn = {978-0-7695-3823-5},
599 year = {2009},
600 pages = {1065-1070},
601 doi = {http://doi.ieeecomputersociety.org/10.1109/CSE.2009.497},
602 publisher = {IEEE Computer Society},
603 address = {Los Alamitos, CA, USA},
605 @article{Chockler:2001:GCS:503112.503113,
606 author = {Chockler, Gregory V. and Keidar, Idit and Vitenberg, Roman},
607 title = {Group communication specifications: a comprehensive study},
608 journal = {ACM Comput. Surv.},
609 issue_date = {December 2001},
610 volume = {33},
611 issue = {4},
612 month = {December},
613 year = {2001},
614 issn = {0360-0300},
615 pages = {427--469},
616 numpages = {43},
617 url = {http://doi.acm.org/10.1145/503112.503113},
618 doi = {http://doi.acm.org/10.1145/503112.503113},
619 acmid = {503113},
620 publisher = {ACM},
621 address = {New York, NY, USA},
622 keywords = {Group communication systems, partitionable group membership, process group membership, specifications of group communication systems, view synchrony, virtual synchrony},
624 @inproceedings{Hurfin:1998:CAS:829523.830974,
625 author = {Hurfin, Michel and Most\'{e}faoui, Achour and Raynal, Michel},
626 title = {Consensus in Asynchronous Systems Where Processes Can Crash and Recover},
627 booktitle = {Proceedings of the The 17th IEEE Symposium on Reliable Distributed Systems},
628 series = {SRDS '98},
629 year = {1998},
630 isbn = {0-8186-9218-9},
631 pages = {280--},
632 url = {http://portal.acm.org/citation.cfm?id=829523.830974},
633 acmid = {830974},
634 publisher = {IEEE Computer Society},
635 address = {Washington, DC, USA},
637 @inproceedings{Pinheiro:2007:FTL:1267903.1267905,
638 author = {Pinheiro, Eduardo and Weber, Wolf-Dietrich and Barroso, Luiz Andr\'{e}},
639 title = {Failure trends in a large disk drive population},
640 booktitle = {Proceedings of the 5th USENIX conference on File and Storage Technologies},
641 year = {2007},
642 location = {San Jose, CA},
643 pages = {2--2},
644 numpages = {1},
645 url = {http://portal.acm.org/citation.cfm?id=1267903.1267905},
646 acmid = {1267905},
647 publisher = {USENIX Association},
648 address = {Berkeley, CA, USA},
650 @ARTICLE{gray07:empirical,
651 author = {{Gray}, Jim and {van Ingen}, Catherine},
652 title = "{Empirical Measurements of Disk Failure Rates and Error Rates}",
653 journal = {ArXiv Computer Science e-prints},
654 eprint = {arXiv:cs/0701166},
655 keywords = {Computer Science - Databases, Computer Science - Architecture},
656 year = 2007,
657 month = jan,
658 adsurl = {http://adsabs.harvard.edu/abs/2007cs........1166G},
660 @article{10.1109/SRDS.2008.9,
661 author = {Timo Warns and Christian Storm and Wilhelm Hasselbring},
662 title = {Availability of Globally Distributed Nodes: An Empirical Evaluation},
663 journal ={Reliable Distributed Systems, IEEE Symposium on},
664 volume = {0},
665 issn = {1060-9857},
666 year = {2008},
667 pages = {279-284},
668 doi = {http://doi.ieeecomputersociety.org/10.1109/SRDS.2008.9},
669 publisher = {IEEE Computer Society},
670 address = {Los Alamitos, CA, USA},
672 @article{10.1109/SRDS.2010.17,
673 author = {Raphael Marcos Menderico and Islene Calciolari Garcia},
674 title = {Diskless Checkpointing with Rollback-Dependency Trackability},
675 journal ={Reliable Distributed Systems, IEEE Symposium on},
676 volume = {0},
677 issn = {1060-9857},
678 year = {2010},
679 pages = {275-281},
680 doi = {http://doi.ieeecomputersociety.org/10.1109/SRDS.2010.17},
681 publisher = {IEEE Computer Society},
682 address = {Los Alamitos, CA, USA},
684 @article{Chandy:1985:DSD:214451.214456,
685 author = {Chandy, K. Mani and Lamport, Leslie},
686 title = {Distributed snapshots: determining global states of distributed systems},
687 journal = {ACM Trans. Comput. Syst.},
688 volume = {3},
689 issue = {1},
690 month = {February},
691 year = {1985},
692 issn = {0734-2071},
693 pages = {63--75},
694 numpages = {13},
695 url = {http://doi.acm.org/10.1145/214451.214456},
696 doi = {http://doi.acm.org/10.1145/214451.214456},
697 acmid = {214456},
698 publisher = {ACM},
699 address = {New York, NY, USA},
701 @article{Randell:1978:RIC:356725.356729,
702 author = {Randell, Brian and Lee, Pete and Treleaven, Phillip C.},
703 title = {Reliability Issues in Computing System Design},
704 journal = {ACM Comput. Surv.},
705 volume = {10},
706 issue = {2},
707 month = {June},
708 year = {1978},
709 issn = {0360-0300},
710 pages = {123--165},
711 numpages = {43},
712 url = {http://doi.acm.org/10.1145/356725.356729},
713 doi = {http://doi.acm.org/10.1145/356725.356729},
714 acmid = {356729},
715 publisher = {ACM},
716 address = {New York, NY, USA},
718 @article{Pease:1980:RAP:322186.322188,
719 author = {Pease, Marshall and Shostak, Robert and Lamport, Leslie},
720 title = {Reaching Agreement in the Presence of Faults},
721 journal = {J. ACM},
722 volume = {27},
723 issue = {2},
724 month = {April},
725 year = {1980},
726 issn = {0004-5411},
727 pages = {228--234},
728 numpages = {7},
729 url = {http://doi.acm.org/10.1145/322186.322188},
730 doi = {http://doi.acm.org/10.1145/322186.322188},
731 acmid = {322188},
732 publisher = {ACM},
733 address = {New York, NY, USA},
735 @inproceedings{Song:2008:BBC:1785854.1785862,
736 author = {Song, Yee Jiun and van Renesse, Robbert and Schneider, Fred B. and Dolev, Danny},
737 title = {The building blocks of consensus},
738 booktitle = {Proceedings of the 9th international conference on Distributed computing and networking},
739 series = {ICDCN'08},
740 year = {2008},
741 isbn = {3-540-77443-2, 978-3-540-77443-3},
742 location = {Kolkata, India},
743 pages = {54--72},
744 numpages = {19},
745 url = {http://portal.acm.org/citation.cfm?id=1785854.1785862},
746 acmid = {1785862},
747 publisher = {Springer-Verlag},
748 address = {Berlin, Heidelberg},
750 @TechReport{HUS+02/LSR,
751 author = {Hayashibara, Naohiro and Urbán, Péter and Schiper, André
752 and Katayama, Takuya},
753 details = {http://infoscience.epfl.ch/record/49945},
754 documenturl = {http://infoscience.epfl.ch/getfile.py?recid=49945},
755 oai-id = {oai:infoscience.epfl.ch:49945},
756 oai-set = {fulltext; report; fulltext-public},
757 title = {Performance {C}omparison {B}etween the {P}axos and
758 {C}handra-{T}oueg {C}onsensus {A}lgorithms},
759 unit = {LSR},
760 year = 2002,
761 institution = {EPFL},
763 @inproceedings{Burrows:2006:CLS:1298455.1298487,
764 author = {Burrows, Mike},
765 title = {The Chubby lock service for loosely-coupled distributed systems},
766 booktitle = {Proceedings of the 7th symposium on Operating systems design and implementation},
767 series = {OSDI '06},
768 year = {2006},
769 isbn = {1-931971-47-1},
770 location = {Seattle, Washington},
771 pages = {335--350},
772 numpages = {16},
773 url = {http://portal.acm.org/citation.cfm?id=1298455.1298487},
774 acmid = {1298487},
775 publisher = {USENIX Association},
776 address = {Berkeley, CA, USA},
778 @article{Camargos:2007:SMH:1272998.1273036,
779 author = {Camargos, L\'{a}saro and Pedone, Fernando and Wieloch, Marcin},
780 title = {Sprint: a middleware for high-performance transaction processing},
781 journal = {SIGOPS Oper. Syst. Rev.},
782 volume = {41},
783 issue = {3},
784 month = {March},
785 year = {2007},
786 issn = {0163-5980},
787 pages = {385--398},
788 numpages = {14},
789 url = {http://doi.acm.org/10.1145/1272998.1273036},
790 doi = {http://doi.acm.org/10.1145/1272998.1273036},
791 acmid = {1273036},
792 publisher = {ACM},
793 address = {New York, NY, USA},
794 keywords = {middleware, parallel databases, partitioning, replication},
796 @inproceedings{Camargos:2007:SMH:1272996.1273036,
797 author = {Camargos, L\'{a}saro and Pedone, Fernando and Wieloch, Marcin},
798 title = {Sprint: a middleware for high-performance transaction processing},
799 booktitle = {Proceedings of the 2nd ACM SIGOPS/EuroSys European Conference on Computer Systems 2007},
800 series = {EuroSys '07},
801 year = {2007},
802 isbn = {978-1-59593-636-3},
803 location = {Lisbon, Portugal},
804 pages = {385--398},
805 numpages = {14},
806 url = {http://doi.acm.org/10.1145/1272996.1273036},
807 doi = {http://doi.acm.org/10.1145/1272996.1273036},
808 acmid = {1273036},
809 publisher = {ACM},
810 address = {New York, NY, USA},
811 keywords = {middleware, parallel databases, partitioning, replication},
813 @inproceedings{MacCormick:2004:BAF:1251254.1251262,
814 author = {MacCormick, John and Murphy, Nick and Najork, Marc and Thekkath, Chandramohan A. and Zhou, Lidong},
815 title = {Boxwood: abstractions as the foundation for storage infrastructure},
816 booktitle = {Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation - Volume 6},
817 year = {2004},
818 location = {San Francisco, CA},
819 pages = {8--8},
820 numpages = {1},
821 url = {http://portal.acm.org/citation.cfm?id=1251254.1251262},
822 acmid = {1251262},
823 publisher = {USENIX Association},
824 address = {Berkeley, CA, USA},
826 @inproceedings{Saito:2004:FBD:1024393.1024400,
827 author = {Saito, Yasushi and Fr{\o}lund, Svend and Veitch, Alistair and Merchant, Arif and Spence, Susan},
828 title = {FAB: building distributed enterprise disk arrays from commodity components},
829 booktitle = {Proceedings of the 11th international conference on Architectural support for programming languages and operating systems},
830 series = {ASPLOS-XI},
831 year = {2004},
832 isbn = {1-58113-804-0},
833 location = {Boston, MA, USA},
834 pages = {48--58},
835 numpages = {11},
836 url = {http://doi.acm.org/10.1145/1024393.1024400},
837 doi = {http://doi.acm.org/10.1145/1024393.1024400},
838 acmid = {1024400},
839 publisher = {ACM},
840 address = {New York, NY, USA},
841 keywords = {consensus, disk array, erasure coding, replication, storage, voting},
843 @article{Saito:2004:FBD:1037949.1024400,
844 author = {Saito, Yasushi and Fr{\o}lund, Svend and Veitch, Alistair and Merchant, Arif and Spence, Susan},
845 title = {FAB: building distributed enterprise disk arrays from commodity components},
846 journal = {SIGOPS Oper. Syst. Rev.},
847 volume = {38},
848 issue = {5},
849 month = {October},
850 year = {2004},
851 issn = {0163-5980},
852 pages = {48--58},
853 numpages = {11},
854 url = {http://doi.acm.org/10.1145/1037949.1024400},
855 doi = {http://doi.acm.org/10.1145/1037949.1024400},
856 acmid = {1024400},
857 publisher = {ACM},
858 address = {New York, NY, USA},
859 keywords = {consensus, disk array, erasure coding, replication, storage, voting},
861 @article{Saito:2004:FBD:1037947.1024400,
862 author = {Saito, Yasushi and Fr{\o}lund, Svend and Veitch, Alistair and Merchant, Arif and Spence, Susan},
863 title = {FAB: building distributed enterprise disk arrays from commodity components},
864 journal = {SIGARCH Comput. Archit. News},
865 volume = {32},
866 issue = {5},
867 month = {October},
868 year = {2004},
869 issn = {0163-5964},
870 pages = {48--58},
871 numpages = {11},
872 url = {http://doi.acm.org/10.1145/1037947.1024400},
873 doi = {http://doi.acm.org/10.1145/1037947.1024400},
874 acmid = {1024400},
875 publisher = {ACM},
876 address = {New York, NY, USA},
877 keywords = {consensus, disk array, erasure coding, replication, storage, voting},
879 @article{Saito:2004:FBD:1037187.1024400,
880 author = {Saito, Yasushi and Fr{\o}lund, Svend and Veitch, Alistair and Merchant, Arif and Spence, Susan},
881 title = {FAB: building distributed enterprise disk arrays from commodity components},
882 journal = {SIGPLAN Not.},
883 volume = {39},
884 issue = {11},
885 month = {October},
886 year = {2004},
887 issn = {0362-1340},
888 pages = {48--58},
889 numpages = {11},
890 url = {http://doi.acm.org/10.1145/1037187.1024400},
891 doi = {http://doi.acm.org/10.1145/1037187.1024400},
892 acmid = {1024400},
893 publisher = {ACM},
894 address = {New York, NY, USA},
895 keywords = {consensus, disk array, erasure coding, replication, storage, voting},
897 @PhdThesis{vieira10:thesis,
898 author = {Vieira, Gustavo M. D.},
899 title = {Uma arquitetura de software para replicação baseda em consenso},
900 school = {{U}niversidade {E}studual de {C}ampinas, {I}nstituto de {C}omputaçao},
901 year = 2010,
902 address = {Brasil},
903 month = {Novembro}}
904 @conference{garcia2002message,
905 title={{Message ordering in a multicast environment}},
906 author={Garcia-Molina, Hector and Spauster, Annemarie},
907 booktitle={Distributed Computing Systems, 1989., 9th International Conference on},
908 pages={354--361},
909 isbn={0818619538},
910 year={2002},
911 organization={IEEE}
913 @article{cristian97:high_performance,
914 author={Flaviu Cristian and Shivakant Mishra and Guillermo Alvarez},
915 title={High-performance asynchronous atomic broadcast},
916 journal={Distributed Systems Engineering},
917 volume=4,
918 number=2,
919 pages=109,
920 url={http://stacks.iop.org/0967-1846/4/i=2/a=005},
921 year=1997
923 @inproceedings{Gopal:1989:RBS:645946.675018,
924 author = {Gopal, Ajei S. and Toueg, Sam},
925 title = {Reliable Broadcast in Synchronous and Asynchronous Environments (Preliminary Version)},
926 booktitle = {Proceedings of the 3rd International Workshop on Distributed Algorithms},
927 year = {1989},
928 isbn = {3-540-51687-5},
929 pages = {110--123},
930 numpages = {14},
931 url = {http://portal.acm.org/citation.cfm?id=645946.675018},
932 acmid = {675018},
933 publisher = {Springer-Verlag},
934 address = {London, UK},
936 @inproceedings{Bar-Joseph:2002:EDA:645959.676132,
937 author = {Bar-Joseph, Ziv and Keidar, Idit and Lynch, Nancy},
938 title = {Early-Delivery Dynamic Atomic Broadcast},
939 booktitle = {Proceedings of the 16th International Conference on Distributed Computing},
940 series = {DISC '02},
941 year = {2002},
942 isbn = {3-540-00073-9},
943 pages = {1--16},
944 numpages = {16},
945 url = {http://portal.acm.org/citation.cfm?id=645959.676132},
946 acmid = {676132},
947 publisher = {Springer-Verlag},
948 address = {London, UK, UK},
950 @inproceedings{Birman:1987:EVS:41457.37515,
951 author = {Birman, Kenneth and Joseph, Thomas},
952 title = {Exploiting virtual synchrony in distributed systems},
953 booktitle = {Proceedings of the eleventh ACM Symposium on Operating systems principles},
954 series = {SOSP '87},
955 year = {1987},
956 isbn = {0-89791-242-X},
957 location = {Austin, Texas, United States},
958 pages = {123--138},
959 numpages = {16},
960 url = {http://doi.acm.org/10.1145/41457.37515},
961 doi = {http://doi.acm.org/10.1145/41457.37515},
962 acmid = {37515},
963 publisher = {ACM},
964 address = {New York, NY, USA},
966 @article{10.1109/DOA.1999.794001,
967 author = {Pascal Felber and Xavier Défago and Rachid Guerraoui and Philipp Oser},
968 title = {Failure Detectors as First Class Objects},
969 journal ={Distributed Objects and Applications, International Symposium on},
970 volume = {0},
971 isbn = {0-7695-0182-6},
972 year = {1999},
973 pages = {132},
974 doi = {http://doi.ieeecomputersociety.org/10.1109/DOA.1999.794001},
975 publisher = {IEEE Computer Society},
976 address = {Los Alamitos, CA, USA},
978 @article{Aguilera:2000:FDC:1035750.1035753,
979 author = {Aguilera, Marcos Kawazoe and Chen, Wei and Toueg, Sam},
980 title = {Failure detection and consensus in the crash-recovery model},
981 journal = {Distrib. Comput.},
982 volume = {13},
983 issue = {2},
984 month = {April},
985 year = {2000},
986 issn = {0178-2770},
987 pages = {99--125},
988 numpages = {27},
989 url = {http://portal.acm.org/citation.cfm?id=1035750.1035753},
990 doi = {10.1007/s004460050070},
991 acmid = {1035753},
992 publisher = {Springer-Verlag},
993 address = {London, UK},
994 keywords = {asynchronous systems, consensus, failure detection, fault tolerance, process crash, process recovery, stable storage},