BPicture: Fix archive constructor.
[haiku.git] / src / add-ons / kernel / file_systems / cdda / cdda.cpp
blob9de57360ec75410d0d94f2b702fd521f2a6e6b41
1 /*
2 * Copyright 2007-2010, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
7 #include "cdda.h"
9 #include <KernelExport.h>
10 #include <device/scsi.h>
12 #include <algorithm>
13 #include <ctype.h>
14 #include <errno.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <strings.h>
20 struct cdtext_pack_data {
21 uint8 id;
22 uint8 track;
23 uint8 number;
24 uint8 character_position : 4;
25 uint8 block_number : 3;
26 uint8 double_byte : 1;
27 char text[12];
28 uint8 crc[2];
29 } _PACKED;
31 enum {
32 kTrackID = 0x80,
33 kArtistID = 0x81,
34 kMessageID = 0x85,
37 static const uint32 kBufferSize = 16384;
38 static const uint32 kSenseSize = 1024;
41 // #pragma mark - string functions
44 static char *
45 copy_string(const char *string)
47 if (string == NULL || !string[0])
48 return NULL;
50 return strdup(string);
54 static char *
55 to_utf8(const char* string)
57 char buffer[256];
58 size_t out = 0;
60 // TODO: assume CP1252 or ISO-8859-1 character set for now
61 while (uint32 c = (uint8)string[0]) {
63 if (c < 0x80) {
64 if (out >= sizeof(buffer) - 1)
65 break;
66 // ASCII character: no change needed
67 buffer[out++] = c;
68 } else {
69 if (c < 0xA0) {
70 // Windows CP-1252 - Use a lookup table
71 static const uint32 lookup[] = {
72 0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
73 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0,
74 0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
75 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178
78 c = lookup[c - 0x80];
81 // Convert to 2 or 3-byte representation
82 if (c == 0) {
83 // invalid character, ignore
84 } else if (c < 0x800) {
85 if (out >= sizeof(buffer) - 2)
86 break;
87 buffer[out++] = 0xc0 | (c >> 6);
88 buffer[out++] = 0x80 | (c & 0x3f);
89 } else {
90 if (out >= sizeof(buffer) - 3)
91 break;
92 buffer[out++] = 0xe0 | (c >> 12);
93 buffer[out++] = 0x80 | ((c >> 6) & 0x3f);
94 buffer[out++] = 0x80 | (c & 0x3f);
98 string++;
100 buffer[out++] = '\0';
102 char *copy = (char *)malloc(out);
103 if (copy == NULL)
104 return NULL;
106 memcpy(copy, buffer, out);
107 return copy;
111 static bool
112 is_garbage(char c)
114 return isspace(c) || c == '-' || c == '/' || c == '\\';
118 static void
119 sanitize_string(char *&string)
121 if (string == NULL)
122 return;
124 // strip garbage at the start
126 uint32 length = strlen(string);
127 uint32 garbage = 0;
128 while (is_garbage(string[garbage])) {
129 garbage++;
132 length -= garbage;
133 if (garbage)
134 memmove(string, string + garbage, length + 1);
136 // strip garbage from the end
138 while (length > 1 && isspace(string[length - 1])) {
139 string[--length] = '\0';
142 if (!string[0]) {
143 // free string if it's empty
144 free(string);
145 string = NULL;
150 //! Finds the first occurrence of \a find in \a string, ignores case.
151 static char*
152 find_string(const char *string, const char *find)
154 if (string == NULL || find == NULL)
155 return NULL;
157 char first = tolower(find[0]);
158 if (first == '\0')
159 return (char *)string;
161 int32 findLength = strlen(find) - 1;
162 find++;
164 for (; string[0]; string++) {
165 if (tolower(string[0]) != first)
166 continue;
167 if (strncasecmp(string + 1, find, findLength) == 0)
168 return (char *)string;
171 return NULL;
175 static void
176 cut_string(char *string, const char *cut)
178 if (string == NULL || cut == NULL)
179 return;
181 char *found = find_string(string, cut);
182 if (found != NULL) {
183 uint32 foundLength = strlen(found);
184 uint32 cutLength = strlen(cut);
185 memmove(found, found + cutLength, foundLength + 1 - cutLength);
190 static void
191 sanitize_album(cdtext &text)
193 cut_string(text.album, text.artist);
194 sanitize_string(text.album);
196 if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
197 // don't laugh, people really do that!
198 free(text.album);
199 text.album = NULL;
202 if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
203 // try to extract artist from album
204 char *space = strstr(text.album, " ");
205 if (space != NULL) {
206 space[0] = '\0';
207 text.artist = text.album;
208 text.album = copy_string(space + 2);
210 sanitize_string(text.artist);
211 sanitize_string(text.album);
217 static void
218 sanitize_titles(cdtext &text)
220 for (uint8 i = 0; i < text.track_count; i++) {
221 cut_string(text.titles[i], "(Album Version)");
222 sanitize_string(text.titles[i]);
223 sanitize_string(text.artists[i]);
225 if (text.artists[i] != NULL && text.artist != NULL
226 && !strcasecmp(text.artists[i], text.artist)) {
227 // if the title artist is the same as the main artist, remove it
228 free(text.artists[i]);
229 text.artists[i] = NULL;
232 if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
233 text.titles[i] = copy_string(text.titles[i - 1]);
238 static bool
239 single_case(const char *string, bool &upper, bool &first)
241 if (string == NULL)
242 return true;
244 while (string[0]) {
245 while (!isalpha(string[0])) {
246 string++;
249 if (first) {
250 upper = isupper(string[0]) != 0;
251 first = false;
252 } else if ((isupper(string[0]) != 0) ^ upper)
253 return false;
255 string++;
258 return true;
262 static void
263 capitalize_string(char *string)
265 if (string == NULL)
266 return;
268 bool newWord = isalpha(string[0]) || isspace(string[0]);
269 while (string[0]) {
270 if (isalpha(string[0])) {
271 if (newWord) {
272 string[0] = toupper(string[0]);
273 newWord = false;
274 } else
275 string[0] = tolower(string[0]);
276 } else if (string[0] != '\'')
277 newWord = true;
279 string++;
284 static void
285 correct_case(cdtext &text)
287 // check if all titles share a single case
288 bool first = true;
289 bool upper;
290 if (!single_case(text.album, upper, first)
291 || !single_case(text.artist, upper, first))
292 return;
294 for (int32 i = 0; i < text.track_count; i++) {
295 if (!single_case(text.titles[i], upper, first)
296 || !single_case(text.artists[i], upper, first))
297 return;
300 // If we get here, everything has a single case; we fix that
301 // and capitalize each word
303 capitalize_string(text.album);
304 capitalize_string(text.artist);
305 for (int32 i = 0; i < text.track_count; i++) {
306 capitalize_string(text.titles[i]);
307 capitalize_string(text.artists[i]);
312 // #pragma mark - CD-Text
315 cdtext::cdtext()
317 artist(NULL),
318 album(NULL),
319 genre(NULL),
320 track_count(0)
322 memset(titles, 0, sizeof(titles));
323 memset(artists, 0, sizeof(artists));
327 cdtext::~cdtext()
329 free(album);
330 free(artist);
331 free(genre);
333 for (uint8 i = 0; i < track_count; i++) {
334 free(titles[i]);
335 free(artists[i]);
340 static bool
341 is_string_id(uint8 id)
343 return id >= kTrackID && id <= kMessageID;
347 /*! Parses a \a pack data into the provided text buffer; the corresponding
348 track number will be left in \a track, and the type of the data in \a id.
349 The pack data is explained in SCSI MMC-3.
351 \a id, \a track, and \a state must stay constant between calls to this
352 function. \a state must be initialized to zero for the first call.
354 static bool
355 parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
356 cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
357 char *buffer, size_t &length)
359 if (packLeft < sizeof(cdtext_pack_data))
360 return false;
362 uint8 number = pack->number;
363 size_t size = length;
365 if (state != 0) {
366 // we had a terminated string and a missing track
367 track++;
369 memcpy(buffer, lastPack->text + state, 12 - state);
370 if (pack->track - track == 1)
371 state = 0;
372 else
373 state += strnlen(buffer, 12 - state);
374 return true;
377 id = pack->id;
378 track = pack->track;
380 buffer[0] = '\0';
381 length = 0;
383 size_t position = pack->character_position;
384 if (position > 0 && lastPack != NULL) {
385 memcpy(buffer, &lastPack->text[12 - position], position);
386 length = position;
389 while (id == pack->id && track == pack->track) {
390 #if 0
391 dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
392 pack->double_byte, pack->block_number, pack->character_position);
393 for (int32 i = 0; i < 12; i++) {
394 if (isprint(pack->text[i]))
395 dprintf("%c", pack->text[i]);
396 else
397 dprintf("-");
399 dprintf("\n");
400 #endif
401 if (is_string_id(id)) {
402 // TODO: support double byte characters
403 if (length + 12 < size) {
404 memcpy(buffer + length, pack->text, 12);
405 length += 12;
409 packLeft -= sizeof(cdtext_pack_data);
410 if (packLeft < sizeof(cdtext_pack_data))
411 return false;
413 lastPack = pack;
414 number++;
415 pack++;
417 if (pack->number != number)
418 return false;
421 if (id == pack->id) {
422 length -= pack->character_position;
423 if (length >= size)
424 length = size - 1;
425 buffer[length] = '\0';
427 if (pack->track > lastPack->track + 1) {
428 // there is a missing track
429 for (int32 i = 0; i < 12; i++) {
430 if (lastPack->text[i] == '\0') {
431 state = i + (lastPack->double_byte ? 2 : 1);
432 break;
438 return true;
442 static void
443 dump_cdtext(cdtext &text)
445 if (text.album)
446 dprintf("Album: \"%s\"\n", text.album);
447 if (text.artist)
448 dprintf("Artist: \"%s\"\n", text.artist);
449 for (uint8 i = 0; i < text.track_count; i++) {
450 dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
451 text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
452 text.artists[i] ? ")" : "");
457 static void
458 dump_toc(scsi_toc_toc *toc)
460 int32 numTracks = toc->last_track + 1 - toc->first_track;
462 for (int32 i = 0; i < numTracks; i++) {
463 scsi_toc_track& track = toc->tracks[i];
464 scsi_cd_msf& next = toc->tracks[i + 1].start.time;
465 // the last track is always lead-out
466 scsi_cd_msf& start = toc->tracks[i].start.time;
467 scsi_cd_msf length;
469 uint64 diff = next.minute * kFramesPerMinute
470 + next.second * kFramesPerSecond + next.frame
471 - start.minute * kFramesPerMinute
472 - start.second * kFramesPerSecond - start.frame;
473 length.minute = diff / kFramesPerMinute;
474 length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
475 length.frame = diff % kFramesPerSecond;
477 dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
478 track.track_number, start.minute, start.second, start.frame,
479 length.minute, length.second, length.frame);
484 static status_t
485 read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
487 size_t framesLeft = count;
489 while (framesLeft > 0) {
490 // If the initial count was >= 32, and not a multiple of 8, and the
491 // ioctl fails, we switch to reading 8 frames at a time. However the
492 // last read can read between 1 and 7 frames only, to not overflow
493 // the buffer.
494 count = std::min(count, framesLeft);
496 scsi_read_cd read;
497 read.start_m = firstFrame / kFramesPerMinute;
498 read.start_s = (firstFrame / kFramesPerSecond) % 60;
499 read.start_f = firstFrame % kFramesPerSecond;
501 read.length_m = count / kFramesPerMinute;
502 read.length_s = (count / kFramesPerSecond) % 60;
503 read.length_f = count % kFramesPerSecond;
505 read.buffer_length = count * kFrameSize;
506 read.buffer = (char *)buffer;
507 read.play = false;
509 if (ioctl(fd, B_SCSI_READ_CD, &read) < 0) {
510 // drive couldn't read data - try again to read with a smaller block size
511 if (count == 1)
512 return errno;
514 if (count >= 32)
515 count = 8;
516 else
517 count = 1;
519 continue;
522 buffer += count * kFrameSize;
523 framesLeft -= count;
524 firstFrame += count;
527 return B_OK;
531 static status_t
532 read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
533 size_t bufferSize)
535 raw_device_command raw;
536 uint8 *senseData = (uint8 *)malloc(kSenseSize);
537 if (senseData == NULL)
538 return B_NO_MEMORY;
540 memset(&raw, 0, sizeof(raw_device_command));
541 memset(senseData, 0, kSenseSize);
542 memset(buffer, 0, bufferSize);
544 scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
545 toc.opcode = SCSI_OP_READ_TOC;
546 toc.time = 1;
547 toc.format = format;
548 toc.track = track;
549 toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
551 raw.command_length = 10;
552 raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
553 | B_RAW_DEVICE_SHORT_READ_VALID;
554 raw.scsi_status = 0;
555 raw.cam_status = 0;
556 raw.data = buffer;
557 raw.data_length = bufferSize;
558 raw.timeout = 10000000LL; // 10 secs
559 raw.sense_data = senseData;
560 raw.sense_data_length = sizeof(kSenseSize);
562 if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw) == 0
563 && raw.scsi_status == 0 && raw.cam_status == 1) {
564 free(senseData);
565 return B_OK;
568 free(senseData);
569 return B_ERROR;
573 // #pragma mark - exported functions
576 status_t
577 read_cdtext(int fd, struct cdtext &cdtext)
579 uint8 *buffer = (uint8 *)malloc(kBufferSize);
580 if (buffer == NULL)
581 return B_NO_MEMORY;
583 // do it twice, just in case...
584 // (at least my CD-ROM sometimes returned broken data on first try)
585 read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
586 kBufferSize);
587 if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
588 kBufferSize) != B_OK) {
589 free(buffer);
590 return B_ERROR;
593 scsi_toc_general *header = (scsi_toc_general *)buffer;
595 uint32 packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
596 cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
597 cdtext_pack_data *lastPack = NULL;
598 uint8 state = 0;
599 uint8 track = 0;
600 uint8 id = 0;
601 char text[256];
603 // TODO: determine encoding!
605 while (true) {
606 size_t length = sizeof(text);
608 if (!parse_pack_data(pack, packLength, lastPack, id, track,
609 state, text, length))
610 break;
612 switch (id) {
613 case kTrackID:
614 if (track == 0) {
615 if (cdtext.album == NULL)
616 cdtext.album = to_utf8(text);
617 } else if (track <= kMaxTracks) {
618 if (cdtext.titles[track - 1] == NULL)
619 cdtext.titles[track - 1] = to_utf8(text);
620 if (track > cdtext.track_count)
621 cdtext.track_count = track;
623 break;
625 case kArtistID:
626 if (track == 0) {
627 if (cdtext.artist == NULL)
628 cdtext.artist = to_utf8(text);
629 } else if (track <= kMaxTracks) {
630 if (cdtext.artists[track - 1] == NULL)
631 cdtext.artists[track - 1] = to_utf8(text);
633 break;
635 default:
636 if (is_string_id(id))
637 dprintf("UNKNOWN %u: \"%s\"\n", id, text);
638 break;
642 free(buffer);
644 if (cdtext.artist == NULL && cdtext.album == NULL)
645 return B_ERROR;
647 for (int i = 0; i < cdtext.track_count; i++) {
648 if (cdtext.titles[i] == NULL)
649 return B_ERROR;
652 sanitize_string(cdtext.artist);
653 sanitize_album(cdtext);
654 sanitize_titles(cdtext);
655 correct_case(cdtext);
657 dump_cdtext(cdtext);
658 return B_OK;
662 status_t
663 read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
665 status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
666 (uint8*)toc, length);
667 if (status < B_OK)
668 return status;
670 // make sure the values in the TOC make sense
672 int32 lastTrack = toc->last_track + 1 - toc->first_track;
673 size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
674 if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
675 return B_BAD_DATA;
677 if (length > dataLength)
678 length = dataLength;
680 length -= sizeof(scsi_toc_general);
682 if (lastTrack * sizeof(scsi_toc_track) > length)
683 toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
685 dump_toc(toc);
686 return B_OK;
690 status_t
691 read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
692 off_t bufferOffset, void *buffer, size_t bufferSize)
694 if (bufferOffset >= 0 && bufferOffset <= offset + (off_t)length
695 && bufferOffset + (off_t)bufferSize > offset) {
696 if (offset >= bufferOffset) {
697 // buffer reaches into the beginning of the request
698 off_t dataOffset = offset - bufferOffset;
699 size_t bytes = min_c(bufferSize - dataOffset, length);
700 if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
701 return B_BAD_ADDRESS;
703 data = (void *)((uint8 *)data + bytes);
704 length -= bytes;
705 offset += bytes;
706 } else if (offset < bufferOffset
707 && offset + length < bufferOffset + bufferSize) {
708 // buffer overlaps at the end of the request
709 off_t dataOffset = bufferOffset - offset;
710 size_t bytes = length - dataOffset;
711 if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
712 return B_BAD_ADDRESS;
714 length -= bytes;
716 // we don't handle the case where we would need to split the request
719 while (length > 0) {
720 off_t frame = offset / kFrameSize;
721 uint32 count = bufferSize / kFrameSize;
722 if (frame + count > endFrame)
723 count = endFrame - frame;
725 status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
726 if (status < B_OK)
727 return status;
729 off_t dataOffset = offset % kFrameSize;
730 size_t bytes = bufferSize - dataOffset;
731 if (bytes > length)
732 bytes = length;
734 if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
735 return B_BAD_ADDRESS;
737 data = (void *)((uint8 *)data + bytes);
738 length -= bytes;
739 offset += bytes;
742 return B_OK;