strex: added `detectUrl()`
[iv.d.git] / cuefile.d
blob98044e21c1596d924b7a7b84c37b51feb79bd825
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module iv.cuefile /*is aliced*/;
19 import iv.alice;
20 import iv.encoding;
21 import iv.strex;
22 import iv.vfs;
23 import iv.vfs.io;
26 // ////////////////////////////////////////////////////////////////////////// //
27 struct CueFile {
28 public:
29 static bool isEs (const(char)[] s, usize cidx) {
30 if (cidx <= 0 || cidx+1 >= s.length || cidx >= s.length) return false;
31 if (s[cidx] != '\'' && s[cidx] != '`') return false;
32 if (s[cidx+1] != 's' && s[cidx+1] != 'S' && s[cidx+1] != 't' && s[cidx+1] != 'T') return false;
33 if (cidx > 0) {
34 immutable char cp = s[cidx-1];
35 if ((cp >= '0' && cp <= '9') ||
36 (cp >= 'a' && cp <= 'z') ||
37 (cp >= 'A' && cp <= 'Z'))
39 // ok
40 } else {
41 return false;
44 if (cidx+2 >= s.length) return true;
45 immutable char ch = s[cidx+2];
46 return (ch <= ' ');
49 static string koi2trlocase (const(char)[] s) {
50 string res;
51 foreach (immutable cidx, char ch; s) {
52 if (ch == '\xe1' || ch == '\xc1') res ~= "a";
53 else if (ch == '\xe2' || ch == '\xc2') res ~= "b";
54 else if (ch == '\xf7' || ch == '\xd7') res ~= "v";
55 else if (ch == '\xe7' || ch == '\xc7') res ~= "g";
56 else if (ch == '\xe4' || ch == '\xc4') res ~= "d";
57 else if (ch == '\xe5' || ch == '\xc5') res ~= "e";
58 else if (ch == '\xb3' || ch == '\xa3') res ~= "yo";
59 else if (ch == '\xf6' || ch == '\xd6') res ~= "zh";
60 else if (ch == '\xfa' || ch == '\xda') res ~= "z";
61 else if (ch == '\xe9' || ch == '\xc9') res ~= "i";
62 else if (ch == '\xea' || ch == '\xca') res ~= "j";
63 else if (ch == '\xeb' || ch == '\xcb') res ~= "k";
64 else if (ch == '\xec' || ch == '\xcc') res ~= "l";
65 else if (ch == '\xed' || ch == '\xcd') res ~= "m";
66 else if (ch == '\xee' || ch == '\xce') res ~= "n";
67 else if (ch == '\xef' || ch == '\xcf') res ~= "o";
68 else if (ch == '\xf0' || ch == '\xd0') res ~= "p";
69 else if (ch == '\xf2' || ch == '\xd2') res ~= "r";
70 else if (ch == '\xf3' || ch == '\xd3') res ~= "s";
71 else if (ch == '\xf4' || ch == '\xd4') res ~= "t";
72 else if (ch == '\xf5' || ch == '\xd5') res ~= "u";
73 else if (ch == '\xe6' || ch == '\xc6') res ~= "f";
74 else if (ch == '\xe8' || ch == '\xc8') res ~= "h";
75 else if (ch == '\xe3' || ch == '\xc3') res ~= "c";
76 else if (ch == '\xfe' || ch == '\xde') res ~= "ch";
77 else if (ch == '\xfb' || ch == '\xdb') res ~= "sh";
78 else if (ch == '\xfd' || ch == '\xdd') res ~= "sch";
79 else if (ch == '\xff' || ch == '\xdf') {} //res ~= "x"; // tvyordyj znak
80 else if (ch == '\xf9' || ch == '\xd9') res ~= "y";
81 else if (ch == '\xf8' || ch == '\xd8') {} //res ~= "w"; // myagkij znak
82 else if (ch == '\xfc' || ch == '\xdc') res ~= "e";
83 else if (ch == '\xe0' || ch == '\xc0') res ~= "ju";
84 else if (ch == '\xf1' || ch == '\xd1') res ~= "ja";
85 else if (ch >= 'A' && ch <= 'Z') res ~= cast(char)(ch+32);
86 else if (ch >= 'a' && ch <= 'z') res ~= ch;
87 else if (ch >= '0' && ch <= '9') res ~= ch;
88 else {
89 if (isEs(s, cidx)) continue;
90 if (ch == '&') {
91 if (res.length && res[$-1] != '_') res ~= '_';
92 res ~= "and_";
93 continue;
95 if (res.length > 0 && res[$-1] != '_') res ~= '_';
98 while (res.length && res[$-1] == '_') res = res[0..$-1];
99 if (res.length == 0) res = "untitled";
100 return res;
103 public:
104 static struct Track {
105 string artist; // performer
106 string title;
107 string genre;
108 uint year; // 0: unknown
109 string filename;
110 ulong pregapmsecs; // index 00, or startmsecs
111 ulong startmsecs; // index 01
113 @property ulong start () const pure nothrow @safe @nogc { pragma(inline, true); return (pregapmsecs != pregapmsecs.max && pregapmsecs < startmsecs ? pregapmsecs : startmsecs); }
116 private:
117 ulong parseIndex (const(char)[] s, bool fuckedCue) {
118 import std.algorithm : splitter;
119 import std.conv : to;
120 import std.range : enumerate;
121 uint[3] msf;
122 bool lastHit = false;
123 foreach (immutable idx, /*auto*/ sv; s.splitter(':').enumerate) {
124 if (idx >= msf.length) throw new Exception("invalid index");
125 lastHit = (idx == msf.length-1);
126 msf[idx] = sv.to!uint;
128 if (!lastHit) throw new Exception("invalid index (0)");
129 if (msf[1] > 59) throw new Exception("invalid index (seconds)");
130 // this is for absolutely dumbfuck so-called "audiophile" images (those shitheads are dumb as fuck)
131 if (fuckedCue) {
132 if (msf[2] > 99) throw new Exception("invalid index (frames)");
133 return cast(uint)((((msf[1]+msf[0]*60)*100+msf[2])/100.0)*1000.0);
134 } else {
135 // normal CDDA
136 if (msf[2] > 74) throw new Exception("invalid index (frames)");
137 return cast(uint)((((msf[1]+msf[0]*60)*75+msf[2])/75.0)*1000.0);
141 public:
142 string artist;
143 string album;
144 string genre;
145 uint year; // 0: unknown
146 string filename;
147 Track[] tracks;
149 public:
150 void clear () { this = this.init; }
152 void load (const(char)[] fname) { load(VFile(fname)); }
154 void load (VFile fl) {
155 import std.string : format;
157 clear();
158 scope(failure) clear();
159 char[4096] linebuf;
160 char lastSavedChar = 0;
161 char[] line;
162 bool firstLine = true;
163 bool fuckedCue = false; // this is for absolutely dumbfuck so-called "audiophile" images (those shitheads are dumb as fuck)
164 int linenum;
166 bool readLine () {
167 scope(success) {
168 if (firstLine) {
169 firstLine = false;
170 if (line.length >= 3 && line[0..3] == "\xEF\xBB\xBF") line = line[3..$]; // fuck BOM
173 ++linenum;
174 uint pos = 0;
175 if (lastSavedChar) { linebuf[pos++] = lastSavedChar; lastSavedChar = 0; }
176 while (pos < linebuf.length) {
177 auto rd = fl.rawRead(linebuf[pos..pos+1]);
178 if (rd.length == 0) {
179 if (pos == 0) { line = null; return false; }
180 line = linebuf[0..pos];
181 return true;
183 char ch = linebuf[pos];
184 if (ch == '\n') {
185 line = linebuf[0..pos];
186 return true;
188 if (ch == '\r') {
189 rd = fl.rawRead((&lastSavedChar)[0..1]);
190 if (rd.length == 1 && lastSavedChar == '\n') lastSavedChar = 0;
191 line = linebuf[0..pos];
192 return true;
194 ++pos;
196 throw new Exception(format("line %d too long!", linenum));
199 // null: EOL
200 const(char)[] nextWord(bool doupper) () {
201 while (line.length && line[0] <= ' ') line = line[1..$];
202 if (line.length == 0) return null;
203 char[] res;
204 uint epos = 1;
205 if (line[0] == '"') {
206 // quoted
207 while (epos < line.length && line[epos] != '"') {
208 // just in case
209 if (line[epos] == '\\' && line.length-epos > 1) epos += 2; else ++epos;
211 res = line[1..epos];
212 if (epos < line.length) {
213 assert(line[epos] == '"');
214 ++epos;
216 line = line[epos..$];
217 // remove spaces (i don't need 'em anyway; and i don't care about idiotic filenames)
218 while (res.length && res[0] <= ' ') res = res[1..$];
219 while (res.length && res[$-1] <= ' ') res = res[0..$-1];
220 } else {
221 // normal
222 while (epos < line.length && line[epos] > ' ') ++epos;
223 res = line[0..epos];
224 line = line[epos..$];
226 // recode
227 if (res !is null && !res.utf8Valid) return res.recode("utf-8", "cp1251");
228 static if (doupper) {
229 if (res !is null) {
230 // upcase
231 bool doconv = false;
232 foreach (char ch; res) {
233 if (ch >= 128) { doconv = false; break; }
234 if (ch >= 'a' && ch <= 'z') doconv = true;
236 if (doconv) foreach (ref char ch; res) if (ch >= 'a' && ch <= 'z') ch -= 32;
239 return res;
242 string normalizeGenre (const(char)[] w) {
243 w = w.xstrip;
244 if (w.length == 0) return "";
245 char[] s = new char[w.length];
246 s[] = w[];
247 foreach (ref char ch; s) {
248 if (ch == '|' || ch == '\\') ch = '/';
249 else if (ch < 32) ch = ' ';
251 s = s.xstrip;
252 // remove extra spaces
253 usize pos = 0;
254 while (pos < s.length) {
255 if (s[pos] == ' ') {
256 if (pos+1 >= s.length || s[pos+1] == ' ' || s[pos+1] == '/') {
257 s = s[0..pos]~s[pos+1..$];
258 continue;
260 } else if (s[pos] == '/' && pos+1 < s.length && s[pos+1] == ' ') {
261 s = s[0..pos+1]~s[pos+2..$];
262 continue;
264 ++pos;
266 s = s.xstrip;
267 return cast(string)s; // it is safe to cast here
270 while (readLine) {
271 //writeln("[", line, "]");
272 auto w = nextWord!true();
273 if (w is null) continue;
274 switch (w) {
275 case "REM": // special
276 w = nextWord!true();
277 switch (w) {
278 case "DATE": case "YEAR":
279 w = nextWord!false();
280 int yr = 0;
281 try { import std.conv : to; yr = w.to!ushort(10); } catch (Exception) {}
282 if (yr >= 1900 && yr <= 3000) {
283 if (tracks.length) tracks[$-1].year = yr; else year = yr;
285 break;
286 case "GENRE":
287 w = nextWord!false();
288 if (w.length) {
289 string gg = normalizeGenre(w);
290 if (tracks.length) tracks[$-1].genre = gg; else genre = gg;
292 break;
293 case "DUMBFUCK": // this is for absolutely dumbfuck so-called "audiophile" images (those shitheads are dumb as fuck)
294 fuckedCue = true;
295 break;
296 default: break;
298 break;
299 case "TRACK": // new track
300 tracks.length += 1;
301 tracks[$-1].pregapmsecs = tracks[$-1].pregapmsecs.max;
302 w = nextWord!true();
303 try {
304 import std.conv : to;
305 auto tn = w.to!ubyte(10);
306 if (tn != tracks.length) throw new Exception(format("invalid track number at line %d", linenum));
307 } catch (Exception) {
308 throw new Exception(format("fucked track number at line %d", linenum));
310 w = nextWord!true();
311 if (w != "AUDIO") throw new Exception(format("non-audio track at line %d", linenum));
312 break;
313 case "PERFORMER":
314 w = nextWord!false();
315 if (w.length) {
316 if (tracks.length) tracks[$-1].artist = w.idup; else artist = w.idup;
318 break;
319 case "TITLE":
320 w = nextWord!false();
321 if (w.length) {
322 if (tracks.length) tracks[$-1].title = w.idup; else album = w.idup;
324 break;
325 case "FILE":
326 w = nextWord!false();
327 if (w.length) {
328 if (tracks.length) tracks[$-1].filename = w.idup; else filename = w.idup;
330 break;
331 case "INDEX":
332 // mm:ss:ff (minute-second-frame) format. There are 75 such frames per second of audio
333 // 00: pregap, optional
334 // 01: song start
335 if (tracks.length == 0) throw new Exception(format("index without track at line %d", linenum));
336 w = nextWord!false();
337 try {
338 import std.conv : to;
339 auto n = w.to!ubyte(10);
340 if (n == 1) {
341 w = nextWord!true();
342 tracks[$-1].startmsecs = parseIndex(w, fuckedCue);
343 } else if (n == 0) {
344 w = nextWord!true();
345 tracks[$-1].pregapmsecs = parseIndex(w, fuckedCue);
347 } catch (Exception e) {
348 writeln("ERROR: ", e.msg, " (", w, ")");
349 throw new Exception(format("fucked index at line %d", linenum));
351 break;
352 case "PREGAP": case "POSTGAP": break; // ignore
353 case "ISRC": case "CATALOG": case "FLAGS": case "CDTEXTFILE": break;
354 // SONGWRITER
355 default:
356 writeln("unknown CUE keyword: '", w, "' at line %d", linenum);
357 throw new Exception(format("invalid keyword at line %d", linenum));
361 // normalize tracks
362 foreach (immutable tidx, ref trk; tracks) {
363 if (trk.pregapmsecs == trk.pregapmsecs.max) trk.pregapmsecs = trk.startmsecs;
364 if (trk.artist == artist) trk.artist = null;
365 if (trk.year == year) trk.year = 0;
366 if (trk.genre == genre) trk.genre = null;
367 if (trk.filename == filename) trk.filename = null;
368 int pidx;
369 string t = simpleParseInt(trk.title, pidx);
370 if (pidx == tidx+1 && t.length && t.ptr[0] == '.') t = t[1..$].xstrip;
371 if (pidx == tidx+1 && t.length) trk.title = t;
375 void dump (VFile fo) {
376 fo.writeln("=======================");
377 if (artist.length) fo.writeln("ARTIST: <", artist.recodeToKOI8, ">");
378 if (album.length) fo.writeln("ALBUM : <", album.recodeToKOI8, ">");
379 if (genre.length) fo.writeln("GENRE : <", genre.recodeToKOI8, ">");
380 if (year) fo.writeln("YEAR : <", year, ">");
381 if (filename.length) fo.writeln("FILE : <", filename.recodeToKOI8, ">");
382 if (tracks.length) {
383 fo.writeln("TRACKS: ", tracks.length);
384 foreach (immutable tidx, const ref trk; tracks) {
385 fo.writefln(" TRACK #%02d: start: %d:%02d.%03d", tidx+1, trk.startmsecs/1000/60, (trk.startmsecs/1000)%60, trk.startmsecs%1000);
386 if (trk.artist.length) fo.writeln(" ARTIST: <", trk.artist.recodeToKOI8, ">");
387 if (trk.title.length) fo.writeln(" TITLE : <", trk.title.recodeToKOI8, ">");
388 if (trk.genre.length) fo.writeln(" GENRE : <", trk.genre.recodeToKOI8, ">");
389 if (trk.year) fo.writeln(" YEAR : <", trk.year, ">");
390 if (trk.filename.length) fo.writeln(" FILE : <", trk.filename.recodeToKOI8, ">");
391 if (trk.title.length) fo.writeln(" XFILE : <", koi2trlocase(trk.title.recodeToKOI8), ">");
396 void dump () { dump(stdout); }
398 private:
399 // num<0: no number
400 // return string w/o parsed number
401 static inout(char)[] simpleParseInt (inout(char)[] src, out int num) nothrow @trusted @nogc {
402 usize pos = 0;
403 while (pos < src.length && src.ptr[pos] <= ' ') ++pos;
404 if (pos >= src.length || src.ptr[pos] < '0' || src.ptr[pos] > '9') {
405 num = -1;
406 return src;
408 num = 0;
409 while (pos < src.length) {
410 char ch = src.ptr[pos];
411 if (ch < '0' || ch > '9') break;
412 auto onum = num;
413 num = num*10+ch-'0';
414 if (num < onum) { num = -1; return src; }
415 ++pos;
417 while (pos < src.length && src.ptr[pos] <= ' ') ++pos;
418 return src[pos..$];