1 // Written in the D programming language.
4 Functions which operate on ASCII characters.
6 All of the functions in std.ascii accept Unicode characters but
7 effectively ignore them if they're not ASCII. All `isX` functions return
8 `false` for non-ASCII characters, and all `toX` functions do nothing
9 to non-ASCII characters.
11 For functions which operate on Unicode characters, see
14 $(SCRIPT inhibitQuickIndex = 1;)
17 $(TR $(TH Category) $(TH Functions))
18 $(TR $(TD Validation) $(TD
32 $(TR $(TD Conversions) $(TD
36 $(TR $(TD Constants) $(TD
42 $(LREF lowerHexDigits)
54 $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
55 $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)
57 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
58 Authors: $(HTTP digitalmars.com, Walter Bright) and
59 $(HTTP jmdavisprog.com, Jonathan M Davis)
60 Source: $(PHOBOSSRC std/ascii.d)
64 immutable fullHexDigits
= "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f
65 immutable hexDigits
= fullHexDigits
[0 .. 16]; /// 0 .. 9A .. F
66 immutable lowerHexDigits
= "0123456789abcdef"; /// 0 .. 9a .. f
67 immutable digits
= hexDigits
[0 .. 10]; /// 0 .. 9
68 immutable octalDigits
= digits
[0 .. 8]; /// 0 .. 7
69 immutable letters
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
70 immutable uppercase
= letters
[0 .. 26]; /// A .. Z
71 immutable lowercase
= letters
[26 .. 52]; /// a .. z
72 immutable whitespace
= " \t\v\r\n\f"; /// ASCII _whitespace
75 Letter case specifier.
77 enum LetterCase
: bool
79 upper
, /// Upper case letters
80 lower
/// Lower case letters
88 assert(42.to
!string(16, LetterCase
.upper
) == "2A");
89 assert(42.to
!string(16, LetterCase
.lower
) == "2a");
95 import std
.digest
.hmac
: hmac
;
96 import std
.digest
: toHexString
;
97 import std
.digest
.sha
: SHA1
;
98 import std
.string
: representation
;
100 const sha1HMAC
= "A very long phrase".representation
101 .hmac
!SHA1("secret".representation
)
102 .toHexString
!(LetterCase
.lower
);
103 assert(sha1HMAC
== "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
107 All control characters in the ASCII table ($(HTTPS www.asciitable.com, source)).
109 enum ControlChar
: char
111 nul
= '\x00', /// Null
112 soh
= '\x01', /// Start of heading
113 stx
= '\x02', /// Start of text
114 etx
= '\x03', /// End of text
115 eot
= '\x04', /// End of transmission
116 enq
= '\x05', /// Enquiry
117 ack
= '\x06', /// Acknowledge
118 bel
= '\x07', /// Bell
119 bs
= '\x08', /// Backspace
120 tab
= '\x09', /// Horizontal tab
121 lf
= '\x0A', /// NL line feed, new line
122 vt
= '\x0B', /// Vertical tab
123 ff
= '\x0C', /// NP form feed, new page
124 cr
= '\x0D', /// Carriage return
125 so
= '\x0E', /// Shift out
126 si
= '\x0F', /// Shift in
127 dle
= '\x10', /// Data link escape
128 dc1
= '\x11', /// Device control 1
129 dc2
= '\x12', /// Device control 2
130 dc3
= '\x13', /// Device control 3
131 dc4
= '\x14', /// Device control 4
132 nak
= '\x15', /// Negative acknowledge
133 syn
= '\x16', /// Synchronous idle
134 etb
= '\x17', /// End of transmission block
135 can
= '\x18', /// Cancel
136 em
= '\x19', /// End of medium
137 sub = '\x1A', /// Substitute
138 esc
= '\x1B', /// Escape
139 fs
= '\x1C', /// File separator
140 gs
= '\x1D', /// Group separator
141 rs
= '\x1E', /// Record separator
142 us
= '\x1F', /// Unit separator
143 del
= '\x7F' /// Delete
147 @safe pure nothrow @nogc unittest
149 import std
.algorithm
.comparison
, std
.algorithm
.searching
, std
.range
, std
.traits
;
151 // Because all ASCII characters fit in char, so do these
152 static assert(ControlChar
.ack
.sizeof
== 1);
154 // All control characters except del are in row starting from 0
155 static assert(EnumMembers
!ControlChar
.only
.until(ControlChar
.del
).equal(iota(32)));
157 static assert(ControlChar
.nul
== '\0');
158 static assert(ControlChar
.bel
== '\a');
159 static assert(ControlChar
.bs
== '\b');
160 static assert(ControlChar
.ff
== '\f');
161 static assert(ControlChar
.lf
== '\n');
162 static assert(ControlChar
.cr
== '\r');
163 static assert(ControlChar
.tab
== '\t');
164 static assert(ControlChar
.vt
== '\v');
168 @safe pure nothrow unittest
171 //Control character table can be used in place of hexcodes.
172 with (ControlChar
) assert(text("Phobos", us
, "Deimos", us
, "Tango", rs
) == "Phobos\x1FDeimos\x1FTango\x1E");
175 /// Newline sequence for this system.
177 immutable newline
= "\r\n";
179 immutable newline
= "\n";
181 static assert(0, "Unsupported OS");
185 Params: c = The character to test.
186 Returns: Whether `c` is a letter or a number (0 .. 9, a .. z, A .. Z).
188 bool isAlphaNum(dchar c
) @safe pure nothrow @nogc
191 return ('0' <= c
&& c
<= '9') ||
('a' <= hc
&& hc
<= 'z');
195 @safe pure nothrow @nogc unittest
197 assert( isAlphaNum('A'));
198 assert( isAlphaNum('1'));
199 assert(!isAlphaNum('#'));
201 // N.B.: does not return true for non-ASCII Unicode alphanumerics:
202 assert(!isAlphaNum('á'));
208 foreach (c
; chain(digits
, octalDigits
, fullHexDigits
, letters
, lowercase
, uppercase
))
209 assert(isAlphaNum(c
));
211 foreach (c
; whitespace
)
212 assert(!isAlphaNum(c
));
217 Params: c = The character to test.
218 Returns: Whether `c` is an ASCII letter (A .. Z, a .. z).
220 bool isAlpha(dchar c
) @safe pure nothrow @nogc
222 // Optimizer can turn this into a bitmask operation on 64 bit code
223 return (c
>= 'A' && c
<= 'Z') ||
(c
>= 'a' && c
<= 'z');
227 @safe pure nothrow @nogc unittest
229 assert( isAlpha('A'));
230 assert(!isAlpha('1'));
231 assert(!isAlpha('#'));
233 // N.B.: does not return true for non-ASCII Unicode alphabetic characters:
234 assert(!isAlpha('á'));
240 foreach (c
; chain(letters
, lowercase
, uppercase
))
243 foreach (c
; chain(digits
, octalDigits
, whitespace
))
249 Params: c = The character to test.
250 Returns: Whether `c` is a lowercase ASCII letter (a .. z).
252 bool isLower(dchar c
) @safe pure nothrow @nogc
254 return c
>= 'a' && c
<= 'z';
258 @safe pure nothrow @nogc unittest
260 assert( isLower('a'));
261 assert(!isLower('A'));
262 assert(!isLower('#'));
264 // N.B.: does not return true for non-ASCII Unicode lowercase letters
265 assert(!isLower('á'));
266 assert(!isLower('Á'));
272 foreach (c
; lowercase
)
275 foreach (c
; chain(digits
, uppercase
, whitespace
))
281 Params: c = The character to test.
282 Returns: Whether `c` is an uppercase ASCII letter (A .. Z).
284 bool isUpper(dchar c
) @safe pure nothrow @nogc
286 return c
<= 'Z' && 'A' <= c
;
290 @safe pure nothrow @nogc unittest
292 assert( isUpper('A'));
293 assert(!isUpper('a'));
294 assert(!isUpper('#'));
296 // N.B.: does not return true for non-ASCII Unicode uppercase letters
297 assert(!isUpper('á'));
298 assert(!isUpper('Á'));
304 foreach (c
; uppercase
)
307 foreach (c
; chain(digits
, lowercase
, whitespace
))
313 Params: c = The character to test.
314 Returns: Whether `c` is a digit (0 .. 9).
316 bool isDigit(dchar c
) @safe pure nothrow @nogc
318 return '0' <= c
&& c
<= '9';
322 @safe pure nothrow @nogc unittest
324 assert( isDigit('3'));
325 assert( isDigit('8'));
326 assert(!isDigit('B'));
327 assert(!isDigit('#'));
329 // N.B.: does not return true for non-ASCII Unicode numbers
330 assert(!isDigit('0')); // full-width digit zero (U+FF10)
331 assert(!isDigit('4')); // full-width digit four (U+FF14)
340 foreach (c
; chain(letters
, whitespace
))
346 Params: c = The character to test.
347 Returns: Whether `c` is a digit in base 8 (0 .. 7).
349 bool isOctalDigit(dchar c
) @safe pure nothrow @nogc
351 return c
>= '0' && c
<= '7';
355 @safe pure nothrow @nogc unittest
357 assert( isOctalDigit('0'));
358 assert( isOctalDigit('7'));
359 assert(!isOctalDigit('8'));
360 assert(!isOctalDigit('A'));
361 assert(!isOctalDigit('#'));
367 foreach (c
; octalDigits
)
368 assert(isOctalDigit(c
));
370 foreach (c
; chain(letters
, ['8', '9'], whitespace
))
371 assert(!isOctalDigit(c
));
376 Params: c = The character to test.
377 Returns: Whether `c` is a digit in base 16 (0 .. 9, A .. F, a .. f).
379 bool isHexDigit(dchar c
) @safe pure nothrow @nogc
382 return ('0' <= c
&& c
<= '9') ||
('a' <= hc
&& hc
<= 'f');
386 @safe pure nothrow @nogc unittest
388 assert( isHexDigit('0'));
389 assert( isHexDigit('A'));
390 assert( isHexDigit('f')); // lowercase hex digits are accepted
391 assert(!isHexDigit('g'));
392 assert(!isHexDigit('G'));
393 assert(!isHexDigit('#'));
399 foreach (c
; fullHexDigits
)
400 assert(isHexDigit(c
));
402 foreach (c
; chain(lowercase
[6 .. $], uppercase
[6 .. $], whitespace
))
403 assert(!isHexDigit(c
));
408 Params: c = The character to test.
409 Returns: Whether or not `c` is a whitespace character. That includes the
410 space, tab, vertical tab, form feed, carriage return, and linefeed
413 bool isWhite(dchar c
) @safe pure nothrow @nogc
415 return c
== ' ' ||
(c
>= 0x09 && c
<= 0x0D);
419 @safe pure nothrow @nogc unittest
421 assert( isWhite(' '));
422 assert( isWhite('\t'));
423 assert( isWhite('\n'));
424 assert(!isWhite('1'));
425 assert(!isWhite('a'));
426 assert(!isWhite('#'));
428 // N.B.: Does not return true for non-ASCII Unicode whitespace characters.
429 static import std
.uni
;
430 assert(std
.uni
.isWhite('\u00A0'));
431 assert(!isWhite('\u00A0')); // std.ascii.isWhite
437 foreach (c
; whitespace
)
440 foreach (c
; chain(digits
, letters
))
446 Params: c = The character to test.
447 Returns: Whether `c` is a control character.
449 bool isControl(dchar c
) @safe pure nothrow @nogc
451 return c
< 0x20 || c
== 0x7F;
455 @safe pure nothrow @nogc unittest
457 assert( isControl('\0'));
458 assert( isControl('\022'));
459 assert( isControl('\n')); // newline is both whitespace and control
460 assert(!isControl(' '));
461 assert(!isControl('1'));
462 assert(!isControl('a'));
463 assert(!isControl('#'));
465 // N.B.: non-ASCII Unicode control characters are not recognized:
466 assert(!isControl('\u0080'));
467 assert(!isControl('\u2028'));
468 assert(!isControl('\u2029'));
474 foreach (dchar c
; 0 .. 32)
475 assert(isControl(c
));
476 assert(isControl(127));
478 foreach (c
; chain(digits
, letters
, [' ']))
479 assert(!isControl(c
));
484 Params: c = The character to test.
485 Returns: Whether or not `c` is a punctuation character. That includes
486 all ASCII characters which are not control characters, letters, digits, or
489 bool isPunctuation(dchar c
) @safe pure nothrow @nogc
491 return c
<= '~' && c
>= '!' && !isAlphaNum(c
);
495 @safe pure nothrow @nogc unittest
497 assert( isPunctuation('.'));
498 assert( isPunctuation(','));
499 assert( isPunctuation(':'));
500 assert( isPunctuation('!'));
501 assert( isPunctuation('#'));
502 assert( isPunctuation('~'));
503 assert( isPunctuation('+'));
504 assert( isPunctuation('_'));
506 assert(!isPunctuation('1'));
507 assert(!isPunctuation('a'));
508 assert(!isPunctuation(' '));
509 assert(!isPunctuation('\n'));
510 assert(!isPunctuation('\0'));
512 // N.B.: Non-ASCII Unicode punctuation characters are not recognized.
513 assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
518 foreach (dchar c
; 0 .. 128)
520 if (isControl(c
) ||
isAlphaNum(c
) || c
== ' ')
521 assert(!isPunctuation(c
));
523 assert(isPunctuation(c
));
529 Params: c = The character to test.
530 Returns: Whether or not `c` is a printable character other than the
533 bool isGraphical(dchar c
) @safe pure nothrow @nogc
535 return '!' <= c
&& c
<= '~';
539 @safe pure nothrow @nogc unittest
541 assert( isGraphical('1'));
542 assert( isGraphical('a'));
543 assert( isGraphical('#'));
544 assert(!isGraphical(' ')); // whitespace is not graphical
545 assert(!isGraphical('\n'));
546 assert(!isGraphical('\0'));
548 // N.B.: Unicode graphical characters are not regarded as such.
549 assert(!isGraphical('á'));
554 foreach (dchar c
; 0 .. 128)
556 if (isControl(c
) || c
== ' ')
557 assert(!isGraphical(c
));
559 assert(isGraphical(c
));
565 Params: c = The character to test.
566 Returns: Whether or not `c` is a printable character - including the
569 bool isPrintable(dchar c
) @safe pure nothrow @nogc
571 return c
>= ' ' && c
<= '~';
575 @safe pure nothrow @nogc unittest
577 assert( isPrintable(' ')); // whitespace is printable
578 assert( isPrintable('1'));
579 assert( isPrintable('a'));
580 assert( isPrintable('#'));
581 assert(!isPrintable('\0')); // control characters are not printable
583 // N.B.: Printable non-ASCII Unicode characters are not recognized.
584 assert(!isPrintable('á'));
589 foreach (dchar c
; 0 .. 128)
592 assert(!isPrintable(c
));
594 assert(isPrintable(c
));
600 Params: c = The character to test.
601 Returns: Whether or not `c` is in the ASCII character set - i.e. in the
605 bool isASCII(dchar c
) @safe pure nothrow @nogc
611 @safe pure nothrow @nogc unittest
613 assert( isASCII('a'));
614 assert(!isASCII('á'));
619 foreach (dchar c
; 0 .. 128)
622 assert(!isASCII(128));
627 Converts an ASCII letter to lowercase.
629 Params: c = A character of any type that implicitly converts to `dchar`.
630 In the case where it's a built-in type, or an enum of a built-in type,
631 `Unqual!(OriginalType!C)` is returned, whereas if it's a user-defined
632 type, `dchar` is returned.
634 Returns: The corresponding lowercase letter, if `c` is an uppercase
635 ASCII character, otherwise `c` itself.
640 import std
.traits
: OriginalType
;
642 static if (!__traits(isScalar
, C
))
644 else static if (is(immutable OriginalType
!C
== immutable OC
, OC
))
647 return isUpper(c
) ?
cast(R
)(cast(R
) c
+ 'a' - 'A') : cast(R
) c
;
651 @safe pure nothrow @nogc unittest
653 assert(toLower('a') == 'a');
654 assert(toLower('A') == 'a');
655 assert(toLower('#') == '#');
657 // N.B.: Non-ASCII Unicode uppercase letters are not converted.
658 assert(toLower('Á') == 'Á');
661 @safe pure nothrow unittest
665 static foreach (C
; AliasSeq
!(char, wchar, dchar, immutable char, ubyte))
667 foreach (i
, c
; uppercase
)
668 assert(toLower(cast(C
) c
) == lowercase
[i
]);
670 foreach (C c
; 0 .. 128)
672 if (c
< 'A' || c
> 'Z')
673 assert(toLower(c
) == c
);
675 assert(toLower(c
) != c
);
678 foreach (C c
; 128 .. C
.max
)
679 assert(toLower(c
) == c
);
682 static assert(toLower(cast(C
)'a') == 'a');
683 static assert(toLower(cast(C
)'A') == 'a');
689 Converts an ASCII letter to uppercase.
691 Params: c = Any type which implicitly converts to `dchar`. In the case
692 where it's a built-in type, or an enum of a built-in type,
693 `Unqual!(OriginalType!C)` is returned, whereas if it's a user-defined
694 type, `dchar` is returned.
696 Returns: The corresponding uppercase letter, if `c` is a lowercase ASCII
697 character, otherwise `c` itself.
702 import std
.traits
: OriginalType
;
704 static if (!__traits(isScalar
, C
))
706 else static if (is(immutable OriginalType
!C
== immutable OC
, OC
))
709 return isLower(c
) ?
cast(R
)(cast(R
) c
- ('a' - 'A')) : cast(R
) c
;
713 @safe pure nothrow @nogc unittest
715 assert(toUpper('a') == 'A');
716 assert(toUpper('A') == 'A');
717 assert(toUpper('#') == '#');
719 // N.B.: Non-ASCII Unicode lowercase letters are not converted.
720 assert(toUpper('á') == 'á');
723 @safe pure nothrow unittest
726 static foreach (C
; AliasSeq
!(char, wchar, dchar, immutable char, ubyte))
728 foreach (i
, c
; lowercase
)
729 assert(toUpper(cast(C
) c
) == uppercase
[i
]);
731 foreach (C c
; 0 .. 128)
733 if (c
< 'a' || c
> 'z')
734 assert(toUpper(c
) == c
);
736 assert(toUpper(c
) != c
);
739 foreach (C c
; 128 .. C
.max
)
740 assert(toUpper(c
) == c
);
743 static assert(toUpper(cast(C
)'a') == 'A');
744 static assert(toUpper(cast(C
)'A') == 'A');
749 @safe unittest //Test both toUpper and toLower with non-builtin
754 //User Defined [Char|Wchar|Dchar]
755 static struct UDC
{ char c
; alias c
this; }
756 static struct UDW
{ wchar c
; alias c
this; }
757 static struct UDD
{ dchar c
; alias c
this; }
758 //[Char|Wchar|Dchar] Enum
759 enum CE
: char {a
= 'a', A
= 'A'}
760 enum WE
: wchar {a
= 'a', A
= 'A'}
761 enum DE
: dchar {a
= 'a', A
= 'A'}
762 //User Defined [Char|Wchar|Dchar] Enum
763 enum UDCE
: UDC
{a
= UDC('a'), A
= UDC('A')}
764 enum UDWE
: UDW
{a
= UDW('a'), A
= UDW('A')}
765 enum UDDE
: UDD
{a
= UDD('a'), A
= UDD('A')}
767 //User defined types with implicit cast to dchar test.
768 static foreach (Char
; AliasSeq
!(UDC
, UDW
, UDD
))
770 assert(toLower(Char('a')) == 'a');
771 assert(toLower(Char('A')) == 'a');
772 static assert(toLower(Char('a')) == 'a');
773 static assert(toLower(Char('A')) == 'a');
774 static assert(toUpper(Char('a')) == 'A');
775 static assert(toUpper(Char('A')) == 'A');
778 //Various enum tests.
779 static foreach (Enum
; AliasSeq
!(CE
, WE
, DE
, UDCE
, UDWE
, UDDE
))
781 assert(toLower(Enum
.a
) == 'a');
782 assert(toLower(Enum
.A
) == 'a');
783 assert(toUpper(Enum
.a
) == 'A');
784 assert(toUpper(Enum
.A
) == 'A');
785 static assert(toLower(Enum
.a
) == 'a');
786 static assert(toLower(Enum
.A
) == 'a');
787 static assert(toUpper(Enum
.a
) == 'A');
788 static assert(toUpper(Enum
.A
) == 'A');
791 //Return value type tests for enum of non-UDT. These should be the original type.
792 static foreach (T
; AliasSeq
!(CE
, WE
, DE
))
794 alias C
= OriginalType
!T
;
795 static assert(is(typeof(toLower(T
.init
)) == C
));
796 static assert(is(typeof(toUpper(T
.init
)) == C
));
799 //Return value tests for UDT and enum of UDT. These should be dchar
800 static foreach (T
; AliasSeq
!(UDC
, UDW
, UDD
, UDCE
, UDWE
, UDDE
))
802 static assert(is(typeof(toLower(T
.init
)) == dchar));
803 static assert(is(typeof(toUpper(T
.init
)) == dchar));