1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Handling of format string in scanf and friends. The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
12 //===----------------------------------------------------------------------===//
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
18 using clang::analyze_format_string::ArgType
;
19 using clang::analyze_format_string::FormatStringHandler
;
20 using clang::analyze_format_string::LengthModifier
;
21 using clang::analyze_format_string::OptionalAmount
;
22 using clang::analyze_format_string::ConversionSpecifier
;
23 using clang::analyze_scanf::ScanfConversionSpecifier
;
24 using clang::analyze_scanf::ScanfSpecifier
;
25 using clang::UpdateOnReturn
;
26 using namespace clang
;
28 typedef clang::analyze_format_string::SpecifierResult
<ScanfSpecifier
>
31 static bool ParseScanList(FormatStringHandler
&H
,
32 ScanfConversionSpecifier
&CS
,
33 const char *&Beg
, const char *E
) {
35 const char *start
= I
- 1;
36 UpdateOnReturn
<const char*> UpdateBeg(Beg
, I
);
38 // No more characters?
40 H
.HandleIncompleteScanList(start
, I
);
44 // Special case: ']' is the first character.
47 H
.HandleIncompleteScanList(start
, I
- 1);
52 // Special case: "^]" are the first characters.
53 if (I
+ 1 != E
&& I
[0] == '^' && I
[1] == ']') {
56 H
.HandleIncompleteScanList(start
, I
- 1);
61 // Look for a ']' character which denotes the end of the scan list.
64 H
.HandleIncompleteScanList(start
, I
- 1);
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
75 static ScanfSpecifierResult
ParseScanfSpecifier(FormatStringHandler
&H
,
79 const LangOptions
&LO
,
80 const TargetInfo
&Target
) {
81 using namespace clang::analyze_format_string
;
82 using namespace clang::analyze_scanf
;
84 const char *Start
= nullptr;
85 UpdateOnReturn
<const char*> UpdateBeg(Beg
, I
);
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I
!= E
; ++I
) {
91 // Detect spurious null characters, which are likely errors.
96 Start
= I
++; // Record the start of the format specifier.
101 // No format specifier found?
106 // No more characters left?
107 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
112 if (ParseArgPosition(H
, FS
, Start
, I
, E
))
116 // No more characters left?
117 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
121 // Look for '*' flag if it is present.
123 FS
.setSuppressAssignment(I
);
125 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
130 // Look for the field width (if any). Unlike printf, this is either
131 // a fixed integer or isn't present.
132 const OptionalAmount
&Amt
= clang::analyze_format_string::ParseAmount(I
, E
);
133 if (Amt
.getHowSpecified() != OptionalAmount::NotSpecified
) {
134 assert(Amt
.getHowSpecified() == OptionalAmount::Constant
);
135 FS
.setFieldWidth(Amt
);
138 // No more characters left?
139 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
144 // Look for the length modifier.
145 if (ParseLengthModifier(FS
, I
, E
, LO
, /*IsScanf=*/true) && I
== E
) {
146 // No more characters left?
147 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
151 // Detect spurious null characters, which are likely errors.
157 // Finally, look for the conversion specifier.
158 const char *conversionPosition
= I
++;
159 ScanfConversionSpecifier::Kind k
= ScanfConversionSpecifier::InvalidSpecifier
;
160 switch (*conversionPosition
) {
163 case '%': k
= ConversionSpecifier::PercentArg
; break;
164 case 'b': k
= ConversionSpecifier::bArg
; break;
165 case 'A': k
= ConversionSpecifier::AArg
; break;
166 case 'E': k
= ConversionSpecifier::EArg
; break;
167 case 'F': k
= ConversionSpecifier::FArg
; break;
168 case 'G': k
= ConversionSpecifier::GArg
; break;
169 case 'X': k
= ConversionSpecifier::XArg
; break;
170 case 'a': k
= ConversionSpecifier::aArg
; break;
171 case 'd': k
= ConversionSpecifier::dArg
; break;
172 case 'e': k
= ConversionSpecifier::eArg
; break;
173 case 'f': k
= ConversionSpecifier::fArg
; break;
174 case 'g': k
= ConversionSpecifier::gArg
; break;
175 case 'i': k
= ConversionSpecifier::iArg
; break;
176 case 'n': k
= ConversionSpecifier::nArg
; break;
177 case 'c': k
= ConversionSpecifier::cArg
; break;
178 case 'C': k
= ConversionSpecifier::CArg
; break;
179 case 'S': k
= ConversionSpecifier::SArg
; break;
180 case '[': k
= ConversionSpecifier::ScanListArg
; break;
181 case 'u': k
= ConversionSpecifier::uArg
; break;
182 case 'x': k
= ConversionSpecifier::xArg
; break;
183 case 'o': k
= ConversionSpecifier::oArg
; break;
184 case 's': k
= ConversionSpecifier::sArg
; break;
185 case 'p': k
= ConversionSpecifier::pArg
; break;
189 if (Target
.getTriple().isOSDarwin())
190 k
= ConversionSpecifier::DArg
;
193 if (Target
.getTriple().isOSDarwin())
194 k
= ConversionSpecifier::OArg
;
197 if (Target
.getTriple().isOSDarwin())
198 k
= ConversionSpecifier::UArg
;
201 ScanfConversionSpecifier
CS(conversionPosition
, k
);
202 if (k
== ScanfConversionSpecifier::ScanListArg
) {
203 if (ParseScanList(H
, CS
, I
, E
))
206 FS
.setConversionSpecifier(CS
);
207 if (CS
.consumesDataArgument() && !FS
.getSuppressAssignment()
208 && !FS
.usesPositionalArg())
209 FS
.setArgIndex(argIndex
++);
211 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
214 if (k
== ScanfConversionSpecifier::InvalidSpecifier
) {
215 unsigned Len
= I
- Beg
;
216 if (ParseUTF8InvalidSpecifier(Beg
, E
, Len
)) {
217 CS
.setEndScanList(Beg
+ Len
);
218 FS
.setConversionSpecifier(CS
);
220 // Assume the conversion takes one argument.
221 return !H
.HandleInvalidScanfConversionSpecifier(FS
, Beg
, Len
);
223 return ScanfSpecifierResult(Start
, FS
);
226 ArgType
ScanfSpecifier::getArgType(ASTContext
&Ctx
) const {
227 const ScanfConversionSpecifier
&CS
= getConversionSpecifier();
229 if (!CS
.consumesDataArgument())
230 return ArgType::Invalid();
232 switch(CS
.getKind()) {
234 case ConversionSpecifier::dArg
:
235 case ConversionSpecifier::DArg
:
236 case ConversionSpecifier::iArg
:
237 switch (LM
.getKind()) {
238 case LengthModifier::None
:
239 return ArgType::PtrTo(Ctx
.IntTy
);
240 case LengthModifier::AsChar
:
241 return ArgType::PtrTo(ArgType::AnyCharTy
);
242 case LengthModifier::AsShort
:
243 return ArgType::PtrTo(Ctx
.ShortTy
);
244 case LengthModifier::AsLong
:
245 return ArgType::PtrTo(Ctx
.LongTy
);
246 case LengthModifier::AsLongLong
:
247 case LengthModifier::AsQuad
:
248 return ArgType::PtrTo(Ctx
.LongLongTy
);
249 case LengthModifier::AsInt64
:
250 return ArgType::PtrTo(ArgType(Ctx
.LongLongTy
, "__int64"));
251 case LengthModifier::AsIntMax
:
252 return ArgType::PtrTo(ArgType(Ctx
.getIntMaxType(), "intmax_t"));
253 case LengthModifier::AsSizeT
:
254 return ArgType::PtrTo(ArgType(Ctx
.getSignedSizeType(), "ssize_t"));
255 case LengthModifier::AsPtrDiff
:
256 return ArgType::PtrTo(ArgType(Ctx
.getPointerDiffType(), "ptrdiff_t"));
257 case LengthModifier::AsLongDouble
:
259 return ArgType::PtrTo(Ctx
.LongLongTy
);
260 case LengthModifier::AsAllocate
:
261 case LengthModifier::AsMAllocate
:
262 case LengthModifier::AsInt32
:
263 case LengthModifier::AsInt3264
:
264 case LengthModifier::AsWide
:
265 case LengthModifier::AsShortLong
:
266 return ArgType::Invalid();
268 llvm_unreachable("Unsupported LengthModifier Type");
271 case ConversionSpecifier::bArg
:
272 case ConversionSpecifier::oArg
:
273 case ConversionSpecifier::OArg
:
274 case ConversionSpecifier::uArg
:
275 case ConversionSpecifier::UArg
:
276 case ConversionSpecifier::xArg
:
277 case ConversionSpecifier::XArg
:
278 switch (LM
.getKind()) {
279 case LengthModifier::None
:
280 return ArgType::PtrTo(Ctx
.UnsignedIntTy
);
281 case LengthModifier::AsChar
:
282 return ArgType::PtrTo(Ctx
.UnsignedCharTy
);
283 case LengthModifier::AsShort
:
284 return ArgType::PtrTo(Ctx
.UnsignedShortTy
);
285 case LengthModifier::AsLong
:
286 return ArgType::PtrTo(Ctx
.UnsignedLongTy
);
287 case LengthModifier::AsLongLong
:
288 case LengthModifier::AsQuad
:
289 return ArgType::PtrTo(Ctx
.UnsignedLongLongTy
);
290 case LengthModifier::AsInt64
:
291 return ArgType::PtrTo(ArgType(Ctx
.UnsignedLongLongTy
, "unsigned __int64"));
292 case LengthModifier::AsIntMax
:
293 return ArgType::PtrTo(ArgType(Ctx
.getUIntMaxType(), "uintmax_t"));
294 case LengthModifier::AsSizeT
:
295 return ArgType::PtrTo(ArgType(Ctx
.getSizeType(), "size_t"));
296 case LengthModifier::AsPtrDiff
:
297 return ArgType::PtrTo(
298 ArgType(Ctx
.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
299 case LengthModifier::AsLongDouble
:
301 return ArgType::PtrTo(Ctx
.UnsignedLongLongTy
);
302 case LengthModifier::AsAllocate
:
303 case LengthModifier::AsMAllocate
:
304 case LengthModifier::AsInt32
:
305 case LengthModifier::AsInt3264
:
306 case LengthModifier::AsWide
:
307 case LengthModifier::AsShortLong
:
308 return ArgType::Invalid();
310 llvm_unreachable("Unsupported LengthModifier Type");
313 case ConversionSpecifier::aArg
:
314 case ConversionSpecifier::AArg
:
315 case ConversionSpecifier::eArg
:
316 case ConversionSpecifier::EArg
:
317 case ConversionSpecifier::fArg
:
318 case ConversionSpecifier::FArg
:
319 case ConversionSpecifier::gArg
:
320 case ConversionSpecifier::GArg
:
321 switch (LM
.getKind()) {
322 case LengthModifier::None
:
323 return ArgType::PtrTo(Ctx
.FloatTy
);
324 case LengthModifier::AsLong
:
325 return ArgType::PtrTo(Ctx
.DoubleTy
);
326 case LengthModifier::AsLongDouble
:
327 return ArgType::PtrTo(Ctx
.LongDoubleTy
);
329 return ArgType::Invalid();
332 // Char, string and scanlist.
333 case ConversionSpecifier::cArg
:
334 case ConversionSpecifier::sArg
:
335 case ConversionSpecifier::ScanListArg
:
336 switch (LM
.getKind()) {
337 case LengthModifier::None
:
338 return ArgType::PtrTo(ArgType::AnyCharTy
);
339 case LengthModifier::AsLong
:
340 case LengthModifier::AsWide
:
341 return ArgType::PtrTo(ArgType(Ctx
.getWideCharType(), "wchar_t"));
342 case LengthModifier::AsAllocate
:
343 case LengthModifier::AsMAllocate
:
344 return ArgType::PtrTo(ArgType::CStrTy
);
345 case LengthModifier::AsShort
:
346 if (Ctx
.getTargetInfo().getTriple().isOSMSVCRT())
347 return ArgType::PtrTo(ArgType::AnyCharTy
);
350 return ArgType::Invalid();
352 case ConversionSpecifier::CArg
:
353 case ConversionSpecifier::SArg
:
354 // FIXME: Mac OS X specific?
355 switch (LM
.getKind()) {
356 case LengthModifier::None
:
357 case LengthModifier::AsWide
:
358 return ArgType::PtrTo(ArgType(Ctx
.getWideCharType(), "wchar_t"));
359 case LengthModifier::AsAllocate
:
360 case LengthModifier::AsMAllocate
:
361 return ArgType::PtrTo(ArgType(ArgType::WCStrTy
, "wchar_t *"));
362 case LengthModifier::AsShort
:
363 if (Ctx
.getTargetInfo().getTriple().isOSMSVCRT())
364 return ArgType::PtrTo(ArgType::AnyCharTy
);
367 return ArgType::Invalid();
371 case ConversionSpecifier::pArg
:
372 return ArgType::PtrTo(ArgType::CPointerTy
);
375 case ConversionSpecifier::nArg
:
376 switch (LM
.getKind()) {
377 case LengthModifier::None
:
378 return ArgType::PtrTo(Ctx
.IntTy
);
379 case LengthModifier::AsChar
:
380 return ArgType::PtrTo(Ctx
.SignedCharTy
);
381 case LengthModifier::AsShort
:
382 return ArgType::PtrTo(Ctx
.ShortTy
);
383 case LengthModifier::AsLong
:
384 return ArgType::PtrTo(Ctx
.LongTy
);
385 case LengthModifier::AsLongLong
:
386 case LengthModifier::AsQuad
:
387 return ArgType::PtrTo(Ctx
.LongLongTy
);
388 case LengthModifier::AsInt64
:
389 return ArgType::PtrTo(ArgType(Ctx
.LongLongTy
, "__int64"));
390 case LengthModifier::AsIntMax
:
391 return ArgType::PtrTo(ArgType(Ctx
.getIntMaxType(), "intmax_t"));
392 case LengthModifier::AsSizeT
:
393 return ArgType::PtrTo(ArgType(Ctx
.getSignedSizeType(), "ssize_t"));
394 case LengthModifier::AsPtrDiff
:
395 return ArgType::PtrTo(ArgType(Ctx
.getPointerDiffType(), "ptrdiff_t"));
396 case LengthModifier::AsLongDouble
:
397 return ArgType(); // FIXME: Is this a known extension?
398 case LengthModifier::AsAllocate
:
399 case LengthModifier::AsMAllocate
:
400 case LengthModifier::AsInt32
:
401 case LengthModifier::AsInt3264
:
402 case LengthModifier::AsWide
:
403 case LengthModifier::AsShortLong
:
404 return ArgType::Invalid();
414 bool ScanfSpecifier::fixType(QualType QT
, QualType RawQT
,
415 const LangOptions
&LangOpt
,
418 // %n is different from other conversion specifiers; don't try to fix it.
419 if (CS
.getKind() == ConversionSpecifier::nArg
)
422 if (!QT
->isPointerType())
425 QualType PT
= QT
->getPointeeType();
427 // If it's an enum, get its underlying type.
428 if (const EnumType
*ETy
= PT
->getAs
<EnumType
>()) {
429 // Don't try to fix incomplete enums.
430 if (!ETy
->getDecl()->isComplete())
432 PT
= ETy
->getDecl()->getIntegerType();
435 const BuiltinType
*BT
= PT
->getAs
<BuiltinType
>();
439 // Pointer to a character.
440 if (PT
->isAnyCharacterType()) {
441 CS
.setKind(ConversionSpecifier::sArg
);
442 if (PT
->isWideCharType())
443 LM
.setKind(LengthModifier::AsWideChar
);
445 LM
.setKind(LengthModifier::None
);
447 // If we know the target array length, we can use it as a field width.
448 if (const ConstantArrayType
*CAT
= Ctx
.getAsConstantArrayType(RawQT
)) {
449 if (CAT
->getSizeModifier() == ArraySizeModifier::Normal
)
450 FieldWidth
= OptionalAmount(OptionalAmount::Constant
,
451 CAT
->getSize().getZExtValue() - 1,
458 // Figure out the length modifier.
459 switch (BT
->getKind()) {
461 case BuiltinType::UInt
:
462 case BuiltinType::Int
:
463 case BuiltinType::Float
:
464 LM
.setKind(LengthModifier::None
);
468 case BuiltinType::Char_U
:
469 case BuiltinType::UChar
:
470 case BuiltinType::Char_S
:
471 case BuiltinType::SChar
:
472 LM
.setKind(LengthModifier::AsChar
);
476 case BuiltinType::Short
:
477 case BuiltinType::UShort
:
478 LM
.setKind(LengthModifier::AsShort
);
482 case BuiltinType::Long
:
483 case BuiltinType::ULong
:
484 case BuiltinType::Double
:
485 LM
.setKind(LengthModifier::AsLong
);
489 case BuiltinType::LongLong
:
490 case BuiltinType::ULongLong
:
491 LM
.setKind(LengthModifier::AsLongLong
);
495 case BuiltinType::LongDouble
:
496 LM
.setKind(LengthModifier::AsLongDouble
);
504 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
505 if (LangOpt
.C99
|| LangOpt
.CPlusPlus11
)
506 namedTypeToLengthModifier(PT
, LM
);
508 // If fixing the length modifier was enough, we are done.
509 if (hasValidLengthModifier(Ctx
.getTargetInfo(), LangOpt
)) {
510 const analyze_scanf::ArgType
&AT
= getArgType(Ctx
);
511 if (AT
.isValid() && AT
.matchesType(Ctx
, QT
))
515 // Figure out the conversion specifier.
516 if (PT
->isRealFloatingType())
517 CS
.setKind(ConversionSpecifier::fArg
);
518 else if (PT
->isSignedIntegerType())
519 CS
.setKind(ConversionSpecifier::dArg
);
520 else if (PT
->isUnsignedIntegerType())
521 CS
.setKind(ConversionSpecifier::uArg
);
523 llvm_unreachable("Unexpected type");
528 void ScanfSpecifier::toString(raw_ostream
&os
) const {
531 if (usesPositionalArg())
532 os
<< getPositionalArgIndex() << "$";
533 if (SuppressAssignment
)
536 FieldWidth
.toString(os
);
541 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler
&H
,
544 const LangOptions
&LO
,
545 const TargetInfo
&Target
) {
547 unsigned argIndex
= 0;
549 // Keep looking for a format specifier until we have exhausted the string.
551 const ScanfSpecifierResult
&FSR
= ParseScanfSpecifier(H
, I
, E
, argIndex
,
553 // Did a fail-stop error of any kind occur when parsing the specifier?
554 // If so, don't do any more processing.
555 if (FSR
.shouldStop())
557 // Did we exhaust the string or encounter an error that
558 // we can recover from?
561 // We have a format specifier. Pass it to the callback.
562 if (!H
.HandleScanfSpecifier(FSR
.getValue(), FSR
.getStart(),
563 I
- FSR
.getStart())) {
567 assert(I
== E
&& "Format string not exhausted");