flang/runtime/namelist.cpp

   1 //===-- runtime/namelist.cpp ----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "namelist.h"
  10 #include "descriptor-io.h"
  11 #include "emit-encoded.h"
  12 #include "io-stmt.h"
  13 #include "flang/Runtime/io-api.h"
  14 #include <algorithm>
  15 #include <cstring>
  16 #include <limits>
  17
  18 namespace Fortran::runtime::io {
  19
  20 // Max size of a group, symbol or component identifier that can appear in
  21 // NAMELIST input, plus a byte for NUL termination.
  22 static constexpr std::size_t nameBufferSize{201};
  23
  24 static inline char32_t GetComma(IoStatementState &io) {
  25   return io.mutableModes().editingFlags & decimalComma ? char32_t{';'}
  26                                                        : char32_t{','};
  27 }
  28
  29 bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) {
  30   IoStatementState &io{*cookie};
  31   io.CheckFormattedStmtType<Direction::Output>("OutputNamelist");
  32   io.mutableModes().inNamelist = true;
  33   char comma{static_cast<char>(GetComma(io))};
  34   ConnectionState &connection{io.GetConnectionState()};
  35   // Internal functions to advance records and convert case
  36   const auto EmitWithAdvance{[&](char ch) -> bool {
  37     return (!connection.NeedAdvance(1) || io.AdvanceRecord()) &&
  38         EmitAscii(io, &ch, 1);
  39   }};
  40   const auto EmitUpperCase{[&](const char *str) -> bool {
  41     if (connection.NeedAdvance(std::strlen(str)) &&
  42         !(io.AdvanceRecord() && EmitAscii(io, " ", 1))) {
  43       return false;
  44     }
  45     for (; *str; ++str) {
  46       char up{*str >= 'a' && *str <= 'z' ? static_cast<char>(*str - 'a' + 'A')
  47                                          : *str};
  48       if (!EmitAscii(io, &up, 1)) {
  49         return false;
  50       }
  51     }
  52     return true;
  53   }};
  54   // &GROUP
  55   if (!(EmitWithAdvance('&') && EmitUpperCase(group.groupName))) {
  56     return false;
  57   }
  58   auto *listOutput{io.get_if<ListDirectedStatementState<Direction::Output>>()};
  59   for (std::size_t j{0}; j < group.items; ++j) {
  60     // [,]ITEM=...
  61     const NamelistGroup::Item &item{group.item[j]};
  62     if (listOutput) {
  63       listOutput->set_lastWasUndelimitedCharacter(false);
  64     }
  65     if (!EmitWithAdvance(j == 0 ? ' ' : comma) || !EmitUpperCase(item.name) ||
  66         !EmitWithAdvance('=')) {
  67       return false;
  68     }
  69     if (const auto *addendum{item.descriptor.Addendum()};
  70         addendum && addendum->derivedType()) {
  71       const NonTbpDefinedIoTable *table{group.nonTbpDefinedIo};
  72       if (!IONAME(OutputDerivedType)(cookie, item.descriptor, table)) {
  73         return false;
  74       }
  75     } else if (!descr::DescriptorIO<Direction::Output>(io, item.descriptor)) {
  76       return false;
  77     }
  78   }
  79   // terminal /
  80   return EmitWithAdvance('/');
  81 }
  82
  83 static constexpr bool IsLegalIdStart(char32_t ch) {
  84   return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
  85       ch == '@' || ch == '$';
  86 }
  87
  88 static constexpr bool IsLegalIdChar(char32_t ch) {
  89   return IsLegalIdStart(ch) || (ch >= '0' && ch <= '9');
  90 }
  91
  92 static constexpr char NormalizeIdChar(char32_t ch) {
  93   return static_cast<char>(ch >= 'A' && ch <= 'Z' ? ch - 'A' + 'a' : ch);
  94 }
  95
  96 static bool GetLowerCaseName(
  97     IoStatementState &io, char buffer[], std::size_t maxLength) {
  98   std::size_t byteLength{0};
  99   if (auto ch{io.GetNextNonBlank(byteLength)}) {
 100     if (IsLegalIdStart(*ch)) {
 101       std::size_t j{0};
 102       do {
 103         buffer[j] = NormalizeIdChar(*ch);
 104         io.HandleRelativePosition(byteLength);
 105         ch = io.GetCurrentChar(byteLength);
 106       } while (++j < maxLength && ch && IsLegalIdChar(*ch));
 107       buffer[j++] = '\0';
 108       if (j <= maxLength) {
 109         return true;
 110       }
 111       io.GetIoErrorHandler().SignalError(
 112           "Identifier '%s...' in NAMELIST input group is too long", buffer);
 113     }
 114   }
 115   return false;
 116 }
 117
 118 static std::optional<SubscriptValue> GetSubscriptValue(IoStatementState &io) {
 119   std::optional<SubscriptValue> value;
 120   std::size_t byteCount{0};
 121   std::optional<char32_t> ch{io.GetCurrentChar(byteCount)};
 122   bool negate{ch && *ch == '-'};
 123   if ((ch && *ch == '+') || negate) {
 124     io.HandleRelativePosition(byteCount);
 125     ch = io.GetCurrentChar(byteCount);
 126   }
 127   bool overflow{false};
 128   while (ch && *ch >= '0' && *ch <= '9') {
 129     SubscriptValue was{value.value_or(0)};
 130     overflow |= was >= std::numeric_limits<SubscriptValue>::max() / 10;
 131     value = 10 * was + *ch - '0';
 132     io.HandleRelativePosition(byteCount);
 133     ch = io.GetCurrentChar(byteCount);
 134   }
 135   if (overflow) {
 136     io.GetIoErrorHandler().SignalError(
 137         "NAMELIST input subscript value overflow");
 138     return std::nullopt;
 139   }
 140   if (negate) {
 141     if (value) {
 142       return -*value;
 143     } else {
 144       io.HandleRelativePosition(-byteCount); // give back '-' with no digits
 145     }
 146   }
 147   return value;
 148 }
 149
 150 static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
 151     const Descriptor &source, const char *name) {
 152   IoErrorHandler &handler{io.GetIoErrorHandler()};
 153   // Allow for blanks in subscripts; they're nonstandard, but not
 154   // ambiguous within the parentheses.
 155   SubscriptValue lower[maxRank], upper[maxRank], stride[maxRank];
 156   int j{0};
 157   std::size_t contiguousStride{source.ElementBytes()};
 158   bool ok{true};
 159   std::size_t byteCount{0};
 160   std::optional<char32_t> ch{io.GetNextNonBlank(byteCount)};
 161   char32_t comma{GetComma(io)};
 162   for (; ch && *ch != ')'; ++j) {
 163     SubscriptValue dimLower{0}, dimUpper{0}, dimStride{0};
 164     if (j < maxRank && j < source.rank()) {
 165       const Dimension &dim{source.GetDimension(j)};
 166       dimLower = dim.LowerBound();
 167       dimUpper = dim.UpperBound();
 168       dimStride =
 169           dim.ByteStride() / std::max<SubscriptValue>(contiguousStride, 1);
 170       contiguousStride *= dim.Extent();
 171     } else if (ok) {
 172       handler.SignalError(
 173           "Too many subscripts for rank-%d NAMELIST group item '%s'",
 174           source.rank(), name);
 175       ok = false;
 176     }
 177     if (auto low{GetSubscriptValue(io)}) {
 178       if (*low < dimLower || (dimUpper >= dimLower && *low > dimUpper)) {
 179         if (ok) {
 180           handler.SignalError("Subscript %jd out of range %jd..%jd in NAMELIST "
 181                               "group item '%s' dimension %d",
 182               static_cast<std::intmax_t>(*low),
 183               static_cast<std::intmax_t>(dimLower),
 184               static_cast<std::intmax_t>(dimUpper), name, j + 1);
 185           ok = false;
 186         }
 187       } else {
 188         dimLower = *low;
 189       }
 190       ch = io.GetNextNonBlank(byteCount);
 191     }
 192     if (ch && *ch == ':') {
 193       io.HandleRelativePosition(byteCount);
 194       ch = io.GetNextNonBlank(byteCount);
 195       if (auto high{GetSubscriptValue(io)}) {
 196         if (*high > dimUpper) {
 197           if (ok) {
 198             handler.SignalError(
 199                 "Subscript triplet upper bound %jd out of range (>%jd) in "
 200                 "NAMELIST group item '%s' dimension %d",
 201                 static_cast<std::intmax_t>(*high),
 202                 static_cast<std::intmax_t>(dimUpper), name, j + 1);
 203             ok = false;
 204           }
 205         } else {
 206           dimUpper = *high;
 207         }
 208         ch = io.GetNextNonBlank(byteCount);
 209       }
 210       if (ch && *ch == ':') {
 211         io.HandleRelativePosition(byteCount);
 212         ch = io.GetNextNonBlank(byteCount);
 213         if (auto str{GetSubscriptValue(io)}) {
 214           dimStride = *str;
 215           ch = io.GetNextNonBlank(byteCount);
 216         }
 217       }
 218     } else { // scalar
 219       dimUpper = dimLower;
 220       dimStride = 0;
 221     }
 222     if (ch && *ch == comma) {
 223       io.HandleRelativePosition(byteCount);
 224       ch = io.GetNextNonBlank(byteCount);
 225     }
 226     if (ok) {
 227       lower[j] = dimLower;
 228       upper[j] = dimUpper;
 229       stride[j] = dimStride;
 230     }
 231   }
 232   if (ok) {
 233     if (ch && *ch == ')') {
 234       io.HandleRelativePosition(byteCount);
 235       if (desc.EstablishPointerSection(source, lower, upper, stride)) {
 236         return true;
 237       } else {
 238         handler.SignalError(
 239             "Bad subscripts for NAMELIST input group item '%s'", name);
 240       }
 241     } else {
 242       handler.SignalError(
 243           "Bad subscripts (missing ')') for NAMELIST input group item '%s'",
 244           name);
 245     }
 246   }
 247   return false;
 248 }
 249
 250 static bool HandleSubstring(
 251     IoStatementState &io, Descriptor &desc, const char *name) {
 252   IoErrorHandler &handler{io.GetIoErrorHandler()};
 253   auto pair{desc.type().GetCategoryAndKind()};
 254   if (!pair || pair->first != TypeCategory::Character) {
 255     handler.SignalError("Substring reference to non-character item '%s'", name);
 256     return false;
 257   }
 258   int kind{pair->second};
 259   SubscriptValue chars{static_cast<SubscriptValue>(desc.ElementBytes()) / kind};
 260   // Allow for blanks in substring bounds; they're nonstandard, but not
 261   // ambiguous within the parentheses.
 262   std::optional<SubscriptValue> lower, upper;
 263   std::size_t byteCount{0};
 264   std::optional<char32_t> ch{io.GetNextNonBlank(byteCount)};
 265   if (ch) {
 266     if (*ch == ':') {
 267       lower = 1;
 268     } else {
 269       lower = GetSubscriptValue(io);
 270       ch = io.GetNextNonBlank(byteCount);
 271     }
 272   }
 273   if (ch && ch == ':') {
 274     io.HandleRelativePosition(byteCount);
 275     ch = io.GetNextNonBlank(byteCount);
 276     if (ch) {
 277       if (*ch == ')') {
 278         upper = chars;
 279       } else {
 280         upper = GetSubscriptValue(io);
 281         ch = io.GetNextNonBlank(byteCount);
 282       }
 283     }
 284   }
 285   if (ch && *ch == ')') {
 286     io.HandleRelativePosition(byteCount);
 287     if (lower && upper) {
 288       if (*lower > *upper) {
 289         // An empty substring, whatever the values are
 290         desc.raw().elem_len = 0;
 291         return true;
 292       }
 293       if (*lower >= 1 || *upper <= chars) {
 294         // Offset the base address & adjust the element byte length
 295         desc.raw().elem_len = (*upper - *lower + 1) * kind;
 296         desc.set_base_addr(reinterpret_cast<void *>(
 297             reinterpret_cast<char *>(desc.raw().base_addr) +
 298             kind * (*lower - 1)));
 299         return true;
 300       }
 301     }
 302     handler.SignalError(
 303         "Bad substring bounds for NAMELIST input group item '%s'", name);
 304   } else {
 305     handler.SignalError(
 306         "Bad substring (missing ')') for NAMELIST input group item '%s'", name);
 307   }
 308   return false;
 309 }
 310
 311 static bool HandleComponent(IoStatementState &io, Descriptor &desc,
 312     const Descriptor &source, const char *name) {
 313   IoErrorHandler &handler{io.GetIoErrorHandler()};
 314   char compName[nameBufferSize];
 315   if (GetLowerCaseName(io, compName, sizeof compName)) {
 316     const DescriptorAddendum *addendum{source.Addendum()};
 317     if (const typeInfo::DerivedType *
 318         type{addendum ? addendum->derivedType() : nullptr}) {
 319       if (const typeInfo::Component *
 320           comp{type->FindDataComponent(compName, std::strlen(compName))}) {
 321         bool createdDesc{false};
 322         if (comp->rank() > 0 && source.rank() > 0) {
 323           // If base and component are both arrays, the component name
 324           // must be followed by subscripts; process them now.
 325           std::size_t byteCount{0};
 326           if (std::optional<char32_t> next{io.GetNextNonBlank(byteCount)};
 327               next && *next == '(') {
 328             io.HandleRelativePosition(byteCount); // skip over '('
 329             StaticDescriptor<maxRank, true, 16> staticDesc;
 330             Descriptor &tmpDesc{staticDesc.descriptor()};
 331             comp->CreatePointerDescriptor(tmpDesc, source, handler);
 332             if (!HandleSubscripts(io, desc, tmpDesc, compName)) {
 333               return false;
 334             }
 335             createdDesc = true;
 336           }
 337         }
 338         if (!createdDesc) {
 339           comp->CreatePointerDescriptor(desc, source, handler);
 340         }
 341         if (source.rank() > 0) {
 342           if (desc.rank() > 0) {
 343             handler.SignalError(
 344                 "NAMELIST component reference '%%%s' of input group "
 345                 "item %s cannot be an array when its base is not scalar",
 346                 compName, name);
 347             return false;
 348           }
 349           desc.raw().rank = source.rank();
 350           for (int j{0}; j < source.rank(); ++j) {
 351             const auto &srcDim{source.GetDimension(j)};
 352             desc.GetDimension(j)
 353                 .SetBounds(1, srcDim.UpperBound())
 354                 .SetByteStride(srcDim.ByteStride());
 355           }
 356         }
 357         return true;
 358       } else {
 359         handler.SignalError(
 360             "NAMELIST component reference '%%%s' of input group item %s is not "
 361             "a component of its derived type",
 362             compName, name);
 363       }
 364     } else if (source.type().IsDerived()) {
 365       handler.Crash("Derived type object '%s' in NAMELIST is missing its "
 366                     "derived type information!",
 367           name);
 368     } else {
 369       handler.SignalError("NAMELIST component reference '%%%s' of input group "
 370                           "item %s for non-derived type",
 371           compName, name);
 372     }
 373   } else {
 374     handler.SignalError("NAMELIST component reference of input group item %s "
 375                         "has no name after '%%'",
 376         name);
 377   }
 378   return false;
 379 }
 380
 381 // Advance to the terminal '/' of a namelist group.
 382 static void SkipNamelistGroup(IoStatementState &io) {
 383   std::size_t byteCount{0};
 384   while (auto ch{io.GetNextNonBlank(byteCount)}) {
 385     io.HandleRelativePosition(byteCount);
 386     if (*ch == '/') {
 387       break;
 388     } else if (*ch == '\'' || *ch == '"') {
 389       // Skip quoted character literal
 390       char32_t quote{*ch};
 391       while (true) {
 392         if ((ch = io.GetCurrentChar(byteCount))) {
 393           io.HandleRelativePosition(byteCount);
 394           if (*ch == quote) {
 395             break;
 396           }
 397         } else if (!io.AdvanceRecord()) {
 398           return;
 399         }
 400       }
 401     }
 402   }
 403 }
 404
 405 bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
 406   IoStatementState &io{*cookie};
 407   io.CheckFormattedStmtType<Direction::Input>("InputNamelist");
 408   io.mutableModes().inNamelist = true;
 409   IoErrorHandler &handler{io.GetIoErrorHandler()};
 410   auto *listInput{io.get_if<ListDirectedStatementState<Direction::Input>>()};
 411   RUNTIME_CHECK(handler, listInput != nullptr);
 412   // Find this namelist group's header in the input
 413   io.BeginReadingRecord();
 414   std::optional<char32_t> next;
 415   char name[nameBufferSize];
 416   RUNTIME_CHECK(handler, group.groupName != nullptr);
 417   char32_t comma{GetComma(io)};
 418   std::size_t byteCount{0};
 419   while (true) {
 420     next = io.GetNextNonBlank(byteCount);
 421     while (next && *next != '&') {
 422       // Extension: comment lines without ! before namelist groups
 423       if (!io.AdvanceRecord()) {
 424         next.reset();
 425       } else {
 426         next = io.GetNextNonBlank(byteCount);
 427       }
 428     }
 429     if (!next) {
 430       handler.SignalEnd();
 431       return false;
 432     }
 433     if (*next != '&') {
 434       handler.SignalError(
 435           "NAMELIST input group does not begin with '&' (at '%lc')", *next);
 436       return false;
 437     }
 438     io.HandleRelativePosition(byteCount);
 439     if (!GetLowerCaseName(io, name, sizeof name)) {
 440       handler.SignalError("NAMELIST input group has no name");
 441       return false;
 442     }
 443     if (std::strcmp(group.groupName, name) == 0) {
 444       break; // found it
 445     }
 446     SkipNamelistGroup(io);
 447   }
 448   // Read the group's items
 449   while (true) {
 450     next = io.GetNextNonBlank(byteCount);
 451     if (!next || *next == '/') {
 452       break;
 453     }
 454     if (!GetLowerCaseName(io, name, sizeof name)) {
 455       handler.SignalError(
 456           "NAMELIST input group '%s' was not terminated at '%c'",
 457           group.groupName, static_cast<char>(*next));
 458       return false;
 459     }
 460     std::size_t itemIndex{0};
 461     for (; itemIndex < group.items; ++itemIndex) {
 462       if (std::strcmp(name, group.item[itemIndex].name) == 0) {
 463         break;
 464       }
 465     }
 466     if (itemIndex >= group.items) {
 467       handler.SignalError(
 468           "'%s' is not an item in NAMELIST group '%s'", name, group.groupName);
 469       return false;
 470     }
 471     // Handle indexing and components, if any.  No spaces are allowed.
 472     // A copy of the descriptor is made if necessary.
 473     const Descriptor &itemDescriptor{group.item[itemIndex].descriptor};
 474     const Descriptor *useDescriptor{&itemDescriptor};
 475     StaticDescriptor<maxRank, true, 16> staticDesc[2];
 476     int whichStaticDesc{0};
 477     next = io.GetCurrentChar(byteCount);
 478     bool hadSubscripts{false};
 479     bool hadSubstring{false};
 480     if (next && (*next == '(' || *next == '%')) {
 481       do {
 482         Descriptor &mutableDescriptor{staticDesc[whichStaticDesc].descriptor()};
 483         whichStaticDesc ^= 1;
 484         io.HandleRelativePosition(byteCount); // skip over '(' or '%'
 485         if (*next == '(') {
 486           if (!hadSubstring && (hadSubscripts || useDescriptor->rank() == 0)) {
 487             mutableDescriptor = *useDescriptor;
 488             mutableDescriptor.raw().attribute = CFI_attribute_pointer;
 489             if (!HandleSubstring(io, mutableDescriptor, name)) {
 490               return false;
 491             }
 492             hadSubstring = true;
 493           } else if (hadSubscripts) {
 494             handler.SignalError("Multiple sets of subscripts for item '%s' in "
 495                                 "NAMELIST group '%s'",
 496                 name, group.groupName);
 497             return false;
 498           } else {
 499             if (!HandleSubscripts(
 500                     io, mutableDescriptor, *useDescriptor, name)) {
 501               return false;
 502             }
 503           }
 504           hadSubscripts = true;
 505         } else {
 506           if (!HandleComponent(io, mutableDescriptor, *useDescriptor, name)) {
 507             return false;
 508           }
 509           hadSubscripts = false;
 510           hadSubstring = false;
 511         }
 512         useDescriptor = &mutableDescriptor;
 513         next = io.GetCurrentChar(byteCount);
 514       } while (next && (*next == '(' || *next == '%'));
 515     }
 516     // Skip the '='
 517     next = io.GetNextNonBlank(byteCount);
 518     if (!next || *next != '=') {
 519       handler.SignalError("No '=' found after item '%s' in NAMELIST group '%s'",
 520           name, group.groupName);
 521       return false;
 522     }
 523     io.HandleRelativePosition(byteCount);
 524     // Read the values into the descriptor.  An array can be short.
 525     if (const auto *addendum{useDescriptor->Addendum()};
 526         addendum && addendum->derivedType()) {
 527       const NonTbpDefinedIoTable *table{group.nonTbpDefinedIo};
 528       listInput->ResetForNextNamelistItem(/*inNamelistSequence=*/true);
 529       if (!IONAME(InputDerivedType)(cookie, *useDescriptor, table)) {
 530         return false;
 531       }
 532     } else {
 533       listInput->ResetForNextNamelistItem(useDescriptor->rank() > 0);
 534       if (!descr::DescriptorIO<Direction::Input>(io, *useDescriptor)) {
 535         return false;
 536       }
 537     }
 538     next = io.GetNextNonBlank(byteCount);
 539     if (next && *next == comma) {
 540       io.HandleRelativePosition(byteCount);
 541     }
 542   }
 543   if (!next || *next != '/') {
 544     handler.SignalError(
 545         "No '/' found after NAMELIST group '%s'", group.groupName);
 546     return false;
 547   }
 548   io.HandleRelativePosition(byteCount);
 549   return true;
 550 }
 551
 552 bool IsNamelistNameOrSlash(IoStatementState &io) {
 553   if (auto *listInput{
 554           io.get_if<ListDirectedStatementState<Direction::Input>>()}) {
 555     if (listInput->inNamelistSequence()) {
 556       SavedPosition savedPosition{io};
 557       std::size_t byteCount{0};
 558       if (auto ch{io.GetNextNonBlank(byteCount)}) {
 559         if (IsLegalIdStart(*ch)) {
 560           do {
 561             io.HandleRelativePosition(byteCount);
 562             ch = io.GetCurrentChar(byteCount);
 563           } while (ch && IsLegalIdChar(*ch));
 564           ch = io.GetNextNonBlank(byteCount);
 565           // TODO: how to deal with NaN(...) ambiguity?
 566           return ch && (*ch == '=' || *ch == '(' || *ch == '%');
 567         } else {
 568           return *ch == '/';
 569         }
 570       }
 571     }
 572   }
 573   return false;
 574 }
 575
 576 } // namespace Fortran::runtime::io