1 //===- split-file.cpp - Input splitting utility ---------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10 // the specified part.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileOutputBuffer.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/LineIterator.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/ToolOutputFile.h"
24 #include "llvm/Support/WithColor.h"
26 #include <system_error>
30 static cl::OptionCategory
cat("split-file Options");
32 static cl::opt
<std::string
> input(cl::Positional
, cl::desc("filename"),
35 static cl::opt
<std::string
> output(cl::Positional
, cl::desc("directory"),
36 cl::value_desc("directory"), cl::cat(cat
));
38 static cl::opt
<bool> leadingLines("leading-lines",
39 cl::desc("Preserve line numbers"),
42 static cl::opt
<bool> noLeadingLines("no-leading-lines",
43 cl::desc("Don't preserve line numbers (default)"),
46 static StringRef toolName
;
47 static int errorCount
;
49 [[noreturn
]] static void fatal(StringRef filename
, const Twine
&message
) {
51 WithColor::error(errs(), toolName
) << message
<< '\n';
53 WithColor::error(errs(), toolName
) << filename
<< ": " << message
<< '\n';
57 static void error(StringRef filename
, int64_t line
, const Twine
&message
) {
59 errs() << filename
<< ':' << line
<< ": ";
60 WithColor::error(errs()) << message
<< '\n';
65 const char *begin
= nullptr;
66 const char *end
= nullptr;
67 int64_t leadingLines
= 0;
71 static int handle(MemoryBuffer
&inputBuf
, StringRef input
) {
72 DenseMap
<StringRef
, Part
> partToBegin
;
73 StringRef lastPart
, separator
;
74 StringRef EOL
= inputBuf
.getBuffer().detectEOL();
75 for (line_iterator
i(inputBuf
, /*SkipBlanks=*/false, '\0'); !i
.is_at_eof();) {
76 const int64_t lineNo
= i
.line_number();
77 const StringRef line
= *i
++;
78 const size_t markerLen
= line
.starts_with("//") ? 6 : 5;
79 if (!(line
.size() >= markerLen
&&
80 line
.substr(markerLen
- 4).starts_with("--- ")))
82 separator
= line
.substr(0, markerLen
);
83 const StringRef partName
= line
.substr(markerLen
);
84 if (partName
.empty()) {
85 error(input
, lineNo
, "empty part name");
88 if (isSpace(partName
.front()) || isSpace(partName
.back())) {
89 error(input
, lineNo
, "part name cannot have leading or trailing space");
93 auto res
= partToBegin
.try_emplace(partName
);
96 "'" + separator
+ partName
+ "' occurs more than once");
99 if (!lastPart
.empty())
100 partToBegin
[lastPart
].end
= line
.data();
101 Part
&cur
= res
.first
->second
;
103 cur
.begin
= i
->data();
104 // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
105 // that the extracted part preserves line numbers.
106 cur
.leadingLines
= leadingLines
? i
.line_number() - 1 : 0;
110 if (lastPart
.empty())
111 fatal(input
, "no part separator was found");
114 partToBegin
[lastPart
].end
= inputBuf
.getBufferEnd();
116 std::vector
<std::unique_ptr
<ToolOutputFile
>> outputFiles
;
117 SmallString
<256> partPath
;
118 for (auto &keyValue
: partToBegin
) {
120 sys::path::append(partPath
, output
, keyValue
.first
);
122 sys::fs::create_directories(sys::path::parent_path(partPath
));
124 fatal(input
, ec
.message());
125 auto f
= std::make_unique
<ToolOutputFile
>(partPath
.str(), ec
,
126 llvm::sys::fs::OF_Text
);
128 fatal(input
, ec
.message());
130 Part
&part
= keyValue
.second
;
131 for (int64_t i
= 0; i
!= part
.leadingLines
; ++i
)
134 (*f
).os().write(part
.begin
, part
.end
- part
.begin
);
135 outputFiles
.push_back(std::move(f
));
138 for (std::unique_ptr
<ToolOutputFile
> &outputFile
: outputFiles
)
143 int main(int argc
, const char **argv
) {
144 toolName
= sys::path::stem(argv
[0]);
145 cl::HideUnrelatedOptions({&cat
});
146 cl::ParseCommandLineOptions(
148 "Split input into multiple parts separated by regex '^(.|//)--- ' and "
149 "extract the part specified by '^(.|//)--- <part>'\n",
152 /*LongOptionsUseDoubleDash=*/true);
155 fatal("", "input filename is not specified");
157 fatal("", "output directory is not specified");
158 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> bufferOrErr
=
159 MemoryBuffer::getFileOrSTDIN(input
, /*IsText=*/true);
160 if (std::error_code ec
= bufferOrErr
.getError())
161 fatal(input
, ec
.message());
163 // Delete output if it is a file or an empty directory, so that we can create
165 sys::fs::file_status status
;
166 if (std::error_code ec
= sys::fs::status(output
, status
))
167 if (ec
.value() != static_cast<int>(std::errc::no_such_file_or_directory
))
168 fatal(output
, ec
.message());
169 if (status
.type() != sys::fs::file_type::file_not_found
&&
170 status
.type() != sys::fs::file_type::directory_file
&&
171 status
.type() != sys::fs::file_type::regular_file
)
172 fatal(output
, "output cannot be a special file");
173 if (std::error_code ec
= sys::fs::remove(output
, /*IgnoreNonExisting=*/true))
174 if (ec
.value() != static_cast<int>(std::errc::directory_not_empty
) &&
175 ec
.value() != static_cast<int>(std::errc::file_exists
))
176 fatal(output
, ec
.message());
177 return handle(**bufferOrErr
, input
);