1 //===-- TarWriter.cpp - Tar archive file creator --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // TarWriter class provides a feature to create a tar archive file.
12 // I put emphasis on simplicity over comprehensiveness when implementing this
13 // class because we don't need a full-fledged archive file generator in LLVM
16 // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames
17 // are stored using the PAX extension. The PAX header is standardized in
20 // The struct definition of UstarHeader is copied from
21 // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
23 //===----------------------------------------------------------------------===//
25 #include "llvm/Support/TarWriter.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/Path.h"
33 // Each file in an archive must be aligned to this block size.
34 static const int BlockSize
= 512;
55 static_assert(sizeof(UstarHeader
) == BlockSize
, "invalid Ustar header");
57 static UstarHeader
makeUstarHeader() {
59 memcpy(Hdr
.Magic
, "ustar", 5); // Ustar magic
60 memcpy(Hdr
.Version
, "00", 2); // Ustar version
64 // A PAX attribute is in the form of "<length> <key>=<value>\n"
65 // where <length> is the length of the entire string including
66 // the length field itself. An example string is this.
68 // 25 ctime=1084839148.1212\n
70 // This function create such string.
71 static std::string
formatPax(StringRef Key
, StringRef Val
) {
72 int Len
= Key
.size() + Val
.size() + 3; // +3 for " ", "=" and "\n"
74 // We need to compute total size twice because appending
75 // a length field could change total size by one.
76 int Total
= Len
+ Twine(Len
).str().size();
77 Total
= Len
+ Twine(Total
).str().size();
78 return (Twine(Total
) + " " + Key
+ "=" + Val
+ "\n").str();
81 // Headers in tar files must be aligned to 512 byte boundaries.
82 // This function forwards the current file position to the next boundary.
83 static void pad(raw_fd_ostream
&OS
) {
84 uint64_t Pos
= OS
.tell();
85 OS
.seek(alignTo(Pos
, BlockSize
));
88 // Computes a checksum for a tar header.
89 static void computeChecksum(UstarHeader
&Hdr
) {
90 // Before computing a checksum, checksum field must be
91 // filled with space characters.
92 memset(Hdr
.Checksum
, ' ', sizeof(Hdr
.Checksum
));
94 // Compute a checksum and set it to the checksum field.
96 for (size_t I
= 0; I
< sizeof(Hdr
); ++I
)
97 Chksum
+= reinterpret_cast<uint8_t *>(&Hdr
)[I
];
98 snprintf(Hdr
.Checksum
, sizeof(Hdr
.Checksum
), "%06o", Chksum
);
101 // Create a tar header and write it to a given output stream.
102 static void writePaxHeader(raw_fd_ostream
&OS
, StringRef Path
) {
103 // A PAX header consists of a 512-byte header followed
104 // by key-value strings. First, create key-value strings.
105 std::string PaxAttr
= formatPax("path", Path
);
107 // Create a 512-byte header.
108 UstarHeader Hdr
= makeUstarHeader();
109 snprintf(Hdr
.Size
, sizeof(Hdr
.Size
), "%011zo", PaxAttr
.size());
110 Hdr
.TypeFlag
= 'x'; // PAX magic
111 computeChecksum(Hdr
);
114 OS
<< StringRef(reinterpret_cast<char *>(&Hdr
), sizeof(Hdr
));
119 // Path fits in a Ustar header if
121 // - Path is less than 100 characters long, or
122 // - Path is in the form of "<prefix>/<name>" where <prefix> is less
123 // than or equal to 155 characters long and <name> is less than 100
124 // characters long. Both <prefix> and <name> can contain extra '/'.
126 // If Path fits in a Ustar header, updates Prefix and Name and returns true.
127 // Otherwise, returns false.
128 static bool splitUstar(StringRef Path
, StringRef
&Prefix
, StringRef
&Name
) {
129 if (Path
.size() < sizeof(UstarHeader::Name
)) {
135 size_t Sep
= Path
.rfind('/', sizeof(UstarHeader::Prefix
) + 1);
136 if (Sep
== StringRef::npos
)
138 if (Path
.size() - Sep
- 1 >= sizeof(UstarHeader::Name
))
141 Prefix
= Path
.substr(0, Sep
);
142 Name
= Path
.substr(Sep
+ 1);
146 // The PAX header is an extended format, so a PAX header needs
147 // to be followed by a "real" header.
148 static void writeUstarHeader(raw_fd_ostream
&OS
, StringRef Prefix
,
149 StringRef Name
, size_t Size
) {
150 UstarHeader Hdr
= makeUstarHeader();
151 memcpy(Hdr
.Name
, Name
.data(), Name
.size());
152 memcpy(Hdr
.Mode
, "0000664", 8);
153 snprintf(Hdr
.Size
, sizeof(Hdr
.Size
), "%011zo", Size
);
154 memcpy(Hdr
.Prefix
, Prefix
.data(), Prefix
.size());
155 computeChecksum(Hdr
);
156 OS
<< StringRef(reinterpret_cast<char *>(&Hdr
), sizeof(Hdr
));
159 // Creates a TarWriter instance and returns it.
160 Expected
<std::unique_ptr
<TarWriter
>> TarWriter::create(StringRef OutputPath
,
162 using namespace sys::fs
;
164 if (std::error_code EC
=
165 openFileForWrite(OutputPath
, FD
, CD_CreateAlways
, OF_None
))
166 return make_error
<StringError
>("cannot open " + OutputPath
, EC
);
167 return std::unique_ptr
<TarWriter
>(new TarWriter(FD
, BaseDir
));
170 TarWriter::TarWriter(int FD
, StringRef BaseDir
)
171 : OS(FD
, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir
) {}
173 // Append a given file to an archive.
174 void TarWriter::append(StringRef Path
, StringRef Data
) {
175 // Write Path and Data.
176 std::string Fullpath
= BaseDir
+ "/" + sys::path::convert_to_slash(Path
);
178 // We do not want to include the same file more than once.
179 if (!Files
.insert(Fullpath
).second
)
184 if (splitUstar(Fullpath
, Prefix
, Name
)) {
185 writeUstarHeader(OS
, Prefix
, Name
, Data
.size());
187 writePaxHeader(OS
, Fullpath
);
188 writeUstarHeader(OS
, "", "", Data
.size());
194 // POSIX requires tar archives end with two null blocks.
195 // Here, we write the terminator and then seek back, so that
196 // the file being output is terminated correctly at any moment.
197 uint64_t Pos
= OS
.tell();
198 OS
<< std::string(BlockSize
* 2, '\0');