2 #include "sanitizer_common/sanitizer_common.h"
4 #include "sanitizer_common/sanitizer_posix.h"
7 #include "xray_interface_internal.h"
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
10 #include <sys/types.h>
13 #include <machine/cpu.h>
15 #include <sys/sysctl.h>
16 #elif SANITIZER_FUCHSIA
17 #include <zircon/syscalls.h>
32 static std::pair
<ssize_t
, bool>
33 retryingReadSome(int Fd
, char *Begin
, char *End
) XRAY_NEVER_INSTRUMENT
{
34 auto BytesToRead
= std::distance(Begin
, End
);
36 ssize_t TotalBytesRead
= 0;
37 while (BytesToRead
&& (BytesRead
= read(Fd
, Begin
, BytesToRead
))) {
38 if (BytesRead
== -1) {
41 Report("Read error; errno = %d\n", errno
);
42 return std::make_pair(TotalBytesRead
, false);
45 TotalBytesRead
+= BytesRead
;
46 BytesToRead
-= BytesRead
;
49 return std::make_pair(TotalBytesRead
, true);
52 static bool readValueFromFile(const char *Filename
,
53 long long *Value
) XRAY_NEVER_INSTRUMENT
{
54 int Fd
= open(Filename
, O_RDONLY
| O_CLOEXEC
);
57 static constexpr size_t BufSize
= 256;
58 char Line
[BufSize
] = {};
61 std::tie(BytesRead
, Success
) = retryingReadSome(Fd
, Line
, Line
+ BufSize
);
65 const char *End
= nullptr;
66 long long Tmp
= internal_simple_strtoll(Line
, &End
, 10);
68 if (Line
[0] != '\0' && (*End
== '\n' || *End
== '\0')) {
75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
76 long long TSCFrequency
= -1;
77 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
80 } else if (readValueFromFile(
81 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
85 Report("Unable to determine CPU frequency for TSC accounting.\n");
87 return TSCFrequency
== -1 ? 0 : static_cast<uint64_t>(TSCFrequency
);
89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
91 long long TSCFrequency
= -1;
92 size_t tscfreqsz
= sizeof(TSCFrequency
);
94 int Mib
[2] = { CTL_MACHDEP
, CPU_TSCFREQ
};
95 if (internal_sysctl(Mib
, 2, &TSCFrequency
, &tscfreqsz
, NULL
, 0) != -1) {
97 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency
,
98 &tscfreqsz
, NULL
, 0) != -1) {
101 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency
, &tscfreqsz
,
104 return static_cast<uint64_t>(TSCFrequency
);
106 Report("Unable to determine CPU frequency for TSC accounting.\n");
111 #elif !SANITIZER_FUCHSIA
112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
118 static constexpr uint8_t CallOpCode
= 0xe8;
119 static constexpr uint16_t MovR10Seq
= 0xba41;
120 static constexpr uint16_t Jmp9Seq
= 0x09eb;
121 static constexpr uint16_t Jmp20Seq
= 0x14eb;
122 static constexpr uint16_t Jmp15Seq
= 0x0feb;
123 static constexpr uint8_t JmpOpCode
= 0xe9;
124 static constexpr uint8_t RetOpCode
= 0xc3;
125 static constexpr uint16_t NopwSeq
= 0x9066;
127 static constexpr int64_t MinOffset
{std::numeric_limits
<int32_t>::min()};
128 static constexpr int64_t MaxOffset
{std::numeric_limits
<int32_t>::max()};
130 bool patchFunctionEntry(const bool Enable
, const uint32_t FuncId
,
131 const XRaySledEntry
&Sled
,
132 void (*Trampoline
)()) XRAY_NEVER_INSTRUMENT
{
133 // Here we do the dance of replacing the following sled:
139 // With the following:
141 // mov r10d, <function id>
142 // call <relative 32bit offset to entry trampoline>
144 // We need to do this in the following order:
146 // 1. Put the function id first, 2 bytes from the start of the sled (just
147 // after the 2-byte jmp instruction).
148 // 2. Put the call opcode 6 bytes from the start of the sled.
149 // 3. Put the relative offset 7 bytes from the start of the sled.
150 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
151 // opcode and first operand.
153 // Prerequisite is to compute the relative offset to the trampoline's address.
154 int64_t TrampolineOffset
= reinterpret_cast<int64_t>(Trampoline
) -
155 (static_cast<int64_t>(Sled
.Address
) + 11);
156 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
157 Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
158 Trampoline
, reinterpret_cast<void *>(Sled
.Address
));
162 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 2) = FuncId
;
163 *reinterpret_cast<uint8_t *>(Sled
.Address
+ 6) = CallOpCode
;
164 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 7) = TrampolineOffset
;
165 std::atomic_store_explicit(
166 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), MovR10Seq
,
167 std::memory_order_release
);
169 std::atomic_store_explicit(
170 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), Jmp9Seq
,
171 std::memory_order_release
);
172 // FIXME: Write out the nops still?
177 bool patchFunctionExit(const bool Enable
, const uint32_t FuncId
,
178 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
179 // Here we do the dance of replacing the following sled:
185 // With the following:
187 // mov r10d, <function id>
188 // jmp <relative 32bit offset to exit trampoline>
190 // 1. Put the function id first, 2 bytes from the start of the sled (just
191 // after the 1-byte ret instruction).
192 // 2. Put the jmp opcode 6 bytes from the start of the sled.
193 // 3. Put the relative offset 7 bytes from the start of the sled.
194 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
195 // opcode and first operand.
197 // Prerequisite is to compute the relative offset fo the
198 // __xray_FunctionExit function's address.
199 int64_t TrampolineOffset
= reinterpret_cast<int64_t>(__xray_FunctionExit
) -
200 (static_cast<int64_t>(Sled
.Address
) + 11);
201 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
202 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
203 __xray_FunctionExit
, reinterpret_cast<void *>(Sled
.Address
));
207 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 2) = FuncId
;
208 *reinterpret_cast<uint8_t *>(Sled
.Address
+ 6) = JmpOpCode
;
209 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 7) = TrampolineOffset
;
210 std::atomic_store_explicit(
211 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), MovR10Seq
,
212 std::memory_order_release
);
214 std::atomic_store_explicit(
215 reinterpret_cast<std::atomic
<uint8_t> *>(Sled
.Address
), RetOpCode
,
216 std::memory_order_release
);
217 // FIXME: Write out the nops still?
222 bool patchFunctionTailExit(const bool Enable
, const uint32_t FuncId
,
223 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
224 // Here we do the dance of replacing the tail call sled with a similar
225 // sequence as the entry sled, but calls the tail exit sled instead.
226 int64_t TrampolineOffset
=
227 reinterpret_cast<int64_t>(__xray_FunctionTailExit
) -
228 (static_cast<int64_t>(Sled
.Address
) + 11);
229 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
230 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
231 __xray_FunctionTailExit
, reinterpret_cast<void *>(Sled
.Address
));
235 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 2) = FuncId
;
236 *reinterpret_cast<uint8_t *>(Sled
.Address
+ 6) = CallOpCode
;
237 *reinterpret_cast<uint32_t *>(Sled
.Address
+ 7) = TrampolineOffset
;
238 std::atomic_store_explicit(
239 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), MovR10Seq
,
240 std::memory_order_release
);
242 std::atomic_store_explicit(
243 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), Jmp9Seq
,
244 std::memory_order_release
);
245 // FIXME: Write out the nops still?
250 bool patchCustomEvent(const bool Enable
, const uint32_t FuncId
,
251 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
252 // Here we do the dance of replacing the following sled:
257 // jmp +20 // 2 bytes
260 // With the following:
266 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
272 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
273 // to a jmp, use 15 bytes instead.
276 std::atomic_store_explicit(
277 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), NopwSeq
,
278 std::memory_order_release
);
280 switch (Sled
.Version
) {
282 std::atomic_store_explicit(
283 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), Jmp15Seq
,
284 std::memory_order_release
);
288 std::atomic_store_explicit(
289 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), Jmp20Seq
,
290 std::memory_order_release
);
297 bool patchTypedEvent(const bool Enable
, const uint32_t FuncId
,
298 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
299 // Here we do the dance of replacing the following sled:
302 // jmp +20 // 2 byte instruction
305 // With the following:
311 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
312 // The 20 byte sled stashes three argument registers, calls the trampoline,
313 // unstashes the registers and returns. If the arguments are already in
314 // the correct registers, the stashing and unstashing become equivalently
317 std::atomic_store_explicit(
318 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), NopwSeq
,
319 std::memory_order_release
);
321 std::atomic_store_explicit(
322 reinterpret_cast<std::atomic
<uint16_t> *>(Sled
.Address
), Jmp20Seq
,
323 std::memory_order_release
);
328 #if !SANITIZER_FUCHSIA
329 // We determine whether the CPU we're running on has the correct features we
330 // need. In x86_64 this will be rdtscp support.
331 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT
{
332 unsigned int EAX
, EBX
, ECX
, EDX
;
334 // We check whether rdtscp support is enabled. According to the x86_64 manual,
335 // level should be set at 0x80000001, and we should have a look at bit 27 in
336 // EDX. That's 0x8000000 (or 1u << 27).
337 __asm__
__volatile__("cpuid" : "=a"(EAX
), "=b"(EBX
), "=c"(ECX
), "=d"(EDX
)
339 if (!(EDX
& (1u << 27))) {
340 Report("Missing rdtscp support.\n");
343 // Also check whether we can determine the CPU frequency, since if we cannot,
344 // we should use the emulated TSC instead.
345 if (!getTSCFrequency()) {
346 Report("Unable to determine CPU frequency.\n");
353 } // namespace __xray