2 #include "sanitizer_common/sanitizer_common.h"
4 #include "sanitizer_common/sanitizer_posix.h"
7 #include "xray_interface_internal.h"
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
10 #include <sys/types.h>
11 #include <sys/sysctl.h>
12 #elif SANITIZER_FUCHSIA
13 #include <zircon/syscalls.h>
28 static std::pair
<ssize_t
, bool>
29 retryingReadSome(int Fd
, char *Begin
, char *End
) XRAY_NEVER_INSTRUMENT
{
30 auto BytesToRead
= std::distance(Begin
, End
);
32 ssize_t TotalBytesRead
= 0;
33 while (BytesToRead
&& (BytesRead
= read(Fd
, Begin
, BytesToRead
))) {
34 if (BytesRead
== -1) {
37 Report("Read error; errno = %d\n", errno
);
38 return std::make_pair(TotalBytesRead
, false);
41 TotalBytesRead
+= BytesRead
;
42 BytesToRead
-= BytesRead
;
45 return std::make_pair(TotalBytesRead
, true);
48 static bool readValueFromFile(const char *Filename
,
49 long long *Value
) XRAY_NEVER_INSTRUMENT
{
50 int Fd
= open(Filename
, O_RDONLY
| O_CLOEXEC
);
53 static constexpr size_t BufSize
= 256;
54 char Line
[BufSize
] = {};
57 std::tie(BytesRead
, Success
) = retryingReadSome(Fd
, Line
, Line
+ BufSize
);
61 const char *End
= nullptr;
62 long long Tmp
= internal_simple_strtoll(Line
, &End
, 10);
64 if (Line
[0] != '\0' && (*End
== '\n' || *End
== '\0')) {
71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
72 long long TSCFrequency
= -1;
73 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
76 } else if (readValueFromFile(
77 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
81 Report("Unable to determine CPU frequency for TSC accounting.\n");
83 return TSCFrequency
== -1 ? 0 : static_cast<uint64_t>(TSCFrequency
);
85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
87 long long TSCFrequency
= -1;
88 size_t tscfreqsz
= sizeof(TSCFrequency
);
90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency
,
91 &tscfreqsz
, NULL
, 0) != -1) {
94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency
, &tscfreqsz
,
97 return static_cast<uint64_t>(TSCFrequency
);
99 Report("Unable to determine CPU frequency for TSC accounting.\n");
104 #elif !SANITIZER_FUCHSIA
105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT
{
111 static constexpr uint8_t CallOpCode
= 0xe8;
112 static constexpr uint16_t MovR10Seq
= 0xba41;
113 static constexpr uint16_t Jmp9Seq
= 0x09eb;
114 static constexpr uint16_t Jmp20Seq
= 0x14eb;
115 static constexpr uint16_t Jmp15Seq
= 0x0feb;
116 static constexpr uint8_t JmpOpCode
= 0xe9;
117 static constexpr uint8_t RetOpCode
= 0xc3;
118 static constexpr uint16_t NopwSeq
= 0x9066;
120 static constexpr int64_t MinOffset
{std::numeric_limits
<int32_t>::min()};
121 static constexpr int64_t MaxOffset
{std::numeric_limits
<int32_t>::max()};
123 bool patchFunctionEntry(const bool Enable
, const uint32_t FuncId
,
124 const XRaySledEntry
&Sled
,
125 void (*Trampoline
)()) XRAY_NEVER_INSTRUMENT
{
126 // Here we do the dance of replacing the following sled:
132 // With the following:
134 // mov r10d, <function id>
135 // call <relative 32bit offset to entry trampoline>
137 // We need to do this in the following order:
139 // 1. Put the function id first, 2 bytes from the start of the sled (just
140 // after the 2-byte jmp instruction).
141 // 2. Put the call opcode 6 bytes from the start of the sled.
142 // 3. Put the relative offset 7 bytes from the start of the sled.
143 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
144 // opcode and first operand.
146 // Prerequisite is to compute the relative offset to the trampoline's address.
147 const uint64_t Address
= Sled
.address();
148 int64_t TrampolineOffset
= reinterpret_cast<int64_t>(Trampoline
) -
149 (static_cast<int64_t>(Address
) + 11);
150 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
151 Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
152 reinterpret_cast<void *>(Trampoline
),
153 reinterpret_cast<void *>(Address
));
157 *reinterpret_cast<uint32_t *>(Address
+ 2) = FuncId
;
158 *reinterpret_cast<uint8_t *>(Address
+ 6) = CallOpCode
;
159 *reinterpret_cast<uint32_t *>(Address
+ 7) = TrampolineOffset
;
160 std::atomic_store_explicit(
161 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), MovR10Seq
,
162 std::memory_order_release
);
164 std::atomic_store_explicit(
165 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), Jmp9Seq
,
166 std::memory_order_release
);
167 // FIXME: Write out the nops still?
172 bool patchFunctionExit(const bool Enable
, const uint32_t FuncId
,
173 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
174 // Here we do the dance of replacing the following sled:
180 // With the following:
182 // mov r10d, <function id>
183 // jmp <relative 32bit offset to exit trampoline>
185 // 1. Put the function id first, 2 bytes from the start of the sled (just
186 // after the 1-byte ret instruction).
187 // 2. Put the jmp opcode 6 bytes from the start of the sled.
188 // 3. Put the relative offset 7 bytes from the start of the sled.
189 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
190 // opcode and first operand.
192 // Prerequisite is to compute the relative offset fo the
193 // __xray_FunctionExit function's address.
194 const uint64_t Address
= Sled
.address();
195 int64_t TrampolineOffset
= reinterpret_cast<int64_t>(__xray_FunctionExit
) -
196 (static_cast<int64_t>(Address
) + 11);
197 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
198 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
199 reinterpret_cast<void *>(__xray_FunctionExit
),
200 reinterpret_cast<void *>(Address
));
204 *reinterpret_cast<uint32_t *>(Address
+ 2) = FuncId
;
205 *reinterpret_cast<uint8_t *>(Address
+ 6) = JmpOpCode
;
206 *reinterpret_cast<uint32_t *>(Address
+ 7) = TrampolineOffset
;
207 std::atomic_store_explicit(
208 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), MovR10Seq
,
209 std::memory_order_release
);
211 std::atomic_store_explicit(
212 reinterpret_cast<std::atomic
<uint8_t> *>(Address
), RetOpCode
,
213 std::memory_order_release
);
214 // FIXME: Write out the nops still?
219 bool patchFunctionTailExit(const bool Enable
, const uint32_t FuncId
,
220 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
221 // Here we do the dance of replacing the tail call sled with a similar
222 // sequence as the entry sled, but calls the tail exit sled instead.
223 const uint64_t Address
= Sled
.address();
224 int64_t TrampolineOffset
=
225 reinterpret_cast<int64_t>(__xray_FunctionTailExit
) -
226 (static_cast<int64_t>(Address
) + 11);
227 if (TrampolineOffset
< MinOffset
|| TrampolineOffset
> MaxOffset
) {
228 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
229 reinterpret_cast<void *>(__xray_FunctionTailExit
),
230 reinterpret_cast<void *>(Address
));
234 *reinterpret_cast<uint32_t *>(Address
+ 2) = FuncId
;
235 *reinterpret_cast<uint8_t *>(Address
+ 6) = CallOpCode
;
236 *reinterpret_cast<uint32_t *>(Address
+ 7) = TrampolineOffset
;
237 std::atomic_store_explicit(
238 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), MovR10Seq
,
239 std::memory_order_release
);
241 std::atomic_store_explicit(
242 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), Jmp9Seq
,
243 std::memory_order_release
);
244 // FIXME: Write out the nops still?
249 bool patchCustomEvent(const bool Enable
, const uint32_t FuncId
,
250 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
251 // Here we do the dance of replacing the following sled:
254 // jmp +15 // 2 bytes
257 // With the following:
263 // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'.
264 const uint64_t Address
= Sled
.address();
266 std::atomic_store_explicit(
267 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), NopwSeq
,
268 std::memory_order_release
);
270 std::atomic_store_explicit(
271 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), Jmp15Seq
,
272 std::memory_order_release
);
277 bool patchTypedEvent(const bool Enable
, const uint32_t FuncId
,
278 const XRaySledEntry
&Sled
) XRAY_NEVER_INSTRUMENT
{
279 // Here we do the dance of replacing the following sled:
282 // jmp +20 // 2 byte instruction
285 // With the following:
291 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
292 // The 20 byte sled stashes three argument registers, calls the trampoline,
293 // unstashes the registers and returns. If the arguments are already in
294 // the correct registers, the stashing and unstashing become equivalently
296 const uint64_t Address
= Sled
.address();
298 std::atomic_store_explicit(
299 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), NopwSeq
,
300 std::memory_order_release
);
302 std::atomic_store_explicit(
303 reinterpret_cast<std::atomic
<uint16_t> *>(Address
), Jmp20Seq
,
304 std::memory_order_release
);
309 #if !SANITIZER_FUCHSIA
310 // We determine whether the CPU we're running on has the correct features we
311 // need. In x86_64 this will be rdtscp support.
312 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT
{
313 unsigned int EAX
, EBX
, ECX
, EDX
;
315 // We check whether rdtscp support is enabled. According to the x86_64 manual,
316 // level should be set at 0x80000001, and we should have a look at bit 27 in
317 // EDX. That's 0x8000000 (or 1u << 27).
318 __asm__
__volatile__("cpuid" : "=a"(EAX
), "=b"(EBX
), "=c"(ECX
), "=d"(EDX
)
320 if (!(EDX
& (1u << 27))) {
321 Report("Missing rdtscp support.\n");
324 // Also check whether we can determine the CPU frequency, since if we cannot,
325 // we should use the emulated TSC instead.
326 if (!getTSCFrequency()) {
327 Report("Unable to determine CPU frequency.\n");
334 } // namespace __xray