BTRFS: Implement BTree::Path and change _Find.
[haiku.git] / src / apps / text_search / Grepper.cpp
blob025fb5ed8a8079f929133a2a73dac99444dc24c3
1 /*
2 * Copyright (c) 1998-2007 Matthijs Hollemans
3 * Copyright (c) 2008-2017, Haiku Inc.
4 * Distributed under the terms of the MIT license.
6 * Authors:
7 * Matthijs Holleman
8 * Stephan Aßmus <superstippi@gmx.de>
9 * Philippe Houdoin
12 #include "Grepper.h"
14 #include <errno.h>
15 #include <new>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <sys/time.h>
22 #include <Catalog.h>
23 #include <Directory.h>
24 #include <image.h>
25 #include <List.h>
26 #include <Locale.h>
27 #include <NodeInfo.h>
28 #include <OS.h>
29 #include <Path.h>
30 #include <UTF8.h>
32 #include "FileIterator.h"
33 #include "Model.h"
35 #undef B_TRANSLATION_CONTEXT
36 #define B_TRANSLATION_CONTEXT "Grepper"
39 const char* kEOFTag = "//EOF";
42 using std::nothrow;
44 char*
45 strdup_to_utf8(uint32 encode, const char* src, int32 length)
47 int32 srcLen = length;
48 int32 dstLen = 2 * srcLen;
49 // TODO: stippi: Why the duplicate copy? Why not just return
50 // dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51 // enough space? Check return value of convert_to_utf8 and keep
52 // converting if it didn't fit?
53 char* dst = new (nothrow) char[dstLen + 1];
54 if (dst == NULL)
55 return NULL;
56 int32 cookie = 0;
57 convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58 dst[dstLen] = '\0';
59 char* dup = strdup(dst);
60 delete[] dst;
61 if (srcLen != length) {
62 fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63 ") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
65 return dup;
69 char*
70 strdup_from_utf8(uint32 encode, const char* src, int32 length)
72 int32 srcLen = length;
73 int32 dstLen = srcLen;
74 char* dst = new (nothrow) char[dstLen + 1];
75 if (dst == NULL)
76 return NULL;
77 int32 cookie = 0;
78 convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79 // TODO: See above.
80 dst[dstLen] = '\0';
81 char* dup = strdup(dst);
82 delete[] dst;
83 if (srcLen != length) {
84 fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85 ") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
87 return dup;
91 Grepper::Grepper(const char* pattern, const Model* model,
92 const BHandler* target, FileIterator* iterator)
93 : fPattern(NULL),
94 fTarget(target),
95 fRegularExpression(model->fRegularExpression),
96 fCaseSensitive(model->fCaseSensitive),
97 fEncoding(model->fEncoding),
99 fIterator(iterator),
100 fRunnerThreadId(-1),
101 fXargsInput(-1),
102 fMustQuit(false)
104 if (fEncoding > 0) {
105 char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106 _SetPattern(src);
107 free(src);
108 } else
109 _SetPattern(pattern);
113 Grepper::~Grepper()
115 Cancel();
116 free(fPattern);
117 delete fIterator;
121 bool
122 Grepper::IsValid() const
124 if (fIterator == NULL || !fIterator->IsValid())
125 return false;
126 return fPattern != NULL;
130 void
131 Grepper::Start()
133 Cancel();
135 fMustQuit = false;
136 fRunnerThreadId = spawn_thread(
137 _SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
139 resume_thread(fRunnerThreadId);
143 void
144 Grepper::Cancel()
146 if (fRunnerThreadId < 0)
147 return;
149 fMustQuit = true;
150 int32 exitValue;
151 wait_for_thread(fRunnerThreadId, &exitValue);
152 fRunnerThreadId = -1;
156 // #pragma mark - private
159 int32
160 Grepper::_SpawnWriterThread(void* cookie)
162 Grepper* self = static_cast<Grepper*>(cookie);
163 return self->_WriterThread();
167 int32
168 Grepper::_WriterThread()
170 BMessage message;
171 char fileName[B_PATH_NAME_LENGTH*2];
172 int count = 0;
174 printf("paths_writer started.\n");
176 while (!fMustQuit && fIterator->GetNextName(fileName)) {
178 message.MakeEmpty();
179 message.what = MSG_REPORT_FILE_NAME;
180 message.AddString("filename", fileName);
182 BEntry entry(fileName);
183 entry_ref ref;
184 entry.GetRef(&ref);
185 if (!entry.Exists()) {
186 if (fIterator->NotifyNegatives()) {
187 message.what = MSG_REPORT_RESULT;
188 message.AddRef("ref", &ref);
189 fTarget.SendMessage(&message);
191 continue;
194 if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
195 char tempString[B_PATH_NAME_LENGTH + 32];
196 sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
197 "the filename."), fileName);
198 message.MakeEmpty();
199 message.what = MSG_REPORT_ERROR;
200 message.AddString("error", tempString);
201 fTarget.SendMessage(&message);
202 continue;
205 // file exists, send it to xargs
206 write(fXargsInput, fileName, strlen(fileName));
207 write(fXargsInput, "\n", 1);
208 // printf(">>>>>> %s\n", fileName);
210 fTarget.SendMessage(&message);
212 count++;
215 write(fXargsInput, kEOFTag, strlen(kEOFTag));
216 write(fXargsInput, "\n", 1);
217 close(fXargsInput);
219 printf("paths_writer stopped (%d paths).\n", count);
221 return 0;
225 int32
226 Grepper::_SpawnRunnerThread(void* cookie)
228 Grepper* self = static_cast<Grepper*>(cookie);
229 return self->_RunnerThread();
233 int32
234 Grepper::_RunnerThread()
236 BMessage message;
237 char fileName[B_PATH_NAME_LENGTH];
239 const char* argv[32];
240 int argc = 0;
241 argv[argc++] = "xargs";
243 // can't use yet the --null mode due to pipe issue
244 // the xargs stdin input pipe closure is not detected
245 // by xargs. Instead, we use eof-string mode
247 // argv[argc++] = "--null";
248 argv[argc++] = "-E";
249 argv[argc++] = kEOFTag;
251 // Enable parallel mode
252 // Retrieve cpu count for to parallel xargs via -P argument
253 char cpuCount[8];
254 system_info sys_info;
255 get_system_info(&sys_info);
256 snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
257 argv[argc++] = "-P";
258 argv[argc++] = cpuCount;
260 // grep command driven by xargs dispatcher
261 argv[argc++] = "grep";
262 argv[argc++] = "-n"; // need matching line(s) number(s)
263 argv[argc++] = "-H"; // need filename prefix
264 if (! fCaseSensitive)
265 argv[argc++] = "-i";
266 if (! fRegularExpression)
267 argv[argc++] = "-F"; // no a regexp: force fixed string,
268 argv[argc++] = fPattern;
269 argv[argc] = NULL;
271 // prepare xargs to run with stdin, stdout and stderr pipes
273 int oldStdIn, oldStdOut, oldStdErr;
274 oldStdIn = dup(STDIN_FILENO);
275 oldStdOut = dup(STDOUT_FILENO);
276 oldStdErr = dup(STDERR_FILENO);
278 int fds[2];
279 if (pipe(fds) != 0) {
280 message.MakeEmpty();
281 message.what = MSG_REPORT_ERROR;
282 message.AddString("error",
283 B_TRANSLATE("Failed to open input pipe!"));
284 fTarget.SendMessage(&message);
285 return 0;
287 dup2(fds[0], STDIN_FILENO);
288 close(fds[0]);
289 fXargsInput = fds[1]; // write to in, appears on command's stdin
291 if (pipe(fds) != 0) {
292 close(fXargsInput);
293 message.MakeEmpty();
294 message.what = MSG_REPORT_ERROR;
295 message.AddString("error",
296 B_TRANSLATE("Failed to open output pipe!"));
297 fTarget.SendMessage(&message);
298 return 0;
300 dup2(fds[1], STDOUT_FILENO);
301 close(fds[1]);
302 int out = fds[0]; // read from out, taken from command's stdout
304 if (pipe(fds) != 0) {
305 close(fXargsInput);
306 close(out);
307 message.MakeEmpty();
308 message.what = MSG_REPORT_ERROR;
309 message.AddString("error",
310 B_TRANSLATE("Failed to open errors pipe!"));
311 fTarget.SendMessage(&message);
312 return 0;
314 dup2(fds[1], STDERR_FILENO);
315 close(fds[1]);
316 int err = fds[0]; // read from err, taken from command's stderr
318 // "load" xargs tool
319 thread_id xargsThread = load_image(argc, argv,
320 const_cast<const char**>(environ));
321 // xargsThread is suspended after loading
323 // restore our previous stdin, stdout and stderr
324 close(STDIN_FILENO);
325 dup(oldStdIn);
326 close(oldStdIn);
327 close(STDOUT_FILENO);
328 dup(oldStdOut);
329 close(oldStdOut);
330 close(STDERR_FILENO);
331 dup(oldStdErr);
332 close(oldStdErr);
334 if (xargsThread < B_OK) {
335 close(fXargsInput);
336 close(out);
337 close(err);
338 message.MakeEmpty();
339 message.what = MSG_REPORT_ERROR;
340 message.AddString("error",
341 B_TRANSLATE("Failed to start xargs program!"));
342 fTarget.SendMessage(&message);
343 return 0;
346 // Listen on xargs's stdout and stderr via select()
347 printf("Running: ");
348 for (int i = 0; i < argc; i++) {
349 printf("%s ", argv[i]);
351 printf("\n");
353 int fdl[2] = { out, err };
354 int maxfd = 0;
355 for (int i = 0; i < 2; i++) {
356 if (maxfd < fdl[i])
357 maxfd = fdl[i];
360 fd_set readSet;
361 struct timeval timeout = { 0, 100000 };
362 char line[B_PATH_NAME_LENGTH * 2];
364 FILE* output = fdopen(out, "r");
365 FILE* errors = fdopen(err, "r");
367 char currentFileName[B_PATH_NAME_LENGTH];
368 currentFileName[0] = '\0';
369 bool canReadOutput, canReadErrors;
370 canReadOutput = canReadErrors = true;
372 thread_id writerThread = spawn_thread(_SpawnWriterThread,
373 "Grep writer", B_LOW_PRIORITY, this);
374 set_thread_priority(xargsThread, B_LOW_PRIORITY);
376 // we're ready, let's go!
377 resume_thread(xargsThread);
378 resume_thread(writerThread);
380 while (!fMustQuit && (canReadOutput || canReadErrors)) {
381 FD_ZERO(&readSet);
382 if (canReadOutput) {
383 FD_SET(out, &readSet);
385 if (canReadErrors) {
386 FD_SET(err, &readSet);
389 int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
390 if (result == -1 && errno == EINTR)
391 continue;
392 if (result == 0) {
393 // timeout, but meanwhile fMustQuit was changed maybe...
394 continue;
396 if (result < 0) {
397 perror("select():");
398 message.MakeEmpty();
399 message.what = MSG_REPORT_ERROR;
400 message.AddString("error", strerror(errno));
401 fTarget.SendMessage(&message);
402 break;
405 if (canReadOutput && FD_ISSET(out, &readSet)) {
406 if (fgets(line, sizeof(line), output) != NULL) {
407 // parse grep output
408 int lineNumber = -1;
409 int textPos = -1;
410 sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
411 // printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
412 // lineNumber, textPos);
413 if (textPos > 0) {
414 if (strcmp(fileName, currentFileName) != 0) {
415 fTarget.SendMessage(&message);
417 strncpy(currentFileName, fileName,
418 sizeof(currentFileName));
420 message.MakeEmpty();
421 message.what = MSG_REPORT_RESULT;
422 message.AddString("filename", fileName);
424 BEntry entry(fileName);
425 entry_ref ref;
426 entry.GetRef(&ref);
427 message.AddRef("ref", &ref);
430 char* text = &line[strlen(fileName)+1];
431 // printf("[%s] %s", fileName, text);
432 if (fEncoding > 0) {
433 char* tempdup = strdup_to_utf8(fEncoding, text,
434 strlen(text));
435 message.AddString("text", tempdup);
436 free(tempdup);
437 } else {
438 message.AddString("text", text);
440 message.AddInt32("line", lineNumber);
442 } else {
443 canReadOutput = false;
446 if (canReadErrors && FD_ISSET(err, &readSet)) {
447 if (fgets(line, sizeof(line), errors) != NULL) {
448 // printf("ERROR: %s", line);
449 if (message.HasString("text"))
450 fTarget.SendMessage(&message);
451 currentFileName[0] = '\0';
453 message.MakeEmpty();
454 message.what = MSG_REPORT_ERROR;
455 message.AddString("error", line);
456 fTarget.SendMessage(&message);
457 } else {
458 canReadErrors = false;
463 // send last pending message, if any
464 if (message.HasString("text"))
465 fTarget.SendMessage(&message);
467 printf("Done.\n");
468 fclose(output);
469 fclose(errors);
471 close(out);
472 close(err);
474 fMustQuit = true;
475 int32 exitValue;
476 wait_for_thread(xargsThread, &exitValue);
477 wait_for_thread(writerThread, &exitValue);
479 message.MakeEmpty();
480 message.what = MSG_SEARCH_FINISHED;
481 fTarget.SendMessage(&message);
483 return 0;
487 void
488 Grepper::_SetPattern(const char* src)
490 if (src == NULL)
491 return;
493 fPattern = strdup(src);
497 bool
498 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
500 char* copy = strdup(buffer);
501 char* start = buffer;
502 uint32 len = strlen(copy);
503 bool result = true;
504 for (uint32 count = 0; count < len; ++count) {
505 if (copy[count] == '\'' || copy[count] == '\\'
506 || copy[count] == ' ' || copy[count] == '\n'
507 || copy[count] == '"')
508 *buffer++ = '\\';
509 if (buffer - start == bufferSize - 1) {
510 result = false;
511 break;
513 *buffer++ = copy[count];
515 *buffer = '\0';
516 free(copy);
517 return result;