From 97e628746aa120d424b06c5550f5507ee4470a59 Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Mon, 20 Jun 2022 17:11:16 +1200 Subject: [PATCH] Add column info to all index script diagnostics (cherry picked from commit 33b67d6819dfdde510a67c3b8ecb286b21b42bc7) --- xapian-applications/omega/omegatest | 21 ++++++++++++- xapian-applications/omega/scriptindex.cc | 53 +++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/xapian-applications/omega/omegatest b/xapian-applications/omega/omegatest index 18ff547fe..abcadab1d 100755 --- a/xapian-applications/omega/omegatest +++ b/xapian-applications/omega/omegatest @@ -997,6 +997,14 @@ END printf '$field{in,1}/$list{$field{one,1},|}/$list{$field{two,1},|}' > "$TEST_TEMPLATE" testcase 'a,b,c;10,21,32;XY,YZ/a,b,c|10,21,32|XY,YZ/a|b|c|10|21|32|xy|yz' P=text +# Test scriptindex `split` action error cases. +printf 'in : split="" field=one\nin : field' > "$TEST_INDEXSCRIPT" +test_scriptindex_error "'split' error for empty separator" \ + "$TEST_INDEXSCRIPT:1:13: error: Split delimiter can't be empty" +printf 'in : split=|,foo field=one\nin : field' > "$TEST_INDEXSCRIPT" +test_scriptindex_error "'split' error for invalid operation" \ + "$TEST_INDEXSCRIPT:1:14: error: Bad split operation 'foo'" + # Feature tests for scriptindex `hextobin` action. printf 'hex : hextobin value=0' > "$TEST_INDEXSCRIPT" rm -rf "$TEST_DB" @@ -1111,7 +1119,7 @@ testcase '|3|2|1|' P=test # Test bad parameter values to `weight` action. printf 'foo : index weight=-2' > "$TEST_INDEXSCRIPT" test_scriptindex_error "bad 'weight' parameter" \ - "$TEST_INDEXSCRIPT:1: error: Index action 'weight' takes a non-negative integer argument" + "$TEST_INDEXSCRIPT:1:20: error: Index action 'weight' takes a non-negative integer argument" printf 'foo : index weight=1.5' > "$TEST_INDEXSCRIPT" test_scriptindex_error "bad 'weight' parameter" \ "$TEST_INDEXSCRIPT:1:21: error: Index action 'weight' takes an integer argument" @@ -1211,6 +1219,12 @@ testcase '3|' id=3 testcase '4|' id=4 exit 0 +# Test `unique` action warning. +printf '%s\n' 'id : unique=Q boolean=W' 'id f : field' > "$TEST_INDEXSCRIPT" +test_scriptindex_warning 'unique without boolean' \ + "$TEST_INDEXSCRIPT:1:6: warning: Index action 'unique=Q' without 'boolean=Q' +$TEST_INDEXSCRIPT:1:6: note: 'unique' doesn't implicitly add a boolean term" + # Test $subdb and $subid. rm -rf "$TEST_DB" printf 'inmemory' > "$TEST_DB" @@ -1473,6 +1487,11 @@ printf 'date : field parsedate=%%Y%%m%%d' > "$TEST_INDEXSCRIPT" test_scriptindex_warning "useless 'parsedate'" \ "$TEST_INDEXSCRIPT:1:14: warning: Index action 'parsedate' has no effect*" +# Test a `parsedate` format error. +printf 'date : field parsedate="%%Y\t%%m\t%%d\t%%Z"' > "$TEST_INDEXSCRIPT" +test_scriptindex_error "'%Z' in 'parsedate' format" \ + "$TEST_INDEXSCRIPT:1:34: error: Parsing timezone names with %Z is not supported" + # Test scriptindex `gap` action inserts a termpos gap. printf 'text : index gap=5' > "$TEST_INDEXSCRIPT" rm -rf "$TEST_DB" diff --git a/xapian-applications/omega/scriptindex.cc b/xapian-applications/omega/scriptindex.cc index febb821d2..ec6019b43 100644 --- a/xapian-applications/omega/scriptindex.cc +++ b/xapian-applications/omega/scriptindex.cc @@ -222,10 +222,10 @@ report_location(enum diag_type type, cerr << filename; if (line != 0) { cerr << ':' << line; - } - if (pos != string::npos) { - // The first column is numbered 1. - cerr << ':' << pos + 1; + if (pos != string::npos) { + // The first column is numbered 1. + cerr << ':' << pos + 1; + } } switch (type) { case DIAG_ERROR: @@ -268,8 +268,10 @@ parse_index_script(const string &filename) } string line; size_t line_no = 0; - // Line number where we saw a `unique` action, or -1 if we haven't. - int unique_line_no = -1; + // Line number where we saw a `unique` action, or 0 if we haven't. + int unique_line_no = 0; + // Offset into line unique_line_no where the `unique` action was. + size_t unique_pos = 0; while (getline(script, line)) { ++line_no; vector fields; @@ -609,7 +611,8 @@ bad_escaping: if (val != "unix" && val != "unixutc" && val != "yyyymmdd") { - report_location(DIAG_ERROR, filename, line_no); + report_location(DIAG_ERROR, filename, line_no, + j - s.begin()); cerr << "Invalid parameter '" << val << "' for " "action 'date'" << endl; exit(1); @@ -626,7 +629,8 @@ bad_escaping: // store it ready to use in the INDEX and INDEXNOPOS // Actions. if (!parse_unsigned(val.c_str(), weight)) { - report_location(DIAG_ERROR, filename, line_no); + report_location(DIAG_ERROR, filename, line_no, + j - s.begin()); cerr << "Index action 'weight' takes a " "non-negative integer argument" << endl; exit(1); @@ -638,14 +642,18 @@ bad_escaping: useless_weight_pos = action_pos; break; case Action::PARSEDATE: { - if (val.find("%Z") != val.npos) { - report_location(DIAG_ERROR, filename, line_no); + auto bad_code = val.find("%Z"); + if (bad_code != val.npos) { + report_location(DIAG_ERROR, filename, line_no, + j - s.begin() + bad_code); cerr << "Parsing timezone names with %Z is not supported" << endl; exit(1); } #ifndef HAVE_STRUCT_TM_TM_GMTOFF - if (val.find("%z") != val.npos) { - report_location(DIAG_ERROR, filename, line_no); + bad_code = val.find("%z"); + if (bad_code != val.npos) { + report_location(DIAG_ERROR, filename, line_no, + j - s.begin() + bad_code); cerr << "Parsing timezone offsets with %z is not supported on " "this platform" << endl; exit(1); @@ -656,7 +664,8 @@ bad_escaping: } case Action::SPLIT: { if (val.empty()) { - report_location(DIAG_ERROR, filename, line_no); + report_location(DIAG_ERROR, filename, line_no, + j - s.begin()); cerr << "Split delimiter can't be empty" << endl; exit(1); } @@ -671,7 +680,12 @@ bad_escaping: } else if (vals[1] == "prefixes") { operation = Action::SPLIT_PREFIXES; } else { - report_location(DIAG_ERROR, filename, line_no); + // FIXME: Column should be for where the `op` + // parameter starts, which this isn't if the + // value is quoted, contains escape sequences, + // etc. + report_location(DIAG_ERROR, filename, line_no, + i - s.begin() - vals[1].size()); cerr << "Bad split operation '" << vals[1] << "'" << endl; exit(1); @@ -694,17 +708,18 @@ bad_escaping: actions.emplace_back(code, action_pos, val); break; case Action::UNIQUE: - if (unique_line_no >= 0) { + if (unique_line_no) { report_location(DIAG_ERROR, filename, line_no, action_pos); cerr << "Index action 'unique' used more than once" << endl; report_location(DIAG_NOTE, filename, - unique_line_no); + unique_line_no, unique_pos); cerr << "Previously used here" << endl; exit(1); } unique_line_no = line_no; + unique_pos = action_pos; if (boolmap.find(val) == boolmap.end()) boolmap[val] = Action::UNIQUE; actions.emplace_back(code, action_pos, val); @@ -835,13 +850,15 @@ bad_escaping: map::const_iterator boolpfx; for (boolpfx = boolmap.begin(); boolpfx != boolmap.end(); ++boolpfx) { if (boolpfx->second == Action::UNIQUE) { - report_location(DIAG_WARN, filename, line_no); + report_location(DIAG_WARN, filename, unique_line_no, + unique_pos); cerr << "Index action 'unique=" << boolpfx->first << "' without 'boolean=" << boolpfx->first << "'" << endl; static bool given_doesnt_imply_boolean_warning = false; if (!given_doesnt_imply_boolean_warning) { given_doesnt_imply_boolean_warning = true; - report_location(DIAG_NOTE, filename, line_no); + report_location(DIAG_NOTE, filename, unique_line_no, + unique_pos); cerr << "'unique' doesn't implicitly add a boolean term" << endl; } -- 2.11.4.GIT