gen-strtab.awk: Work around IRIX 6.2 nawk bug.
[dxcommon.git] / scripts / join.awk
blob7187b50c3e1d048c9d1bc2066088af6323694e62
1 #!/bin/awk -f
3 # Copyright © 2022 Nick Bowler
5 # Partial implementation of POSIX "join" command: only the "-v" and "-a"
6 # options are implemented.
8 # Not all awk implementations support reading from standard input with the
9 # getline function by specifying a filename of "-". In particular, busybox
10 # awk will read from a file named "-" instead of standard input. Since
11 # busybox-based environments are typically missing "join", this limitation
12 # is problematic. As a workaround, do not use "-" for the second input
13 # filename.
15 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
16 # This is free software: you are free to do what the fuck you want to.
17 # There is NO WARRANTY, to the extent permitted by law.
19 BEGIN {
20 show_uniq_lhs = 0
21 shoq_uniq_rhs = 0
22 show_common = 1
24 # Process command-line options
25 for (i = 1; i < ARGC; i++) {
26 if (substr(ARGV[i], 1, 1) != "-" || ARGV[i] == "-")
27 break;
29 opt = substr(ARGV[i], 2, 1);
30 if (opt == "a" || opt == "v") {
31 num = substr(ARGV[i], 3, 1);
32 if (num == "") {
33 # option argument must be next on command-line
34 ARGV[i++] = "";
35 num = ARGV[i];
38 if (opt == "v") {
39 show_common = 0;
42 if (num == 1) {
43 show_uniq_lhs = 1;
44 } else if (num == 2) {
45 show_uniq_rhs = 1;
46 } else {
47 # invalid argument
48 exit 1;
50 } else {
51 # unsupported option
52 exit 1;
55 ARGV[i] = "";
58 if (i+2 != ARGC) {
59 # invalid usage
60 exit 1;
63 file2 = ARGV[i+1];
64 ARGV[i+1] = "";
66 rhs_max_nr = rhs_nr = 0;
67 if (advance_rhs() == 0)
68 finish_rhs();
71 { $1 = $1 }
72 $1 == lhs_prev {
73 # Rewind RHS as we have duplicate common keys in LHS.
74 close(file2);
76 rhs_nr = 0;
77 advance_rhs();
80 $1 < rhs[1] {
81 if (show_uniq_lhs) {
82 print;
84 next;
88 while ($1 > rhs[1]) {
89 if (show_uniq_rhs && rhs_nr == rhs_max_nr)
90 print_rhs();
92 if (advance_rhs() == 0)
93 finish_rhs();
95 if (show_uniq_lhs && $1 < rhs[1])
96 print;
100 !rhs_eof && $1 == rhs[1] {
101 lhs_prev = $1;
102 do {
103 if (show_common) print_match();
104 advance_rhs();
105 } while (!rhs_eof && $1 == rhs[1]);
108 END {
109 if (show_uniq_rhs) {
110 do {
111 print_rhs();
112 } while (advance_rhs() > 0);
116 function advance_rhs(raw, rc)
118 rc = getline raw < file2;
119 if (rc < 0)
120 exit(1);
122 rhs_eof = rc == 0;
124 if (rhs_max_nr == rhs_nr++)
125 rhs_max_nr++;
127 split(raw, rhs);
128 return rc;
131 function finish_rhs(rc)
133 rc = 0;
134 if (show_uniq_lhs) {
135 do {
136 if (NR > 0) {
137 $1 = $1;
138 print;
140 } while ((rc = getline) > 0);
142 exit(-rc);
145 function print_rhs(i)
147 if (!rhs_eof) {
148 if (i < 1)
149 printf "%s", rhs[1];
150 for (i = 2; i in rhs; i++) {
151 printf " %s", rhs[i];
153 print "";
157 function print_match(i)
159 printf "%s", $0
160 print_rhs(2);