1 /* join - relation data base operator Author: Saeko Hirabayashi */
3 /* Written by Saeko Hirabayashi, 1989.
4 * 1992-01-28 Modified by Kouichi Hirabayashi to add some POSIX1003.2 options.
12 #define MAXFLD 200 /* maximum # of fields to accept */
14 _PROTOTYPE(void main
, (int argc
, char **argv
));
15 _PROTOTYPE(void error
, (char *s
, char *t
));
16 _PROTOTYPE(void usage
, (void));
17 _PROTOTYPE(void match
, (void));
18 _PROTOTYPE(void f1_only
, (void));
19 _PROTOTYPE(void f2_only
, (void));
20 _PROTOTYPE(void output
, (int flag
));
21 _PROTOTYPE(void outfld
, (int file
));
22 _PROTOTYPE(void outputf
, (int flag
));
23 _PROTOTYPE(int compare
, (void));
24 _PROTOTYPE(int get1
, (void));
25 _PROTOTYPE(int get2
, (int back
));
26 _PROTOTYPE(int getrec
, (int file
));
27 _PROTOTYPE(int split
, (int file
));
28 _PROTOTYPE(int atoi
, (char *str
));
29 _PROTOTYPE(int exit
, (int val
));
30 _PROTOTYPE(FILE * efopen
, (char *file
, char *mode
));
31 _PROTOTYPE(void (*outfun
), (int file
)); /* output func: output() or outputf()*/
35 #define SEP (sep ? sep : ' ')
37 FILE *fp
[2]; /* file pointer for file1 and file2 */
38 long head
; /* head of the current (same)key group of the
41 char buf
[2][BUFSIZ
]; /* input buffer for file1 and file2 */
42 char *fld
[2][MAXFLD
]; /* field vector for file1 and file2 */
43 int nfld
[2]; /* # of fields for file1 and file2 */
45 int kpos
[2]; /* key field position for file1 and file2
47 char oldkey
[BUFSIZ
]; /* previous key of the file1 */
49 struct { /* output list by -o option */
50 int o_file
; /* file #: 0 or 1 */
51 int o_field
; /* field #: 0, 1, 2, .. */
53 int nout
; /* # of output filed */
55 int aflag
; /* n for '-an': F1 or F2 or both */
56 int vflag
; /* n for '-vn': F1 or F2 or both */
57 char *es
; /* s for '-e s' */
58 char sep
; /* c for -tc: filed separator */
59 char *cmd
; /* name of this program */
69 outfun
= output
; /* default output form */
71 while (--argc
> 0 && (*++argv
)[0] == '-' && (*argv
)[1]) {
72 /* "-" is a file name (stdin) */
74 if ((c
= *s
) == '-' && !s
[1]) {
84 case 'a': /* add unpairable line to output */
87 case '1': aflag
|= F1
; break;
88 case '2': aflag
|= F2
; break;
89 default: aflag
|= (F1
| F2
); break;
93 case 'e': /* replace empty field by es */
97 case 'j': /* key field (obsolute) */
104 case '1': /* key field of file1 */
105 case '2': /* key field of file2 */
109 case '1': kpos
[0] = i
; break;
110 case '2': kpos
[1] = i
; break;
111 default: kpos
[0] = kpos
[1] = i
;
116 case 'o': /* specify output format */
119 sscanf(s
, "%d.%d", &i
, &j
);
120 if (i
< 1 || j
< 1 || i
> 2) usage();
121 olist
[nout
].o_file
= i
- 1;
122 olist
[nout
].o_field
= j
- 1;
124 if ((s
= strchr(s
, ',')) != (char *) 0)
130 } while (argc
> 2 && *s
!= '-');
132 --argv
; /* compensation */
136 case 't': /* tab char */
140 case 'v': /* output unpairable line only */
143 case '1': vflag
|= F1
; break;
144 case '2': vflag
|= F2
; break;
145 default: vflag
|= (F1
| F2
); break;
152 if (argc
!= 2) usage();
154 fp
[0] = strcmp(argv
[0], "-") ? efopen(argv
[0], "r") : stdin
;
155 fp
[1] = efopen(argv
[1], "r");
157 nfld
[0] = get1(); /* read file1 */
158 nfld
[1] = get2(0); /* read file2 */
160 while (nfld
[0] || nfld
[1]) {
161 if ((i
= compare()) == 0)
176 "Usage: %s [-an|-vn] [-e str] [-o list] [-tc] [-1 f] [-2 f] file1 file2\n",
182 { /* compare key field */
185 if (nfld
[1] == 0) /* file2 EOF */
187 else if (nfld
[0] == 0) /* file1 EOF */
190 if (nfld
[0] <= kpos
[0])
191 error("missing key field in file1", (char *) 0);
192 if (nfld
[1] <= kpos
[1])
193 error("missing key field in file2", (char *) 0);
195 r
= strcmp(fld
[0][kpos
[0]], fld
[1][kpos
[1]]);
204 if (!vflag
) (*outfun
) (F1
| F2
);
207 nfld
[1] = get2(0); /* check key order */
208 if (nfld
[1] == 0 || strcmp(fld
[0][kpos
[0]], fld
[1][kpos
[1]])) {
210 if (strcmp(fld
[0][kpos
[0]], oldkey
) == 0) {
211 fseek(fp
[1], head
, 0); /* re-do from head */
212 nfld
[1] = get2(1); /* don't check key order */
214 head
= p
; /* mark here */
220 if ((aflag
& F1
) || (vflag
& F1
)) (*outfun
) (F1
);
226 if ((aflag
& F2
) || (vflag
& F2
)) (*outfun
) (F2
);
227 head
= ftell(fp
[1]); /* mark */
228 nfld
[1] = get2(0); /* check key order */
232 { /* default output form */
234 fputs(fld
[0][kpos
[0]], stdout
);
236 fputs(fld
[1][kpos
[1]], stdout
);
237 if (f
& F1
) outfld(0);
238 if (f
& F2
) outfld(1);
243 { /* output all fields except key_field */
249 for (i
= 0; i
< n
; i
++)
252 fputs(fld
[file
][i
], stdout
);
257 { /* output by '-o list' */
261 for (i
= k
= 0; i
< nout
; i
++) {
263 if ((f
& (j
+ 1)) && (olist
[i
].o_field
< nfld
[j
]))
264 s
= fld
[j
][olist
[i
].o_field
];
268 if (k
++) fputc(SEP
, stdout
);
278 static char oldkey1
[BUFSIZ
];
280 if (fld
[0][kpos
[0]]) {
281 strcpy(oldkey
, fld
[0][kpos
[0]]); /* save previous key for control */
286 if (strcmp(oldkey1
, fld
[0][kpos
[0]]) > 0)
287 error("file1 is not sorted", (char *) 0);
288 strcpy(oldkey1
, fld
[0][kpos
[0]]); /* save prev key for sort check */
295 static char oldkey2
[BUFSIZ
];
301 if (!back
&& strcmp(oldkey2
, fld
[1][kpos
[1]]) > 0)
302 error("file2 is not sorted", (char *) 0);
303 strcpy(oldkey2
, fld
[1][kpos
[1]]); /* save prev key for sort check */
309 { /* read one line to split it */
310 if (fgets(buf
[file
], BUFSIZ
, fp
[file
]) == (char *) 0)
312 else if (*buf
[file
] == '\n' || *buf
[file
] == '\r')
313 error("null line in file%s", file
? "1" : "0");
321 register char *s
, *t
;
323 for (n
= 0, s
= buf
[file
]; *s
&& *s
!= '\n' && *s
!= '\r';) {
325 for (t
= s
; *s
&& *s
!= sep
&& *s
!= '\n' && *s
!= '\r'; s
++);
327 while (*s
== ' ' || *s
== '\t')
328 s
++; /* skip leading white space */
329 for (t
= s
; *s
&& *s
!= ' ' && *s
!= '\t'
330 && *s
!= '\n' && *s
!= '\r'; s
++);
331 /* We will treat trailing white space as NULL field */
335 if (n
== MAXFLD
) error("too many filed in file%s", file
? "1" : "0");
337 fld
[file
][n
] = (char *) 0;
342 FILE *efopen(file
, mode
)
347 if ((fp
= fopen(file
, mode
)) == (FILE *) 0) error("can't open %s", file
);
355 fprintf(stderr
, "%s: ", cmd
);
356 fprintf(stderr
, s
, t
);
357 fprintf(stderr
, "\n");