4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
32 #define MSG_DISORDER gettext("sort: disorder: ")
33 #define MSG_NONUNIQUE gettext("sort: non-unique: ")
35 #define MSG_DISORDER gettext("sort: disorder (%llu): ")
36 #define MSG_NONUNIQUE gettext("sort: non-unique (%llu): ")
39 #define CHECK_FAILURE_DISORDER 0x1
40 #define CHECK_FAILURE_NONUNIQUE 0x2
41 #define CHECK_WIDE 0x4
44 fail_check(line_rec_t
*L
, int flags
, u_longlong_t lineno
)
49 if (flags
& CHECK_WIDE
) {
50 if ((length
= (ssize_t
)wcstombs(NULL
, L
->l_data
.wp
, 0)) < 0)
51 die(EMSG_ILLEGAL_CHAR
);
54 * +1 for null character
56 line
= alloca(length
+ 1);
57 (void) wcstombs(line
, L
->l_data
.wp
, L
->l_data_length
);
61 length
= L
->l_data_length
;
64 if (flags
& CHECK_FAILURE_DISORDER
) {
65 (void) fprintf(stderr
, MSG_DISORDER
, lineno
);
66 (void) write(fileno(stderr
), line
, length
);
67 (void) fprintf(stderr
, "\n");
71 (void) fprintf(stderr
, MSG_NONUNIQUE
);
72 (void) write(fileno(stderr
), line
, length
);
73 (void) fprintf(stderr
, "\n");
77 swap_coll_bufs(line_rec_t
*A
, line_rec_t
*B
)
79 char *coll_buffer
= B
->l_collate
.sp
;
80 ssize_t coll_bufsize
= B
->l_collate_bufsize
;
82 safe_free(B
->l_raw_collate
.sp
);
85 A
->l_collate
.sp
= coll_buffer
;
86 A
->l_collate_bufsize
= coll_bufsize
;
87 A
->l_raw_collate
.sp
= NULL
;
91 * check_if_sorted() interacts with a stream in a slightly different way than a
92 * simple sort or a merge operation: the check involves looking at two adjacent
93 * lines of the file and verifying that they are collated according to the key
94 * specifiers given. For files accessed via mmap(), this is simply done as the
95 * entirety of the file is present in the address space. For files accessed via
96 * stdio, regardless of locale, we must be able to guarantee that two lines are
97 * present in memory at once. The basic buffer code for stdio does not make
98 * such a guarantee, so we use stream_swap_buffer() to alternate between two
102 check_if_sorted(sort_t
*S
)
105 int numerator
, denominator
;
107 char *data_buffer
= NULL
;
108 size_t data_bufsize
= 0;
109 line_rec_t last_line
;
110 u_longlong_t lineno
= 0;
114 stream_t
*cur_streamp
= S
->m_input_streams
;
116 ssize_t (*conversion_fcn
)(field_t
*, line_rec_t
*, flag_t
, vchar_t
) =
118 int (*collation_fcn
)(line_rec_t
*, line_rec_t
*, ssize_t
, flag_t
) =
121 set_memory_ratio(S
, &numerator
, &denominator
);
123 if (stream_open_for_read(S
, cur_streamp
) > 1)
126 if (SOP_EOS(cur_streamp
))
129 (void) memset(&last_line
, 0, sizeof (line_rec_t
));
132 * We need to swap data buffers for the stream with each fetch, except
133 * on STREAM_MMAP (which are implicitly STREAM_SUSTAIN).
135 swap_required
= !(cur_streamp
->s_status
& STREAM_MMAP
);
137 stream_set(cur_streamp
, STREAM_INSTANT
);
139 * We use one half of the available memory for input, half for
140 * each buffer. (The other half is left unreserved, in case
141 * conversions to collatable form require it.)
143 input_mem
= numerator
* S
->m_memory_available
/ denominator
/ 4;
145 stream_set_size(cur_streamp
, input_mem
);
146 stream_swap_buffer(cur_streamp
, &data_buffer
, &data_bufsize
);
147 stream_set_size(cur_streamp
, input_mem
);
149 if (cur_streamp
->s_status
& STREAM_WIDE
) {
150 conversion_fcn
= field_convert_wide
;
151 collation_fcn
= collated_wide
;
155 if (SOP_PRIME(cur_streamp
) > 1)
158 if (S
->m_field_options
& FIELD_REVERSE_COMPARISONS
)
159 coll_flags
= COLL_REVERSE
;
162 if (S
->m_unique_lines
)
163 coll_flags
|= COLL_UNIQUE
;
165 cur_streamp
->s_current
.l_collate_bufsize
= INITIAL_COLLATION_SIZE
166 * cur_streamp
->s_element_size
;
167 cur_streamp
->s_current
.l_collate
.sp
= safe_realloc(NULL
,
168 cur_streamp
->s_current
.l_collate_bufsize
);
169 cur_streamp
->s_current
.l_raw_collate
.sp
= NULL
;
171 last_line
.l_collate_bufsize
= INITIAL_COLLATION_SIZE
*
172 cur_streamp
->s_element_size
;
173 last_line
.l_collate
.sp
= safe_realloc(NULL
,
174 last_line
.l_collate_bufsize
);
175 last_line
.l_raw_collate
.sp
= NULL
;
177 (void) conversion_fcn(S
->m_fields_head
, &cur_streamp
->s_current
,
178 FCV_REALLOC
, S
->m_field_separator
);
180 swap_coll_bufs(&cur_streamp
->s_current
, &last_line
);
182 stream_swap_buffer(cur_streamp
, &data_buffer
, &data_bufsize
);
184 while (!SOP_EOS(cur_streamp
)) {
185 (void) SOP_FETCH(cur_streamp
);
188 (void) conversion_fcn(S
->m_fields_head
, &cur_streamp
->s_current
,
189 FCV_REALLOC
, S
->m_field_separator
);
191 r
= collation_fcn(&last_line
, &cur_streamp
->s_current
, 0,
194 if (r
< 0 || (r
== 0 && S
->m_unique_lines
== 0)) {
195 swap_coll_bufs(&cur_streamp
->s_current
, &last_line
);
197 stream_swap_buffer(cur_streamp
, &data_buffer
,
204 fail_check(&cur_streamp
->s_current
,
205 CHECK_FAILURE_DISORDER
|
206 (S
->m_single_byte_locale
? 0 : CHECK_WIDE
),
209 exit(E_FAILED_CHECK
);
212 if (r
== 0 && S
->m_unique_lines
!= 0) {
214 fail_check(&cur_streamp
->s_current
,
215 CHECK_FAILURE_NONUNIQUE
|
216 (S
->m_single_byte_locale
? 0 : CHECK_WIDE
),
219 exit(E_FAILED_CHECK
);