HBASE-23037 Make the split WAL related log more readable (#632)
[hbase.git] / dev-support / hbase_nightly_pseudo-distributed-test.sh
blob2d77184cedaa5924151a226cda33f75aef0566de
1 #!/usr/bin/env bash
2 # Licensed to the Apache Software Foundation (ASF) under one
3 # or more contributor license agreements. See the NOTICE file
4 # distributed with this work for additional information
5 # regarding copyright ownership. The ASF licenses this file
6 # to you under the Apache License, Version 2.0 (the
7 # "License"); you may not use this file except in compliance
8 # with the License. You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing,
13 # software distributed under the License is distributed on an
14 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 # KIND, either express or implied. See the License for the
16 # specific language governing permissions and limitations
17 # under the License.
19 set -e
20 function usage {
21 echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable"
22 echo ""
23 echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data."
24 echo " defaults to 'zk-data' in the working-dir."
25 echo " --working-dir /path/to/use Path for writing configs and logs. must exist."
26 echo " defaults to making a directory via mktemp."
27 echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars."
28 echo " defaults to 'hadoop classpath'"
29 echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install"
30 echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase"
31 echo " --single-process Run as single process instead of pseudo-distributed"
32 echo ""
33 exit 1
35 # if no args specified, show usage
36 if [ $# -lt 5 ]; then
37 usage
40 # Get arguments
41 declare component_install
42 declare hadoop_exec
43 declare working_dir
44 declare zk_data_dir
45 declare clean
46 declare distributed="true"
47 declare hadoop_jars
48 declare hbase_client
49 while [ $# -gt 0 ]
51 case "$1" in
52 --working-dir) shift; working_dir=$1; shift;;
53 --force-data-clean) shift; clean="true";;
54 --zookeeper-data) shift; zk_data_dir=$1; shift;;
55 --single-process) shift; distributed="false";;
56 --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;;
57 --hbase-client-install) shift; hbase_client="$1"; shift;;
58 --) shift; break;;
59 -*) usage ;;
60 *) break;; # terminate while loop
61 esac
62 done
64 # should still have where component checkout is.
65 if [ $# -lt 5 ]; then
66 usage
68 component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
69 hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")"
70 yarn_server_tests_test_jar="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")"
71 mapred_jobclient_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")"
72 mapred_exec="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")"
74 if [ ! -x "${hadoop_exec}" ]; then
75 echo "hadoop cli does not appear to be executable." >&2
76 exit 1
79 if [ ! -x "${mapred_exec}" ]; then
80 echo "mapred cli does not appear to be executable." >&2
81 exit 1
84 if [ ! -d "${component_install}" ]; then
85 echo "Path to HBase binary install should be a directory." >&2
86 exit 1
89 if [ ! -f "${yarn_server_tests_test_jar}" ]; then
90 echo "Specified YARN server tests test jar is not a file." >&2
91 exit 1
94 if [ ! -f "${mapred_jobclient_test_jar}" ]; then
95 echo "Specified MapReduce jobclient test jar is not a file." >&2
96 exit 1
99 if [ -z "${working_dir}" ]; then
100 if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then
101 echo "Failed to create temporary working directory. Please specify via --working-dir" >&2
102 exit 1
104 else
105 # absolutes please
106 working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
107 if [ ! -d "${working_dir}" ]; then
108 echo "passed working directory '${working_dir}' must already exist." >&2
109 exit 1
113 if [ -z "${zk_data_dir}" ]; then
114 zk_data_dir="${working_dir}/zk-data"
115 mkdir "${zk_data_dir}"
116 else
117 # absolutes please
118 zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")"
119 if [ ! -d "${zk_data_dir}" ]; then
120 echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist."
121 exit 1
125 if [ -z "${hbase_client}" ]; then
126 hbase_client="${component_install}"
127 else
128 echo "Using HBase client-side artifact"
129 # absolutes please
130 hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")"
131 if [ ! -d "${hbase_client}" ]; then
132 echo "If given hbase client install should be a directory with contents of the client tarball." >&2
133 exit 1
137 if [ -n "${hadoop_jars}" ]; then
138 declare -a tmp_jars
139 for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do
140 tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")")
141 done
142 hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")"
146 echo "You'll find logs and temp files in ${working_dir}"
148 function redirect_and_run {
149 log_base=$1
150 shift
151 echo "$*" >"${log_base}.err"
152 "$@" >"${log_base}.out" 2>>"${log_base}.err"
155 (cd "${working_dir}"
157 echo "Hadoop version information:"
158 "${hadoop_exec}" version
159 hadoop_version=$("${hadoop_exec}" version | head -n 1)
160 hadoop_version="${hadoop_version#Hadoop }"
161 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
162 "${hadoop_exec}" envvars
163 else
164 echo "JAVA_HOME: ${JAVA_HOME}"
167 # Ensure that if some other Hadoop install happens to be present in the environment we ignore it.
168 HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
169 export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP
171 if [ -n "${clean}" ]; then
172 echo "Cleaning out ZooKeeper..."
173 rm -rf "${zk_data_dir:?}/*"
176 echo "HBase version information:"
177 "${component_install}/bin/hbase" version 2>/dev/null
178 hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null)
179 hbase_version="${hbase_version#HBase }"
181 if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then
182 echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2
183 exit 1
186 if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then
187 echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
188 exit 1
191 if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then
192 echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
193 exit 1
196 echo "Writing out configuration for HBase."
197 rm -rf "${working_dir}/hbase-conf"
198 mkdir "${working_dir}/hbase-conf"
200 if [ -f "${component_install}/conf/log4j.properties" ]; then
201 cp "${component_install}/conf/log4j.properties" "${working_dir}/hbase-conf/log4j.properties"
202 else
203 cat >"${working_dir}/hbase-conf/log4j.properties" <<EOF
204 # Define some default values that can be overridden by system properties
205 hbase.root.logger=INFO,console
207 # Define the root logger to the system property "hbase.root.logger".
208 log4j.rootLogger=${hbase.root.logger}
210 # Logging Threshold
211 log4j.threshold=ALL
212 # console
213 log4j.appender.console=org.apache.log4j.ConsoleAppender
214 log4j.appender.console.target=System.err
215 log4j.appender.console.layout=org.apache.log4j.PatternLayout
216 log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
220 cat >"${working_dir}/hbase-conf/hbase-site.xml" <<EOF
221 <?xml version="1.0"?>
222 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
223 <!--
226 * Licensed to the Apache Software Foundation (ASF) under one
227 * or more contributor license agreements. See the NOTICE file
228 * distributed with this work for additional information
229 * regarding copyright ownership. The ASF licenses this file
230 * to you under the Apache License, Version 2.0 (the
231 * "License"); you may not use this file except in compliance
232 * with the License. You may obtain a copy of the License at
234 * http://www.apache.org/licenses/LICENSE-2.0
236 * Unless required by applicable law or agreed to in writing, software
237 * distributed under the License is distributed on an "AS IS" BASIS,
238 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
239 * See the License for the specific language governing permissions and
240 * limitations under the License.
243 <configuration>
244 <property>
245 <name>hbase.rootdir</name>
246 <!-- We rely on the defaultFS being set in our hadoop confs -->
247 <value>/hbase</value>
248 </property>
249 <property>
250 <name>hbase.zookeeper.property.dataDir</name>
251 <value>${zk_data_dir}</value>
252 </property>
253 <property>
254 <name>hbase.cluster.distributed</name>
255 <value>${distributed}</value>
256 </property>
257 </configuration>
260 if [ "true" = "${distributed}" ]; then
261 cat >"${working_dir}/hbase-conf/regionservers" <<EOF
262 localhost
266 function cleanup {
268 echo "Shutting down HBase"
269 HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/stop-hbase.sh"
271 if [ -f "${working_dir}/hadoop.pid" ]; then
272 echo "Shutdown: listing HDFS contents"
273 redirect_and_run "${working_dir}/hadoop_listing_at_end" \
274 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R /
276 echo "Shutting down Hadoop"
277 kill -6 "$(cat "${working_dir}/hadoop.pid")"
281 trap cleanup EXIT SIGQUIT
283 echo "Starting up Hadoop"
285 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
286 "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
287 else
288 HADOOP_CLASSPATH="${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
291 echo "$!" > "${working_dir}/hadoop.pid"
293 sleep_time=2
294 until [ -s "${working_dir}/hbase-conf/core-site.xml" ]; do
295 printf '\twaiting for Hadoop to finish starting up.\n'
296 sleep "${sleep_time}"
297 sleep_time="$((sleep_time*2))"
298 done
300 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
301 echo "Verifying configs"
302 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest
305 if [ -n "${clean}" ]; then
306 echo "Cleaning out HDFS..."
307 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase
308 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/
309 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data
312 echo "Listing HDFS contents"
313 redirect_and_run "${working_dir}/hadoop_cluster_smoke" \
314 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R /
316 echo "Starting up HBase"
317 HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/start-hbase.sh"
319 sleep_time=2
320 until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <<EOF
321 count 'hbase:meta'
324 printf '\tretry waiting for hbase to come up.\n'
325 sleep "${sleep_time}"
326 sleep_time="$((sleep_time*2))"
327 done
329 echo "Setting up table 'test:example' with 1,000 regions"
330 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/table_create.log" 2>&1 <<EOF
331 create_namespace 'test'
332 create 'test:example', 'family1', 'family2', {NUMREGIONS => 1000, SPLITALGO => 'UniformSplit'}
335 echo "writing out example TSV to example.tsv"
336 cat >"${working_dir}/example.tsv" <<EOF
337 row1 value8 value8
338 row3 value2
339 row2 value9
340 row10 value1
341 pow1 value8 value8
342 pow3 value2
343 pow2 value9
344 pow10 value1
345 paw1 value8 value8
346 paw3 value2
347 paw2 value9
348 paw10 value1
349 raw1 value8 value8
350 raw3 value2
351 raw2 value9
352 raw10 value1
353 aow1 value8 value8
354 aow3 value2
355 aow2 value9
356 aow10 value1
357 aaw1 value8 value8
358 aaw3 value2
359 aaw2 value9
360 aaw10 value1
361 how1 value8 value8
362 how3 value2
363 how2 value9
364 how10 value1
365 zow1 value8 value8
366 zow3 value2
367 zow2 value9
368 zow10 value1
369 zaw1 value8 value8
370 zaw3 value2
371 zaw2 value9
372 zaw10 value1
373 haw1 value8 value8
374 haw3 value2
375 haw2 value9
376 haw10 value1
377 low1 value8 value8
378 low3 value2
379 low2 value9
380 low10 value1
381 law1 value8 value8
382 law3 value2
383 law2 value9
384 law10 value1
387 echo "uploading example.tsv to HDFS"
388 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -mkdir example
389 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyFromLocal "${working_dir}/example.tsv" "example/"
391 echo "Importing TSV via shaded client artifact for HBase - MapReduce integration."
392 # hbase_thirdparty_jars=("${component_install}"/lib/htrace-core4*.jar \
393 # "${component_install}"/lib/slf4j-api-*.jar \
394 # "${component_install}"/lib/commons-logging-*.jar \
395 # "${component_install}"/lib/slf4j-log4j12-*.jar \
396 # "${component_install}"/lib/log4j-1.2.*.jar \
397 # "${working_dir}/hbase-conf/log4j.properties")
398 # hbase_dep_classpath=$(IFS=:; echo "${hbase_thirdparty_jars[*]}")
399 hbase_dep_classpath="$("${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" mapredcp)"
400 HADOOP_CLASSPATH="${hbase_dep_classpath}" redirect_and_run "${working_dir}/mr-importtsv" \
401 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" jar "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" importtsv -Dimporttsv.columns=HBASE_ROW_KEY,family1:column1,family1:column4,family1:column3 test:example example/ -libjars "${hbase_dep_classpath}"
402 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" <<EOF
403 scan 'test:example'
406 echo "Verifying row count from import."
407 import_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1)
408 if [ ! "${import_rowcount}" -eq 48 ]; then
409 echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}."
410 exit 2
413 if [ -z "${hadoop_jars}" ]; then
414 echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example."
415 hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath)
418 echo "Building shaded client example."
419 cat >"${working_dir}/HBaseClientReadWriteExample.java" <<EOF
420 import org.apache.hadoop.conf.Configuration;
421 import org.apache.hadoop.fs.FileSystem;
422 import org.apache.hadoop.fs.FSDataInputStream;
423 import org.apache.hadoop.fs.FSDataOutputStream;
424 import org.apache.hadoop.fs.Path;
425 import org.apache.hadoop.hbase.Cell;
426 import org.apache.hadoop.hbase.CellBuilder;
427 import org.apache.hadoop.hbase.CellBuilderFactory;
428 import org.apache.hadoop.hbase.CellBuilderType;
429 import org.apache.hadoop.hbase.ClusterMetrics;
430 import org.apache.hadoop.hbase.HBaseConfiguration;
431 import org.apache.hadoop.hbase.RegionMetrics;
432 import org.apache.hadoop.hbase.ServerMetrics;
433 import org.apache.hadoop.hbase.TableName;
434 import org.apache.hadoop.hbase.client.Admin;
435 import org.apache.hadoop.hbase.client.Connection;
436 import org.apache.hadoop.hbase.client.ConnectionFactory;
437 import org.apache.hadoop.hbase.client.Put;
438 import org.apache.hadoop.hbase.client.Table;
439 import org.apache.hadoop.hbase.util.Bytes;
441 import java.util.LinkedList;
442 import java.util.List;
445 public class HBaseClientReadWriteExample {
446 private static final byte[] FAMILY_BYTES = Bytes.toBytes("family2");
448 public static void main(String[] args) throws Exception {
449 Configuration hbase = HBaseConfiguration.create();
450 Configuration hadoop = new Configuration();
451 try (Connection connection = ConnectionFactory.createConnection(hbase)) {
452 System.out.println("Generating list of regions");
453 final List<String> regions = new LinkedList<>();
454 try (Admin admin = connection.getAdmin()) {
455 final ClusterMetrics cluster = admin.getClusterMetrics();
456 System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount()));
457 for (ServerMetrics server : cluster.getLiveServerMetrics().values()) {
458 for (RegionMetrics region : server.getRegionMetrics().values()) {
459 regions.add(region.getNameAsString());
463 final Path listing = new Path("example-region-listing.data");
464 System.out.println("Writing list to HDFS");
465 try (FileSystem fs = FileSystem.newInstance(hadoop)) {
466 final Path path = fs.makeQualified(listing);
467 try (FSDataOutputStream out = fs.create(path)) {
468 out.writeInt(regions.size());
469 for (String region : regions) {
470 out.writeUTF(region);
472 out.hsync();
475 final List<Put> puts = new LinkedList<>();
476 final Put marker = new Put(new byte[] { (byte)0 });
477 System.out.println("Reading list from HDFS");
478 try (FileSystem fs = FileSystem.newInstance(hadoop)) {
479 final Path path = fs.makeQualified(listing);
480 final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
481 try (FSDataInputStream in = fs.open(path)) {
482 final int count = in.readInt();
483 marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count));
484 for(int i = 0; i < count; i++) {
485 builder.clear();
486 final byte[] row = Bytes.toBytes(in.readUTF());
487 final Put put = new Put(row);
488 builder.setRow(row);
489 builder.setFamily(FAMILY_BYTES);
490 builder.setType(Cell.Type.Put);
491 put.add(builder.build());
492 puts.add(put);
496 System.out.println("Writing list into HBase table");
497 try (Table table = connection.getTable(TableName.valueOf("test:example"))) {
498 table.put(marker);
499 table.put(puts);
505 redirect_and_run "${working_dir}/hbase-shaded-client-compile" \
506 javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java"
507 echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table."
508 # The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190.
509 redirect_and_run "${working_dir}/hbase-shaded-client-example" \
510 java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample
512 echo "Checking on results of example program."
513 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data"
515 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" <<EOF
516 scan 'test:example'
519 echo "Verifying row count from example."
520 example_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1)
521 if [ "${example_rowcount}" -gt "1049" ]; then
522 echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 1 for example's use of meta region, and 1 for example's count record"
523 else
524 echo "ERROR: Only found ${example_rowcount} rows."