2 # Licensed to the Apache Software Foundation (ASF) under one
3 # or more contributor license agreements. See the NOTICE file
4 # distributed with this work for additional information
5 # regarding copyright ownership. The ASF licenses this file
6 # to you under the Apache License, Version 2.0 (the
7 # "License"); you may not use this file except in compliance
8 # with the License. You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing,
13 # software distributed under the License is distributed on an
14 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 # KIND, either express or implied. See the License for the
16 # specific language governing permissions and limitations
21 echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/share/hadoop/yarn/timelineservice /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable"
23 echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data."
24 echo " defaults to 'zk-data' in the working-dir."
25 echo " --working-dir /path/to/use Path for writing configs and logs. must exist."
26 echo " defaults to making a directory via mktemp."
27 echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars."
28 echo " defaults to 'hadoop classpath'"
29 echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install"
30 echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase"
31 echo " --single-process Run as single process instead of pseudo-distributed"
35 # if no args specified, show usage
41 declare component_install
46 declare distributed
="true"
52 --working-dir) shift; working_dir
=$1; shift;;
53 --force-data-clean) shift; clean
="true";;
54 --zookeeper-data) shift; zk_data_dir
=$1; shift;;
55 --single-process) shift; distributed
="false";;
56 --hadoop-client-classpath) shift; hadoop_jars
="$1"; shift;;
57 --hbase-client-install) shift; hbase_client
="$1"; shift;;
60 *) break;; # terminate while loop
64 # should still have where component checkout is.
68 component_install
="$(cd "$
(dirname "$1")"; pwd)/$(basename "$1")"
69 hadoop_exec
="$(cd "$
(dirname "$2")"; pwd)/$(basename "$2")"
70 timeline_service_dir
="$(cd "$
(dirname "$3")"; pwd)/$(basename "$3")"
71 yarn_server_tests_test_jar
="$(cd "$
(dirname "$4")"; pwd)/$(basename "$4")"
72 mapred_jobclient_test_jar
="$(cd "$
(dirname "$5")"; pwd)/$(basename "$5")"
73 mapred_exec
="$(cd "$
(dirname "$6")"; pwd)/$(basename "$6")"
75 if [ ! -x "${hadoop_exec}" ]; then
76 echo "hadoop cli does not appear to be executable." >&2
80 if [ ! -x "${mapred_exec}" ]; then
81 echo "mapred cli does not appear to be executable." >&2
85 if [ ! -d "${component_install}" ]; then
86 echo "Path to HBase binary install should be a directory." >&2
90 if [ ! -f "${yarn_server_tests_test_jar}" ]; then
91 echo "Specified YARN server tests test jar is not a file." >&2
95 if [ ! -f "${mapred_jobclient_test_jar}" ]; then
96 echo "Specified MapReduce jobclient test jar is not a file." >&2
100 if [ -z "${working_dir}" ]; then
101 if ! working_dir
="$(mktemp -d -t hbase-pseudo-dist-test)" ; then
102 echo "Failed to create temporary working directory. Please specify via --working-dir" >&2
107 working_dir
="$(cd "$
(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
108 if [ ! -d "${working_dir}" ]; then
109 echo "passed working directory '${working_dir}' must already exist." >&2
114 if [ -z "${zk_data_dir}" ]; then
115 zk_data_dir
="${working_dir}/zk-data"
116 mkdir
"${zk_data_dir}"
119 zk_data_dir
="$(cd "$
(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")"
120 if [ ! -d "${zk_data_dir}" ]; then
121 echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist."
126 if [ -z "${hbase_client}" ]; then
127 hbase_client
="${component_install}"
129 echo "Using HBase client-side artifact"
131 hbase_client
="$(cd "$
(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")"
132 if [ ! -d "${hbase_client}" ]; then
133 echo "If given hbase client install should be a directory with contents of the client tarball." >&2
138 if [ -n "${hadoop_jars}" ]; then
140 for entry
in $
(echo "${hadoop_jars}" |
tr ':' '\n'); do
141 tmp_jars
=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")")
143 hadoop_jars="$
(IFS
=:; echo "${tmp_jars[*]}")"
147 echo "You
'll find logs and temp files in ${working_dir}"
149 function redirect_and_run {
152 echo "$*" >"${log_base}.err"
153 "$@" >"${log_base}.out" 2>>"${log_base}.err"
158 echo "Hadoop version information:"
159 "${hadoop_exec}" version
160 hadoop_version=$("${hadoop_exec}" version | head -n 1)
161 hadoop_version="${hadoop_version#Hadoop }"
162 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
163 "${hadoop_exec}" envvars
165 echo "JAVA_HOME: ${JAVA_HOME}"
168 # Ensure that if some other Hadoop install happens to be present in the environment we ignore it.
169 HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
170 export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP
172 if [ -n "${clean}" ]; then
173 echo "Cleaning out ZooKeeper..."
174 rm -rf "${zk_data_dir:?}/*"
177 echo "HBase version information:"
178 "${component_install}/bin/hbase" version 2>/dev/null
179 hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null)
180 hbase_version="${hbase_version#HBase }"
182 if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then
183 echo "HBase binary install doesn't appear to include a shaded mapreduce artifact.
" >&2
187 if [ ! -s "${hbase_client}/lib
/shaded-clients
/hbase-shaded-client-
${hbase_version}.jar
" ]; then
188 echo "HBase binary
install doesn
't appear to include a shaded client artifact." >&2
192 if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then
193 echo "HBase binary install doesn't appear to include a shaded client artifact.
" >&2
197 echo "Writing out configuration
for HBase.
"
198 rm -rf "${working_dir}/hbase-conf
"
199 mkdir "${working_dir}/hbase-conf
"
201 if [ -f "${component_install}/conf
/log4j.properties
" ]; then
202 cp "${component_install}/conf
/log4j.properties
" "${working_dir}/hbase-conf
/log4j.properties
"
204 cat >"${working_dir}/hbase-conf
/log4j.properties
" <<EOF
205 # Define some default values that can be overridden by system properties
206 hbase.root.logger=INFO,console
208 # Define the root logger to the system property "hbase.root.logger
".
209 log4j.rootLogger=${hbase.root.logger}
214 log4j.appender.console=org.apache.log4j.ConsoleAppender
215 log4j.appender.console.target=System.err
216 log4j.appender.console.layout=org.apache.log4j.PatternLayout
217 log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
221 cat >"${working_dir}/hbase-conf
/hbase-site.xml
" <<EOF
222 <?xml version="1.0"?>
223 <?xml-stylesheet type="text
/xsl
" href="configuration.xsl
"?>
227 * Licensed to the Apache Software Foundation (ASF) under one
228 * or more contributor license agreements. See the NOTICE file
229 * distributed with this work for additional information
230 * regarding copyright ownership. The ASF licenses this file
231 * to you under the Apache License, Version 2.0 (the
232 * "License
"); you may not use this file except in compliance
233 * with the License. You may obtain a copy of the License at
235 * http://www.apache.org/licenses/LICENSE-2.0
237 * Unless required by applicable law or agreed to in writing, software
238 * distributed under the License is distributed on an "AS IS
" BASIS,
239 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
240 * See the License for the specific language governing permissions and
241 * limitations under the License.
246 <name>hbase.rootdir</name>
247 <!-- We rely on the defaultFS being set in our hadoop confs -->
248 <value>/hbase</value>
251 <name>hbase.zookeeper.property.dataDir</name>
252 <value>${zk_data_dir}</value>
255 <name>hbase.cluster.distributed</name>
256 <value>${distributed}</value>
261 if [ "true
" = "${distributed}" ]; then
262 cat >"${working_dir}/hbase-conf
/regionservers
" <<EOF
269 echo "Shutting down HBase
"
270 HBASE_CONF_DIR="${working_dir}/hbase-conf
/" "${component_install}/bin
/stop-hbase.sh
"
272 if [ -f "${working_dir}/hadoop.pid
" ]; then
273 echo "Shutdown
: listing HDFS contents
"
274 redirect_and_run "${working_dir}/hadoop_listing_at_end
" \
275 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -ls -R /
277 echo "Shutting down Hadoop
"
278 kill -6 "$
(cat "${working_dir}/hadoop.pid")"
282 trap cleanup EXIT SIGQUIT
284 echo "Starting up Hadoop
"
286 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
287 "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err
" &
289 HADOOP_CLASSPATH="${timeline_service_dir}/*:${timeline_service_dir}/lib/*:${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err
" &
292 echo "$
!" > "${working_dir}/hadoop.pid
"
294 # 2 + 4 + 8 + .. + 256 ~= 8.5 minutes.
297 until [[ -s "${working_dir}/hbase-conf/core-site.xml" || "${sleep_time}" -ge "${max_sleep_time}" ]]; do
298 printf '\twaiting for Hadoop to finish starting up.\n'
299 sleep "${sleep_time}"
300 sleep_time="$
((sleep_time
*2))"
303 if [ "${sleep_time}" -ge "${max_sleep_time}" ] ; then
304 echo "time out waiting
for Hadoop to startup
" >&2
308 if [ "${hadoop_version%.*.*}" -gt 2 ]; then
309 echo "Verifying configs
"
310 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" conftest
313 if [ -n "${clean}" ]; then
314 echo "Cleaning out HDFS...
"
315 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -rm -r /hbase
316 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -rm -r example/
317 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -rm -r example-region-listing.data
320 echo "Listing HDFS contents
"
321 redirect_and_run "${working_dir}/hadoop_cluster_smoke
" \
322 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -ls -R /
324 echo "Starting up HBase
"
325 HBASE_CONF_DIR="${working_dir}/hbase-conf
/" "${component_install}/bin
/start-hbase.sh
"
328 until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log
" 2>&1 <<EOF
332 printf '\tretry waiting for hbase to come up.\n'
333 sleep "${sleep_time}"
334 sleep_time="$
((sleep_time
*2))"
337 echo "Setting up table
'test:example' with
1,000 regions
"
338 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/table_create.log
" 2>&1 <<EOF
339 create_namespace 'test'
340 create 'test:example', 'family1', 'family2', {NUMREGIONS => 1000, SPLITALGO => 'UniformSplit'}
343 echo "writing out example TSV to example.tsv
"
344 cat >"${working_dir}/example.tsv
" <<EOF
395 echo "uploading example.tsv to HDFS
"
396 "${hadoop_exec}" --config "${working_dir}/hbase-conf
/" fs -mkdir example
397 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyFromLocal "${working_dir}/example.tsv
" "example
/"
399 echo "Importing TSV via shaded client artifact
for HBase
- MapReduce integration.
"
400 # hbase_thirdparty_jars=("${component_install}"/lib/htrace-core4*.jar \
401 # "${component_install}"/lib/slf4j-api-*.jar \
402 # "${component_install}"/lib/commons-logging-*.jar \
403 # "${component_install}"/lib/slf4j-log4j12-*.jar \
404 # "${component_install}"/lib/log4j-1.2.*.jar \
405 # "${working_dir}/hbase-conf
/log4j.properties
")
406 # hbase_dep_classpath=$(IFS=:; echo "${hbase_thirdparty_jars[*]}")
407 hbase_dep_classpath="$
("${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" mapredcp
)"
408 HADOOP_CLASSPATH="${hbase_dep_classpath}" redirect_and_run "${working_dir}/mr-importtsv
" \
409 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" jar "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" importtsv -Dimporttsv.columns=HBASE_ROW_KEY,family1:column1,family1:column4,family1:column3 test:example example/ -libjars "${hbase_dep_classpath}"
410 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err
" <<EOF
414 echo "Verifying row count from import.
"
415 import_rowcount=$(echo 'count "test:example
"' | "${hbase_client}/bin
/hbase
" --config "${working_dir}/hbase-conf
/" shell --noninteractive 2>/dev/null | tail -n 1)
416 if [ ! "${import_rowcount}" -eq 48 ]; then
417 echo "ERROR
: Instead of finding
48 rows
, we found
${import_rowcount}.
"
421 if [ -z "${hadoop_jars}" ]; then
422 echo "Hadoop client jars not given
; getting them from
'hadoop classpath' for the example.
"
423 hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf
/" classpath)
426 echo "Building shaded client example.
"
427 cat >"${working_dir}/HBaseClientReadWriteExample.java
" <<EOF
428 import org.apache.hadoop.conf.Configuration;
429 import org.apache.hadoop.fs.FileSystem;
430 import org.apache.hadoop.fs.FSDataInputStream;
431 import org.apache.hadoop.fs.FSDataOutputStream;
432 import org.apache.hadoop.fs.Path;
433 import org.apache.hadoop.hbase.Cell;
434 import org.apache.hadoop.hbase.CellBuilder;
435 import org.apache.hadoop.hbase.CellBuilderFactory;
436 import org.apache.hadoop.hbase.CellBuilderType;
437 import org.apache.hadoop.hbase.ClusterMetrics;
438 import org.apache.hadoop.hbase.HBaseConfiguration;
439 import org.apache.hadoop.hbase.RegionMetrics;
440 import org.apache.hadoop.hbase.ServerMetrics;
441 import org.apache.hadoop.hbase.TableName;
442 import org.apache.hadoop.hbase.client.Admin;
443 import org.apache.hadoop.hbase.client.Connection;
444 import org.apache.hadoop.hbase.client.ConnectionFactory;
445 import org.apache.hadoop.hbase.client.Put;
446 import org.apache.hadoop.hbase.client.Table;
447 import org.apache.hadoop.hbase.util.Bytes;
449 import java.util.LinkedList;
450 import java.util.List;
453 public class HBaseClientReadWriteExample {
454 private static final byte[] FAMILY_BYTES = Bytes.toBytes("family2
");
456 public static void main(String[] args) throws Exception {
457 Configuration hbase = HBaseConfiguration.create();
458 Configuration hadoop = new Configuration();
459 try (Connection connection = ConnectionFactory.createConnection(hbase)) {
460 System.out.println("Generating list of regions
");
461 final List<String> regions = new LinkedList<>();
462 try (Admin admin = connection.getAdmin()) {
463 final ClusterMetrics cluster = admin.getClusterMetrics();
464 System.out.println(String.format("\tCluster reports version
%s
, ave load
%f
, region count
%d
", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount()));
465 for (ServerMetrics server : cluster.getLiveServerMetrics().values()) {
466 for (RegionMetrics region : server.getRegionMetrics().values()) {
467 regions.add(region.getNameAsString());
471 final Path listing = new Path("example-region-listing.data
");
472 System.out.println("Writing list to HDFS
");
473 try (FileSystem fs = FileSystem.newInstance(hadoop)) {
474 final Path path = fs.makeQualified(listing);
475 try (FSDataOutputStream out = fs.create(path)) {
476 out.writeInt(regions.size());
477 for (String region : regions) {
478 out.writeUTF(region);
483 final List<Put> puts = new LinkedList<>();
484 final Put marker = new Put(new byte[] { (byte)0 });
485 System.out.println("Reading list from HDFS
");
486 try (FileSystem fs = FileSystem.newInstance(hadoop)) {
487 final Path path = fs.makeQualified(listing);
488 final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
489 try (FSDataInputStream in = fs.open(path)) {
490 final int count = in.readInt();
491 marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count
"), Bytes.toBytes(count));
492 for(int i = 0; i < count; i++) {
494 final byte[] row = Bytes.toBytes(in.readUTF());
495 final Put put = new Put(row);
497 builder.setFamily(FAMILY_BYTES);
498 builder.setType(Cell.Type.Put);
499 put.add(builder.build());
504 System.out.println("Writing list into HBase table
");
505 try (Table table = connection.getTable(TableName.valueOf("test:example
"))) {
513 redirect_and_run "${working_dir}/hbase-shaded-client-compile
" \
514 javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java
"
515 echo "Running shaded client example. It
'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table."
516 # The order of classpath entries here is important. if we're using non-shaded Hadoop
3 / 2.9.0 jars
, we have to work around YARN-2190.
517 redirect_and_run
"${working_dir}/hbase-shaded-client-example" \
518 java
-cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample
520 echo "Checking on results of example program."
521 "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data"
523 "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" <<EOF
527 echo "Verifying row count from example."
528 example_rowcount
=$
(echo 'count "test:example"' |
"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell
--noninteractive 2>/dev
/null |
tail -n 1)
529 if [ "${example_rowcount}" -gt "1049" ]; then
530 echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 1 for example's use of meta region, and 1 for example's count record"
532 echo "ERROR: Only found ${example_rowcount} rows."