scripts/migrate/example.sh

   1 #!/bin/bash
   2 # This is an example of how regenerating the query cache
   3 # works. You will need to configure several things first:
   4 #
   5 # * configure postgres environment, see run-query.sh
   6 # * prepare pig:
   7 #     * make sure the environment is set up for pig
   8 #     * rearrange the values in TypeIDs.java to match your
   9 #       instance's IDs
  10 #     * compile the pig UDFs (ant build in udfs/)
  11 # * ensure the Cassandra JARs are on the CLASSPATH
  12
  13 # dump the relevant postgres tables to input/
  14 ./dump-all.sh
  15
  16 # process the postgres dumps in hadoop and generate
  17 # output data suitable for cassandra
  18 pig regenerate-query-cache.py
  19
  20 # for each column family we'll be writing to, generate
  21 # sstables from the map-reduce output
  22 for $cf in $(ls output/); do
  23     jython tuples_to_sstables.py $cf output/$cf/*/part*
  24 done
  25
  26 # bulk-load the sstables into cassandra
  27 sstableloader reddit/