Update README for archival
[reddit.git] / scripts / migrate / example.sh
blob9fc6e4e27cb91a9777c197afbe46ce9d7ed009c0
1 #!/bin/bash
2 # This is an example of how regenerating the query cache
3 # works. You will need to configure several things first:
5 # * configure postgres environment, see run-query.sh
6 # * prepare pig:
7 # * make sure the environment is set up for pig
8 # * rearrange the values in TypeIDs.java to match your
9 # instance's IDs
10 # * compile the pig UDFs (ant build in udfs/)
11 # * ensure the Cassandra JARs are on the CLASSPATH
13 # dump the relevant postgres tables to input/
14 ./dump-all.sh
16 # process the postgres dumps in hadoop and generate
17 # output data suitable for cassandra
18 pig regenerate-query-cache.py
20 # for each column family we'll be writing to, generate
21 # sstables from the map-reduce output
22 for $cf in $(ls output/); do
23 jython tuples_to_sstables.py $cf output/$cf/*/part*
24 done
26 # bulk-load the sstables into cassandra
27 sstableloader reddit/