hadoop - How can I select a part of rows and create a new table in HBase? -
i have large table in hbase, want separate them several small tables, easier me use. (the original table should kept.) how can that?
for example: have table, called all
following rowkey:
animal-1, ... plant-1, ... animal-2, ... plant-2, ... human-1, ... human-2, ...
i want separate 3 tables: animal
,plant
,human
3 type of living beings. how can it?
you can use mapreduce multipletableoutputformat below example.
but in below example reading file i.e textinputformat
instead have read hbase table using tableinputformat
'all'
instead of table1 table2 ... have use 'animal', 'planet', 'human'
as per requirement, if scan on hbase table , pass mapper using table inputformat, rowkey mapper's map method. need compare decide table going insert.
please see 7.2.2. hbase mapreduce read/write example
package mapred; import java.io.ioexception; import org.apache.hadoop.hbase.io.immutablebyteswritable; import org.apache.hadoop.hbase.mapreduce.multitableoutputformat; import org.apache.hadoop.hbase.util.bytes; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.fs.path; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.input.textinputformat; import org.apache.hadoop.hbase.client.put; public class multitablemapper { static class innermapper extends mapper <longwritable, text, immutablebyteswritable, put> { public void map(longwritable offset, text value, context context) throws ioexception { // contains line of tab separated data working on (needs parsed out). //byte[] linebytes = value.getbytes(); string valuestring[]=value.tostring().split(“\t”); string rowid = /*hbasemanager.generateid();*/ “12345”; // rowkey hbase rowkey generated linebytes put put = new put(rowid.getbytes()); put.add(bytes.tobytes(“userinfo”), bytes.tobytes(“studentname”), bytes.tobytes(valuestring[0])); try { context.write(new immutablebyteswritable(bytes.tobytes(“table1”)), put); } catch (interruptedexception e) { // todo auto-generated catch block e.printstacktrace(); } // write actions table // rowkey2 hbase rowkey put put1 = new put(rowid.getbytes()); put1.add(bytes.tobytes(“marksinfo”),bytes.tobytes(“marks”),bytes.tobytes(valuestring[1])); // create keyvalue object //put.add(kv); try { context.write(new immutablebyteswritable(bytes.tobytes(“table2”)), put1); } catch (interruptedexception e) { // todo auto-generated catch block e.printstacktrace(); } // write actions table } } public static void createsubmittablejob() throws ioexception, classnotfoundexception, interruptedexception { path inputdir = new path(“in”); configuration conf = /*hbasemanager.gethbconnection();*/ new configuration(); job job = new job(conf, “my_custom_job”); job.setjarbyclass(innermapper.class); fileinputformat.setinputpaths(job, inputdir); job.setmapperclass(innermapper.class); job.setinputformatclass(textinputformat.class); // key writing multiple tables in hbase job.setoutputformatclass(multitableoutputformat.class); //job.setnumreducetasks(0); //tablemapreduceutil.adddependencyjars(job); //tablemapreduceutil.adddependencyjars(job.getconfiguration()); system.out.println(job.waitforcompletion(true)); } public static void main(string[] args) throws ioexception, classnotfoundexception, interruptedexception { // todo auto-generated method stub multitablemapper.createsubmittablejob(); system.out.println(); } }
Comments
Post a Comment