This project has retired. For details please refer to its Attic page.
HFileUtils (Apache Crunch 0.9.0 API)

org.apache.crunch.io.hbase
Class HFileUtils

java.lang.Object
  extended by org.apache.crunch.io.hbase.HFileUtils

public final class HFileUtils
extends Object


Nested Class Summary
static class HFileUtils.KeyValueComparator
           
 
Constructor Summary
HFileUtils()
           
 
Method Summary
static PCollection<org.apache.hadoop.hbase.client.Result> combineIntoRow(PCollection<org.apache.hadoop.hbase.KeyValue> kvs)
           
static PCollection<org.apache.hadoop.hbase.client.Result> combineIntoRow(PCollection<org.apache.hadoop.hbase.KeyValue> kvs, org.apache.hadoop.hbase.client.Scan scan)
          Converts a bunch of KeyValues into Result.
static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline, List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.hbase.client.Scan scan)
           
static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline, org.apache.hadoop.fs.Path path)
           
static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline, org.apache.hadoop.fs.Path path, org.apache.hadoop.hbase.client.Scan scan)
          Scans HFiles with filter conditions.
static PCollection<org.apache.hadoop.hbase.KeyValue> sortAndPartition(PCollection<org.apache.hadoop.hbase.KeyValue> kvs, org.apache.hadoop.hbase.client.HTable table)
           
static void writePutsToHFilesForIncrementalLoad(PCollection<org.apache.hadoop.hbase.client.Put> puts, org.apache.hadoop.hbase.client.HTable table, org.apache.hadoop.fs.Path outputPath)
           
static void writeToHFilesForIncrementalLoad(PCollection<org.apache.hadoop.hbase.KeyValue> kvs, org.apache.hadoop.hbase.client.HTable table, org.apache.hadoop.fs.Path outputPath)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HFileUtils

public HFileUtils()
Method Detail

scanHFiles

public static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline,
                                                                            org.apache.hadoop.fs.Path path)

scanHFiles

public static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline,
                                                                            org.apache.hadoop.fs.Path path,
                                                                            org.apache.hadoop.hbase.client.Scan scan)
Scans HFiles with filter conditions.

Parameters:
pipeline - the pipeline
path - path to HFiles
scan - filtering conditions
Returns:
Results
See Also:
combineIntoRow(org.apache.crunch.PCollection, org.apache.hadoop.hbase.client.Scan)

scanHFiles

public static PCollection<org.apache.hadoop.hbase.client.Result> scanHFiles(Pipeline pipeline,
                                                                            List<org.apache.hadoop.fs.Path> paths,
                                                                            org.apache.hadoop.hbase.client.Scan scan)

combineIntoRow

public static PCollection<org.apache.hadoop.hbase.client.Result> combineIntoRow(PCollection<org.apache.hadoop.hbase.KeyValue> kvs)

combineIntoRow

public static PCollection<org.apache.hadoop.hbase.client.Result> combineIntoRow(PCollection<org.apache.hadoop.hbase.KeyValue> kvs,
                                                                                org.apache.hadoop.hbase.client.Scan scan)
Converts a bunch of KeyValues into Result. All KeyValues belong to the same row are combined. Users may provide some filter conditions (specified by scan). Deletes are dropped and only a specified number of versions are kept.

Parameters:
kvs - the input KeyValues
scan - filter conditions, currently we support start row, stop row and family map
Returns:
Results

writeToHFilesForIncrementalLoad

public static void writeToHFilesForIncrementalLoad(PCollection<org.apache.hadoop.hbase.KeyValue> kvs,
                                                   org.apache.hadoop.hbase.client.HTable table,
                                                   org.apache.hadoop.fs.Path outputPath)
                                            throws IOException
Throws:
IOException

writePutsToHFilesForIncrementalLoad

public static void writePutsToHFilesForIncrementalLoad(PCollection<org.apache.hadoop.hbase.client.Put> puts,
                                                       org.apache.hadoop.hbase.client.HTable table,
                                                       org.apache.hadoop.fs.Path outputPath)
                                                throws IOException
Throws:
IOException

sortAndPartition

public static PCollection<org.apache.hadoop.hbase.KeyValue> sortAndPartition(PCollection<org.apache.hadoop.hbase.KeyValue> kvs,
                                                                             org.apache.hadoop.hbase.client.HTable table)
                                                                      throws IOException
Throws:
IOException


Copyright © 2014 The Apache Software Foundation. All Rights Reserved.