python连接HBase

发布时间:2019-09-12 07:58:26编辑:auto阅读(2279)

    环境

    hadoop 2.7.0
    hbase 1.2.1
    Thrift 0.9.0

    启动hbase的Thrift RPC

    ./hbase-daemon.sh start thrift

    生成python的Thrift模块

    cd hbase-1.2.1/hbase-thrift/src/main/resources/org/apache/hadoop/hbase/thrift
    thrift --gen py Hbase.thrift
    
    #生成gen-py文件
    .
    ├── gen-py
    │   ├── hbase
    │   │   ├── constants.py
    │   │   ├── Hbase.py
    │   │   ├── Hbase-remote
    │   │   ├── __init__.py
    │   │   └── ttypes.py
    │   └── __init__.py
    └── Hbase.thrift
    #把gen-py/hbase复制到项目下
    .
    ├── hbase
    │   ├── constants.py
    │   ├── Hbase.py
    │   ├── Hbase.pyc
    │   ├── Hbase-remote
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── ttypes.py
    │   └── ttypes.pyc
    └── hbase_client.py
    

    hbase_client.py

    # # -*- coding: utf-8 -*-
    
    from thrift.transport import TSocket
    from thrift.transport import TTransport
    from thrift.protocol import TBinaryProtocol
    
    from hbase import Hbase
    from hbase.ttypes import ColumnDescriptor, Mutation
    
    
    class HbaseClient(object):
        def __init__(self, host='localhost', port=9090):
            transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            self.client = Hbase.Client(protocol)
            transport.open()
    
        def get_tables(self):
            """
            获取所有表
            """
            return self.client.getTableNames()
    
        def create_table(self, table, *columns):
            """
            创建表
            """
            self.client.createTable(table, map(lambda column: ColumnDescriptor(column), columns))
    
        def put(self, table, row, columns, attributes=None):
            """
            添加记录
            @:param columns {"k:1":"11"}
            """
            self.client.mutateRow(table, row, map(lambda (k,v): Mutation(column=k, value=v), columns.items()), attributes)
    
        def scan(self, table, start_row="", columns=None, attributes=None):
            """
            获取记录
            """
    
            scanner = self.client.scannerOpen(table, start_row, columns, attributes)
            while True:
                r = self.client.scannerGet(scanner)
                if not r:
                    break
                yield dict(map(lambda (k, v): (k, v.value),r[0].columns.items()))
    
    if __name__ == "__main__":
        client = HbaseClient("192.168.19.74", 9090)
        client.create_table("student", "name", "coruse")
        print(client.get_tables())
        client.put("student", "1", {"name:":"zhangsan", "coruse:art": "88", "coruse:math": "12"})
        client.put("student", "2", {"name:":"lisi", "coruse:art": "90", "coruse:math": "100"})
        client.put("student", "3", {"name:":"lisi2"})
        for v in client.scan("student", columns=["name"]):
            print(v)
        for v in client.scan("student"):
            print(v)
    

关键字