“林子雨大數據” 實驗3 HBase操作與介面編程 環境搭建 VM虛擬機和Ubuntu系統的安裝 在Windows中使用VirtualBox安裝Ubuntu虛擬機(2020年7月版本)_廈大資料庫實驗室博客 (xmu.edu.cn) Hadoop安裝(偽分散式) Hadoop3.1.3安裝教程_單機 ...
“林子雨大數據” 實驗3 HBase操作與介面編程
環境搭建
VM虛擬機和Ubuntu系統的安裝
在Windows中使用VirtualBox安裝Ubuntu虛擬機(2020年7月版本)_廈大資料庫實驗室博客 (xmu.edu.cn)
Hadoop安裝(偽分散式)
Hadoop3.1.3安裝教程_單機/偽分散式配置_Hadoop3.1.3/Ubuntu18.04(16.04)_廈大資料庫實驗室博客 (xmu.edu.cn)
HBase安裝(偽分散式)
HBase2.2.2安裝和編程實踐指南_廈大資料庫實驗室博客 (xmu.edu.cn)
通過NAT轉發使本地主機連接虛擬機
PowerShell SSH 連接 VirtualBox Ubuntu 虛擬機的具體步驟 - 小能日記 - 博客園 (cnblogs.com)
文件傳輸工具
FlashFXP - Secure FTP Client Software for Windows. Upload, Download, and Synchronize your files.
在VSCODE中使用SSH進行遠程開發
在VScode中使用SSH進行遠程開發_vscode ssh_Shipmaster_23的博客-CSDN博客
Linux中安裝go環境並運行
【golang】linux中安裝go環境並運行_linux 運行golang_沉默小管的博客-CSDN博客
理解Hbase
我終於看懂了HBase,太不容易了... - 知乎 (zhihu.com)
第三方軟體包
tsuna/gohbase: Pure-Go HBase client (github.com)
hrpc package - github.com/tsuna/gohbase/hrpc - Go Packages
gin框架 · Go語言中文文檔 (topgoer.com)
遇到過的問題
Linux 查看埠占用情況 | 菜鳥教程 (runoob.com)
介面編程
func Router(r *gin.Engine) {
r.GET("/", controller.HelloWorld)
r.GET("/TableList", controller.ShowTableList) // 1.1
r.GET("/TableRows", controller.ShowTableRows) // 1.2
r.POST("/TableInsertRowCol", controller.TableInsertRowCol) // 1.3
r.POST("/TableDeleteRowCol", controller.TableDeleteRowCol) // 1.3
r.GET("/TableTruncate", controller.TableTruncate) // 1.4
r.GET("/TableRowsCount", controller.ShowTableRowsCount) // 1.5
r.POST("/TableCreate", controller.TableCreate) // 3.1
r.POST("/TableInsertRow", controller.TableInsertRow) // 3.2
r.GET("/TableColumnScan", controller.TableColumnScan) // 3.3
r.POST("/TableModifyData", controller.TableInsertRowCol) // 3.4 same as 1.3
r.POST("/TableDeleteRow", controller.TableDeleteRow) // 3.5
}
實驗過程
啟動服務
ssh [email protected] -p 10022
cd /usr/local/hadoop
./sbin/start-dfs.sh
cd /usr/local/hbase
bin/start-hbase.sh
cd /usr/local/hbase
bin/hbase shell
創建三個表
對應第2題答案
create 'student','S_No','S_Name','S_Sex','S_Age'
put 'student','s001','S_No','2015001'
put 'student','s001','S_Name','zhangsan'
put 'student','s001','S_Sex','male'
put 'student','s001','S_Age','23'
put 'student','s002','S_No','2015002'
put 'student','s002','S_Name','Mary'
put 'student','s002','S_Sex','female'
put 'student','s002','S_Age','22'
put 'student','s003','S_No','2015003'
put 'student','s003','S_Name','Lisi'
put 'student','s003','S_Sex','male'
put 'student','s003','S_Age','24'
create 'course','C_No','C_Name','C_Credit'
put 'course','c001','C_No','123001'
put 'course','c001','C_Name','Math'
put 'course','c001','C_Credit','2.0'
put 'course','c002','C_No','123002'
put 'course','c002','C_Name','Computer'
put 'course','c002','C_Credit','5.0'
put 'course','c003','C_No','123003'
put 'course','c003','C_Name','English'
put 'course','c003','C_Credit','3.0'
create 'SC','SC_Sno','SC_Cno','SC_Score'
put 'SC','sc001','SC_Sno','2015001'
put 'SC','sc001','SC_Cno','123001'
put 'SC','sc001','SC_Score','86'
put 'SC','sc002','SC_Sno','2015001'
put 'SC','sc002','SC_Cno','123002'
put 'SC','sc002','SC_Score','77'
put 'SC','sc003','SC_Sno','2015002'
put 'SC','sc003','SC_Cno','123002'
put 'SC','sc003','SC_Score','77'
put 'SC','sc004','SC_Sno','2015002'
put 'SC','sc004','SC_Cno','123003'
put 'SC','sc004','SC_Score','99'
put 'SC','sc005','SC_Sno','2015003'
put 'SC','sc005','SC_Cno','123001'
put 'SC','sc005','SC_Score','98'
put 'SC','sc006','SC_Sno','2015003'
put 'SC','sc006','SC_Cno','123002'
put 'SC','sc006','SC_Score','95'
後端編程
後端在啟動時分別與本地HBase建立管理員客戶端、普通客戶端的連接。管理員客戶端管理所有表,普通客戶端管理表數據的增刪改查。並啟動HTTP服務提供一系列API介面。
package variable
import "github.com/tsuna/gohbase"
var AdminClient gohbase.AdminClient
var Client gohbase.Client
package main
import (
"github.com/gin-gonic/gin"
"github.com/tsuna/gohbase"
"wolflong.com/hbase_gin/router"
"wolflong.com/hbase_gin/variable"
)
func init() {
variable.AdminClient = gohbase.NewAdminClient("127.0.0.1")
variable.Client = gohbase.NewClient("127.0.0.1")
}
func main() {
r := gin.Default()
router.Router(r)
r.Run(":1313")
}
給定一個處理錯誤的通用方法
package controller
import "github.com/gin-gonic/gin"
func checkError(err error, c *gin.Context, handlers ...gin.HandlerFunc) {
if err != nil {
c.JSON(500, gin.H{"error": "致命錯誤", "back": err.Error()})
panic(err)
}
}
1.1 列出所有表的相關信息
HBase Shell 對應代碼
list
從管理客戶端發出請求,獲取所有表,遍歷表將表的命名空間與表名存儲至切片中以json格式返回
type table struct {
Namespace string `json:"namespace"`
Qualifier string `json:"qualifier"`
}
func ShowTableList(c *gin.Context) {
var tables []table
t, err := hrpc.NewListTableNames(context.Background())
checkError(err, c)
res, err := variable.AdminClient.ListTableNames(t)
checkError(err, c)
for _, v := range res {
tables = append(tables, table{string(v.GetNamespace()), string(v.GetQualifier())})
}
fmt.Println(tables)
c.JSON(200, tables)
}
測試結果
[
{
"namespace": "default",
"qualifier": "SC"
},
{
"namespace": "default",
"qualifier": "course"
},
{
"namespace": "default",
"qualifier": "student"
},
{
"namespace": "default",
"qualifier": "test"
}
]
1.2 列印指定表的所有記錄數據
HBase Shell 對應代碼
scan "course"
獲取請求參數table,從普通客戶端發出請求,獲取scanner,遍歷scanner直到io.EOF。請註意,當查詢的表沒數據時,第一條拿到的是nil數據,需要進行判斷。
func ShowTableRows(c *gin.Context) {
var Cells [][]*hrpc.Cell
t, err := hrpc.NewScan(context.Background(), []byte(c.Query("table")))
checkError(err, c)
res := variable.Client.Scan(t)
row, err := res.Next()
for err != io.EOF && row != nil {
Cells = append(Cells, row.Cells)
fmt.Println(row.Cells)
row, err = res.Next()
}
c.JSON(200, Cells)
}
localhost:1313/TableRows?table=course 測試結果。
請註意,Gin框架在轉換二進位數據時使用BASE64編碼,使得控制字元在網路上正常傳輸。
[
[
{
"row": "YzAwMQ==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640294,
"cell_type": 4,
"value": "Mi4w"
},
{
"row": "YzAwMQ==",
"family": "Q19DcmVkaXQ=",
"qualifier": "bmV3",
"timestamp": 1680432352886,
"cell_type": 4,
"value": "NS4w"
},
{
"row": "YzAwMQ==",
"family": "Q19OYW1l",
"timestamp": 1680431640279,
"cell_type": 4,
"value": "TWF0aA=="
},
{
"row": "YzAwMQ==",
"family": "Q19Obw==",
"timestamp": 1680431640250,
"cell_type": 4,
"value": "MTIzMDAx"
}
],
[
{
"row": "YzAwMg==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640328,
"cell_type": 4,
"value": "NS4w"
},
{
"row": "YzAwMg==",
"family": "Q19OYW1l",
"timestamp": 1680431640318,
"cell_type": 4,
"value": "Q29tcHV0ZXI="
},
{
"row": "YzAwMg==",
"family": "Q19Obw==",
"timestamp": 1680431640305,
"cell_type": 4,
"value": "MTIzMDAy"
}
],
[
{
"row": "YzAwMw==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640363,
"cell_type": 4,
"value": "My4w"
},
{
"row": "YzAwMw==",
"family": "Q19OYW1l",
"timestamp": 1680431640352,
"cell_type": 4,
"value": "RW5nbGlzaA=="
},
{
"row": "YzAwMw==",
"family": "Q19Obw==",
"timestamp": 1680431640343,
"cell_type": 4,
"value": "MTIzMDAz"
}
]
]
1.3 向已創建好的表添加和刪除指定的列族或列
HBase Shell 對應代碼
put 'course','c001','C_Credit:new','5.0'
delete 'course','c001','C_Credit:new'
使用普通客戶端進行put操作,需要準備一個item數據包含當前操作的列族或列以及對應的值。支持覆蓋重寫與新增。
func TableInsertRowCol(c *gin.Context) {
table := c.PostForm("table")
rowKey := c.PostForm("rowKey")
colFamily := c.PostForm("colFamily")
col := c.PostForm("col")
val := c.PostForm("val")
var item map[string]map[string][]byte = make(map[string]map[string][]byte)
item[colFamily] = make(map[string][]byte)
item[colFamily][col] = []byte(val)
fmt.Println(item)
t, err := hrpc.NewPutStr(context.Background(), table, rowKey, item)
checkError(err, c)
res, err := variable.Client.Put(t)
checkError(err, c)
c.JSON(200, res)
}
測試結果
{
"Cells": null,
"Stale": false,
"Partial": false,
"Exists": null
}
刪除列族或列,不需要val數據。列可以為空字元串,僅刪除列族,不為空時則刪除指定的列
func TableDeleteRowCol(c *gin.Context) {
table := c.PostForm("table")
rowKey := c.PostForm("rowKey")
colFamily := c.PostForm("colFamily")
col := c.PostForm("col")
// val := c.PostForm("val")
var item map[string]map[string][]byte = make(map[string]map[string][]byte)
item[colFamily] = make(map[string][]byte)
item[colFamily][col] = []byte{}
fmt.Println(item)
t, err := hrpc.NewDelStr(context.Background(), table, rowKey, item)
checkError(err, c)
res, err := variable.Client.Delete(t)
checkError(err, c)
c.JSON(200, res)
}
1.4 清空指定表的所有數據
HBase Shell 對應代碼
truncate 'course'
沒找到清空所有數據的hrpc函數,只能先拿到scanner再執行rowKey的每條刪除。
如果是delete表操作需要先disable表,再刪除表。
func TableTruncate(c *gin.Context) {
table := c.Query("table")
// ^ 下方註釋代碼是 Table Delete
// t := hrpc.NewDisableTable(context.Background(), []byte(table))
// err := variable.AdminClient.DisableTable(t)
// checkError(err, c)
// t2 := hrpc.NewDeleteTable(context.Background(), []byte(table))
// err = variable.AdminClient.DeleteTable(t2)
t2, err := hrpc.NewScan(context.Background(), []byte(table))
checkError(err, c)
res := variable.Client.Scan(t2)
row, err := res.Next()
for err != io.EOF && row != nil {
t, err := hrpc.NewDelStr(context.Background(), table, string(row.Cells[0].Row), nil)
checkError(err, c)
_, err = variable.Client.Delete(t)
checkError(err, c)
row, err = res.Next()
}
c.JSON(200, gin.H{"result": "刪除成功"})
}
1.5 統計表的行數
HBase Shell 對應代碼
count 'course'
修改1.2的代碼
func ShowTableRowsCount(c *gin.Context) {
var count int
t, err := hrpc.NewScan(context.Background(), []byte(c.Query("table")))
checkError(err, c)
res := variable.Client.Scan(t)
row, err := res.Next()
for err != io.EOF && row != nil {
count++
row, err = res.Next()
}
c.JSON(200, count)
}
localhost:1313/TableRowsCount?table=course 測試結果
3
3.1 創建表
func TableCreate(c *gin.Context) {
table := c.PostForm("table")
fs := c.PostForm("fields")
var fields []string
// fmt.Println(table, fs)
err := json.Unmarshal([]byte(fs), &fields)
checkError(err, c)
// 驗證是否存在表
flag := false
t, err := hrpc.NewListTableNames(context.Background())
checkError(err, c)
res, err := variable.AdminClient.ListTableNames(t)
checkError(err, c)
for _, v := range res {
if string(v.GetQualifier()) == table {
flag = true
}
}
// 如存在刪除表
if flag {
t := hrpc.NewDisableTable(context.Background(), []byte(table))
err := variable.AdminClient.DisableTable(t)
checkError(err, c)
t2 := hrpc.NewDeleteTable(context.Background(), []byte(table))
err = variable.AdminClient.DeleteTable(t2)
checkError(err, c)
}
// 插入新表
var items map[string]map[string]string = make(map[string]map[string]string)
for _, v := range fields {
items[v] = make(map[string]string)
}
t2 := hrpc.NewCreateTable(context.Background(), []byte(table), items)
err = variable.AdminClient.CreateTable(t2)
checkError(err, c)
c.JSON(200, gin.H{"result": "創建成功"})
}
通過1.1函數介面我們可知test表已經存在,現在我們使用3.1函數介面重新創建該表,併為接下來的3.2函數介面調用做準備。
{
"result": "創建成功"
}
3.2 新增記錄
為了增強健壯性,我們需要判斷傳入的fields、values參數個數是否一致,否則應當主動報錯。
func TableInsertRow(c *gin.Context) {
table := c.PostForm("table")
rowKey := c.PostForm("rowKey")
fs := c.PostForm("fields")
vs := c.PostForm("values")
var fields []string
var values []string
err := json.Unmarshal([]byte(fs), &fields)
checkError(err, c)
err = json.Unmarshal([]byte(vs), &values)
checkError(err, c)
if len(fields) != len(values) {
checkError(fmt.Errorf("數量不一致"), c)
}
var item map[string]map[string][]byte = make(map[string]map[string][]byte)
for i, v := range fields {
vs := strings.Split(v, ":")
item[vs[0]] = make(map[string][]byte)
if len(vs) > 1 {
item[vs[0]][vs[1]] = []byte(values[i])
} else {
item[vs[0]][""] = []byte(values[i])
}
}
fmt.Println(item)
t, err := hrpc.NewPutStr(context.Background(), table, rowKey, item)
checkError(err, c)
res, err := variable.Client.Put(t)
checkError(err, c)
c.JSON(200, res)
}
3.3 通過列過濾數據
使用scanner實現,也可以通過過濾器實現。
// TODO USE FILTER
type item struct {
Row string `json:"row"`
Family string `json:"family"`
Qualifier string `json:"qualifier"`
Timestamp *uint64 `json:"timestamp"`
Cell_type *pb.CellType `json:"cell_type"`
Value string `json:"value"`
}
func TableColumnScan(c *gin.Context) {
table := c.Query("table")
column := c.Query("column")
vs := strings.Split(column, ":")
var items []item
t, err := hrpc.NewScan(context.Background(), []byte(table))
checkError(err, c)
res := variable.Client.Scan(t)
row, err := res.Next()
for err != io.EOF && row != nil {
for _, v := range row.Cells {
if string(v.Family) != vs[0] {
continue
}
if len(vs) > 1 {
if string(v.Qualifier) != vs[1] {
continue
}
}
fmt.Println(row.Cells)
items = append(items, item{
Row: string(v.Row),
Family: string(v.Family),
Qualifier: string(v.Qualifier),
Timestamp: v.Timestamp,
Cell_type: v.CellType,
Value: string(v.Value),
})
}
row, err = res.Next()
}
c.JSON(200, items)
}
再執行一遍1.3添加列的函數,調用介面,執行結果如下。
localhost:1313/TableColumnScan?table=course&column=C_Credit
[
{
"row": "c001",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640294,
"cell_type": 4,
"value": "2.0"
},
{
"row": "c001",
"family": "C_Credit",
"qualifier": "new",
"timestamp": 1680434951646,
"cell_type": 4,
"value": "5.0"
},
{
"row": "c002",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640328,
"cell_type": 4,
"value": "5.0"
},
{
"row": "c003",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640363,
"cell_type": 4,
"value": "3.0"
}
]
localhost:1313/TableColumnScan?table=course&column=C_Credit:new
[
{
"row": "c001",
"family": "C_Credit",
"qualifier": "new",
"timestamp": 1680434951646,
"cell_type": 4,
"value": "5.0"
}
]
3.4 修改行數據
與 1.3 函數代碼一致
3.5 刪除表指定記錄
package controller
import (
"context"
"github.com/gin-gonic/gin"
"github.com/tsuna/gohbase/hrpc"
"wolflong.com/hbase_gin/variable"
)
func TableDeleteRow(c *gin.Context) {
table := c.PostForm("table")
rowKey := c.PostForm("rowKey")
t, err := hrpc.NewDelStr(context.Background(), table, rowKey, nil)
checkError(err, c)
res, err := variable.Client.Delete(t)
checkError(err, c)
c.JSON(200, res)
}
{
"Cells": null,
"Stale": false,
"Partial": false,
"Exists": null
}
再次調用1.5函數介面,執行結果符合預期。
2
總結
裝環境2小時,代碼和文檔編寫4.5小時,代碼編寫過程中查閱官方文檔和解決問題的時間為3小時。
總共花費6.5個小時,共編寫333行代碼,56行表數據。
代碼編寫能力得到了提升。提高了自己對HBase的理解,作為一個典型的NoSQL資料庫,其一大優點是可在廉價PC伺服器上搭建起大規模結構化存儲集群,並提供易使用的HBase Shell操作數據集,水平擴展方便。