【1、创建table_env文件,增加如下内容】
执行命令:vi table_env
内容:
点击(此处)折叠或打开
- city_info=default.city_info
- product_info=default.product_info
- user_click=default.tmp_user_click
注:等号前为RDBMS表,等号后为Hive表
二、从MySQL将数据导入到Hive表中,并进行数据处理
【1、编写导入脚本:创建mysql_to_hive.sh文件,增加如下内容】
执行命令:vi mysql_to_hive.sh
内容:
点击(此处)折叠或打开
- #!/usr/bin/env bash
-
- source /app/works/user_env
-
- if [ $# != 2 ] ; then
- echo "USAGE: $0 数据库表 Hive表"
- echo " e.g.: $0 'city_info' 'default.city_info'"
- echo " e.g.: $0 'product_info' 'default.product_info'"
- echo " e.g.: $0 'user_click' 'default.tmp_user_click'"
- exit 1;
- fi
- ### 导入
- sqoop import --connect "${DB_VALUES}" \
- --username ${DB_USER} \
- --password-file /input/sqoop.pwd \
- --table $1 \
- --delete-target-dir \
- --fields-terminated-by "${INPUT_FIELDS_TERMINATED}" \
- --hive-import \
- --hive-overwrite \
- --hive-table $2 \
- -m 1
执行命令:chmod 775 mysql_to_hive.sh
【2、编写处理数据脚本:创建hive_user_click.hql文件,增加如下内容】
执行命令:vi hive_user_click.hql
内容:
点击(此处)折叠或打开
- set hive.exec.dynamic.partition.mode=nonstrict;
- insert overwrite table user_click partition(action_time)
- select user_id,session_id,city_id,product_id,substring(action_time,1,10) action_time from tmp_user_click distribute by action_time
【3、编写循环组装处理数据脚本:创建import_sqoop_while.sh文件,增加如下内容】
执行命令:vi import_sqoop_while.sh
内容:
点击(此处)折叠或打开
- #!/usr/bin/env bash
- source /app/works/user_env
- FILENAME=/app/works/table_env
- cat $FILENAME | while read LINE
- do
- db_table_name=`echo $LINE | cut -d \= -f 1`
- hive_table_name=`echo $LINE | cut -d \= -f 2`
- echo "***************************************************"
- echo "导入${hive_table_name}信息开始"
- echo "***************************************************"
- sh /app/works/mysql_to_hive.sh ${db_table_name} ${hive_table_name}
- done
- hive -f hive_user_click.hql
执行命令:chmod 775 import_sqoop_while.sh
【4、执行脚本】
./import_sqoop_while.sh
三、从Hive将数据导入到MySQL表中,并进行数据处理
【1、创建配置文件:创建user_env文件,增加如下内容】
执行命令:vi user_env
内容:
点击(此处)折叠或打开
- export DB_IP_ADD=192.168.137.130
- export DB_PROT=3306
- export DB_DIRV=jdbc:mysql
- export DB_NAME=works
- export DB_USER=root
- export DB_VALUES="jdbc:mysql://192.168.137.130:3306/works?useUnicode=true&characterEncoding=utf-8"
- export INPUT_FIELDS_TERMINATED='\t'
- export HDFS_PATH='/works/tmp/'
【2、创建HQL脚本文件:创建user_click.hql文件,增加如下内容】
执行命令:vi user_click.hql
内容:
点击(此处)折叠或打开
-
insert overwrite directory '${hivevar:hdfs_path}' ROW FORMAT DELIMITED FIELDS TERMINATED BY '${hivevar:field_term}' select * from (select c.area,p.product_name,
-
sum(rn) visit_num,
-
row_number()over(partition by c.area order by sum(rn) desc) rn,
-
'${hivevar:action_time}' action_time
-
from city_info c join (
-
select product_id,city_id,count(1) rn from user_click where action_time='${hivevar:action_time}' group by product_id,city_id
-
) u join product_info p
-
on c.city_id = case when u.city_id is null then concat('cityid',rand()) else u.city_id end
-
and p.product_id = case when u.product_id is null then concat('prodid',rand()) else u.product_id end
-
group by c.area,p.product_name) a
- where a.rn <= 3;
【3、创建从Hive导出脚本:创建hive_to_mysql.sh文件,增加如下内容】
执行命令:vi hive_to_mysql.sh
内容:
点击(此处)折叠或打开
- #!/usr/bin/env bash
- source /app/works/user_env
- if [ $# != 1 ] ; then
- echo "USAGE: $0 日期参数"
- echo " e.g.: $0 '2018-01-01'"
- exit 1;
- fi
- ### 删除数据库中特定数据
- sqoop eval \
- --connect "${DB_VALUES}" \
- --username ${DB_USER} \
- --password-file /input/sqoop.pwd \
- --query "delete from user_click_rs where sdate='$1'"
- ### 导出hive表中数据到HDFS
- hive -hivevar action_time=$1 -hivevar field_term=${INPUT_FIELDS_TERMINATED} -hivevar hdfs_path=${HDFS_PATH} -f /app/works/user_click.hql
- ### 导出HDFS数据到数据库中
- sqoop export --connect "${DB_VALUES}" \
- --username ${DB_USER} \
- --password-file /input/sqoop.pwd \
- --table user_click_rs \
- --export-dir ${HDFS_PATH} \
- --input-fields-terminated-by "${INPUT_FIELDS_TERMINATED}"
执行命令:chmod 775 hive_to_mysql.sh
【4、执行脚本】
./hive_to_mysql.sh '2016-05-05'
四、测试需要信息
【1、MySQL信息】
1.1、创建MySQL数据库
点击(此处)折叠或打开
- create database works DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
点击(此处)折叠或打开
-
--城市表
-
create table city_info(
-
city_id int,
-
city_name varchar(255),
-
area varchar(255)
-
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-
-
--产品表
-
create table product_info(
-
product_id int,
-
product_name varchar(50),
-
extend_info varchar(100)
-
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
-
-
-- 用户点击表
-
create table user_click(
-
user_id int,
-
session_id varchar(50),
-
action_time varchar(30),
-
city_id int(11),
-
product_id int(11)
-
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
-
-
--统考结果表
-
create table user_click_rs(
-
area varchar(50),
-
product_name varchar(50),
-
visit_num int(11),
-
rn int(11),
-
sdate varchar(10)
- )ENGINE=InnoDB DEFAULT CHARSET=utf8;
创建HIve表
点击(此处)折叠或打开
- --用户点击表
- create external table user_click(
- user_id string,
- session_id string,
- city_id string,
- product_id string
- )
- partitioned by (action_time string)
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
- location '/works/user_click';
- --用户点击临时表
- create external table tmp_user_click(
- user_id string,
- session_id string,
- action_time string,
- city_id string,
- product_id string
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
- location '/works/tmp_user_click';
- --城市表
- create external table city_info(
- city_id int,
- city_name string,
- area string
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
- location '/works/city_info';
- --产品表
- create external table product_info(
- product_id int,
- product_name string,
- extend_info string
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
- location '/works/product_info';
若泽大数据交流群:671914634