1. python连接数据库
from pyhive import hive
conn = hive.Connection(
host= 'xxx xxx',
port= xxx,
auth='CUSTOM',
username='your user name',
password='your password',
database='default')
cursor = conn.cursor()
2. SQL代码封装
设计为可传参方式
def get_sql(d):
d = "'"+ d + "'"
add_sql_horizon = """
CREATE table tmp.horizon_feature as
with
tmp_dlr_cstm as(
select dealer_id,
sum(
case when DateDiff("""+d+""",created_time)<=30 and date_from not in (0,3) then 1
else 0
end
) as pcust_acc_1m_dlr_off
from dcs.nt_tp_p_customers
group by dealer_id
)
select
nvl(cast(d1.dealer_id as string),'') as dealer_id,
nvl(cast(d1.dealer_code as string),'') as dealer_code,
nvl(cast(d1.company_id as string),'') as company_id,
nvl(cast(tmp_dlr_cstm.pcust_acc_1m_dlr_off as string),'') as pcust_acc_1m_dlr_off
from dcs.nt_BB_DEALERS d1
left join tmp_dlr_cstm on d1.dealer_id = tmp_dlr_cstm.dealer_id
where d1.biz_status=10310005"""
add_sql_verticle ="""
insert into table tmp.dwb_dealer_feature_dd
select dealer_id,dealer_code,company_id,t.feat_name,t.feat_value,"""+d+""" as prod_time
from tmp.horizon_feature
lateral view explode(
map(
'pcust_acc_1m_dlr_off', pcust_acc_1m_dlr_off
)
)t as feat_name, feat_value
"""
return add_sql_horizon,add_sql_verticle
以上操作完成了新建一个横表,然后转化为纵表的操作。
3. 获取日期参数
import datetime
def getEveryDay(begin_date,end_date):
date_list = []
begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
end_date = datetime.datetime.strptime(end_date,"%Y-%m-%d")
while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
return date_list
4. 循环执行SQL脚本
for d in getEveryDay('2017-01-01','2019-08-07'):
cursor.execute("""drop table if exists tmp.horizon_feature""")
conn.commit()
horizon_sql,verticle_sql = get_sql(d)
cursor.execute(horizon_sql)
conn.commit()
cursor = conn.cursor()
cursor.execute(verticle_sql)
conn.commit()
cursor = conn.cursor()
cursor.close()
成功插入数据!
棒!
欢迎关注微信公众号“数据分析师手记”,一起进步!