insert overwrite table ods.customer partition (part_date="20211020") select `(part_date)?+.+` from ods.customer where part_date="20211021";
时间: 2024-05-29 16:12:40 浏览: 58
This query will overwrite the data in the partition "part_date=20211020" of the table "ods.customer" with the data from the partition "part_date=20211021" of the same table. The query uses dynamic partitioning to select the partition based on the value of the "part_date" column.
相关问题
insert overwrite table discountdw.dwd_sd_adds_order_bill_inc partition(dt = '2023-06-06') select t1.order_bill_id, t1.counterfoil_no, t1.acceptor, date_format(to_utc_timestamp(cast(t1.expiry_date as bigint) ,'GMT-8'),'YYYY-MM-dd'), t2.company_id, t1.cert_no, t1.company_name, t1.third_order_id, t1.counterfoil_amt/10000, t1.transaction_amt/10000, t1.rate, '3bp' as service_tariffing, ((DATEDIFF(to_utc_timestamp(t1.expiry_date ,'GMT-8'),to_utc_timestamp(t1.transaction_date ,'GMT-8') ) + adjust_days)* 0.0003 *(counterfoil_amt))/ 360 as service_fee, 360 as total_days, DATEDIFF(to_utc_timestamp(t1.expiry_date ,'GMT-8'),to_utc_timestamp(t1.transaction_date ,'GMT-8') ) + adjust_days as modulation_date, t3.channel_type, t3.bank_name, date_format(to_utc_timestamp(cast(t1.transaction_date as bigint) ,'GMT-8'),'YYYY-MM-dd'), t1.order_status_code, t1.order_status_msg, t4.fee_amt, t4.status, t1.tenant_id, t5.revenue, to_utc_timestamp(cast(t1.create_date as bigint) ,'GMT-8'), to_utc_timestamp(cast(t1.update_date as bigint) ,'GMT-8') from (select * from discountdw.ods_adds_order_bill_inc where dt ='2023-06-06' and channel_id=101110004 )t1 left join (select * from mecdw.ods_company_full where platform_id='sdpjw')t2 on t1.cert_no=t2.cert_no and t1.tenant_id=t2.tenant_id left join discountdw.dim_adds_product_full t3 on t1.partner_id=t3.partner_id and t1.product_id=t3.product_id left join (select * from mecdw.dwd_sc_fee_record_full where dt='2023-06-06' and biz_type=2 ) t4 on t1.order_bill_id=t4.third_id left join (select * from discountdw.ods_sd_order_ext_inc where dt='2023-06-06') t5 on t1.order_bill_id=t5.order_bill_id left join sdpjwdw.dim_holiday_info_full t6 on date_format(to_utc_timestamp(t1.expiry_date ,'GMT-8'),'YYYY-MM-dd') = t6.civic_holiday ;
这是一条 SQL 语句,看起来比较长。以下是一些可以考虑优化的建议:
1. 将表名、字段名等缩短,以减少字符数和提高可读性。
2. 将复杂的表连接操作拆分成多个子查询,以降低查询复杂度。
3. 对于一些计算操作,可以考虑使用内置函数或自定义函数,以简化代码并提高可维护性。
4. 对于一些重复的操作,可以将其提取成变量或常量,以减少重复代码。
综上所述,优化后的 SQL 可能类似于以下代码(仅供参考):
```
-- 定义常量
SET adjust_days = 1;
-- 查询订单表
WITH order_data AS (
SELECT
o.order_bill_id,
o.counterfoil_no,
o.acceptor,
DATE_FORMAT(TO_UTC_TIMESTAMP(CAST(o.expiry_date AS BIGINT), 'GMT-8'), 'YYYY-MM-dd') AS expiry_date,
o.cert_no,
o.company_name,
o.third_order_id,
o.counterfoil_amt / 10000 AS counterfoil_amt,
o.transaction_amt / 10000 AS transaction_amt,
o.rate,
'3bp' AS service_tariffing,
((DATEDIFF(TO_UTC_TIMESTAMP(o.expiry_date, 'GMT-8'), TO_UTC_TIMESTAMP(o.transaction_date, 'GMT-8')) + adjust_days) * 0.0003 * (o.counterfoil_amt)) / 360 AS service_fee,
360 AS total_days,
DATEDIFF(TO_UTC_TIMESTAMP(o.expiry_date, 'GMT-8'), TO_UTC_TIMESTAMP(o.transaction_date, 'GMT-8')) + adjust_days AS modulation_date,
o.order_status_code,
o.order_status_msg,
o.tenant_id,
TO_UTC_TIMESTAMP(CAST(o.create_date AS BIGINT), 'GMT-8') AS create_date,
TO_UTC_TIMESTAMP(CAST(o.update_date AS BIGINT), 'GMT-8') AS update_date
FROM discountdw.ods_adds_order_bill_inc o
WHERE o.dt = '2023-06-06' AND o.channel_id = 101110004
),
-- 查询公司表
company_data AS (
SELECT
c.company_id,
c.cert_no,
c.tenant_id
FROM mecdw.ods_company_full c
WHERE c.platform_id = 'sdpjw'
),
-- 查询产品表
product_data AS (
SELECT
p.partner_id,
p.product_id,
p.channel_type,
p.bank_name
FROM discountdw.dim_adds_product_full p
),
-- 查询费用记录表
fee_data AS (
SELECT
f.third_id,
f.fee_amt,
f.status
FROM mecdw.dwd_sc_fee_record_full f
WHERE f.dt = '2023-06-06' AND f.biz_type = 2
),
-- 查询订单扩展信息表
order_ext_data AS (
SELECT
e.order_bill_id,
e.revenue
FROM discountdw.ods_sd_order_ext_inc e
WHERE e.dt = '2023-06-06'
),
-- 查询节假日表
holiday_data AS (
SELECT
h.civic_holiday
FROM sdpjwdw.dim_holiday_info_full h
)
-- 合并数据
INSERT OVERWRITE TABLE discountdw.dwd_sd_adds_order_bill_inc PARTITION(dt = '2023-06-06')
SELECT
o.order_bill_id,
o.counterfoil_no,
o.acceptor,
o.expiry_date,
c.company_id,
o.cert_no,
o.company_name,
o.third_order_id,
o.counterfoil_amt,
o.transaction_amt,
o.rate,
o.service_tariffing,
o.service_fee,
o.total_days,
o.modulation_date,
p.channel_type,
p.bank_name,
DATE_FORMAT(TO_UTC_TIMESTAMP(CAST(o.transaction_date AS BIGINT), 'GMT-8'), 'YYYY-MM-dd'),
o.order_status_code,
o.order_status_msg,
f.fee_amt,
f.status,
o.tenant_id,
e.revenue,
o.create_date,
o.update_date
FROM order_data o
LEFT JOIN company_data c ON o.cert_no = c.cert_no AND o.tenant_id = c.tenant_id
LEFT JOIN product_data p ON o.partner_id = p.partner_id AND o.product_id = p.product_id
LEFT JOIN fee_data f ON o.order_bill_id = f.third_id
LEFT JOIN order_ext_data e ON o.order_bill_id = e.order_bill_id
LEFT JOIN holiday_data h ON o.expiry_date = DATE_FORMAT(TO_UTC_TIMESTAMP(h.civic_holiday, 'GMT-8'), 'YYYY-MM-dd');
```
set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.parallel=true; set hive.exec.parallel.thread.number=30; --drop table if exists dm_finrisk.dm_event_walletSettleSuccessEvent_di; create table if not exists dm_finrisk.dm_event_cashLoanEvent_di( event_uuid string ,event_code string ,dt string ,event_time string ,rawSnapshot string ) COMMENT 'cashLoanEvent' PARTITIONED BY ( day bigint)stored as parquet; drop table if exists tmp.tmp_dm_event_cashLoanEvent_di_01_${zdt.addDay(-1).format("yyyyMMdd")}; create table if not exists tmp.tmp_dm_event_cashLoanEvent_di_01_${zdt.addDay(-1).format("yyyyMMdd")} as select uuid as event_uuid ,event as event_code ,from_unixtime(unix_timestamp(cast(day as string),'yyyymmdd'),'yyyy-mm-dd') as dt ,from_unixtime(cast(cast(`time` AS BIGINT)/1000 AS BIGINT),'yyyy-MM-dd HH:mm:ss') as event_time ,get_json_object(data,'$.hotPublish.rawSnapshot') as rawSnapshot from dm_finrisk.ods_event_hdfs_snapshot_di where code = 'shield@cashLoanEvent' and day=${zdt.addDay(-1).format("yyyyMMdd")} drop table if exists tmp.tmp_dm_event_cashLoanEvent_di_02_${zdt.addDay(-1).format("yyyyMMdd")}; create table if not exists tmp.tmp_dm_event_cashLoanEvent_di_02_${zdt.addDay(-1).format("yyyyMMdd")} as select event_uuid ,event_code ,dt ,event_time ,rawSnapshot from tmp.tmp_dm_event_cashLoanEvent_di_01_${zdt.addDay(-1).format("yyyyMMdd")}; alter table dm_finrisk.dm_event_cashLoanEvent_di drop if exists partition (day = ${zdt.addDay(-1).format("yyyyMMdd")}); insert overwrite table dm_finrisk.dm_event_cashLoanEvent_di partition (day = ${zdt.addDay(-1).format("yyyyMMdd")}) select event_uuid ,event_code ,dt ,event_time ,rawSnapshot from tmp.tmp_dm_event_cashLoanEvent_di_02_${zdt.addDay(-1).format("yyyyMMdd")}分析下上面代码
上面的代码主要是一个HiveQL脚本,用于从`dm_finrisk.ods_event_hdfs_snapshot_di`表中提取`code`为`shield@cashLoanEvent`且`day`为昨天日期的数据,并将其存储到`dm_finrisk.dm_event_cashLoanEvent_di`表中的对应分区中。
具体来说,该脚本的执行步骤如下:
1. 第1行:设置Hive执行参数,包括动态分区模式、并行执行和并行执行线程数等。
2. 第3-8行:创建`dm_finrisk.dm_event_cashLoanEvent_di`表,该表包含5个列,分别为`event_uuid`、`event_code`、`dt`、`event_time`和`rawSnapshot`,其中`dt`为分区列。
3. 第10-15行:创建临时表`tmp.tmp_dm_event_cashLoanEvent_di_01_${zdt.addDay(-1).format("yyyyMMdd")}`,该表从`dm_finrisk.ods_event_hdfs_snapshot_di`表中选择`code`为`shield@cashLoanEvent`且`day`为昨天日期的记录,并将其中的`uuid`、`event`、`day`、`time`和`data`字段分别映射到`event_uuid`、`event_code`、`dt`、`event_time`和`rawSnapshot`列中。
4. 第17-22行:创建临时表`tmp.tmp_dm_event_cashLoanEvent_di_02_${zdt.addDay(-1).format("yyyyMMdd")}`,该表从临时表`tmp.tmp_dm_event_cashLoanEvent_di_01_${zdt.addDay(-1).format("yyyyMMdd")}`中选择所有列,用于后续的数据处理。
5. 第24行:如果`dm_finrisk.dm_event_cashLoanEvent_di`表中昨天的分区已经存在,则删除该分区。
6. 第26-27行:将临时表`tmp.tmp_dm_event_cashLoanEvent_di_02_${zdt.addDay(-1).format("yyyyMMdd")}`中的数据插入到`dm_finrisk.dm_event_cashLoanEvent_di`表的昨天分区中。
整个脚本的作用是将来自`dm_finrisk.ods_event_hdfs_snapshot_di`表的特定数据提取出来,并存储到`dm_finrisk.dm_event_cashLoanEvent_di`表的对应分区中,以便后续的分析和查询。
阅读全文