SELECT DISTINCT qianniuuser_id,DATE(client_date) AS login_date FROM ( SELECT pt, type, event_id, cast(get_json_object(params, '$.qianniuUserId') AS BIGINT) AS qianniuuser_id, FROM_UNIXTIME(CAST(IF(LENGTH(client_timestamp) = 10,client_timestamp,client_timestamp / 1000) AS BIGINT)) AS client_date FROM ( SELECT pt, type, event_id, params,client_timestamp -- 这里暂时还是用旧表,数据周期长一些,后续补数合并后统一用小时表: FROM mogic.ods_log_burial_point_full -- 这里需要确认下是否为自然月:【按照每周的区间分别统计】 WHERE pt >= '20230428' AND pt <= '20230531' ) a ) temp WHERE type = 'behavior' AND event_id = 'show_compose_result'. 用户留存数和留存率只要在3日有第二次登陆都算留存,7日内有第二次登陆也算7日留存,14日内有第二次登陆也算14日留存,30日内有第二次登陆也算30日留存,现在要求不同的日期,用户数和他的3日留存数,3留存率,7日留存数,7日留存率,14日留存数,14日留存率,30日留存数,30日留存率,这个sql要怎么写用户id就是qianniuuser_id,日期就是login_date
时间: 2024-03-01 09:55:30 浏览: 21
感谢您的问题,以下是我给出的 SQL 语句:
```
SELECT
login_date AS 日期,
COUNT(DISTINCT qianniuuser_id) AS 用户数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 3 THEN qianniuuser_id END) AS 3日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 3 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 3日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 7 THEN qianniuuser_id END) AS 7日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 7 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 7日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 14 THEN qianniuuser_id END) AS 14日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 14 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 14日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 30 THEN qianniuuser_id END) AS 30日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 30 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 30日留存率
FROM (
SELECT DISTINCT
qianniuuser_id,
DATE(client_date) AS login_date,
MIN(CASE WHEN DATEDIFF(DATE(client_date), second_login_date) >= 0 THEN second_login_date ELSE NULL END) AS second_login_date
FROM (
SELECT
pt,
type,
event_id,
CAST(GET_JSON_OBJECT(params, '$.qianniuUserId') AS BIGINT) AS qianniuuser_id,
FROM_UNIXTIME(CAST(IF(LENGTH(client_timestamp) = 10, client_timestamp, client_timestamp / 1000) AS BIGINT)) AS client_date,
LAG(FROM_UNIXTIME(CAST(IF(LENGTH(client_timestamp) = 10, client_timestamp, client_timestamp / 1000) AS BIGINT))) OVER
(PARTITION BY CAST(GET_JSON_OBJECT(params, '$.qianniuUserId') AS BIGINT) ORDER BY client_date) AS second_login_date
FROM (
SELECT
pt,
type,
event_id,
params,
client_timestamp
FROM mogic.ods_log_burial_point_full
WHERE pt >= '20230428' AND pt <= '20230531'
) a
WHERE type = 'behavior' AND event_id = 'show_compose_result'
) temp
WHERE second_login_date IS NOT NULL
GROUP BY qianniuuser_id, DATE(client_date)
) t
GROUP BY login_date;
```
解释一下这个 SQL 语句的思路:
首先,我们需要找到每个用户的第二次登录时间,所以我们使用了一个包含了 LAG 函数的子查询:
```
SELECT
pt,
type,
event_id,
CAST(GET_JSON_OBJECT(params, '$.qianniuUserId') AS BIGINT) AS qianniuuser_id,
FROM_UNIXTIME(CAST(IF(LENGTH(client_timestamp) = 10, client_timestamp, client_timestamp / 1000) AS BIGINT)) AS client_date,
LAG(FROM_UNIXTIME(CAST(IF(LENGTH(client_timestamp) = 10, client_timestamp, client_timestamp / 1000) AS BIGINT))) OVER
(PARTITION BY CAST(GET_JSON_OBJECT(params, '$.qianniuUserId') AS BIGINT) ORDER BY client_date) AS second_login_date
FROM (
SELECT
pt,
type,
event_id,
params,
client_timestamp
FROM mogic.ods_log_burial_point_full
WHERE pt >= '20230428' AND pt <= '20230531'
) a
WHERE type = 'behavior' AND event_id = 'show_compose_result'
```
然后,我们需要找到每个用户的每次登录时间,并和第二次登录时间进行比较,筛选出符合条件的用户,最后按照日期进行统计:
```
SELECT
login_date AS 日期,
COUNT(DISTINCT qianniuuser_id) AS 用户数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 3 THEN qianniuuser_id END) AS 3日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 3 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 3日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 7 THEN qianniuuser_id END) AS 7日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 7 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 7日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 14 THEN qianniuuser_id END) AS 14日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 14 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 14日留存率,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 30 THEN qianniuuser_id END) AS 30日留存数,
COUNT(DISTINCT CASE WHEN DATEDIFF(login_date, second_login_date) <= 30 THEN qianniuuser_id END) / COUNT(DISTINCT qianniuuser_id) AS 30日留存率
FROM (
SELECT DISTINCT
qianniuuser_id,
DATE(client_date) AS login_date,
MIN(CASE WHEN DATEDIFF(DATE(client_date), second_login_date) >= 0 THEN second_login_date ELSE NULL END) AS second_login_date
FROM (
-- 上面的子查询
) temp
WHERE second_login_date IS NOT NULL
GROUP BY qianniuuser_id, DATE(client_date)
) t
GROUP BY login_date;
```
希望这个 SQL 语句能够帮到您,如果您有任何问题,请随时问我。