你正在搭建一个用户活跃度的画像,其中一个与活跃度相关的特征是“最长连续登录天数”, 请用SQL实现“2023年1月1日-2023年1月31日用户最长的连续登录天数”
登陆表 tb_dau:
| fdate | user_id |
| 2023-01-01 | 10000 |
| 2023-01-02 | 10000 |
| 2023-01-04 | 10000 |
输出:
| user_id | max_consec_days |
| 10000 | 2 |
| fdate | user_id |
| 2023-01-01 | 10000 |
| 2023-01-02 | 10000 |
| 2023-01-04 | 10000 |
| user_id | max_consec_days |
| 10000 | 2 |
drop table if exists tb_dau;
create table `tb_dau` (
`fdate` date,
`user_id` int
);
insert into tb_dau(fdate, user_id)
values
('2023-01-01', 10000),
('2023-01-02', 10000),
('2023-01-04', 10000);user_id|max_consec_days 10000|2
id为10000的用户在1月1日及1月2日连续登录2日,1月4日登录1日,故最长连续登录天数为2日
MySQL中日期加减的函数日期增加 DATE_ADD,例:date_add('2023-01-01', interval 1 day) 输出 '2023-01-02'
日期减少 DATE_SUB,例:date_sub('2023-01-01', interval 1 day) 输出 '2022-12-31'日期差 DATEDIFF,例:datediff('2023-02-01', '2023-01-01') 输出31
WITH
TB as (
select distinct
fdate,
user_id
from
tb_dau
where
fdate between '2023-01-01' and '2023-01-31'
),
TB_group as (
select
fdate,
user_id,
date_sub(
fdate,
INTERVAL row_number() over (
partition by user_id
order by
fdate
) DAY
) as ranking
from
TB
),
TB_table as (
select
ranking,
user_id,
count(*) as consecutive_days
from
TB_group
GROUP BY
user_id,
ranking
)SELECT
user_id, MAX(consecutive_days) AS max_consec_days
FROM
TB_table
GROUP BY
user_id;
WITH result AS ( SELECT *, ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY fdate) AS rk FROM tb_dau ), result1 AS ( SELECT user_id, DAY(fdate) - rk, COUNT(*) AS consec_days FROM result GROUP BY user_id,DAY(fdate) - rk ) SELECT user_id, MAX(consec_days) AS max_consec_days FROM result1 GROUP BY user_id
WITH date_groups AS ( SELECT user_id, fdate, -- 计算连续日期分组标识 DATE_SUB(fdate, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) DAY) AS grp FROM tb_dau WHERE fdate BETWEEN '2023-01-01' AND '2023-01-31' ) SELECT user_id, MAX(consec_days) AS max_consec_days FROM ( SELECT user_id, grp, COUNT(*) AS consec_days -- 计算每组连续天数 FROM date_groups GROUP BY user_id, grp ) t GROUP BY user_id;
with tmp as (
select
IFNULL(DATEDIFF(next,fdate)=1,0) as diff,
row_number() over(partition by user_id order by fdate) as eday,
user_id
from(
select
fdate,
user_id,
lead(fdate,1,NULL) over(partition by user_id order by fdate) as next
from tb_dau) t1
)
select
user_id,
MAX(eday-last) as max_consec_days
from(
select
lag(eday,1,0) over(partition by user_id order by eday rows 1 preceding) as last,
user_id,
eday
from tmp
where diff=0
)t1
group by user_id
# Write your MySQL query statement below WITH t0 as ( select distinct fdate as login_date, user_id as id from tb_dau where fdate between '2023-01-01' and '2023-01-31' ) , t1 AS ( SELECT #------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------- login_date, id, (case when login_date = @prev_date + INTERVAL 1 DAY AND id = @prev_author_id then @consec_days := @consec_days + 1 when login_date = @prev_date AND id = @prev_author_id then @consec_days := @consec_days else @consec_days := 1 end) AS consec_days, @prev_date := login_date, @prev_author_id := id #-------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------ FROM (SELECT @prev_date := NULL, @prev_author_id := NULL, @consec_days := 1) vars, (SELECT login_date, id FROM t0 ORDER BY id , login_date) ordered_dates ) SELECT id as user_id, max((CAST(consec_days AS UNSIGNED))) as max_consec_days from t1 group by id
with v as ( select * ,row_number() over(partition by user_id order by fdate) as ranking from tb_dau where fdate between '2023-01-01' and '2023-01-31' ), vv as ( select subdate(fdate, ranking) as date ,count(*) as max_consec_days ,user_id from v group by date,user_id ) select user_id ,max(max_consec_days) as max_consec_days from vv group by user_id order by user_id
select user_id,max(date_cnt) as max_consec_days from( select user_id,base_date,count(1) as date_cnt from( select user_id,(fdate-rk+1) as base_date from( select user_id,fdate,row_number() over(partition by user_id order by fdate) as rk from( select user_id,fdate from tb_dau where year(fdate)='2023' and month(fdate)='01' group by user_id,fdate ) m ) n ) a group by user_id,base_date ) b group by user_id