你正在搭建一个用户活跃度的画像,其中一个与活跃度相关的特征是“最长连续登录天数”, 请用SQL实现“2023年1月1日-2023年1月31日用户最长的连续登录天数”
登陆表 tb_dau:
| fdate | user_id |
| 2023-01-01 | 10000 |
| 2023-01-02 | 10000 |
| 2023-01-04 | 10000 |
输出:
| user_id | max_consec_days |
| 10000 | 2 |
| fdate | user_id |
| 2023-01-01 | 10000 |
| 2023-01-02 | 10000 |
| 2023-01-04 | 10000 |
| user_id | max_consec_days |
| 10000 | 2 |
drop table if exists tb_dau;
create table `tb_dau` (
`fdate` date,
`user_id` int
);
insert into tb_dau(fdate, user_id)
values
('2023-01-01', 10000),
('2023-01-02', 10000),
('2023-01-04', 10000);user_id|max_consec_days 10000|2
id为10000的用户在1月1日及1月2日连续登录2日,1月4日登录1日,故最长连续登录天数为2日
MySQL中日期加减的函数日期增加 DATE_ADD,例:date_add('2023-01-01', interval 1 day) 输出 '2023-01-02'
日期减少 DATE_SUB,例:date_sub('2023-01-01', interval 1 day) 输出 '2022-12-31'日期差 DATEDIFF,例:datediff('2023-02-01', '2023-01-01') 输出31
select user_id, max(consec_days) max_consec_days from (select user_id, count(startday) consec_days from (select user_id, date_sub(fdate, interval (dense_rank() over (partition by user_id order by fdate)) day) startday from tb_dau where year(fdate) = 2023 and month(fdate) = 1 ) t1 group by user_id, startday ) t2 group by user_id
select user_id ,max(num) max_consec_days from (select user_id ,date_sub(fdate, interval r day) datesub ,count(1) num from (select user_id ,fdate ,row_number()over(partition by user_id order by fdate) r from tb_dau where fdate between '2023-01-01' and '2023-1-31') a group by 1,2 ) b group by 1
#抄的评论区大佬的,看了好几遍才看懂 with t1 as( select user_id, count(continday) as consec_days from ( select user_id, # data_sub函数 # 用登陆日期-连续递增的数字(排序的位次), # 如果日期连续,就会产生相同的结果 date_sub(fdate, interval ( dense_rank() over (partition by user_id order by fdate) ) day ) continday from tb_dau where year(fdate) = 2023 and month(fdate) = 1 ) t2 # 对计算结果进行分组,相同的在一组,表示连续登录 group by user_id, continday ) select user_id, max(consec_days) as max_consec_days from t1 group by user_id
select user_id, max(co) max_consec_days from ( select user_id, count(rn) co from ( select fdate, user_id, row_number()over(partition by user_id order by fdate) rn, date_sub(fdate,interval row_number()over(partition by user_id order by fdate) day) ds from tb_dau ) a group by user_id,ds ) b group by user_id;
with t1 as ( select distinct user_id, fdate from tb_dau where fdate >= '2023-01-01' and fdate <= '2023-01-31' ), t2 as ( select user_id, fdate, rank() over ( partition by user_id order by fdate ) as rn from t1 ), t3 as ( select user_id, fdate, date_sub (fdate, interval rn day) flag_date from t2 ), t4 as ( select user_id, count(*) total from t3 group by user_id, flag_date ) select user_id, max(total) as max_consec_days from t4 group by user_id
# Write your MySQL query statement below WITH t0 as ( select distinct fdate as login_date, user_id as id from tb_dau where fdate between '2023-01-01' and '2023-01-31' ) , t1 AS ( SELECT #------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------- login_date, id, (case when login_date = @prev_date + INTERVAL 1 DAY AND id = @prev_author_id then @consec_days := @consec_days + 1 when login_date = @prev_date AND id = @prev_author_id then @consec_days := @consec_days else @consec_days := 1 end) AS consec_days, @prev_date := login_date, @prev_author_id := id #-------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------ FROM (SELECT @prev_date := NULL, @prev_author_id := NULL, @consec_days := 1) vars, (SELECT login_date, id FROM t0 ORDER BY id , login_date) ordered_dates ) SELECT id as user_id, max((CAST(consec_days AS UNSIGNED))) as max_consec_days from t1 group by id
with tmp0 as(select a.fdate,a.user_id,b.fdate as hdate,b.user_id as huser_id,row_number() over(partition by a.fdate,a.user_id order by b.fdate asc) as xianzaiduoshaoge,datediff(b.fdate,a.fdate) as date_diff from tb_dau a left join tb_dau b on b.fdate>=a.fdate and b.user_id=a.user_id where a.fdate between '2023-01-01' and '2023-01-31'), tmp1 as (select * from tmp0 where date_diff+1=xianzaiduoshaoge) select a.user_id,max(xianzaiduoshaoge) as max_consec_days from tmp1 a group by a.user_id order by a.user_id asc
with tmp as (
select
IFNULL(DATEDIFF(next,fdate)=1,0) as diff,
row_number() over(partition by user_id order by fdate) as eday,
user_id
from(
select
fdate,
user_id,
lead(fdate,1,NULL) over(partition by user_id order by fdate) as next
from tb_dau) t1
)
select
user_id,
MAX(eday-last) as max_consec_days
from(
select
lag(eday,1,0) over(partition by user_id order by eday rows 1 preceding) as last,
user_id,
eday
from tmp
where diff=0
)t1
group by user_id
WITH t1 as ( select user_id,fdate,DATE_SUB(fdate,interval ROW_NUMBER() over(PARTITION by user_id ORDER BY fdate) day) as date_sub from tb_dau ), t2 as ( SELECT user_id,count(*) day_sum from t1 GROUP BY user_id,date_sub ), t3 as( select user_id,day_sum,ROW_NUMBER() over(PARTITION by user_id ORDER BY day_sum desc) day_rank from t2 ) SELECT user_id,day_sum as max_consec_days from t3 where day_rank =1欢迎大家指正批评!
select user_id,consecutive_days as max_consec_days from (select user_id,consecutive_days,dense_rank()over(partition by user_id order by consecutive_days desc) as rk from (select user_id,min(fdate),max(fdate),count(*) as consecutive_days from (select user_id,fdate,date_sub(fdate,interval rn day) as grp,rn from (SELECT user_id,fdate,ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) AS rn FROM tb_dau) as t1 ) as t2 group by user_id,grp) as t3 ) as t4 where rk =1 group by user_id,consecutive_days