首页 > 试题广场 >

计算用户的平均次日留存率

[编程题]计算用户的平均次日留存率
  • 热度指数:397009 时间限制:C/C++ 1秒,其他语言2秒 空间限制:C/C++ 256M,其他语言512M
  • 算法知识视频讲解
题目:现在运营想要查看用户在某天刷题后第二天还会再来刷题的留存率。请你取出相应数据。

示例:question_practice_detail
id device_id question_id result date
1 2138 111 wrong 2021-05-03
2 3214 112 wrong 2021-05-09
3 3214 113 wrong 2021-06-15
4 6543 111 right 2021-08-13
5 2315 115 right 2021-08-13
6 2315 116 right 2021-08-14
7 2315 117 wrong 2021-08-15
8 3214 112 wrong 2021-05-09
9 3214 113 wrong 2021-08-15
10 6543 111 right 2021-08-13
11 2315 115 right 2021-08-13
12 2315 116 right 2021-08-14
13 2315 117 wrong 2021-08-15
14 3214 112 wrong 2021-08-16
15 3214 113 wrong 2021-08-18
16 6543 111 right 2021-08-13

根据示例,你的查询应返回以下结果:
avg_ret
0.3000
示例1

输入

drop table if  exists `question_practice_detail`;
CREATE TABLE `question_practice_detail` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL,
`date` date NOT NULL
);

INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');

输出

avg_ret
0.3000
select
    count(t2.date) / count(t1.date) as avg_ret
from
    (
        select distinct
            device_id,
            date
        from
            question_practice_detail
    ) t1
    left join (
        select distinct
            device_id,
            date
        from
            question_practice_detail
    ) t2 on t1.device_id = t2.device_id
    and t2.date = date_add(t1.date, interval 1 day);

select
    count(distinct t2.device_id, t2.date) / count(distinct t1.device_id, t1.date) as avg_ret
from
    question_practice_detail t1
    left join question_practice_detail t2 on t1.device_id = t2.device_id
    # and t2.date = date_add(t1.date, interval 1 day);
    # and datediff(t2.date, t1.date) = 1;
    and t1.date = t2.date -1;
学到了
发表于 2025-12-24 11:57:41 回复(0)
很笨但很直接的一种方法
SELECT(
(SELECT COUNT(date1) FROM
(SELECT
    *
FROM
    ( SELECT DISTINCT device_id, date FROM question_practice_detail ORDER BY device_id ) AS temp1
    LEFT JOIN ( SELECT DISTINCT device_id as d, DATE_ADD( date, INTERVAL 1 DAY ) AS date1 FROM question_practice_detail ) AS temp2 ON temp1.device_id = temp2.d
    AND temp1.date = temp2.date1 ) as temp3) /
(SELECT COUNT(*) FROM
(SELECT
    *
FROM
    ( SELECT DISTINCT device_id, date FROM question_practice_detail ORDER BY device_id ) AS temp1
    LEFT JOIN ( SELECT DISTINCT device_id as d, DATE_ADD( date, INTERVAL 1 DAY ) AS date1 FROM question_practice_detail ) AS temp2 ON temp1.device_id = temp2.d
    AND temp1.date = temp2.date1 ) as temp3)
    ) as avg_ret;

发表于 2025-12-22 22:39:15 回复(0)
with k AS (
    SELECT COUNT(DISTINCT q2.device_id,q2.date) AS liucun
    FROM question_practice_detail q1
    inner JOIN question_practice_detail q2
        ON q1.device_id = q2.device_id
       AND DATEDIFF(q2.date , q1.date) = 1
),
total AS (
SELECT COUNT(DISTINCT device_id,date) AS total_device
FROM question_practice_detail
)
SELECT k.liucun/ total.total_device AS avg_ret
FROM k, total;
发表于 2025-12-09 13:41:44 回复(0)
感觉还是弄得有点绕了,这运营毛病真多
select
    COUNT(output.dayplus)/COUNT(output.day) as avg_ret
from
(select distinct t1.device_id,date(t1.date)as day,date_add(date(t2.date),interval 1 day)as dayplus
        from question_practice_detail t1
        left join question_practice_detail t2
        on t1.device_id=t2.device_id  and date(t1.date)=date_add(t2.date,interval 1 day)
) as output
发表于 2025-12-05 18:02:54 回复(0)
select count(date2)/count(date1) as avg_ret
from(
select
distinct device_id,
date as date1,
lead(date,1) over(partition by device_id order by date) as date2
from(
    select distinct device_id,date
    from question_practice_detail
) t
) a
请问大佬们,为啥其实两种方法的子查询得到的表是一样的,但是第二种方法一定要用datediff函数呢,直接按照第一个方法count(date2)/count(date1) as avg_ret算出来为啥不对
发表于 2025-12-01 18:18:52 回复(0)
让ai解释完代码都理解不了,有没有通俗易懂的
SELECT
    COUNT(date2) / COUNT(date) AS avg_ret
FROM
    (
        SELECT
            a.*,
            b.date AS date2
        FROM
            (
                SELECT
                    device_id,
                    date
                FROM
                    question_practice_detail
                GROUP BY
                    device_id,
                    date
            ) a
            LEFT JOIN (
                SELECT
                    device_id,
                    date
                FROM
                    question_practice_detail
                GROUP BY
                    device_id,
                    date
            ) b ON a.device_id = b.device_id
            AND b.date = DATE_ADD(a.date, INTERVAL 1 DAY)
    ) t;

发表于 2025-11-29 13:42:22 回复(1)
select
    sum(dl = 1) / count(1) as avg_ret
from
    (
      select
          device_id,
          date,
          datediff(
              lead(date, 1, date) over (partition by device_id order by date),
              date
          ) dl
      from
          question_practice_detail
      group by
          device_id,
          date
  ) a

发表于 2025-11-23 20:03:33 回复(0)
with a as(
select *, lag(date) over(partition by device_id)  date1,
         if (date - lag(date) over(partition by device_id)=1,1,0)  diff
from(
select distinct device_id,`date`
from question_practice_detail
order by device_id asc,`date` asc) as t)

select round(sum(diff)/count(1),4) avg_ret
from a
发表于 2025-11-22 14:49:08 回复(0)
select 
sum(if(datediff(date2, date1)=1, 1, 0))/count(*) as avg_ret
from
(select device_id, date as date1,
lead(date)over(partition by device_id order by date) as date2
from
(select distinct device_id, date
from question_practice_detail) as uniq_id_date) as last_date
看题解前没想到要用avg(),通过自测输入但是通不过提交,改成avg就可以了,有人可以解一下惑吗
发表于 2025-11-07 20:18:08 回复(0)
-- 留存率;次日留存率;n日留存率
-- 某天刷题后第n天再来刷题 n=2;
-- 用户每天来了x次,去重
-- 本题要求是:求出所有数据的留存率

with base as (
    -- 用户每天来了x次,去重后,保证用户在某天只有一条数据
    select distinct device_id, date from question_practice_detail
)
select round(count(next_day.device_id) / count(current_day.device_id), 4) as avg_ret
from
    base as current_day
    left join
        base as next_day -- 次日
        on current_day.device_id = next_day.device_id
        -- 7日,30日留存率只需要改这里
        -- and next_day.date = date_add(current_day.date, interval 1 day)
        and datediff(next_day.date, current_day.date) = 1
;

-- (A, 10-2) → NULL      ✗ 未留存
-- (A, 10-1) → (A, 10-2) ✓ 留存
-- (A, 10-4) → NULL      ✗ 未留存
-- (B, 10-1) → NULL      ✗ 未留存
-- (B, 10-3) → NULL      ✗ 未留存
-- (C, 10-2) → (C, 10-3) ✓ 留存
-- 结果: 2/6 = 0.3333 = 33.33%
发表于 2025-10-29 12:46:09 回复(0)
select
round(count(distinct y.device_id,y.date)/count(distinct x.device_id,x.date),4) as avg_ret
from question_practice_detail x 
left join question_practice_detail y 
on x.device_id=y.device_id and datediff(y.date,x.date)=1

发表于 2025-10-27 22:29:50 回复(0)
先获取每个用户的答题日期,需要进行去重,即获得每天有哪些用户答题
with user_active as
(
    select
        distinct device_id
        , date
    from question_practice_detail
)
然后将这个用户答题日期表进行自连接,目的是用来匹配每个用户他这个日期登录了,然后还在哪些日期登录
from user_active t1
    left join user_active t2 on t1.device_id = t2.device_id
这里要求次日留存率,那也就是说希望找到的数据是用户每个登录答题日期的第二天(次日)有没有再登录答题,也就是我们自连接匹配的表的左右日期之差是1天,所以自连接要再添加一个条件datediff()计算日期之差
tips:我们是通过datediff来控制我们也计算的是n日的留存率,如果题目问的是用户某天刷题之后7天之内又再次刷题的留存率,那这里就是datediff(左表日期, 右表日期)<=7
from user_active t1
    left join user_active t2 on t1.device_id = t2.device_id and datediff(t2.date, t1.date) = 1
接下来就是计算次日留存率: 
分子是有多少用户第二天登录答题了,也就是右表的用户数量
分母是全部用户数量,也就是左表的用户数量
count(t2.device_id) / count(t1.device_id) as avg_ret
完整代码:
with user_active as
(
    select
        distinct device_id
        , date
    from question_practice_detail
)
select
    count(t2.device_id) / count(t1.device_id) as avg_ret
from user_active t1
    left join user_active t2 on t1.device_id = t2.device_id and datediff(t2.date, t1.date) = 1
;





发表于 2025-10-10 10:03:01 回复(0)
select avg(if(date-last_date=1,1,0)) avg_ret
from
(select device_id,date,lag(date,1)over(partition by device_id order by date) last_date
from
(select distinct device_id,date
from question_practice_detail qpd
order by device_id,date) new) new2

拼尽全力两层子查询才搞定
发表于 2025-10-07 18:41:32 回复(0)
方法倒是不难,主要是这个“留存率”比较抽象。分子是连续两天都有数据的用户记录(但是同一用户同一天只算1条,算出来是3),分母是所有用户数据(但是同一用户同一天只算1条,算出来是10)
select count(distinct q1.device_id,q1.date)/(select count(distinct device_id,date) from question_practice_detail) avg_ret from question_practice_detail q1 join question_practice_detail q2 on q1.device_id = q2.device_id and TIMESTAMPDIFF(day,q1.date,q2.date) = 1

发表于 2025-10-04 11:17:19 回复(0)
SELECT
COUNT(q2.device_id) / COUNT(q1.device_id) AS avg_ret
FROM
(SELECT DISTINCT device_id, date FROM question_practice_detail) q1
LEFT JOIN
(SELECT DISTINCT device_id, date FROM question_practice_detail) q2
ON
q1.device_id = q2.device_id
AND q2.date = DATE_ADD(q1.date, INTERVAL 1 DAY)
发表于 2025-09-27 14:32:18 回复(0)
with qpd
as(
    select device_id,date
    from question_practice_detail
    group by 1,2
)
select count(q2.date)/count(q1.date) avg_ret
from qpd q1
left join qpd q2
on q1.device_id=q2.device_id
and q1.date=date_add(q2.date,interval 1 day) 

发表于 2025-09-25 11:46:37 回复(0)