u_room_log.zip附件是实验的初始化表.另外需要先创建数字辅助表
- create table nums(id int not null primary key);
- delimiter $$
- create procedure pCreateNums(cnt int)
- begin
- declare s int default 1;
- truncate table nums;
- while s<=cnt do
- insert into nums select s;
- set s=s+1;
- end while;
- end $$
- delimiter ;
- call pCreateNums(1000);
- commit;
数字辅助表基本上是复杂SQL的标配.
以roomid为1 的数据了解需求

求每天每个房间同时两个用户在线的时长和最高在线人数
需要解决的几个问题
1.用户数据跨天。
比如 logid=634715的记录,该用户登录房间是
2018-01-15 15:29:55 至 2018-01-16 15:30:00
那么,这条数据需要拆分为两个记录
2018-01-15 15:29:55 至 2018-01-15 23:59:59
和
2018-01-16 00:00:00 至 2018-01-16 15:30:00
2.同一天同一房间同一用户的时间重叠
比如某个用户有两个记录
一个是 3点-5点登录;另外一个记录是4点-7点登录.
这两个记录需要合并为一个记录 就是该用户 3点-7点在线.
3.每天每个房间同时有两个用户在线,才算房间的活跃时间.
(后续可能扩展,比如四个在线用户才算活跃时间,这就是产品随口一说的事儿。不能因为她拍脑袋就得改程序改死自己。)
最后统计每天每个房间的活跃时间和最大在线人数.
我处理这个问题分了两个步骤.
第一个步骤,先合并同一房间同一用户登录时间的重叠部分.然后拆分跨天的记录.最后输出到一个结果表中.(表名 t1)
第二个步骤,通过一个自定义函数,处理第一步的结果表(t1).
首先,把某天某个房间,任意两个用户时间范围存在交叉的记录找出来.
(因为需求说明,两个用户同时在线才算活跃时间,所以时间范围有交叉是必要条件.如果一个用户在线就算房间的在线时间的话,就更容易一点,按照步骤一的算法,再次合并相同房间的时间范围即是)
然后把这些记录的时间点生成一个排序的列(固定行数行转列),

生成每个时间点到下个时间点的范围

这个范围是算法的最核心部分,他是某天某个房间所有记录的最小范围间隔.
也就是说,在某天某个房间,任何用户的时间范围不会和最小范围有重叠的部分.
最后用最小范围关联 t1的结果表,如果用户的在线时间和最小范围重合,就将重叠的最小范围和用户ID,房间ID写入最终结果表t2.
t2这个结果表,最终包含了某个房间某个用户一个或者多个的最小范围.
最终的查询就是聚合房间和最小范围,找到超过两个用户的记录,然后对用户求max得到最大在线人数.对最小范围的间隔时间求和得到房间的活跃时间.
步骤一:
- drop table t1;
- create table t1
- select distinct
- roomid,
- userid,
- if(date(s)!=date(e) and id>1,date(s+interval id-1 day),s) s,
- if(date(s+interval id-1 day)=date(e) ,e,date_format(s+interval id-1 day,'%Y-%m-%d 23:59:59')) e
- from (
- SELECT DISTINCT s.roomid, s.userid, s.s, (
- SELECT MIN(e)
- FROM (SELECT DISTINCT roomid, userid, roomend AS e
- FROM u_room_log a
- WHERE NOT EXISTS (SELECT *
- FROM u_room_log b
- WHERE a.roomid = b.roomid
- AND a.userid = b.userid
- AND a.roomend >= b.roomstart
- AND a.roomend < b.roomend)
- ) s2
- WHERE s2.e > s.s
- AND s.roomid = s2.roomid
- AND s.userid = s2.userid
- ) AS e
- FROM (SELECT DISTINCT roomid, userid, roomstart AS s
- FROM u_room_log a
- WHERE NOT EXISTS (SELECT *
- FROM u_room_log b
- WHERE a.roomid = b.roomid
- AND a.userid = b.userid
- AND a.roomstart > b.roomstart
- AND a.roomstart <= b.roomend)
- ) s, (SELECT DISTINCT roomid, userid, roomend AS e
- FROM u_room_log a
- WHERE NOT EXISTS (SELECT *
- FROM u_room_log b
- WHERE a.roomid = b.roomid
- AND a.userid = b.userid
- AND a.roomend >= b.roomstart
- AND a.roomend < b.roomend)
- ) e
- WHERE s.roomid = e.roomid
- AND s.userid = e.userid
- ) t1 ,
- nums
- where nums.id<=datediff(e,s)+1
- ;
- alter table t1 add key(roomid,s,e),add primary key(roomid,userid,s,e);
步骤二:
- DELIMITER $$
- CREATE DEFINER=`root`@`localhost` FUNCTION `f`(pRoomId bigint,pTime timestamp) RETURNS int(11)
- BEGIN
- declare pResult bigint;
- insert into t2
- select v6.roomid,v6.userid,greatest(s,starttime) s,least(e,endtime) e
- from (
- select roomid,starttime,endtime from (
- select @d as starttime,@d:=d,v3.roomid,v3.d endtime from (
- select distinct roomid,
- case
- when nums.id=1 then v1s
- when nums.id=2 then v1e
- when nums.id=3 then v2s
- when nums.id=4 then v2e
- end d from (
- select v1.roomid, v1.s v1s,v1.e v1e,v2.s v2s,v2.e v2e
- from t1 v1
- inner join t1 v2 on ((v1.s between v2.s and v2.e or v1.e between v2.s and v2.e ) and v1.roomid=v2.roomid)
- where v2.roomid=pRoomId and v2.s>=pTime and v2.s<(pTime+interval '1' day) and (v2.roomid,v2.userid,v2.s,v2.e)!= (v1.roomid,v1.userid,v1.s,v1.e)
- ) a,nums where nums.id<=4
- order by roomid,d
- ) v3,(select @d:='') vars
- ) v4 where starttime!=''
- ) v5 inner join t1 v6 on(v5.starttime between v6.s and v6.e and v5.endtime between v6.s and v6.e and v5.roomid=v6.roomid)
- ;
- select row_count() into pResult;
- RETURN pResult;
- END $$
执行函数
select f(roomid,s) from (
select distinct roomid,date(s) s from t1
) a;
最终查询得到结果
- select roomid,date(s) dt,round(sum(timestampdiff(second,s,e))/60) ts,max(c) c from (
- select roomid,s,e ,count(distinct userid) c from t2 where roomid=660026 group by roomid,s,e having count(distinct userid)>1
- ) a group by roomid,date(s);