数据库mysql常用查询操作,适合练习

阅读 93

2022-01-07

– 查看系统中有哪些数据库
show databases;

– 创建test数据库
create database test;

– 选择进入数据库
use test;

– 删除数据库(慎用)
drop database test;

– 创建数据表
CREATE TABLE department (
deptno INT,
dname VARCHAR(15),
loc VARCHAR(10)
);

– 查看当前数据库中有哪些表
show tables;

– 查看表结构
desc department;

– 删除数据表(慎用)
drop table department;

– 创建带有约束条件的表(因为两张表中有主外键约束,所以需要先创建主键所在的dept,再创建外键所在的emp)
CREATE TABLE dept (
deptno INT primary key,
dname VARCHAR(15),
loc VARCHAR(10)
);
CREATE TABLE employee (
empid INT primary key auto_increment,
ename VARCHAR(15) unique,
job VARCHAR(10) not null,
mgr int,
hiredate date,
sal float default 0,
comm float,
deptno int,
foreign key (deptno) references dept(deptno)
);

– 修改表名
alter table employee rename emp;

– 修改字段名
alter table emp change empid empno int auto_increment;
desc emp;

– 修改字段类型
alter table emp modify sal decimal default 6000;

– 添加字段
alter table emp add address varchar(50) not null default ‘-’;

– 修改字段的排列位置:
alter table emp modify address varchar(100) first;
alter table emp modify address varchar(100) after job;

– 删除字段
alter table emp drop address;

– 插入数据:字段名与字段值的数据类型、个数、顺序必须一一对应
insert into dept(deptno,dname,loc) values (10,‘accounting’,‘new york’),(20,‘research’,‘dallas’);
select * from dept;-- 查询表中记录
insert into dept values (30,‘sales’,‘chicago’),(40,‘operations’,‘boston’);

– 批量导入数据(路径中不能有中文,‘\’在编程语言中是转义符,需要将‘\’改为‘\’或‘/’)
– 先有部门,才能存储每个部门的员工信息,所以先添加dept的部门信息,再导入emp的员工信息
show variables like ‘%secure%’;-- 查看安全路径

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/employee.csv”
into table emp
fields terminated by ‘,’
ignore 1 lines;

select * from emp; – 检查导入数据内容
select count(*) from emp; – 检查导入数据总行数

– 更新数据
set sql_safe_updates=0; – 设置数据库安全权限
set sql_safe_updates=1; – 设置数据库安全权限(不开放权限)
update emp set sal=8000 where deptno=10;
update emp set sal=8000;

– 删除数据
delete from emp where deptno=10;

– 清空数据
truncate emp;
desc emp;
insert into emp(empno,ename,job,deptno) values(1001,‘abc’,‘CEO’,10);

– 单表查询(虚拟结果集)
– select后面可以出现字段名、常量、公式、函数、表达式
– 查询指定列:查询emp表中ename,job,sal
select ename,job,sal from emp;

– 设置别名:查询每位员工调整后的薪资(基本工资+1000)
select *,sal+1000 as 调薪 from emp;

– 练习:查询每位员工的年薪(基本工资12):empno,ename,年薪
select empno,ename,sal
12 as 年薪 from emp;

– 查询不重复的数据:查询emp表中有哪些部门
select distinct deptno from emp;

– 条件查询
– 查询10号部门和20号部门中sal低于2000的员工信息
select * from emp
where deptno in(10,20) and sal<2000;

– 练习:查询基本工资大于等于2000小于等于3000的员工信息
select * from emp
where sal>=2000 and sal<=3000;

– 空值查询
– 查询mgr为空的记录
select * from emp where mgr is null;

– 练习:查询comm不为空的记录
select * from emp where comm is not null;

– 模糊查询
– 查询姓名以a开头的员工信息
select * from emp where ename like ‘a%’;

– 查询姓名中包含a的员工信息
select * from emp where ename like ‘%a%’;

– 查询姓名中第二个字符为a的员工信息
select * from emp where ename like ‘_a%’;

– 练习:查询员工姓名中不包含s的员工信息
select * from emp where ename not like ‘%s%’;

– 查询结果排序
– 单字段排序:查询所有员工信息按sal降序显示
select * from emp order by sal desc;

– 多字段排序:查询所有员工信息按deptno升序、sal降序显示
select * from emp order by deptno, sal desc;

– 限制查询结果数量
– 查询基本工资最高的前5位员工
select * from emp order by sal desc limit 5;

– 查询基本工资第6到10名的员工
select * from emp order by sal desc limit 5,5;

– 练习:查询最后入职的5位员工
select * from emp order by hiredate desc limit 5;

– 聚合运算
– 查询emp表中员工总数、最高工资、最低工资、平均工资及工资总和
select count(*)员工总数,max(sal)最高工资,min(sal)最低工资,avg(sal)平均工资,sum(sal)工资总和 from emp;

– 分组查询
– 查询各部门的平均工资
select deptno,avg(sal)平均工资 from emp group by deptno;

– 查询各部门不同职位的平均工资
select deptno,job,avg(sal)平均工资 from emp group by deptno,job;

– 练习:查询各部门的员工数
select deptno,count(*)员工数 from emp group by deptno;

– 练习:查询各部门不同职位的人数
select deptno,job,count(*)员工数 from emp group by deptno,job;

– 分组后筛选
– 查询各部门clerk的平均工资
select deptno,avg(sal) from emp where job=‘clerk’ group by deptno;
select deptno,avg(sal) from emp group by deptno,job having job=‘clerk’;

– 查询平均工资大于2000的部门
select deptno,avg(sal) from emp group by deptno having avg(sal)>2000;

– 多表连接查询
create table t1(key1 char,v1 int);

create table t2(key2 char,v2 int);

insert into t1 values(‘a’,1),(‘a’,2),(‘b’,3),(‘c’,4),(‘a’,13);

insert into t2 values(‘b’,10),(‘b’,11),(‘a’,12),(‘a’,13),(‘e’,14);

select * from t1;
select * from t2;

– 内连接
select * from t1 inner join t2 on key1=key2;

– 左连接
select * from t1 left join t2 on key1=key2;

– 右连接
select * from t1 right join t2 on key1=key2;

– 合并查询
– union去重
select * from t1 union select * from t2;

– union all不去重
select * from t1 union all select * from t2;

– 全连接,左连接并上右连接去重
select * from t1 left join t2 on key1=key2
union
select * from t1 right join t2 on key1=key2;

– 左反连接,左连接中右表为空
select * from t1 left join t2 on key1=key2
where key2 is null;

– 右反连接,右连接中左表为空
select * from t1 right join t2 on key1=key2
where key1 is null;

– 多表查询练习
create table salgrade(grade int,losal int,hisal int);
insert into salgrade values(1,700,1200),
(2,1201,1400),
(3,1401,2000),
(4,2001,3000),
(5,3001,9999);

select * from salgrade;-- 5
select * from emp;-- 14
select * from dept;-- 4

– 查询每位员工的ename,dname,sal
select ename,dname,sal
from emp left join dept on emp.deptno=dept.deptno;

– 查询各地区的员工数(统计每个地区,没有员工计为0)
select loc,count(ename)
from emp right join dept on emp.deptno=dept.deptno
group by loc;

– 查询manager的姓名、所属部门名称和入职日期:ename,dname,job,hiredate(内连接/笛卡尔积连接)
select ename,dname,job,hiredate
from emp join dept on emp.deptno=dept.deptno
where job=‘manager’;
select ename,dname,job,hiredate
from emp, dept
where emp.deptno=dept.deptno
and job=‘manager’;

– 查询所有员工姓名及其直属领导姓名(自连接:通过别名,将同一张表视为多张表)
select a.ename 员工姓名,b.ename 领导姓名
from emp a left join emp b on a.mgr=b.empno;

– 查询入职日期早于其直属领导的员工姓名及其所属部门:empno,ename,dname (两张以上的多表连接)
select a.empno,a.ename,dname,a.hiredate 员工入职日期,b.ename 领导姓名,b.hiredate 领导入职日期
from emp a left join emp b on a.mgr=b.empno
left join dept on a.deptno=dept.deptno
where a.hiredate<b.hiredate;

– 查询每位员工的工资等级;empno,ename,sal,grade(不等值连接)
select empno,ename,sal,grade
from emp a join salgrade b
where a.sal>b.losal and a.sal<hisal;

– 子查询
– 标量子查询:
– 查询基本工资高于公司平均工资的员工信息
select * from emp where sal>(select avg(sal) from emp) ;
select avg(sal) from emp;

– 练习:查询和allen同一个领导的员工:empno,ename,job,mgr
select empno,ename,job,mgr from emp
where mgr=(select mgr from emp where ename=‘allen’) and ename!=‘allen’;

– 行子查询
– 查询和smith同部门同职位的员工:empno,ename,job,deptno
select empno,ename,job,deptno
from emp
where job=(select job from emp where ename=‘smith’)
and deptno=(select deptno from emp where ename=‘smith’)
and ename!=‘smith’;

select empno,ename,job,deptno
from emp
where (job,deptno)=(select job,deptno from emp where ename=‘smith’)
and ename!=‘smith’;

select b.empno,b.ename,b.job,b.deptno
from emp a,emp b
where a.empno!=b.empno and a.job=b.job and a.deptno=b.deptno and a.ename=‘smith’;

select b.empno,b.ename,b.job,b.deptno
from emp a join emp b on a.empno!=b.empno and a.job=b.job and a.deptno=b.deptno
where a.ename=‘smith’;

– 列子查询:
– 查询普通员工的工资等级:empno,ename,sal,grade
select empno,ename,sal,grade
from emp a join salgrade b
on a.sal between b.losal and b.hisal
where empno not in(select distinct mgr from emp where mgr is not null);

– 练习:查询员工数不少于5人的部门的平均工资
select deptno ,avg(sal)
from emp
group by deptno
having count(empno)>=5;

– 查询基本工资高于30号部门任意员工的员工信息
select *from emp
where sal>any(select sal from emp where deptno=‘30’) and deptno!=‘30’;

select *from emp
where sal>(select min(sal) from emp where deptno=‘30’)and deptno!=‘30’;

– 查询基本工资高于30号部门所有员工的员工信息
select *from emp
where sal>all(select sal from emp where deptno=‘30’);

select *from emp
where sal>(select max(sal) from emp where deptno=‘30’);

– from子查询
– 查询各部门最高工资的员工:empno,ename,sal,deptno
select empno,ename,sal,emp.deptno
from emp join (select deptno,max(sal) hsal from emp group by deptno) a
on emp.deptno=a.deptno where sal=hsal ;

select empno,ename,sal,deptno from emp
where (deptno,sal) in(select deptno,max(sal) from emp group by deptno);

– select子查询
– 查询各部门员工人数占比
select deptno,count() /(select count() from emp) 人数占比
from emp group by deptno;

– 常用函数
select floor(1.23);
select ceiling(1.23);
select ceiling(-1.23);
select round(1.58,1);
select concat(‘CDA’,‘数据’, ‘分析’);
select concat(‘CDA’,null, ‘分析’);
select instr(‘CDA’, ‘A’);
select instr(‘CDA’, ‘B’);
select ltrim(’  CDA数据分析’);

– 数学函数
– 练习:查询各部门员工人数占比(保留两位小数)
select deptno,round(count()/(select count() from emp),2) 人数占比
from emp group by deptno;

– 字符串函数
– 练习:查询各部门员工人数占比(以百分比显示)
select deptno,concat(round((count()/(select count() from emp))*100,2),’%’) 人数占比
from emp group by deptno;

– 日期函数
select *from emp ;
select curdate();
select curtime();
select now();
select timestampdiff(month,‘2018-01-01’,‘2019-03-22’);
select unix_timestamp();
select unix_timestamp(‘2018-01-01’);
select from_unixtime(1577808000);
– 练习:查询每位员工的工龄(年):ename,hiredate,工龄
select ename,hiredate,timestampdiff(year,hiredate,curdate()) 工龄
from emp;

– 分组合并函数
– 练习:查询各部门的员工姓名
select deptno,group_concat(ename) from emp group by deptno;
select deptno,group_concat(distinct ename order by ename separator ‘/’)
from emp
group by deptno;

– 逻辑函数
– ifnull函数:查询每位员工的实发工资(基本工资+提成,没有提成计为0)
select *,sal+ifnull(comm,0) 实发工资 from emp;

– if函数:查询每位员工的工资级别:3000及以上为高,1500-3000为中,1500及以下为低
select ename,sal,if(sal>=3000,‘高’,if(sal>1500,‘中’,‘低’)) 工资级别 from emp;

– 逻辑表达式 case when …then… else … end
select ename,sal,
case when sal>=3000 then ‘高’
when sal>=1500 then ‘中’
else ‘低’
end 工资级别
from emp;

– 开窗函数
– 聚合函数用于开窗函数
– 查询所有员工的平均工资
select * ,avg(sal) over() 平均工资 from emp;
#当over中没有指定分区、排序和滑动窗口时,表中所有记录为一个区,默认计算分区内的所有行

– 查询各部门的平均工资
select * ,avg(sal) over(partition by deptno) 平均工资 from emp;
#当over中指定分区,但是没有指定排序和滑动窗口时,默认计算分区内的所有行

– 查询各部门员工按入职顺序的累计工资
select * ,sum(sal) over(partition by deptno order by hiredate ) 累计工资 from emp;
#当over中指定分区和排序,但是没有指定滑动窗口时,默认计算分区内的第一行到当前行

– 查询各部门员工按入职顺序前一位到后一位的移动平均工资
select * ,avg(sal)
over(partition by deptno order by hiredate rows between 1 preceding and 1 following)
移动平均工资 from emp;
#当over中指定了分区、排序和滑动窗口时,计算滑动窗口范围内的所有行

– 序号函数
– 查询各部门员工工资排名
select * ,row_number() over(partition by deptno order by sal desc) 排名 from emp;
select * ,dense_rank() over(partition by deptno order by sal desc) 排名 from emp;
select * ,rank() over(partition by deptno order by sal desc) 排名 from emp;

select * ,
row_number() over(partition by deptno order by sal desc) 排名1 ,
dense_rank() over(partition by deptno order by sal desc) 排名2 ,
rank() over(partition by deptno order by sal desc) 排名3
from emp;

– 查询各部门员工工资排名
select * ,dense_rank() over(partition by deptno order by sal desc ) pm from emp;

select * from
(select * ,dense_rank() over(partition by deptno order by sal desc ) pm from emp) a
where pm=2;

– 前后函数
– 查询各部门员工按入职顺序的间隔天数
select *,lag(hiredate,1) over(partition by deptno order by hiredate) qygrq from emp;
select * ,lag(hiredate,1) over(partition by deptno order by hiredate) qygrq ,
timestampdiff(day,lag(hiredate,1) over(partition by deptno order by hiredate),hiredate) 间隔天数
from emp;

– 数据准备
create database taobao;

use taobao;

create table UserBehavior(
user_id int,
item_id int,
item_category int,
behavior_type varchar(10),
user_geohash varchar(10),
times datetime,
amount decimal(5,2)
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserBehavior.csv”
into table UserBehavior
fields terminated by ‘,’
ignore 1 lines;

select * from UserBehavior limit 100;

– 数据清洗
– 缺失值处理
select count(user_id),count(item_id),count(item_category),count(behavior_type),count(user_geohash),count(times),count(amount)
from UserBehavior;
select 330790/1048575;

– 异常值检查
select max(times),min(times),max(amount),min(amount)
from UserBehavior;

– 重复记录处理
select distinct * from UserBehavior;

– 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表
/*帅选出结果保存到安全路径,命名为t,以逗号为分隔符,按回车换行
select user_id,item_id,item_category,behavior_type,times,date(times) 日期,date_format(times,’%e’),hour(times) 小时,amount
from (select distinct * from UserBehavior) as t
into outfile ‘C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/t.csv’
fields terminated by ‘,’
lines terminated by ‘\r\n’;
*/
create table UserBehavior_new as
select user_id,item_id,item_category,behavior_type,times,date(times) 日期,date_format(times,’%a’) 周,hour(times) 小时,amount
from (select distinct * from UserBehavior) as t;

show tables;
select count(*) from UserBehavior_new;

– 数据预览
select * from UserBehavior_new;
select count(distinct user_id),count(distinct item_id),count(distinct item_category)
from UserBehavior_new;
select behavior_type,count(* ) from userbehavior_new group by behavior_type;

– 数据分析
– 1.流量指标分析
– 每日PV浏览量、UV访客数、人均浏览量、成交量、销售额
select 日期,
sum(behavior_type=‘pv’) as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type=‘pv’)/count(distinct user_id) as 人均浏览量,
sum(behavior_type=‘buy’) as 成交量,
sum((behavior_type=‘buy’)*amount) as 销售额
from userbehavior_new group by 日期;

– 每周PV、UV、人均浏览量、成交量、销售额
select 周,
sum(behavior_type=‘pv’) as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type=‘pv’)/count(distinct user_id) as 人均浏览量,
sum(behavior_type=‘buy’) as 成交量,
sum((behavior_type=‘buy’)*amount) as 销售额
from userbehavior_new group by 周
order by date_format(times,’%w’);

– 每小时PV、UV、人均浏览量、成交量、销售额
select 小时,
sum(behavior_type=‘pv’) as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type=‘pv’)/count(distinct user_id) as 人均浏览量,
sum(behavior_type=‘buy’) as 成交量,
sum((behavior_type=‘buy’)*amount) as 销售额
from userbehavior_new group by 小时;

– 2.行为转化分析(转化率=每一行为人数/上一行为人数)
select behavior_type,count(distinct user_id) 用户数,
lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) 转化率
from userbehavior_new
group by behavior_type;

– 浏览—加购—购买的转化率
select
behavior_type,
count(distinct user_id) as 用户数,
lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) as 转化率
from UserBehavior_new
where behavior_type in (‘pv’,‘cart’,‘buy’)
group by behavior_type;

– 每日浏览—加购—购买的转化率-- 一维转二维
select 日期,behavior_type,count(distinct user_id) 用户数
from UserBehavior_new
where behavior_type in (‘pv’,‘cart’,‘buy’)
group by 日期,behavior_type;

– 一维转二维
select 日期,
sum(if(behavior_type=‘pv’,用户数,0)) 浏览用户数,
sum(if(behavior_type=‘cart’,用户数,0)) 加购用户数,
sum(if(behavior_type=‘buy’,用户数,0)) 购买用户数,
sum(if(behavior_type=‘cart’,用户数,0))/sum(if(behavior_type=‘pv’,用户数,0)) 浏览—加购转化率,
sum(if(behavior_type=‘buy’,用户数,0))/sum(if(behavior_type=‘cart’,用户数,0)) 加购—购买转化率
from
(select 日期,behavior_type,count(distinct user_id) 用户数
from UserBehavior_new
where behavior_type in (‘pv’,‘cart’,‘buy’)
group by 日期,behavior_type) t
group by 日期;

– 3.消费偏好分析
– 3.1爆款畅销商品
select item_id,sum(behavior_type=‘pv’) as 浏览量,sum(behavior_type=‘buy’) as 成交量
from UserBehavior_new
group by item_id
order by 浏览量 desc limit 10;
select item_id,sum(behavior_type=‘pv’) as 浏览量,sum(behavior_type=‘buy’) as 成交量
from UserBehavior_new
group by item_id
order by 成交量 desc limit 10;

– 3.2帕累托分析
select sum(amount) from UserBehavior_new where behavior_type=‘buy’; – 购买的总销售额

select item_category,sum(amount) 销售额,
sum(sum(amount)) over(order by sum(amount) desc)累计销售额,
sum(sum(amount)) over(order by sum(amount) desc)/(select sum(amount) from UserBehavior_new where behavior_type=‘buy’) 累计销售额占比
from UserBehavior_new
where behavior_type=‘buy’
group by item_category;

– 前80%的商品类目
select count(*) from
(select item_category,sum(amount) 销售额,
sum(sum(amount)) over(order by sum(amount) desc)累计销售额,
sum(sum(amount)) over(order by sum(amount) desc)/(select sum(amount) from UserBehavior_new where behavior_type=‘buy’) 累计销售额占比
from UserBehavior_new
where behavior_type=‘buy’
group by item_category) t
where 累计销售额占比<=0.8;

– 4.用户价值分析
– 每个用户消费时间间隔、消费频次、消费金额
select user_id,timestampdiff(day,max(日期),‘2014-12-19’) R,count(*) F,sum(amount) M
from UserBehavior_new
where behavior_type=‘buy’
group by user_id;

– RFM评分
select * ,
if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1)))) as Rscore,
if(FF=1,1,if(FF=2,2,if(FF=3,3,if(FF=4,4,5)))) as Fscore,
if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5)))) as Mscore
from
(select user_id,timestampdiff(day,max(日期),‘2014-12-19’) RR,count(*) FF,sum(amount) MM
from UserBehavior_new
where behavior_type=‘buy’
group by user_id) t;

select * ,
if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1)))) as Rscore,
if(FF<5,FF,5) as Fscore,
if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5)))) as Mscore
from
(select user_id,timestampdiff(day,max(日期),‘2014-12-19’) RR,count(*) FF,sum(amount) MM
from UserBehavior_new
where behavior_type=‘buy’
group by user_id) t;

– RFM均值
– 3.5984 2.1039 2.2051
select avg(Rscore),avg(Fscore),avg(Mscore)
from
(select * ,
if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1)))) as Rscore,
if(FF<5,FF,5) as Fscore,
if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5)))) as Mscore
from
(select user_id,timestampdiff(day,max(日期),‘2014-12-19’) RR,count(*) FF,sum(amount) MM
from UserBehavior_new
where behavior_type=‘buy’
group by user_id) t1) t2;

– RFM重要程度
select
,
if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1)))) as Rscore,
if(FF<5,FF,5) as Fscore,
if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5)))) as Mscore,
if(if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1))))>3.5984,‘高’,‘低’) as R,
if(if(FF<5,FF,5)>2.1039,‘高’,‘低’) as F,
if(if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5))))>2.2051,‘高’,‘低’) as M
from
(select
user_id,
timestampdiff(day,max(日期),‘2014-12-19’) as RR,
count(
) as FF,
sum(amount) as MM
from UserBehavior_new
where behavior_type=‘buy’
group by user_id) as t;

– RFM用户价值

select
*,
case when R=‘高’ and F=‘高’ and M=‘高’ then ‘重要价值客户’
when R=‘高’ and F=‘低’ and M=‘高’ then ‘重要发展客户’
when R=‘低’ and F=‘高’ and M=‘高’ then ‘重要保持客户’
when R=‘低’ and F=‘低’ and M=‘高’ then ‘重要挽留客户’
when R=‘高’ and F=‘高’ and M=‘低’ then ‘一般价值客户’
when R=‘高’ and F=‘低’ and M=‘低’ then ‘一般发展客户’
when R=‘低’ and F=‘高’ and M=‘低’ then ‘一般保持客户’
else ‘一般挽留客户’
end as 用户价值
from
(select
,
if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1)))) as Rscore,
if(FF<5,FF,5) as Fscore,
if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5)))) as Mscore,
if(if(RR<=6,5,if(RR<=12,4,if(RR<=18,3,if(RR<=24,2,1))))>3.5984,‘高’,‘低’) as R,
if(if(FF<5,FF,5)>2.1039,‘高’,‘低’) as F,
if(if(MM<100,1,if(MM<200,2,if(MM<300,3,if(MM<400,4,5))))>2.2051,‘高’,‘低’) as M
from
(select
user_id,
timestampdiff(day,max(日期),‘2014-12-19’) as RR,
count(
) as FF,
sum(amount) as MM
from UserBehavior_new
where behavior_type=‘buy’
group by user_id) as t1) as t2;

create database onlineshop;

use onlineshop;

– 注册用户表customers------------------------------------------------------------------------------------------------------------------------
create table customers(
id varchar(15) primary key,
full_name varchar(30),
created_at int
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/customers.csv”
into table customers
fields terminated by ‘,’
ignore 1 lines;

select * from customers limit 10;
select count(*) from customers;-- 44661

– drop table customers;

– 订单主表orders-----------------------------------------------------------------------------------------------------------------------------
create table orders(
id varchar(15) primary key,
created_at date,
closed_at date,
cancelled_at date,
customer_id varchar(15),
country char,
province varchar(4),
city varchar(4),
district varchar(4),
address varchar(100),
financial_status varchar(20),
fulfillment_status varchar(10),
processed_at date,
total_price decimal(6,2),
shipping_rate decimal(6,2),
subtotal_price decimal(6,2),
total_discounts decimal(6,2),
total_line_items_price decimal(6,2)
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/orders.csv”
into table orders
fields terminated by ‘,’
ignore 1 lines;

select * from orders limit 10;
select count(*) from orders;-- 21358

– drop table orders;

– 订单详情表orders_items--------------------------------------------------------------------------------------------------------------------------
create table orders_items(
id varchar(15) primary key,
order_id varchar(15),
product_id varchar(15),
product_style varchar(50),
variant_id varchar(15),
sku varchar(50),
product_title varchar(50),
fulfillment_status varchar(10),
price decimal(6,2),
quantity int
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/orders_items.csv”
into table orders_items
fields terminated by ‘,’
ignore 1 lines;

select * from orders_items limit 10;
select count(*) from orders_items;-- 36826

– drop table orders_items;

– 商品主表products------------------------------------------------------------------------------------------------------------------------------
create table products(
id varchar(15) primary key,
title varchar(50),
product_type varchar(15),
created_at date,
published_at date
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/products.csv”
into table products
fields terminated by ‘,’
ignore 1 lines;

select * from products limit 10;
select count(*) from products;-- 247

– drop table products;

– 商品详情表products_skus--------------------------------------------------------------------------------------------------------------------------
create table products_skus(
id varchar(15) primary key,
product_id varchar(15),
product_style varchar(50),
sku varchar(50),
created_at int,
price decimal(6,2)
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/products_skus.csv”
into table products_skus
fields terminated by ‘,’
ignore 1 lines;

select * from products_skus limit 10;
select count(*) from products_skus;-- 1356

– drop table products_skus;

– 区域表regioninfo--------------------------------------------------------------------------------------------------------------------------
create table regioninfo(
regionid varchar(4) primary key,
parentid varchar(4),
regionname varchar(20),
regiontype char
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/regioninfo.csv”
into table regioninfo
fields terminated by ‘,’
ignore 1 lines;

select * from regioninfo limit 10;
select count(*) from regioninfo;-- 3415

– drop table regioninfo;

select * from customers;
select * from orders;
select * from orders_items;
select * from products;
select * from products_skus;
select * from regioninfo;

– 开发安全权限
set sql_safe_updates=0;
update customers set created_at=from_unixtime(created_at);-- 报错:转换后数据类型不符
– 添加一个日期型的新字段
alter table customers add created_new date;
– 将时间戳转换为日期格式,并赋值给新字段
update customers set created_new=date(from_unixtime(created_at));

– 给商品详情表同样处理上架日期
– 添加一个日期型的新字段
alter table products_skus add created_new date;
– 将时间戳转换为日期格式,并赋值给新字段
update products_skus set created_new=date(from_unixtime(created_at));

– 2018年3月每天的新增用户数、当日订单数、当日下过单的用户数
select created_new as 日期 from customers where created_new between ‘2018-03-01’ and ‘2018-03-31’
union
select created_at from orders where created_at between ‘2018-03-01’ and ‘2018-03-31’;

select t.日期,
count(distinct customers.id) 新增用户数,
count(distinct orders.id) 订单数 ,
count(distinct orders.customer_id) 下单用户数
from
(select created_new as 日期 from customers where created_new between ‘2018-03-01’ and ‘2018-03-31’
union
select created_at from orders where created_at between ‘2018-03-01’ and ‘2018-03-31’) t
left join customers on 日期=customers.created_new
left join orders on 日期=orders.created_at
group by t.日期;

– 各省市的消费金额
select * from regioninfo where regiontype=1;
select * from regioninfo where regiontype=2;

select * from regioninfo t1 join regioninfo t2
on t1.regionid=t2.parentid and t1.regiontype=1 and t2.regiontype=2;

select t1.regionname 省,
t2.regionname 市,
sum(total_price) 消费金额
from regioninfo t1 join regioninfo t2
on t1.regionid=t2.parentid and t1.regiontype=1 and t2.regiontype=2
left join orders on t1.regionid=orders.province and t2.regionid=orders.city
group by t1.regionname ,t2.regionname
order by t1.regionid,sum(total_price) desc;

select r2.regionname as 省份,r1.regionname as 城市,sum(total_price) as 订单总金额
from orders
left join regioninfo as r1 on city=r1.regionid
left join regioninfo as r2 on r1.parentid=r2.regionid
group by r2.regionid,r1.regionid
order by r2.regionid,sum(total_price) desc;

– 查询销售额前20%的商品,贡献了多少的销售额
select variant_id,sum(pricequantity) 销售额,
rank() over(order by sum(price
quantity) desc) pm,
rank() over(order by sum(pricequantity) desc)/(select count(distinct variant_id) from orders_items) pm占比,
sum(sum(price
quantity)) over(order by sum(pricequantity) desc) 累计金额,
sum(sum(price
quantity)) over(order by sum(pricequantity) desc)/(select sum(pricequantity) from orders_items) 累计金额占比
from orders_items group by variant_id;

select * from
(select variant_id,sum(pricequantity) 销售额,
rank() over(order by sum(price
quantity) desc) pm,
rank() over(order by sum(pricequantity) desc)/(select count(distinct variant_id) from orders_items) pm占比,
sum(sum(price
quantity)) over(order by sum(pricequantity) desc) 累计金额,
sum(sum(price
quantity)) over(order by sum(pricequantity) desc)/(select sum(pricequantity) from orders_items) 累计金额占比
from orders_items group by variant_id)t
where pm占比<=0.2;

– 2017年每月复购率:当月消费次数多于一次的用户人数占比
#每月每个用户的下单次数
select month(created_at) 月份,customer_id,count(customer_id) 下单次数
from orders where year(created_at)=2017
group by month(created_at),customer_id;

select 月份,sum(下单次数>1) 复购人数,count(下单次数>1) 总人数,avg(下单次数>1) 复购率 from
(select month(created_at) 月份,customer_id,count(customer_id) 下单次数
from orders where year(created_at)=2017
group by month(created_at),customer_id) t
group by 月份;

– 2017年每月回购率:本月消费的用户在下月再次消费的用户人数占比
select month(created_at)月份 ,customer_id
from orders
where year(created_at)=2017
group by month(created_at),customer_id;
– 回购率=回购人数/总人数
select t1.月份,count(t2.customer_id)回购人数,count(t1.customer_id)总人数,
count(t2.customer_id)/count(t1.customer_id) 回购率
from
(select month(created_at)月份 ,customer_id
from orders
where year(created_at)=2017
group by month(created_at),customer_id)t1 left join
(select month(created_at)月份 ,customer_id
from orders
where year(created_at)=2017
group by month(created_at),customer_id)t2
on t1.月份+1=t2.月份 and t1.customer_id=t2.customer_id
group by t1.月份;

– 2018年3月每个用户的连续下单天数
#查询每个用户的下单日期,日期序号和分组日期
select customer_id,created_at,
row_number() over(partition by customer_id order by created_at) as xh,
date_sub(created_at,interval row_number() over(partition by customer_id order by created_at) day) 分组
from orders where created_at between ‘2018-03-01’ and ‘2018-03-31’
group by customer_id,created_at;

select customer_id,分组,count(*) 连续下单天数
from
(select customer_id,created_at,
row_number() over(partition by customer_id order by created_at) as xh, #序号
date_sub(created_at,interval row_number() over(partition by customer_id order by created_at) day) 分组
from orders where created_at between ‘2018-03-01’ and ‘2018-03-31’
group by customer_id,created_at)t
group by customer_id,分组
order by 连续下单天数 desc;

create database cda;

use cda;

create table order_tab(
order_id int,
user_no varchar(3),
amount int,
create_date date
);

insert into order_tab values
(1,‘001’,100,‘2019-01-01’),
(2,‘001’,300,‘2019-01-02’),
(3,‘001’,500,‘2019-01-02’),
(4,‘001’,800,‘2019-01-03’),
(5,‘001’,900,‘2019-01-04’),
(6,‘002’,500,‘2019-01-03’),
(7,‘002’,600,‘2019-01-04’),
(8,‘002’,300,‘2019-01-10’),
(9,‘002’,800,‘2019-01-16’),
(10,‘002’,800,‘2019-01-22’);

– 查询每个用户按单号顺序,前一笔到后一笔订单的平均订单金额
select *,avg(amount) over(partition by user_no order by order_id range between 1 preceding and 1 following) as 平均订单金额
from order_tab;

select *,avg(amount) over(partition by user_no order by order_id rows between 1 preceding and 1 following) as 平均订单金额
from order_tab;

– 查询每个用户按下单时间顺序,计算当前日期到后一天的平均订单金额
select *,
avg(amount) over(partition by user_no order by create_date range between current row and interval 1 day following) as 平均订单金额
from order_tab;

select *,
avg(amount) over(partition by user_no order by create_date rows between current row and 1 following) as 平均订单金额
from order_tab;

– 查询每个用户按订单金额顺序,计算当前订单金额-1 到 当前订单金额+1 范围内的平均订单金额
select *,avg(amount) over(partition by user_no order by amount range between 1 preceding and 1 following) as 平均订单金额 from order_tab;

select *,avg(amount) over(partition by user_no order by amount rows between 1 preceding and 1 following) as 平均订单金额 from order_tab;
– 查询大于当前订单金额的订单占剩余订单的比例
select
*,
rank() over(partition by user_no order by amount desc) as 序号,
percent_rank() over(partition by user_no order by amount desc) as 比例
from order_tab;

– 将每个用户的订单按照订单金额分成3组
select *,ntile(3) over(partition by user_no order by amount desc) as 组号 from order_tab;

– 查询截止到当前订单,按照日期排序第一个订单和最后一个订单的订单金额
select
*,
first_value(amount) over(partition by user_no order by create_date) as first_amount,
last_value(amount) over(partition by user_no order by create_date) as last_amount
from order_tab;

– 每个用户订单中显示金额排名第二的订单金额
select
*,
rank() over(partition by user_no order by amount desc) as 序号,
nth_value(amount,2) over(partition by user_no order by amount desc) as 排名第二的订单金额
from order_tab;

create database lianjia;
use lianjia;

create table beike(
sign_corp_code text,
sign_corp_name text,
city_code text,
city_name text,
brand_code text,
brand_name text,
house_district_code text,
house_district_name text,
agreement_no int primary key,
del_type_code text,
del_type text,
status_code text,
status text,
contract_date datetime,
agreement_amt decimal(15,2),
house_floor_area decimal(10,2),
is_mortgage text,
user_code text,
user_name text,
shop_code text,
shop_name text,
region_code text,
region_name text,
position_uc_levelname text,
housedel_id text,
grade text,
housedel_deal_period text,
house_id text,
house_bizcircle_id text,
house_bizcircle_name text,
resblock_id text,
resblock_name text,
building_id text,
building_name text,
floor_area decimal(10,2),
physical_floor int,
jushi int,
face text,
fitment_status text,
property_right_years int,
first_del_source_name text,
second_del_source_name text,
sale_reason text,
typing_time datetime,
is_express_sale_housedel text,
is_prospecting_housedel text,
is_key_housedel text,
showing_cnt int,
is_focus_housedel text,
id text,
seller_age int,
custdel_id text,
is_online_deal text,
custdel_showing_cnt int,
buyer_age int,
custdel_deal_period int,
telephone_showing_cnt int,
is_new text,
sign_time datetime,
area_code text,
area_name text,
marketing_code text,
marketing_name text,
cust_typing_time datetime,
cust_typing_ucid text,
contract_hold_user_name text,
contract_hold_user_no text,
contract_hold_shop_name text,
contract_hold_area_name text,
contract_hold_marketing_name text,
sign_brand_code text,
sign_brand_name text,
revoke_time datetime,
agreement_style text,
last_price decimal(15,2),
yingshou decimal(10,2),
stat_function_name text,
agreement_id text
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/beike.csv”
into table beike
fields terminated by ‘,’
ignore 1 lines;

select * from beike;

select
is_online_deal in(“贝壳400-客源角色人”,“贝壳IM-客源角色人”,“链家网400-客源角色人”,“链家IM-客源角色人”,“BOD”) as 是否线上,
date(sign_time) as 签约日期,
agreement_id as 协议id,
(is_online_deal in(“贝壳400-客源角色人”,“贝壳IM-客源角色人”,“链家网400-客源角色人”,“链家IM-客源角色人”,“BOD”))*(avg(is_online_deal in(“贝壳400-客源角色人”,“贝壳IM-客源角色人”,“链家网400-客源角色人”,“链家IM-客源角色人”,“BOD”)) over(partition by shop_code)>=0.5)*200 as 应发贝壳币,
custdel_id as 成交客源id,
user_code as 签约经纪人系统号,
user_name as 签约经纪人姓名,
stat_function_name as 房屋用途,
agreement_amt as 成交价,
avg(is_online_deal in(“贝壳400-客源角色人”,“贝壳IM-客源角色人”,“链家网400-客源角色人”,“链家IM-客源角色人”,“BOD”)) over(partition by shop_code) as 经纪人所在门店的线上成交占比
from beike
where date(sign_time) between date_sub(curdate(),interval day(curdate())-1 day) and date_sub(curdate(),interval 1 day);

create database didi;

use didi;

create table dw_v_order_base(
order_id int primary key,
pid int,
driver_id int,
finish_time datetime,
product_id int,
order_status int,
city_name varchar(10),
distance float
);

create table gulf_order_scene(
order_id int,
scene_l2 varchar(10)
);

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/dw_v_order_base.csv”
into table dw_v_order_base
fields terminated by ‘,’
lines terminated by ‘\r\n’
ignore 1 lines;

select * from dw_v_order_base limit 10;

load data infile “C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/gulf_order_scene.csv”
into table gulf_order_scene
fields terminated by ‘,’
lines terminated by ‘\r\n’
ignore 1 lines;

select * from gulf_order_scene limit 10;

– 请写出过去一周火车站场景下快车完单量、完单城市关于乘客数分布
– (说明:快车完单数1单 2单 单。。。完单城市1,2,3。。。,各情况对应的用户数是多少)
select 完单数,city_name,count() as 乘客数
from
(select pid,city_name,count(
) as 完单数
from dw_v_order_base
inner join gulf_order_scene
on gulf_order_scene.order_id=dw_v_order_base.order_id
where date(finish_time)>date_sub(curdate(),interval 7 day) and scene_l2 like ‘%火车站%’ and product_id in (3,4) and order_status=5
group by pid,city_name) as t
group by 完单数,city_name;

– 请写出过去一个月各城市分里程分时段快车完单量的分布
select
city_name,
if(distance<=8,‘0-8公里’,if(distance<=15,‘8-15公里’,if(distance<=40,‘15-40公里’,‘40公里以上’))) as 里程,
if(hour(finish_time)>=21 or hour(finish_time)<6,‘特殊时段’,‘普通时段’) as 时段,
count(*) as 快车完单量
from dw_v_order_base
where date(finish_time)>date_sub(curdate(),interval 1 month) and product_id in (3,4) and order_status=5
group by city_name,if(distance<=8,‘0-8公里’,if(distance<=15,‘8-15公里’,if(distance<=40,‘15-40公里’,‘40公里以上’))),if(hour(finish_time)>=21 or hour(finish_time)<6,‘特殊时段’,‘普通时段’);

– 2020-05-01快车完单用户在之后一周内日留存率
#2020-05-01快车完单总用户数
select count(distinct pid)
from dw_v_order_base
where date(finish_time)=‘2020-05-01’ and product_id in (3,4) and order_status=5;

select
timestampdiff(day,‘2020-05-01’,date(finish_time)) as 留存日期,
count(distinct pid) as 留存用户数,
count(distinct pid)/(select count(distinct pid) from dw_v_order_base where date(finish_time)=‘2020-05-01’ and product_id in (3,4) and order_status=5) as 留存率
from dw_v_order_base
where date(finish_time) between ‘2020-05-02’ and ‘2020-05-08’ and pid in (select distinct pid from dw_v_order_base where date(finish_time)=‘2020-05-01’ and product_id in (3,4) and order_status=5)
group by timestampdiff(day,‘2020-05-01’,date(finish_time));

– 选出各城市连续完单天数最长的快车司机
select city_name,
driver_id,
司机连续完单天数
from
(select
city_name,
driver_id,
分组日期,
count() as 司机连续完单天数,
max(count(
)) over(partition by city_name) as 城市最长连续完单天数
from
(select
city_name,
driver_id,
date(finish_time),
row_number() over(partition by city_name,driver_id order by date(finish_time)) as 序号,
date_sub(date(finish_time),interval row_number() over(partition by city_name,driver_id order by date(finish_time)) day) as 分组日期
from dw_v_order_base
group by city_name,driver_id,date(finish_time)) as t1
group by city_name,driver_id,分组日期) as t2
where 司机连续完单天数=城市最长连续完单天数;

精彩评论(0)

0 0 举报