1. 示例数据库

drop table if exists user_profile;
CREATE TABLE `user_profile` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`province` varchar(32)  NOT NULL);
INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学','BeiJing');
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学','Shanghai');
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学','BeiJing');
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学','ZheJiang');
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学','Shandong');

示例：

id	device_id	gender	age	university	province
1	2138	male	21	北京大学	Beijing
2	3214	male		复旦大学	Shanghai
3	6543	female	20	北京大学	Beijing
4	2315	female	23	浙江大学	ZheJiang
5	5432	male	25	山东大学	Shandong

2. 基础查询

2.1 查询结果限制返回行数

接受一个或两个数字参数。参数必须是一个整数常量
如果只给定一个参数，它表示返回最大的记录行数目
如果给定两个参数，第一个参数指定第一个返回记录行的偏移量 offset，第二个参数指定返回记录行的最大数目
为了检索从某一个偏移量到记录集的结束所有的记录行，可以指定第二个参数为 -1
初始记录行的偏移量是 0(而不是 1)

题目：现在运营只需要查看前2个用户明细设备ID数据，请你从用户信息表 user_profile 中取出相应结果。

根据输入，你的查询应返回以下结果：

device_id
2138
3214

select device_id from user_profile order by id limit 2;

# 检索 6 - 10 行
select * from user_profile limit 5,5;

# 检索 11 - last
select * from user_profile limit 10,-1;

# 检索前 5 个
select * from user_profile limit 5;

limit 优化

当 offset 特别大的时候，查询效率就会特别低，优化：

# 反例
select id，name，age from employee limit 10000，10

# 方案一 ：返回上次查询的最大记录(偏移量)
select id，name from employee where id>10000 limit 10.

# 方案二：order by + 索引
select id，name from employee order by id  limit 10000，10

# 方案四：使用 between and 语句，查询效率高很多
select * from employee id between 90000 and 90010;

2.2 将查询后的列重新命名

题目：现在你需要查看前2个用户明细设备ID数据，并将列名改为 'user_infos_example',，请你从用户信息表取出相应结果。

根据输入，你的查询应返回以下结果：

根据示例，你的查询应返回以下结果：

user_infos_example
2138
3214

select device_id as user_infos_example from user_profile order by id limit 2;

3. 条件查询

3.1 基础排序

3.1.1 查找后排序

题目：现在运营想要取出用户信息表中的用户年龄，请取出相应数据，并按照年龄升序排序。

根据示例，你的查询应返回以下结果：

device_id	age
6534	20
2138	21
3214	23
2315	23
5432	25
2131	28

select device_id, age from user_profile order by age;

select device_id,age from user_profile order by age asc; // order by + 列名 asc/desc:根据那一列升序/降序

3.1.2 查找后多列排序

题目：现在运营想要取出用户信息表中的年龄和gpa数据，并先按照gpa升序排序，再按照年龄升序排序输出，请取出相应数据。

用户信息表：user_profile

id	device_id	gender	age	university	gpa
1	2138	male	21	北京大学	3.4
2	3214	male	23	复旦大学	4
3	6543	female	20	北京大学	3.2
4	2315	female	23	浙江大学	3.6
5	5432	male	25	山东大学	3.8
6	2131	male	28	北京师范大学	3.3

device_id	gpa	age
6534	3.2	20
2131	3.3	28
2138	3.4	21
2315	3.6	23
5432	3.8	25
3214	4	23

select device_id, gpa, age from user_profile order by gpa, age asc;

# 或者
select device_id, gpa, age from user_profile order by gpa asc, age asc;

# 降序排序
select device_id, gpa, age from user_profile order by gpa desc, age desc;

3.2 基础操作符

3.2.1 21年8月份练题总数

2021年8月份所有练习过题目的总用户数和练习过题目的总次数

like

# device_id 用户 ID 有重复
select 
    count(distinct device_id) as did_cnt,   
    count(question_id) as question_cnt 
from question_practice_detail 
where date like "2021-08%";

# like 优化
# 把%放前面，并不走索引
select userId，name from user where userId like '%123';

# 正确
select userId，name from user where userId like '123%';

year、month 内置函数

select 
    count(distinct device_id) as did_cnt, 
    count(question_id) as question_cnt 
from question_practice_detail 
where year(date)='2021' and month(date)='08';

date_format 函数

select 
    count(distinct device_id) as did_cnt, 
    count(question_id) as question_cnt 
from question_practice_detail 
where date_format(date, '%Y-%m')='2021-08';

3.2.2 查找学校是北大的学生信息

题目：现在运营想要筛选出所有北京大学的学生进行用户调研，请你从用户信息表中取出满足条件的数据，结果返回设备id和学校。

示例：user_profile

id	device_id	gender	age	university	province
1	2138	male	21	北京大学	Beijing
2	3214	male		复旦大学	Shanghai
3	6543	female	20	北京大学	Beijing
4	2315	female	23	浙江大学	ZheJiang
5	5432	male	25	山东大学	Shandong

根据示例，你的查询应返回以下结果：

device_id	university
2138	北京大学
6543	北京大学

# 学校是北大的学生
select device_id, university from user_profile where university="北京大学";

# 年龄 24 岁以上
select device_id, gender, age, university from user_profile where age > 24;

# 20岁及以上且23岁及以下
select device_id, gender, age from user_profile where age >= 20 and age <= 23;

# 除复旦大学以外的其他学生信息
select device_id, gender, age, university from user_profile where university != "复旦大学";
select device_id, gender, age, university from user_profile where university not in ("复旦大学");

# where过滤空值，过滤年龄为 null 的学生
select device_id, gender, age, university from user_profile where age > 0;
select device_id, gender, age, university from user_profile where age is not null;
select device_id, gender, age, university from user_profile where age != "";

3.3 高级操作符

示例：user_profile

id	device_id	gender	age	university	gpa
1	2138	male	21	北京大学	3.4
2	3214	male		复旦大学	4.0
3	6543	female	20	北京大学	3.2
4	2315	female	23	浙江大学	3.6
5	5432	male	25	山东大学	3.8
6	2131	male	28	北京师范大学	3.3

# 男性且GPA在3.5以上(不包括3.5)的用户进行调研 and
select device_id, gender, age, university, gpa from user_profile where gender="male" and gpa > 3.5;

# 现在运营想要找到学校为北大或GPA在3.7以上(不包括3.7)的用户进行调研，请你取出相关数据（使用OR实现） or
select device_id, gender, age, university, gpa from user_profile where university="北京大学" or gpa > 3.7;

# 题目：现在运营想要找到学校为北大、复旦和山大的同学进行调研，请你取出相关数据。
select device_id, gender, age, university, gpa from user_profile where university in ("北京大学", "复旦大学", "山东大学");

# 操作符混用 现在运营想要找到gpa在3.5以上(不包括3.5)的山东大学用户 或 gpa在3.8以上(不包括3.8)的复旦大学同学进行用户调研，请你取出相应数据
select 
    device_id, gender, age, university, gpa 
from user_profile 
where 
    gpa > 3.5 and university = "山东大学"
    or gpa > 3.8 and university = "复旦大学";

# 现在运营想查看所有大学中带有北京的用户的信息，请你取出相应数据。
select device_id, age, university from user_profile where university like "%北京%";
select device_id, age, university from user_profile where university like "北京%";

like 模糊匹配

匹配串中可包含如下四种通配符：

_：匹配任意一个字符
%：匹配0个或多个字符
[ ]：匹配 [ ] 中的任意一个字符(若要比较的字符是连续的，则可以用连字符“-”表达 )
[^ ]：不匹配[ ]中的任意一个字符

select university from user_profile where university like "北京_";
select university from user_profile where university like "北京%";

# 查询学生表中姓‘张’、姓‘李’和姓‘刘’的学生的情况
SELECT * FROM 学生表 WHERE 姓名 LIKE '[张李刘]%’

# 查询学生表中所有不姓“刘”的学生
SELECT 姓名 FROM 学生 WHERE 姓名 NOT LIKE '刘%’

# 从学生表表中查询学号的最后一位不是2、3、5的学生信息
SELECT * FROM 学生表 WHERE 学号 LIKE '%[^235]'

4. 高级查询

4.1 计算函数

示例：某user_profile表如下:

id	device_id	gender	age	university	gpa
1	2234	male	21	北京大学	3.2
2	2235	male	NULL	复旦大学	3.8
3	2236	female	20	复旦大学	3.5
4	2237	female	23	浙江大学	3.3
5	2238	male	25	复旦大学	3.1
6	2239	male	25	北京大学	3.6
7	2240	male	NULL	清华大学	3.3
8	2241	female	NULL	北京大学	3.7

SQL 提供的统计函数有：

COUNT()：统计表中元组个数；
COUNT([DISTINCT] <列名>)：统计本列列值个数；
SUM( <列名> )：计算列值总和；
AVG( <列名> )：计算列值平均值；
MAX( <列名> )：求列值最大值；
MIN( <列名> )：求列值最小值。

注意：上述函数中除COUNT()外，其他函数在计算过程中均忽略NULL值，统计函数不能出现在WHERE子句中

例如，查询成绩最高的学生的学号，如下写法是错误的：

SELECT 学号 FROM 成绩表
WHERE 成绩 = MAX(成绩)

# 正确示例
select count(*) from table;
select count(distinct 学号) from table;
select sum(成绩) from table;
select avg(成绩) from table;
select max(成绩) as max_scroe, min(成绩) as min_scroe from table;

题解：

# 运营想要知道复旦大学学生gpa最高值是多少，请你取出相应数据（题目示例结果只返回一条）
select max(gpa) as gpa from user_profile where university='复旦大学' order by id limit 1;

# 现在运营想要看一下男性用户有多少人以及他们的平均gpa是多少，用以辅助设计相关活动，请你取出相应数据
select count(*) as male_num, avg(gpa) as avg_gpa from user_profile where gender="male";

# round 函数保留小数点有效数字，这里是保留 1 位
select count(gender) as male_num, round(avg(gpa), 1) as avg_gpa from user_profile where gender="male";

4.2 分组查询

4.2.1 分组计算

示例：某user_profile表如下：

id	device_id	gender	age	university	gpa	active_days_within_30	question_cnt	answer_cnt
1	2138	male	21	北京大学	3.4	7	2	12
2	3214	male		复旦大学	4.0	15	5	25
3	6543	female	20	北京大学	3.2	12	3	30
4	2315	female	23	浙江大学	3.6	5	1	2
5	5432	male	25	山东大学	3.8	20	15	70
6	2131	male	28	山东大学	3.3	15	7	13
7	4321	male	26	复旦大学	3.6	9	6	52

30天内活跃天数字段（active_days_within_30）
发帖数量字段（question_cnt）
回答数量字段（answer_cnt）

你的查询返回结果需要对性别和学校分组，示例如下，结果保留1位小数，1位小数之后的四舍五入：

gender	university	user_num	avg_active_day	avg_question_cnt
male	北京大学	1	7.0	2.0
male	复旦大学	2	12.0	5.5
female	北京大学	1	12.0	3.0
female	浙江大学	1	5.0	1.0
male	山东大学	2	17.5	11.0

解释:

第一行表示：北京大学的男性用户个数为1，平均活跃天数为7天，平均发帖量为2

。。。

最后一行表示：山东大学的男性用户个数为2，平均活跃天数为17.5天，平均发帖量为11

题解：

# 现在运营想要对每个学校不同性别的用户活跃情况和发帖数量进行分析，请分别计算出每个学校每种性别的用户数、30天内平均活跃天数和平均发帖数量
select
    gender, university, 
    count(gender) as user_num, 
    round(avg(active_days_within_30), 1) as avg_active_day, 
    round(avg(question_cnt), 1) as avg_question_cnt
from 
    user_profile
group by
    gender, university;

4.2.2 分组过滤

group by 与 where、having

where 和 having 之后都是筛选条件，但是有区别的：

group by 字句必须在 where 之后，order by 之前
having 语句必须在 order by 之后
where先执行，再group by分组；group by先分组，having在执行
聚合函数（avg、sum、max、min、count），不能作为条件放在where之后，但可以放在having之后

根据示例，你的查询应返回以下结果，请你保留3位小数(系统后台也会自动校正)，3位之后四舍五入：

university	avg_question_cnt	avg_answer_cnt
北京大学	2.5000	21.000
浙江大学	1.000	2.000

解释: 平均发贴数低于5的学校或平均回帖数小于20的学校有2个

属于北京大学的用户的平均发帖量为2.500，平均回答数量为21.000

属于浙江大学的用户的平均发帖量为1.000，平均回答数量为2.000

# 现在运营想查看每个学校用户的平均发贴和回帖情况，寻找低活跃度学校进行重点运营，请取出平均发贴数低于5的学校或平均回帖数小于20的学校
select 
    university,
    round(avg(question_cnt), 3) as avg_question_cnt,
    round(avg(answer_cnt), 3) as avg_answer_cnt
from
    user_profile
group by 
    university
having 
    (avg_question_cnt < 5) or (avg_answer_cnt < 20);

注意：聚合函数结果作为筛选条件时，不能用where，而是用having语法

4.2.3 分组排序

根据示例，你的查询应返回以下结果：

university	avg_question_cnt
浙江大学	1.0000
北京大学	2.5000
复旦大学	5.5000
山东大学	11.0000

题解：

# 现在运营想要查看不同大学的用户平均发帖情况，并期望结果按照平均发帖情况进行升序排列，请你取出相应数据
select 
    university,
    round(avg(question_cnt), 4) as avg_question_cnt
from 
    user_profile
group by
    university
order by 
    avg_question_cnt;

5. 多表查询

5.1 子查询

题目：现在运营想要查看所有来自浙江大学的用户题目回答明细情况，请你取出相应数据

示例：question_practice_detail

id	device_id	question_id	result
1	2138	111	wrong
2	3214	112	wrong
3	3214	113	wrong
4	6543	114	right
5	2315	115	right
6	2315	116	right
7	2315	117	wrong

第一行表示:id为1的用户的常用信息为使用的设备id为2138，在question_id为111的题目上，回答错误

....

最后一行表示:id为7的用户的常用信息为使用的设备id为2135，在question_id为117的题目上，回答错误

示例：user_profile

id	device_id	gender	age	university	gpa	active_days_within_30	question_cnt	answer_cnt
1	2138	male	21	北京大学	3.4	7	2	12
2	3214	male		复旦大学	4.0	15	5	25
3	6543	female	20	北京大学	3.2	12	3	30
4	2315	female	23	浙江大学	3.6	5	1	2
5	5432	male	25	山东大学	3.8	20	15	70
6	2131	male	28	山东大学	3.3	15	7	13
7	4321	female	26	复旦大学	3.6	9	6	52

第一行表示:id为1的用户的常用信息为使用的设备id为2138，性别为男，年龄21岁，北京大学，gpa为3.4在过去的30天里面活跃了7天，发帖数量为2，回答数量为12
。。。
最后一行表示:id为7的用户的常用信息为使用的设备id为4321，性别为男，年龄26岁，复旦大学，gpa为3.6在过去的30天里面活跃了9天，发帖数量为6，回答数量为52

根据示例，你的查询应返回以下结果，查询结果根据question_id升序排序：

解释:

根据题目的数据只有1个浙江大学的用户，那么把浙江大学这个用户所有答题数据查询出来就行，题解：

1、方法一（子查询）：

# 先查询 user_profile 表，过滤出属于浙江大学的学生 device_id，再和 question_practice_detail 中的 device_id 比较
select
    device_id, question_id, result
from   
    question_practice_detail
where 
    device_id = (
        select 
            device_id
        from 
            user_profile
        where 
            university = "浙江大学"
    );

2、方法二（连接查询）：

select
    t1.device_id, question_id, result
from   
    question_practice_detail t1
left join
    user_profile t2
on
    t1.device_id = t2.device_id
where
    university = "浙江大学";

5.2 链接查询

5.2.1 统计每个学校的答过题的用户的平均答题数

运营想要了解每个学校答过题的用户平均答题数量情况，请你取出数据。

用户信息表 user_profile，其中device_id指终端编号（认为每个用户有唯一的一个终端），gender指性别，age指年龄，university指用户所在的学校，gpa是该用户平均学分绩点，active_days_within_30是30天内的活跃天数。

device_id	gender	age	university	gpa	active_days_within_30
2138	male	21	北京大学	3.4	7
3214	male	NULL	复旦大学	4	15
6543	female	20	北京大学	3.2	12
2315	female	23	浙江大学	3.6	5
5432	male	25	山东大学	3.8	20
2131	male	28	山东大学	3.3	15
4321	male	28	复旦大学	3.6	9

第一行表示:用户的常用信息为使用的设备id为2138，性别为男，年龄21岁，北京大学，gpa为3.4，在过去的30天里面活跃了7天

最后一行表示:用户的常用信息为使用的设备id为4321，性别为男，年龄28岁，复旦大学，gpa为3.6，在过去的30天里面活跃了9天

答题情况明细表 question_practice_detail，其中question_id是题目编号，result是答题结果。

device_id	question_id	result
2138	111	wrong
3214	112	wrong
3214	113	wrong
6543	111	right
2315	115	right
2315	116	right
2315	117	wrong
5432	118	wrong
5432	112	wrong
2131	114	right
5432	113	wrong

第一行表示用户的常用信息为使用的设备id为2138，在question_id为111的题目上，回答错误

....

最后一行表示用户的常用信息为使用的设备id为5432，在question_id为113的题目上，回答错误

请你写SQL查找每个学校用户的平均答题数目(说明：某学校用户平均答题数量计算方式为该学校用户答题总次数除以答过题的不同用户个数)根据示例，你的查询应返回以下结果（结果保留4位小数），注意：结果按照university升序排序！！！

university	avg_answer_cnt
北京大学	1.0000
复旦大学	2.0000
山东大学	2.0000
浙江大学	3.0000

解释:

第一行：北京大学总共有2个用户，2138和6543，2个用户在question_practice_detail里面答了2题，平均答题数目为2/2=1.0000

....

最后一行:浙江大学总共有1个用户，2315，这个用户在\question_practice_detail里面答了3题，平均答题数目为3/1=3.0000****

# 取出答过题目的去除重复的用户数量
select  count(distinct device_id) from user_profile group by university order by university;

# 答过题目的总用户总数量
select count(question_id) from question_practice_detail;

# 题解
select 
    university, round(count(t1.question_id) / count(distinct t2.device_id), 4) as avg_answer_cnt
from 
    question_practice_detail as t1 
left join 
    user_profile as t2
on
    t1.device_id = t2.device_id
group by
    t2.university
order by
    university;

5.2.2 统计每个学校各难度的用户平均刷题数

统计每个学校各难度的用户平均刷题数

题目：运营想要计算一些参加了答题的不同学校、不同难度的用户平均答题量，请你写SQL取出相应数据

表：question_detail

id	question_id	difficult_level
1	111	hard
2	112	medium
3	113	easy
4	115	easy
5	116	medium
6	117	easy

第一行表示: 题目id为111的难度为hard

....

第一行表示: 题目id为117的难度为easy

请你写一个SQL查询，计算不同学校、不同难度的用户平均答题量，根据示例，你的查询应返回以下结果(结果在小数点位数保留4位，4位之后四舍五入)：

university	difficult_level	avg_answer_cnt
北京大学	hard	1.0000
复旦大学	easy	1.0000
复旦大学	medium	1.0000
山东大学	easy	4.5000
山东大学	medium	3.0000
浙江大学	easy	5.0000
浙江大学	medium	2.0000

解释：

第一行：北京大学有设备id为2138，6543这2个用户，这2个用户在question_practice_detail表下都只有一条答题记录，且答题题目是111，从question_detail可以知道这个题目是hard，故北京大学的用户答题为hard的题目平均答题为2/2=1.0000

第二行，第三行：复旦大学有设备id为3214，4321这2个用户，但是在question_practice_detail表只有1个用户(device_id=3214有答题，device_id=4321没有答题，不计入后续计算)有2条答题记录，且答题题目是112，113各1个，从question_detail可以知道题目难度分别是medium和easy，故复旦大学的用户答题为easy, medium的题目平均答题量都为1(easy=1或medium=1) /1 (device_id=3214)=1.0000

第四行，第五行：山东大学有设备id为5432和2131这2个用户，这2个用户总共在question_practice_detail表下有12条答题记录，且答题题目是112，113，117，且数目分别为3，6，3，从question_detail可以知道题目难度分别为medium,easy,easy，所以，easy共有9个，故easy的题目平均答题量= 9(easy=9)/2 (device_id=3214 or device_id=5432) =4.5000，medium共有3个，medium的答题只有device_id=5432的用户，故medium的题目平均答题量= 3(medium=9)/1 ( device_id=5432) =3.0000

.....

# 三表连接，中间表 question_practice_detail，它与另外两个表都有关联字段
# 平均答题数：总答题数除以总人数count(t1.question_id) / count(distinct t1.device_id)
select 
    university, t2.difficult_level, round(count(t1.question_id) / count(distinct t1.device_id), 4) as avg_answer_cnt
from 
    question_practice_detail as t1 
left join 
    question_detail as t2 
on 
    t1.question_id = t2.question_id
left join 
    user_profile as t3
on
    t1.device_id = t3.device_id
group by
    t3.university, t2.difficult_level;

5.2.3 统计每个用户的平均刷题数

统计每个用户的平均刷题数

题目：运营想要查看参加了答题的山东大学的用户在不同难度下的平均答题题目数，请取出相应数据

请你写一个SQL查询，计算山东、不同难度的用户平均答题量，根据示例，你的查询应返回以下结果(结果在小数点位数保留4位，4位之后四舍五入)：

university	difficult_level	avg_answer_cnt
山东大学	easy	4.5000
山东大学	medium	3.0000

山东大学有设备id为5432和2131这2个用户，这2个用户总共在question_practice_detail表下有12条答题记录，且答题题目是112，113，117，且数目分别为3，6，3，从question_detail可以知道题目难度分别为medium,easy,easy，所以，easy共有9个，故easy的题目平均答题量= 9(easy=9)/2 (device_id=3214 or device_id=5432) =4.5000，medium共有3个，medium的答题只有device_id=5432的用户，故medium的题目平均答题量= 3(medium=9)/1 ( device_id=5432) =3.0000

1、方法一：

select 
    university, t2.difficult_level, round(count(t1.question_id) / count(distinct t1.device_id), 4) as avg_answer_cnt
from 
    question_practice_detail as t1 
left join 
    question_detail as t2 
on 
    t1.question_id = t2.question_id
left join 
    user_profile as t3
on
    t1.device_id = t3.device_id
group by
    t3.university, t2.difficult_level
having
    t3.university = "山东大学";

2、方法二：

SELECT
    t1.university,
    t3.difficult_level,
    COUNT(t2.question_id) / COUNT(DISTINCT(t2.device_id)) as avg_answer_cnt
from
    user_profile as t1,
    question_practice_detail as t2,
    question_detail as t3
WHERE
    t1.university = '山东大学'
    and t1.device_id = t2.device_id
    and t2.question_id = t3.question_id
GROUP BY
    t3.difficult_level;

5.3 组合查询

union：
- 对两个结果集进行并集操作, 不包括重复行,相当于distinct, 同时进行默认规则的排序
- 对结果进行排序
union all:
- 对两个结果集进行并集操作, 包括重复行, 即所有的结果全部显示, 不管是不是重复
- 不会对查询结果排序

注意：union all只是合并查询结果，并不会进行去重和排序操作，在没有去重的前提下，使用union all的执行效率要比union高

查找山东大学或者性别为男生的信息

根据示例，你的查询应返回以下结果（注意输出的顺序，先输出学校为山东大学再输出性别为男生的信息）：

device_id	gender	age	gpa
5432	male	25	3.8
2131	male	28	3.3
2138	male	21	3.4
3214	male	None	4
5432	male	25	3.8
2131	male	28	3.3
4321	male	28	3.6

# 不能直接用 or，or 不能直接决定先输出 xxx 再输出 xxx
select 
    device_id, gender, age, gpa
from user_profile
where university = "山东大学" 

union all

select 
    device_id, gender, age, gpa
from user_profile
where gender = "male";