SQL 入门与进阶:从零基础到实战精通
前言
SQL(Structured Query Language,结构化查询语言)是用于管理和操作关系型数据库的标准语言。无论是数据分析、后端开发还是运维管理,SQL 都是必备技能。本文将从基础语法出发,逐步深入到高级特性,帮助读者建立完整的 SQL 知识体系。
第一部分:SQL 基础
1.1 什么是 SQL
SQL 是一种声明式语言,用于与关系型数据库(如 MySQL、PostgreSQL、SQLite、Oracle、SQL Server 等)进行交互。它主要完成以下任务:
数据查询:从数据库中检索数据
数据操作:插入、更新、删除数据
数据定义:创建、修改、删除表结构
数据控制:管理访问权限
1.2 SQL 的四大分类
1.3 创建表与数据类型
-- 创建用户表
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50) NOT NULL UNIQUE,
email VARCHAR(100) NOT NULL,
age INT,
balance DECIMAL(10, 2) DEFAULT 0.00,
status TINYINT DEFAULT 1,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
-- 创建订单表
CREATE TABLE orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
product_name VARCHAR(100) NOT NULL,
amount DECIMAL(10, 2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
order_date DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
);
常用数据类型:
INT— 整数VARCHAR(n)— 可变长度字符串DECIMAL(p, s)— 精确小数(p 为总位数,s 为小数位数)DATETIME/TIMESTAMP— 日期时间TINYINT— 极小整数,常用于布尔值或状态码TEXT— 长文本
1.4 插入数据
-- 插入单条记录
INSERT INTO users (username, email, age, balance)
VALUES ('Alice', 'alice@example.com', 28, 1000.00);
-- 插入多条记录
INSERT INTO users (username, email, age, balance) VALUES
('Bob', 'bob@example.com', 32, 2500.00),
('Carol', 'carol@example.com', 24, 800.00),
('David', 'david@example.com', 45, 5000.00);
-- 插入订单数据
INSERT INTO orders (user_id, product_name, amount, status) VALUES
(1, 'MacBook Pro', 12999.00, 'completed'),
(1, 'AirPods Pro', 1999.00, 'completed'),
(2, 'iPhone 15', 6999.00, 'pending'),
(3, 'iPad Air', 4799.00, 'completed'),
(4, 'Apple Watch', 3199.00, 'cancelled');
1.5 基础查询(SELECT)
-- 查询所有列
SELECT * FROM users;
-- 查询指定列
SELECT username, email FROM users;
-- 条件查询
SELECT * FROM users WHERE age > 25;
-- 多条件查询(AND / OR)
SELECT * FROM users WHERE age >= 25 AND balance > 2000;
-- 范围查询(BETWEEN)
SELECT * FROM users WHERE age BETWEEN 25 AND 40;
-- 集合查询(IN)
SELECT * FROM users WHERE id IN (1, 3, 5);
-- 模糊查询(LIKE)
SELECT * FROM users WHERE email LIKE '%@example.com';
SELECT * FROM users WHERE username LIKE 'A%'; -- 以 A 开头
-- 空值判断(IS NULL / IS NOT NULL)
SELECT * FROM users WHERE age IS NULL;
-- 去重(DISTINCT)
SELECT DISTINCT status FROM orders;
-- 排序(ORDER BY)
SELECT * FROM users ORDER BY balance DESC; -- 降序
SELECT * FROM users ORDER BY age ASC, balance DESC; -- 多字段排序
-- 限制结果(LIMIT)
SELECT * FROM users ORDER BY balance DESC LIMIT 3;
-- 分页(LIMIT ... OFFSET)
SELECT * FROM users ORDER BY id LIMIT 5 OFFSET 10; -- 第三页,每页 5 条
1.6 聚合函数
-- 计数
SELECT COUNT(*) FROM users;
-- 求和
SELECT SUM(balance) AS total_balance FROM users;
-- 平均值
SELECT AVG(age) AS avg_age FROM users;
-- 最大值 / 最小值
SELECT MAX(balance) AS max_balance, MIN(balance) AS min_balance FROM users;
-- 分组统计(GROUP BY)
SELECT status, COUNT(*) AS cnt, SUM(amount) AS total
FROM orders
GROUP BY status;
-- 分组过滤(HAVING)
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
HAVING COUNT(*) >= 2;
WHERE 与 HAVING 的区别: WHERE 在分组前过滤行,HAVING 在分组后过滤组。
1.7 更新与删除
-- 更新数据
UPDATE users SET balance = balance + 500 WHERE id = 1;
-- 多字段更新
UPDATE users SET age = 29, updated_at = NOW() WHERE username = 'Alice';
-- 删除数据
DELETE FROM orders WHERE status = 'cancelled';
-- 清空整张表(不可回滚)
TRUNCATE TABLE orders;
第二部分:SQL 进阶
2.1 JOIN 连接查询
JOIN 是 SQL 中最强大的功能之一,用于将多张表的数据组合在一起。
-- INNER JOIN(内连接:只返回匹配的行)
SELECT u.username, o.product_name, o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- LEFT JOIN(左连接:返回左表全部 + 右表匹配行)
SELECT u.username, o.product_name, o.amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id;
-- RIGHT JOIN(右连接:返回右表全部 + 左表匹配行)
SELECT u.username, o.product_name, o.amount
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id;
-- 多表连接
SELECT u.username, o.product_name, o.amount, o.status
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
ORDER BY o.amount DESC;
JOIN 类型对比:
2.2 子查询(Subquery)
子查询是嵌套在另一个查询中的 SELECT 语句。
-- WHERE 子句中的子查询
SELECT username, balance
FROM users
WHERE balance > (SELECT AVG(balance) FROM users);
-- IN 子查询
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders WHERE status = 'completed');
-- FROM 子查询(派生表)
SELECT user_id, total_amount
FROM (
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
) AS order_summary
WHERE total_amount > 5000;
-- EXISTS 子查询(判断是否存在)
SELECT username FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.status = 'pending'
);
2.3 窗口函数(Window Functions)
窗口函数在不改变结果集行数的前提下,对一组行进行计算。这是 SQL 进阶的核心技能。
-- ROW_NUMBER():行号
SELECT username, balance,
ROW_NUMBER() OVER (ORDER BY balance DESC) AS rank
FROM users;
-- RANK() / DENSE_RANK():排名(处理并列情况不同)
SELECT username, balance,
RANK() OVER (ORDER BY balance DESC) AS rank,
DENSE_RANK() OVER (ORDER BY balance DESC) AS dense_rank
FROM users;
-- SUM() OVER():累计求和
SELECT username, balance,
SUM(balance) OVER (ORDER BY id) AS running_total
FROM users;
-- AVG() OVER():移动平均
SELECT username, balance,
AVG(balance) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS moving_avg
FROM users;
-- PARTITION BY:分组窗口
SELECT user_id, product_name, amount,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY amount DESC) AS rn
FROM orders;
-- 上述查询可配合外层 WHERE rn = 1 来获取每个用户的最大订单
-- NTILE():分桶
SELECT username, balance,
NTILE(3) OVER (ORDER BY balance DESC) AS bucket
FROM users;
-- 将用户按余额分为 3 组(高/中/低)
常用窗口函数一览:
2.4 CTE(公用表表达式)与递归查询
CTE(Common Table Expression)使用 WITH 子句定义临时结果集,让复杂查询更易读。
-- 基础 CTE
WITH active_users AS (
SELECT id, username, balance
FROM users
WHERE status = 1
),
completed_orders AS (
SELECT user_id, SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_id
)
SELECT au.username, au.balance, co.total_spent
FROM active_users au
INNER JOIN completed_orders co ON au.id = co.user_id
ORDER BY co.total_spent DESC;
-- 递归 CTE(查询层级结构,如部门树)
WITH RECURSIVE dept_tree AS (
-- 锚点:根节点
SELECT id, name, parent_id, 1 AS level
FROM departments
WHERE parent_id IS NULL
UNION ALL
-- 递归:子节点
SELECT d.id, d.name, d.parent_id, dt.level + 1
FROM departments d
INNER JOIN dept_tree dt ON d.parent_id = dt.id
)
SELECT * FROM dept_tree ORDER BY level, name;
2.5 CASE 条件表达式
-- 简单 CASE
SELECT username,
CASE status
WHEN 1 THEN '活跃'
WHEN 0 THEN '禁用'
ELSE '未知'
END AS status_label
FROM users;
-- 搜索 CASE
SELECT username, balance,
CASE
WHEN balance >= 5000 THEN '高净值'
WHEN balance >= 2000 THEN '中等'
ELSE '普通'
END AS customer_level
FROM users
ORDER BY balance DESC;
-- 配合聚合使用
SELECT
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending_count,
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count
FROM orders;
2.6 索引与性能优化
-- 创建索引
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_user_date ON orders(user_id, order_date);
-- 查看查询执行计划(MySQL)
EXPLAIN SELECT * FROM users WHERE email = 'alice@example.com';
-- 查看慢查询日志
SHOW VARIABLES LIKE 'slow_query%';
优化原则:
避免
SELECT *:只查询需要的列善用索引:在 WHERE、JOIN、ORDER BY 的列上建索引
避免函数包裹索引列:
WHERE YEAR(created_at) = 2024会导致索引失效,应改为范围查询LIMIT 分页:深度分页时用
WHERE id > last_id ORDER BY id LIMIT n代替OFFSETEXPLAIN 分析:定期检查慢查询的执行计划
避免 N+1 查询:用 JOIN 或批量查询替代循环单条查询
2.7 事务(Transaction)
事务确保一组操作要么全部成功,要么全部失败。
-- 开始事务
START TRANSACTION;
-- 转账操作
UPDATE users SET balance = balance - 500 WHERE id = 1;
UPDATE users SET balance = balance + 500 WHERE id = 2;
-- 提交(或回滚)
COMMIT; -- ROLLBACK;
-- 事务的 ACID 特性:
-- A - Atomicity(原子性):全部成功或全部失败
-- C - Consistency(一致性):事务前后数据保持一致
-- I - Isolation(隔离性):事务间互不干扰
-- D - Durability(持久性):提交后永久保存
-- 设置隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
2.8 视图(View)
视图是存储的查询,像虚拟表一样使用。
-- 创建视图
CREATE VIEW user_order_summary AS
SELECT u.id, u.username,
COUNT(o.id) AS order_count,
SUM(o.amount) AS total_amount,
MAX(o.order_date) AS last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;
-- 使用视图
SELECT * FROM user_order_summary WHERE order_count > 1;
-- 更新视图定义
CREATE OR REPLACE VIEW user_order_summary AS ...;
-- 删除视图
DROP VIEW IF EXISTS user_order_summary;
2.9 存储过程与函数
-- 存储过程
DELIMITER //
CREATE PROCEDURE GetUserOrders(IN p_user_id INT)
BEGIN
SELECT o.* FROM orders o
WHERE o.user_id = p_user_id
ORDER BY o.order_date DESC;
END //
DELIMITER ;
-- 调用存储过程
CALL GetUserOrders(1);
-- 自定义函数
DELIMITER //
CREATE FUNCTION GetCustomerLevel(p_balance DECIMAL(10,2))
RETURNS VARCHAR(10)
DETERMINISTIC
BEGIN
DECLARE level VARCHAR(10);
IF p_balance >= 5000 THEN
SET level = '高净值';
ELSEIF p_balance >= 2000 THEN
SET level = '中等';
ELSE
SET level = '普通';
END IF;
RETURN level;
END //
DELIMITER ;
-- 使用函数
SELECT username, balance, GetCustomerLevel(balance) AS level FROM users;
第三部分:实战演练
3.1 经典查询场景
-- 场景 1:查询每个用户的订单总金额,按金额降序
SELECT u.username, SUM(o.amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
GROUP BY u.id, u.username
ORDER BY total_spent DESC;
-- 场景 2:查询没有下过订单的用户(LEFT JOIN 法)
SELECT u.username
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;
-- 场景 3:查询每个用户最近一笔订单
WITH ranked_orders AS (
SELECT user_id, product_name, amount, order_date,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) AS rn
FROM orders
)
SELECT user_id, product_name, amount, order_date
FROM ranked_orders
WHERE rn = 1;
-- 场景 4:连续登录用户(相邻日期差值为 1)
SELECT user_id, COUNT(*) AS streak_days
FROM (
SELECT user_id, login_date,
DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) AS grp
FROM user_logins
) t
GROUP BY user_id, grp
HAVING COUNT(*) >= 3;
-- 场景 5:订单金额环比增长
SELECT
order_date,
daily_total,
LAG(daily_total) OVER (ORDER BY order_date) AS prev_total,
ROUND((daily_total - LAG(daily_total) OVER (ORDER BY order_date))
/ LAG(daily_total) OVER (ORDER BY order_date) * 100, 2) AS growth_pct
FROM (
SELECT DATE(order_date) AS order_date, SUM(amount) AS daily_total
FROM orders
GROUP BY DATE(order_date)
) t;
3.2 数据迁移与备份
-- 复制表结构
CREATE TABLE users_backup LIKE users;
-- 复制表数据
INSERT INTO users_backup SELECT * FROM users;
-- 从查询结果创建新表
CREATE TABLE vip_users AS
SELECT * FROM users WHERE balance >= 5000;
-- 条件更新(从一个表更新到另一个表)
UPDATE users u
INNER JOIN (
SELECT user_id, SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_id
) o ON u.id = o.user_id
SET u.balance = u.balance + o.total_spent * 0.01; -- 消费返现 1%
第四部分:SQL 最佳实践
4.1 命名规范
表名和列名使用 snake_case(如
user_id、created_at)关键字使用 大写(如
SELECT、WHERE)别名使用有意义的名称(如
AS total_amount而非AS t)
4.2 安全实践
-- 防止 SQL 注入:使用参数化查询
-- Python 示例:cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
-- 最小权限原则
CREATE USER 'app_user'@'localhost' IDENTIFIED BY 'password';
GRANT SELECT, INSERT, UPDATE, DELETE ON mydb.* TO 'app_user'@'localhost';
-- 不要授予 DROP、ALTER 等 DDL 权限给应用账户
4.3 性能检查清单
[ ] 只
SELECT需要的列[ ] WHERE 条件中的列有索引
[ ] 避免在索引列上使用函数或运算
[ ] 使用
EXPLAIN检查执行计划[ ] 大表 JOIN 时确保连接列有索引
[ ] 定期
ANALYZE TABLE更新统计信息[ ] 批量操作替代逐条操作
附录:常见数据库方言差异
结语
SQL 的学习路径可以概括为:
入门:掌握 SELECT、INSERT、UPDATE、DELETE 和基本的 WHERE 条件
进阶:熟练使用 JOIN、子查询、GROUP BY 和聚合函数
高级:掌握窗口函数、CTE、递归查询和性能优化
精通:理解执行计划、索引策略、事务隔离级别和数据库设计范式
实践是最好的老师。建议在自己的电脑上安装一个 MySQL 或 PostgreSQL,导入示例数据,动手练习上述每一个查询。随着经验的积累,你会发现 SQL 不仅是一门语言,更是一种思维方式 — 用集合和关系的视角来理解和操作数据。