**SQL(Structured Query Language)**은 관계형 데이터베이스를 관리하고 조작하기 위한 표준 언어입니다. 이 글에서는 SQL의 기본 문법부터 고급 쿼리 기법까지 체계적으로 다루겠습니다.
SQL 기본 문법
SELECT 문
데이터를 조회하는 가장 기본적인 명령어입니다.
SELECT column1, column2
FROM table_name;
SELECT *
FROM employees;
SELECT
first_name,
last_name,
salary
FROM employees;
WHERE 절
조건에 맞는 데이터만 필터링합니다.
SELECT *
FROM employees
WHERE department = 'Engineering';
SELECT *
FROM products
WHERE price > 100 AND stock > 0;
SELECT *
FROM users
WHERE email LIKE '%@gmail.com';
SELECT *
FROM orders
WHERE order_date BETWEEN '2024-01-01' AND '2024-12-31';
SELECT *
FROM products
WHERE category IN ('Electronics', 'Books', 'Clothing');
ORDER BY와 LIMIT
결과를 정렬하고 개수를 제한합니다.
SELECT *
FROM products
ORDER BY price DESC;
SELECT *
FROM employees
ORDER BY department ASC, salary DESC;
SELECT *
FROM products
ORDER BY created_at DESC
LIMIT 10;
SELECT *
FROM products
ORDER BY price
LIMIT 10 OFFSET 20;
SQL 쿼리 실행 순서
SQL 쿼리는 작성 순서와 다르게 실행됩니다. 이를 이해하면 더 효율적인 쿼리를 작성할 수 있습니다.
WHERE 절에서 SELECT에서 정의한 별칭(alias)을 사용할 수 없는 이유가 바로 실행 순서 때문입니다. WHERE는 SELECT보다 먼저 실행됩니다.
JOIN - 테이블 결합
여러 테이블의 데이터를 결합하여 조회합니다.
INNER JOIN
두 테이블에서 일치하는 행만 반환합니다.
SELECT
e.first_name,
e.last_name,
d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id;
SELECT
o.order_id,
c.customer_name,
p.product_name,
oi.quantity
FROM orders o
INNER JOIN customers c ON o.customer_id = c.id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;
LEFT JOIN
왼쪽 테이블의 모든 행과 일치하는 오른쪽 행을 반환합니다.
SELECT
c.customer_name,
o.order_id,
o.order_date
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id;
SELECT
c.customer_name,
COUNT(o.id) as order_count
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.customer_name;
RIGHT JOIN과 FULL OUTER JOIN
SELECT
e.first_name,
d.department_name
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.id;
SELECT
e.first_name,
d.department_name
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.id;
대부분의 경우 LEFT JOIN으로 충분합니다. RIGHT JOIN은 LEFT JOIN으로 테이블 순서를 바꿔 표현할 수 있습니다.
집계 함수와 GROUP BY
기본 집계 함수
SELECT
COUNT(*) as total_employees,
AVG(salary) as avg_salary,
MAX(salary) as max_salary,
MIN(salary) as min_salary,
SUM(salary) as total_salary
FROM employees;
SELECT
department,
COUNT(*) as employee_count,
AVG(salary) as avg_salary
FROM employees
GROUP BY department;
HAVING 절
GROUP BY 결과를 필터링합니다.
SELECT
department,
COUNT(*) as employee_count,
AVG(salary) as avg_salary
FROM employees
GROUP BY department
HAVING COUNT(*) >= 5;
SELECT
category,
SUM(quantity * price) as total_revenue
FROM order_items oi
JOIN products p ON oi.product_id = p.id
GROUP BY category
HAVING SUM(quantity * price) > 10000
ORDER BY total_revenue DESC;
WHERE는 GROUP BY 전에 개별 행을 필터링하고, HAVING은 GROUP BY 후에 그룹을 필터링합니다. 목적에 맞게 사용하세요.
서브쿼리
쿼리 안에 포함된 또 다른 쿼리입니다.
스칼라 서브쿼리
SELECT
first_name,
salary,
(SELECT AVG(salary) FROM employees) as avg_salary
FROM employees;
SELECT *
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
IN을 사용한 서브쿼리
SELECT *
FROM products
WHERE category_id IN (
SELECT id
FROM categories
WHERE name LIKE '%Electronics%'
);
SELECT *
FROM customers
WHERE id NOT IN (
SELECT DISTINCT customer_id
FROM orders
WHERE order_date > '2024-01-01'
);
EXISTS 서브쿼리
SELECT *
FROM customers c
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.id
);
SELECT *
FROM products p
WHERE NOT EXISTS (
SELECT 1
FROM order_items oi
WHERE oi.product_id = p.id
);
FROM 절의 서브쿼리
SELECT
department,
avg_salary
FROM (
SELECT
department,
AVG(salary) as avg_salary
FROM employees
GROUP BY department
) as dept_salaries
WHERE avg_salary > 50000;
윈도우 함수
집계 함수와 달리 행을 그룹화하지 않고 계산합니다.
ROW_NUMBER, RANK, DENSE_RANK
SELECT
first_name,
department,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
RANK() OVER (ORDER BY salary DESC) as rank,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
FROM employees;
SELECT
first_name,
department,
salary,
ROW_NUMBER() OVER (
PARTITION BY department
ORDER BY salary DESC
) as dept_rank
FROM employees;
LAG와 LEAD
이전/다음 행의 값에 접근합니다.
SELECT
order_date,
total_amount,
LAG(total_amount, 1) OVER (ORDER BY order_date) as prev_amount,
LEAD(total_amount, 1) OVER (ORDER BY order_date) as next_amount
FROM daily_sales;
SELECT
order_date,
total_amount,
total_amount - LAG(total_amount, 1, 0) OVER (ORDER BY order_date) as daily_change
FROM daily_sales;
누적 합계와 이동 평균
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) as running_total
FROM orders;
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7days
FROM daily_sales;
데이터베이스 정규화
데이터 중복을 줄이고 무결성을 보장하기 위한 설계 원칙입니다.
정규화 단계
| 정규형 | 조건 |
|---|---|
| 1NF | 모든 속성이 원자값을 가짐 |
| 2NF | 1NF + 부분 함수 종속 제거 |
| 3NF | 2NF + 이행 함수 종속 제거 |
| BCNF | 모든 결정자가 후보키 |
인덱스와 성능 최적화
인덱스 생성
CREATE INDEX idx_employee_department
ON employees(department);
CREATE INDEX idx_order_customer_date
ON orders(customer_id, order_date);
CREATE UNIQUE INDEX idx_user_email
ON users(email);
실행 계획 분석
EXPLAIN SELECT *
FROM employees
WHERE department = 'Engineering';
EXPLAIN ANALYZE
SELECT e.*, d.department_name
FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE e.salary > 50000;
쿼리 최적화 팁
SELECT id, name
FROM products
WHERE category = 'Electronics';
SELECT *
FROM products
WHERE YEAR(created_at) = 2024;
SELECT *
FROM products
WHERE created_at >= '2024-01-01'
AND created_at < '2025-01-01';
SELECT *
FROM orders o
WHERE EXISTS (
SELECT 1 FROM customers c
WHERE c.id = o.customer_id
AND c.status = 'active'
);
인덱스는 조회 성능을 높이지만 INSERT, UPDATE, DELETE 성능에는 영향을 줄 수 있습니다. 자주 조회되는 컬럼에만 인덱스를 생성하세요.
실전 예제
월별 매출 분석
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
COUNT(*) as order_count,
SUM(total_amount) as monthly_revenue,
AVG(total_amount) as avg_order_value
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month;
고객 등급 분류
WITH customer_stats AS (
SELECT
c.id,
c.name,
COUNT(o.id) as order_count,
SUM(o.total_amount) as total_spent
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name
)
SELECT
name,
order_count,
total_spent,
CASE
WHEN total_spent >= 100000 THEN 'VIP'
WHEN total_spent >= 50000 THEN 'Gold'
WHEN total_spent >= 10000 THEN 'Silver'
ELSE 'Bronze'
END as customer_tier
FROM customer_stats
ORDER BY total_spent DESC;
제품 카테고리별 순위
WITH product_sales AS (
SELECT
p.id,
p.name,
p.category,
SUM(oi.quantity) as total_sold,
SUM(oi.quantity * oi.price) as revenue
FROM products p
JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.category
)
SELECT
name,
category,
total_sold,
revenue,
RANK() OVER (PARTITION BY category ORDER BY revenue DESC) as category_rank
FROM product_sales
ORDER BY category, category_rank;
마무리
SQL은 데이터를 다루는 모든 개발자에게 필수적인 기술입니다. 핵심 포인트를 정리하면 다음과 같습니다.
- SELECT, WHERE, JOIN은 가장 기본적이고 자주 사용되는 명령어
- GROUP BY와 집계 함수로 데이터를 요약
- 서브쿼리로 복잡한 조건을 표현
- 윈도우 함수로 고급 분석 쿼리 작성
- 인덱스를 활용한 성능 최적화
다음 글에서는 데이터베이스 설계와 NoSQL과의 비교에 대해 다루겠습니다.