Kwangmin Kim - Python 필수 내장 함수·자료구조 레퍼런스 (Level 3)

1 개요

Level 3 코딩 테스트는 단순 구현을 넘어 탐색 최적화와 동적 프로그래밍 기초를 요구한다. DS 트랙은 데이터 분석 패턴(누적합, 빈도 분석)을, AIE 트랙은 그래프·DP 패턴을 우선시한다. 아래 도구 목록을 트랙별 우선순위에 따라 학습하면 Level 3를 효율적으로 대비할 수 있다.

2 DS vs AIE 트랙 우선순위

도구/개념	DS 우선순위	AIE 우선순위	이유
`bisect`	★★★	★★★	정렬 배열 탐색
`heapq`	★★☆	★★★	우선순위 큐, Dijkstra
`functools.lru_cache`	★★★	★★★	DP 메모이제이션
`itertools.accumulate`	★★★	★★☆	누적합
BFS/DFS 기본 패턴	★★☆	★★★	그래프 탐색
`re` (정규식)	★★★	★☆☆	문자열 패턴 매칭
`collections.OrderedDict`	★★☆	★☆☆	순서 있는 dict
`sys.stdin` 빠른 입력	★☆☆	★★★	대용량 입력 처리

3 bisect — 이진 탐색 (정렬 배열에서 삽입 위치 찾기)

from bisect import bisect_left, bisect_right, insort

arr = [1, 3, 3, 5, 7]

bisect_left(arr, 3)        # → 1  (3이 처음 나오는 위치)
bisect_right(arr, 3)       # → 3  (3이 마지막으로 나오는 위치 + 1)
bisect_left(arr, 4)        # → 3  (4를 삽입할 위치 → arr[3]=5 앞)

# 특정 값의 개수
count = bisect_right(arr, 3) - bisect_left(arr, 3)  # → 2  (3이 2개)

# 특정 범위 [lo, hi]의 원소 수
def count_range(arr, lo, hi):
    return bisect_right(arr, hi) - bisect_left(arr, lo)

count_range(arr, 2, 5)    # → 3  (3,3,5가 해당)

# insort: 정렬 상태를 유지하며 삽입
insort(arr, 4)             # arr → [1, 3, 3, 4, 5, 7]  — O(n) 삽입이지만 O(log n) 탐색

DS 관점: 누적 빈도 계산, 분위수 조회에 활용한다. AIE 관점: 매개변수 탐색(parametric search), Upper/Lower bound 패턴에 활용한다.

4 heapq — 우선순위 큐

import heapq

# 최소 힙 (기본)
heap = []
heapq.heappush(heap, 5)    # heap → [5]
heapq.heappush(heap, 2)    # heap → [2, 5]
heapq.heappush(heap, 8)    # heap → [2, 5, 8]
heapq.heappop(heap)        # → 2, heap → [5, 8]
heap[0]                    # → 5  (최솟값 조회, 추출 안 함)

# heapify: 기존 리스트를 힙으로 변환 — O(n)
arr = [5, 3, 1, 4, 2]
heapq.heapify(arr)         # arr → [1, 3, 2, 4, 5]  (힙 구조)

# 최대 힙: 음수 부호로 구현
nums = [3, 1, 4, 1, 5]
max_heap = []
for x in nums:
    heapq.heappush(max_heap, -x)
-heapq.heappop(max_heap)   # → 5  (최댓값)

# Top-K 패턴
arr = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.nlargest(3, arr)     # → [9, 6, 5]
heapq.nsmallest(3, arr)    # → [1, 1, 2]

# (우선순위, 데이터) 튜플로 사용 — Level 3의 핵심 패턴
tasks = [(3, 'low'), (1, 'high'), (2, 'medium')]
heapq.heapify(tasks)
heapq.heappop(tasks)       # → (1, 'high')  — 우선순위 1이 먼저 나옴

DS 관점: 상위 K개 결과 추출(Top-K), 스트리밍 데이터 최솟값 유지에 활용한다. AIE 관점: Dijkstra 최단 경로, A* 탐색, K-way 병합에 필수다.

5 functools.lru_cache — DP 메모이제이션

from functools import lru_cache

# 피보나치 — 메모이제이션 없이 O(2^n), 있으면 O(n)
@lru_cache(maxsize=None)
def fib(n):
    if n <= 1:
        return n
    return fib(n-1) + fib(n-2)

fib(10)    # → 55
fib(50)    # → 12586269025  (메모이제이션으로 빠르게 계산)
fib.cache_info()  # CacheInfo(hits=48, misses=51, maxsize=None, currsize=51)

# 주의: 인수가 해시 가능해야 한다 (list → tuple로 변환)
@lru_cache(maxsize=None)
def dp(items, capacity):   # items는 tuple이어야 한다
    pass

# 캐시 초기화
fib.cache_clear()

DS 관점: 조합 최적화, 통계적 재귀 계산(파스칼 삼각형 등)에 활용한다. AIE 관점: DP 문제(최장 공통 부분 수열, 배낭 문제 등) 구현에 필수다.

6 itertools 심화 — accumulate, groupby, chain

from itertools import accumulate, groupby, chain
import operator

# accumulate: 누적 연산 — 구간 합에 사용
arr = [1, 2, 3, 4, 5]
list(accumulate(arr))                  # → [1, 3, 6, 10, 15]  (누적합)
list(accumulate(arr, operator.mul))   # → [1, 2, 6, 24, 120]  (누적곱)
list(accumulate(arr, max))             # → [1, 2, 3, 4, 5]  (누적 최댓값)

# 구간 합 쿼리 O(1) — 전처리 O(n)
prefix = [0] + list(accumulate(arr))  # → [0, 1, 3, 6, 10, 15]
# 인덱스 i~j 합: prefix[j+1] - prefix[i]
prefix[4] - prefix[1]                 # → 9  (인덱스 1~3: 2+3+4)

# groupby: 연속된 같은 값 그루핑 (정렬 선행 필요)
data = [('a', 1), ('a', 2), ('b', 1), ('b', 3), ('a', 4)]
data.sort(key=lambda x: x[0])         # 반드시 정렬 먼저
for key, group in groupby(data, key=lambda x: x[0]):
    print(key, list(group))
# → a [('a', 1), ('a', 2), ('a', 4)]
# → b [('b', 1), ('b', 3)]

# chain: 여러 이터러블을 하나로 연결
a = [1, 2]
b = [3, 4]
c = [5]
list(chain(a, b, c))                  # → [1, 2, 3, 4, 5]
list(chain.from_iterable([[1,2],[3,4]]))  # → [1, 2, 3, 4]

7 BFS/DFS 기본 패턴 — Level 3 그래프 필수

from collections import deque

# BFS — 최단 거리, 레벨 순회
def bfs(graph, start):
    visited = set([start])
    queue = deque([start])
    order = []
    while queue:
        node = queue.popleft()    # FIFO: 먼저 넣은 것 먼저 처리
        order.append(node)
        for neighbor in graph[node]:
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append(neighbor)
    return order

graph = {1: [2, 3], 2: [4], 3: [4], 4: []}
bfs(graph, 1)    # → [1, 2, 3, 4]  (레벨 순)

# DFS — 경로 탐색, 백트래킹
def dfs(graph, node, visited=None):
    if visited is None:
        visited = set()
    visited.add(node)
    order = [node]
    for neighbor in graph[node]:
        if neighbor not in visited:
            order += dfs(graph, neighbor, visited)
    return order

dfs(graph, 1)    # → [1, 2, 4, 3]  (깊이 우선)

# 2D 그리드 BFS (미로 탐색) — Level 3 자주 나오는 패턴
def grid_bfs(grid, start, end):
    rows, cols = len(grid), len(grid[0])
    visited = set([start])
    queue = deque([(start, 0)])   # (좌표, 거리)
    while queue:
        (r, c), dist = queue.popleft()
        if (r, c) == end:
            return dist
        for dr, dc in [(-1,0),(1,0),(0,-1),(0,1)]:   # 상하좌우
            nr, nc = r+dr, c+dc
            if 0 <= nr < rows and 0 <= nc < cols and grid[nr][nc] != '#' and (nr,nc) not in visited:
                visited.add((nr, nc))
                queue.append(((nr, nc), dist+1))
    return -1

DS 관점: 연결 성분 파악, 계층 구조 탐색에 활용한다. AIE 관점: 최단 경로, 미로 탐색, 상태 공간 탐색의 핵심이다.

8 re — 정규식 (DS 우선)

import re

# 핵심 메서드
re.search(r'\d+', 'abc123def')    # → Match 객체 (123), 없으면 None
re.findall(r'\d+', 'a1b22c333')   # → ['1', '22', '333']
re.sub(r'\s+', '_', 'hello world')  # → 'hello_world'
re.split(r'[,\s]+', 'a, b  c')    # → ['a', 'b', 'c']

# 주요 패턴
r'\d'      # 숫자 [0-9]
r'\w'      # 단어 문자 [a-zA-Z0-9_]
r'\s'      # 공백
r'.'       # 임의의 한 문자
r'^'       # 문자열 시작
r'$'       # 문자열 끝
r'+'       # 1개 이상
r'*'       # 0개 이상
r'?'       # 0개 또는 1개

# Level 3 자주 쓰는 패턴
text = "2024-01-15"
re.findall(r'(\d{4})-(\d{2})-(\d{2})', text)
# → [('2024', '01', '15')]

# 숫자만 추출
re.findall(r'-?\d+\.?\d*', "온도: -3.5, 습도: 78")
# → ['-3.5', '78']

9 sys — 빠른 입력 (AIE 우선)

import sys
input = sys.stdin.readline    # 빠른 입력으로 교체

# 기본 입력
n = int(input())
arr = list(map(int, input().split()))

# 여러 줄 입력
data = sys.stdin.read().split()    # 전체를 한 번에 읽어 분리

# 재귀 한도 늘리기 (DFS 깊이 문제 해결)
sys.setrecursionlimit(100000)

10 유형별 사용 도구 매핑 (Level 3)

문제 유형	DS 도구	AIE 도구	핵심 패턴
구간 합/최댓값	`accumulate`	`accumulate`	prefix sum
정렬 배열 탐색	`bisect`	`bisect`	lower/upper bound
DP 재귀	`lru_cache`	`lru_cache`	top-down memo
최단 경로 (가중치)	-	`heapq`	Dijkstra
Top-K 추출	`heapq.nlargest`	`heapq`	nlargest(k, arr)
BFS 탐색	`deque`	`deque`	최단 거리
문자열 패턴	`re`	-	findall, sub
그루핑/연속구간	`groupby`	`groupby`	정렬 후 groupby
이터러블 합치기	`chain`	`chain`	평탄화