数据结构

Python 提供了多种内置数据结构，包括列表、元组、字典、集合等。

列表 (List)

创建列表

python

# 创建列表
fruits = ["苹果", "香蕉", "橙子"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True]
empty = []

# 使用 list() 函数
chars = list("hello")  # ['h', 'e', 'l', 'l', 'o']
range_list = list(range(5))  # [0, 1, 2, 3, 4]

# 列表推导式
squares = [x ** 2 for x in range(1, 6)]
# [1, 4, 9, 16, 25]

访问元素

python

fruits = ["苹果", "香蕉", "橙子", "葡萄", "西瓜"]

# 索引访问
print(fruits[0])   # 苹果
print(fruits[-1])  # 西瓜（负索引）
print(fruits[-2])  # 葡萄

# 切片
print(fruits[1:3])   # ['香蕉', '橙子']
print(fruits[:3])    # ['苹果', '香蕉', '橙子']
print(fruits[2:])    # ['橙子', '葡萄', '西瓜']
print(fruits[::2])   # ['苹果', '橙子', '西瓜'] 步长
print(fruits[::-1])  # ['西瓜', '葡萄', '橙子', '香蕉', '苹果'] 反转

# 遍历
for fruit in fruits:
    print(fruit)

# 带索引遍历
for index, fruit in enumerate(fruits):
    print(f"{index}: {fruit}")

修改列表

python

fruits = ["苹果", "香蕉", "橙子"]

# 修改元素
fruits[0] = "草莓"
print(fruits)  # ['草莓', '香蕉', '橙子']

# 添加元素
fruits.append("葡萄")      # 末尾添加
fruits.insert(1, "西瓜")   # 指定位置插入
print(fruits)

# 扩展列表
fruits.extend(["芒果", "荔枝"])
print(fruits)

# 删除元素
fruits.remove("香蕉")      # 按值删除
del fruits[0]              # 按索引删除
popped = fruits.pop()      # 弹出最后一个
popped = fruits.pop(0)     # 弹出指定位置
fruits.clear()             # 清空列表

列表操作

python

numbers = [3, 1, 4, 1, 5, 9, 2, 6]

# 长度
print(len(numbers))  # 8

# 计数
print(numbers.count(1))  # 2

# 查找索引
print(numbers.index(5))  # 4

# 排序
numbers.sort()           # 原地排序
numbers.sort(reverse=True)  # 降序
numbers.sort(key=lambda x: -x)  # 自定义排序

# 反转
numbers.reverse()  # 原地反转

# 复制
new_list = numbers.copy()
new_list = numbers[:]

# 判断
print(3 in numbers)      # True
print(10 in numbers)     # False

# 最值与求和
print(max(numbers))
print(min(numbers))
print(sum(numbers))

列表嵌套

python

# 二维列表
matrix = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

# 访问元素
print(matrix[0][0])  # 1
print(matrix[1][2])  # 6

# 遍历
for row in matrix:
    for item in row:
        print(item, end=" ")
    print()

# 列表推导式
flat = [item for row in matrix for item in row]
print(flat)  # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# 转置
transposed = [[row[i] for row in matrix] for i in range(3)]
print(transposed)  # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]

元组 (Tuple)

创建元组

python

# 创建元组
point = (3, 4)
single = (1,)          # 单元素元组需要逗号
empty = ()
coordinates = 1, 2, 3  # 省略括号

# 使用 tuple() 函数
numbers = tuple([1, 2, 3])
chars = tuple("hello")  # ('h', 'e', 'l', 'l', 'o')

# 命名元组
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y)  # 3 4

元组操作

python

point = (3, 4)

# 访问
print(point[0])   # 3
print(point[-1])  # 4

# 切片
print(point[:1])  # (3,)

# 解包
x, y = point
print(x, y)  # 3 4

# 扩展解包
a, *b, c = (1, 2, 3, 4, 5)
print(a)  # 1
print(b)  # [2, 3, 4]
print(c)  # 5

# 元组不可修改
# point[0] = 5  # TypeError

# 但可以重新赋值
point = (5, 6)

# 元组可以包含可变对象
data = ([1, 2], [3, 4])
data[0].append(3)  # 可以修改列表元素
print(data)  # ([1, 2, 3], [3, 4])

元组方法

python

numbers = (1, 2, 3, 2, 2, 4)

# 计数
print(numbers.count(2))  # 3

# 索引
print(numbers.index(3))  # 2

# 长度
print(len(numbers))  # 6

# 最值
print(max(numbers))
print(min(numbers))

# 求和
print(sum(numbers))

元组 vs 列表

python

# 元组：不可变，可以作为字典键
locations = {(0, 0): "原点", (1, 0): "x轴"}

# 列表：可变，不能作为字典键
# locations = {[0, 0]: "原点"}  # TypeError

# 元组更节省内存
import sys
lst = [1, 2, 3, 4, 5]
tpl = (1, 2, 3, 4, 5)
print(sys.getsizeof(lst))  # 更大
print(sys.getsizeof(tpl))  # 更小

字典 (Dict)

创建字典

python

# 创建字典
person = {"name": "张三", "age": 25, "city": "北京"}
empty = {}

# 使用 dict() 函数
person2 = dict(name="李四", age=30)
person3 = dict([("name", "王五"), ("age", 28)])

# 字典推导式
squares = {x: x ** 2 for x in range(1, 6)}
# {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# fromkeys
keys = ["a", "b", "c"]
d = dict.fromkeys(keys, 0)
# {'a': 0, 'b': 0, 'c': 0}

访问字典

python

person = {"name": "张三", "age": 25, "city": "北京"}

# 通过键访问
print(person["name"])  # 张三
# print(person["gender"])  # KeyError

# 使用 get 方法
print(person.get("name"))      # 张三
print(person.get("gender"))    # None
print(person.get("gender", "未知"))  # 未知

# 检查键是否存在
print("name" in person)    # True
print("gender" in person)  # False

# 获取所有键、值、键值对
print(person.keys())    # dict_keys(['name', 'age', 'city'])
print(person.values())  # dict_values(['张三', 25, '北京'])
print(person.items())   # dict_items([('name', '张三'), ...])

# 遍历
for key in person:
    print(key, person[key])

for key, value in person.items():
    print(f"{key}: {value}")

修改字典

python

person = {"name": "张三", "age": 25}

# 添加/修改
person["city"] = "北京"    # 添加
person["age"] = 26         # 修改

# update 方法
person.update({"gender": "男", "job": "工程师"})

# setdefault - 键不存在时设置默认值
person.setdefault("hobby", "阅读")
print(person["hobby"])  # 阅读

# 删除
del person["city"]           # 删除键值对
age = person.pop("age")      # 弹出并返回值
job = person.pop("job", None)  # 键不存在时返回默认值

# popitem - 弹出最后一个键值对 (Python 3.7+)
item = person.popitem()

# 清空
person.clear()

字典操作

python

# 合并字典 (Python 3.9+)
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
merged = dict1 | dict2
print(merged)  # {'a': 1, 'b': 2, 'c': 3, 'd': 4}

# 更新字典 (Python 3.9+)
dict1 |= dict2

# 使用 ** 解包
merged = {**dict1, **dict2}

# 嵌套字典
users = {
    "user1": {"name": "张三", "age": 25},
    "user2": {"name": "李四", "age": 30}
}
print(users["user1"]["name"])  # 张三

默认字典

python

from collections import defaultdict

# 默认值为列表
word_lengths = defaultdict(list)
words = ["apple", "banana", "cherry", "date"]
for word in words:
    word_lengths[len(word)].append(word)
print(word_lengths)
# {5: ['apple'], 6: ['banana', 'cherry'], 4: ['date']}

# 默认值为整数（计数器）
counter = defaultdict(int)
for char in "hello":
    counter[char] += 1
print(counter)  # {'h': 1, 'e': 1, 'l': 2, 'o': 1}

# 默认值为集合
groups = defaultdict(set)
groups["A"].add("张三")
groups["A"].add("李四")
print(groups)  # {'A': {'张三', '李四'}}

有序字典

python

from collections import OrderedDict

# Python 3.7+ 普通 dict 已经有序
# OrderedDict 提供额外功能

od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3

# 移动到末尾
od.move_to_end("a")

# 弹出最后/最前
last = od.popitem()
first = od.popitem(last=False)

计数器

python

from collections import Counter

# 创建计数器
text = "hello world"
counter = Counter(text)
print(counter)  # Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})

# 从列表创建
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_count = Counter(words)
print(word_count)  # Counter({'apple': 3, 'banana': 2, 'cherry': 1})

# 方法
print(word_count.most_common(2))  # [('apple', 3), ('banana', 2)]
print(word_count.elements())      # 迭代器
print(list(word_count.elements()))  # ['apple', 'apple', 'apple', 'banana', 'banana', 'cherry']

# 更新
word_count.update(["apple", "date"])
print(word_count)

# 运算
c1 = Counter("aabbcc")
c2 = Counter("bbccdd")
print(c1 + c2)  # 加法
print(c1 - c2)  # 减法
print(c1 & c2)  # 交集
print(c1 | c2)  # 并集

集合 (Set)

创建集合

python

# 创建集合
fruits = {"苹果", "香蕉", "橙子"}
numbers = set([1, 2, 3, 2, 1])  # {1, 2, 3}
empty = set()  # 空集合（不能用 {}）

# 集合推导式
squares = {x ** 2 for x in range(1, 6)}
# {1, 4, 9, 16, 25}

集合操作

python

fruits = {"苹果", "香蕉", "橙子"}

# 添加
fruits.add("葡萄")

# 删除
fruits.remove("香蕉")     # 不存在会报错
fruits.discard("西瓜")    # 不存在不报错
item = fruits.pop()       # 弹出任意元素
fruits.clear()            # 清空

# 判断
print("苹果" in fruits)   # True
print("西瓜" in fruits)   # False

# 长度
print(len(fruits))

集合运算

python

a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}

# 并集
print(a | b)              # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))

# 交集
print(a & b)              # {4, 5}
print(a.intersection(b))

# 差集
print(a - b)              # {1, 2, 3}
print(a.difference(b))

# 对称差集（并集减交集）
print(a ^ b)              # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))

# 子集/超集
c = {1, 2}
print(c.issubset(a))      # True
print(a.issuperset(c))    # True

# 不相交
d = {6, 7}
print(a.isdisjoint(d))    # True

不可变集合

python

# frozenset - 不可变集合
fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError

# 可以作为字典键
d = {frozenset([1, 2]): "value"}

字符串方法

字符串操作

python

s = "Hello, World!"

# 大小写
print(s.lower())          # "hello, world!"
print(s.upper())          # "HELLO, WORLD!"
print(s.title())          # "Hello, World!"
print(s.capitalize())     # "Hello, world!"
print(s.swapcase())       # "hELLO, wORLD!"

# 查找
print(s.find("World"))    # 7
print(s.rfind("o"))       # 8
print(s.index("World"))   # 7（找不到会报错）
print(s.count("l"))       # 3

# 替换
print(s.replace("World", "Python"))

# 分割和连接
print(s.split(", "))      # ['Hello', 'World!']
print(s.rsplit(" ", 1))   # 从右边分割
print("a\nb\nc".splitlines())  # ['a', 'b', 'c']
print(",".join(["a", "b", "c"]))  # "a,b,c"

# 去除空白
s = "  hello  "
print(s.strip())          # "hello"
print(s.lstrip())         # "hello  "
print(s.rstrip())         # "  hello"

# 判断
print("123".isdigit())    # True
print("abc".isalpha())    # True
print("abc123".isalnum()) # True
print("hello".islower())  # True
print("HELLO".isupper())  # True
print("Hello World".istitle())  # True

# 前后缀
print(s.startswith("Hello"))  # True
print(s.endswith("!"))        # True

# 填充
print("hello".center(10))     # "  hello   "
print("hello".ljust(10))      # "hello     "
print("hello".rjust(10))      # "     hello"
print("5".zfill(3))           # "005"

字符串格式化

python

name = "张三"
age = 25

# f-string (推荐)
print(f"姓名: {name}, 年龄: {age}")
print(f"明年: {age + 1}岁")
print(f"浮点数: {3.14159:.2f}")
print(f"百分比: {0.85:.1%}")

# format 方法
print("姓名: {}, 年龄: {}".format(name, age))
print("姓名: {0}, 年龄: {1}".format(name, age))
print("姓名: {name}, 年龄: {age}".format(name=name, age=age))

# 格式化选项
print("{:<10}".format("左对齐"))   # 左对齐
print("{:>10}".format("右对齐"))   # 右对齐
print("{:^10}".format("居中"))     # 居中
print("{:05d}".format(42))         # 00042
print("{:.2f}".format(3.14159))    # 3.14
print("{:,}".format(1234567))      # 1,234,567

队列和栈

列表实现

python

# 栈（后进先出）
stack = []
stack.append(1)
stack.append(2)
stack.append(3)
print(stack.pop())  # 3

# 队列（先进先出）- 效率低
queue = []
queue.append(1)
queue.append(2)
queue.append(3)
print(queue.pop(0))  # 1

collections.deque

python

from collections import deque

# 双端队列
dq = deque([1, 2, 3])

# 右端操作
dq.append(4)       # 右端添加
dq.pop()           # 右端弹出

# 左端操作
dq.appendleft(0)   # 左端添加
dq.popleft()       # 左端弹出

# 旋转
dq.rotate(1)       # 向右旋转
dq.rotate(-1)      # 向左旋转

# 扩展
dq.extend([4, 5])
dq.extendleft([0, -1])

# 栈
stack = deque()
stack.append(1)
stack.append(2)
stack.pop()  # 2

# 队列
queue = deque()
queue.append(1)
queue.append(2)
queue.popleft()  # 1

queue 模块

python

import queue

# FIFO 队列
q = queue.Queue()
q.put(1)
q.put(2)
print(q.get())  # 1

# LIFO 队列（栈）
lq = queue.LifoQueue()
lq.put(1)
lq.put(2)
print(lq.get())  # 2

# 优先队列
pq = queue.PriorityQueue()
pq.put((2, "低优先级"))
pq.put((1, "高优先级"))
print(pq.get())  # (1, '高优先级')

堆

python

import heapq

# 创建堆
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(numbers)  # 原地转换为堆
print(numbers)  # [1, 1, 2, 3, 5, 9, 4, 6]

# 添加元素
heapq.heappush(numbers, 0)
print(numbers)  # [0, 1, 2, 1, 5, 9, 4, 6, 3]

# 弹出最小元素
print(heapq.heappop(numbers))  # 0

# 弹出并添加
print(heapq.heapreplace(numbers, 10))  # 弹出最小，添加新元素

# 查找最小/最大的 n 个元素
nums = [3, 1, 4, 1, 5, 9, 2, 6]
print(heapq.nsmallest(3, nums))  # [1, 1, 2]
print(heapq.nlargest(3, nums))   # [9, 6, 5]

# 合并有序序列
a = [1, 3, 5]
b = [2, 4, 6]
print(list(heapq.merge(a, b)))  # [1, 2, 3, 4, 5, 6]

实践示例

数据分析

python

from collections import Counter, defaultdict

def analyze_text(text):
    """分析文本统计"""
    # 单词统计
    words = text.lower().split()
    word_count = Counter(words)

    # 字符统计
    char_count = Counter(text)

    # 按长度分组
    by_length = defaultdict(list)
    for word in words:
        by_length[len(word)].append(word)

    return {
        "word_count": len(words),
        "unique_words": len(word_count),
        "most_common": word_count.most_common(5),
        "by_length": dict(by_length)
    }

text = "the quick brown fox jumps over the lazy dog the fox is quick"
result = analyze_text(text)
print(result)

数据处理管道

python

def process_data(data):
    """数据处理管道"""
    # 过滤
    filtered = [x for x in data if x > 0]

    # 转换
    transformed = [x * 2 for x in filtered]

    # 分组
    grouped = defaultdict(list)
    for x in transformed:
        grouped[x % 3].append(x)

    # 统计
    stats = {
        "count": len(transformed),
        "sum": sum(transformed),
        "avg": sum(transformed) / len(transformed) if transformed else 0,
        "max": max(transformed) if transformed else None,
        "min": min(transformed) if transformed else None
    }

    return {"grouped": dict(grouped), "stats": stats}

data = [1, -2, 3, 4, -5, 6, 7, 8, -9, 10]
result = process_data(data)
print(result)

数据结构 ​

列表 (List) ​

创建列表 ​

访问元素 ​

修改列表 ​

列表操作 ​

列表嵌套 ​

元组 (Tuple) ​

创建元组 ​

元组操作 ​

元组方法 ​

元组 vs 列表 ​

字典 (Dict) ​

创建字典 ​

访问字典 ​

修改字典 ​

字典操作 ​

默认字典 ​

有序字典 ​

计数器 ​

集合 (Set) ​

创建集合 ​

集合操作 ​

集合运算 ​

不可变集合 ​

字符串方法 ​

字符串操作 ​

字符串格式化 ​

队列和栈 ​

列表实现 ​

collections.deque ​

queue 模块 ​

堆 ​

实践示例 ​

数据分析 ​

数据处理管道 ​

数据结构

列表 (List)

创建列表

访问元素

修改列表

列表操作

列表嵌套

元组 (Tuple)

创建元组

元组操作

元组方法

元组 vs 列表

字典 (Dict)

创建字典

访问字典

修改字典

字典操作

默认字典

有序字典

计数器

集合 (Set)

创建集合

集合操作

集合运算

不可变集合

字符串方法

字符串操作

字符串格式化

队列和栈

列表实现

collections.deque

queue 模块

堆

实践示例

数据分析

数据处理管道