Skip to content

数据结构

Python 提供了多种内置数据结构,包括列表、元组、字典、集合等。

列表 (List)

创建列表

python
# 创建列表
fruits = ["苹果", "香蕉", "橙子"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True]
empty = []

# 使用 list() 函数
chars = list("hello")  # ['h', 'e', 'l', 'l', 'o']
range_list = list(range(5))  # [0, 1, 2, 3, 4]

# 列表推导式
squares = [x ** 2 for x in range(1, 6)]
# [1, 4, 9, 16, 25]

访问元素

python
fruits = ["苹果", "香蕉", "橙子", "葡萄", "西瓜"]

# 索引访问
print(fruits[0])   # 苹果
print(fruits[-1])  # 西瓜(负索引)
print(fruits[-2])  # 葡萄

# 切片
print(fruits[1:3])   # ['香蕉', '橙子']
print(fruits[:3])    # ['苹果', '香蕉', '橙子']
print(fruits[2:])    # ['橙子', '葡萄', '西瓜']
print(fruits[::2])   # ['苹果', '橙子', '西瓜'] 步长
print(fruits[::-1])  # ['西瓜', '葡萄', '橙子', '香蕉', '苹果'] 反转

# 遍历
for fruit in fruits:
    print(fruit)

# 带索引遍历
for index, fruit in enumerate(fruits):
    print(f"{index}: {fruit}")

修改列表

python
fruits = ["苹果", "香蕉", "橙子"]

# 修改元素
fruits[0] = "草莓"
print(fruits)  # ['草莓', '香蕉', '橙子']

# 添加元素
fruits.append("葡萄")      # 末尾添加
fruits.insert(1, "西瓜")   # 指定位置插入
print(fruits)

# 扩展列表
fruits.extend(["芒果", "荔枝"])
print(fruits)

# 删除元素
fruits.remove("香蕉")      # 按值删除
del fruits[0]              # 按索引删除
popped = fruits.pop()      # 弹出最后一个
popped = fruits.pop(0)     # 弹出指定位置
fruits.clear()             # 清空列表

列表操作

python
numbers = [3, 1, 4, 1, 5, 9, 2, 6]

# 长度
print(len(numbers))  # 8

# 计数
print(numbers.count(1))  # 2

# 查找索引
print(numbers.index(5))  # 4

# 排序
numbers.sort()           # 原地排序
numbers.sort(reverse=True)  # 降序
numbers.sort(key=lambda x: -x)  # 自定义排序

# 反转
numbers.reverse()  # 原地反转

# 复制
new_list = numbers.copy()
new_list = numbers[:]

# 判断
print(3 in numbers)      # True
print(10 in numbers)     # False

# 最值与求和
print(max(numbers))
print(min(numbers))
print(sum(numbers))

列表嵌套

python
# 二维列表
matrix = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

# 访问元素
print(matrix[0][0])  # 1
print(matrix[1][2])  # 6

# 遍历
for row in matrix:
    for item in row:
        print(item, end=" ")
    print()

# 列表推导式
flat = [item for row in matrix for item in row]
print(flat)  # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# 转置
transposed = [[row[i] for row in matrix] for i in range(3)]
print(transposed)  # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]

元组 (Tuple)

创建元组

python
# 创建元组
point = (3, 4)
single = (1,)          # 单元素元组需要逗号
empty = ()
coordinates = 1, 2, 3  # 省略括号

# 使用 tuple() 函数
numbers = tuple([1, 2, 3])
chars = tuple("hello")  # ('h', 'e', 'l', 'l', 'o')

# 命名元组
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y)  # 3 4

元组操作

python
point = (3, 4)

# 访问
print(point[0])   # 3
print(point[-1])  # 4

# 切片
print(point[:1])  # (3,)

# 解包
x, y = point
print(x, y)  # 3 4

# 扩展解包
a, *b, c = (1, 2, 3, 4, 5)
print(a)  # 1
print(b)  # [2, 3, 4]
print(c)  # 5

# 元组不可修改
# point[0] = 5  # TypeError

# 但可以重新赋值
point = (5, 6)

# 元组可以包含可变对象
data = ([1, 2], [3, 4])
data[0].append(3)  # 可以修改列表元素
print(data)  # ([1, 2, 3], [3, 4])

元组方法

python
numbers = (1, 2, 3, 2, 2, 4)

# 计数
print(numbers.count(2))  # 3

# 索引
print(numbers.index(3))  # 2

# 长度
print(len(numbers))  # 6

# 最值
print(max(numbers))
print(min(numbers))

# 求和
print(sum(numbers))

元组 vs 列表

python
# 元组:不可变,可以作为字典键
locations = {(0, 0): "原点", (1, 0): "x轴"}

# 列表:可变,不能作为字典键
# locations = {[0, 0]: "原点"}  # TypeError

# 元组更节省内存
import sys
lst = [1, 2, 3, 4, 5]
tpl = (1, 2, 3, 4, 5)
print(sys.getsizeof(lst))  # 更大
print(sys.getsizeof(tpl))  # 更小

字典 (Dict)

创建字典

python
# 创建字典
person = {"name": "张三", "age": 25, "city": "北京"}
empty = {}

# 使用 dict() 函数
person2 = dict(name="李四", age=30)
person3 = dict([("name", "王五"), ("age", 28)])

# 字典推导式
squares = {x: x ** 2 for x in range(1, 6)}
# {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# fromkeys
keys = ["a", "b", "c"]
d = dict.fromkeys(keys, 0)
# {'a': 0, 'b': 0, 'c': 0}

访问字典

python
person = {"name": "张三", "age": 25, "city": "北京"}

# 通过键访问
print(person["name"])  # 张三
# print(person["gender"])  # KeyError

# 使用 get 方法
print(person.get("name"))      # 张三
print(person.get("gender"))    # None
print(person.get("gender", "未知"))  # 未知

# 检查键是否存在
print("name" in person)    # True
print("gender" in person)  # False

# 获取所有键、值、键值对
print(person.keys())    # dict_keys(['name', 'age', 'city'])
print(person.values())  # dict_values(['张三', 25, '北京'])
print(person.items())   # dict_items([('name', '张三'), ...])

# 遍历
for key in person:
    print(key, person[key])

for key, value in person.items():
    print(f"{key}: {value}")

修改字典

python
person = {"name": "张三", "age": 25}

# 添加/修改
person["city"] = "北京"    # 添加
person["age"] = 26         # 修改

# update 方法
person.update({"gender": "男", "job": "工程师"})

# setdefault - 键不存在时设置默认值
person.setdefault("hobby", "阅读")
print(person["hobby"])  # 阅读

# 删除
del person["city"]           # 删除键值对
age = person.pop("age")      # 弹出并返回值
job = person.pop("job", None)  # 键不存在时返回默认值

# popitem - 弹出最后一个键值对 (Python 3.7+)
item = person.popitem()

# 清空
person.clear()

字典操作

python
# 合并字典 (Python 3.9+)
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
merged = dict1 | dict2
print(merged)  # {'a': 1, 'b': 2, 'c': 3, 'd': 4}

# 更新字典 (Python 3.9+)
dict1 |= dict2

# 使用 ** 解包
merged = {**dict1, **dict2}

# 嵌套字典
users = {
    "user1": {"name": "张三", "age": 25},
    "user2": {"name": "李四", "age": 30}
}
print(users["user1"]["name"])  # 张三

默认字典

python
from collections import defaultdict

# 默认值为列表
word_lengths = defaultdict(list)
words = ["apple", "banana", "cherry", "date"]
for word in words:
    word_lengths[len(word)].append(word)
print(word_lengths)
# {5: ['apple'], 6: ['banana', 'cherry'], 4: ['date']}

# 默认值为整数(计数器)
counter = defaultdict(int)
for char in "hello":
    counter[char] += 1
print(counter)  # {'h': 1, 'e': 1, 'l': 2, 'o': 1}

# 默认值为集合
groups = defaultdict(set)
groups["A"].add("张三")
groups["A"].add("李四")
print(groups)  # {'A': {'张三', '李四'}}

有序字典

python
from collections import OrderedDict

# Python 3.7+ 普通 dict 已经有序
# OrderedDict 提供额外功能

od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3

# 移动到末尾
od.move_to_end("a")

# 弹出最后/最前
last = od.popitem()
first = od.popitem(last=False)

计数器

python
from collections import Counter

# 创建计数器
text = "hello world"
counter = Counter(text)
print(counter)  # Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})

# 从列表创建
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_count = Counter(words)
print(word_count)  # Counter({'apple': 3, 'banana': 2, 'cherry': 1})

# 方法
print(word_count.most_common(2))  # [('apple', 3), ('banana', 2)]
print(word_count.elements())      # 迭代器
print(list(word_count.elements()))  # ['apple', 'apple', 'apple', 'banana', 'banana', 'cherry']

# 更新
word_count.update(["apple", "date"])
print(word_count)

# 运算
c1 = Counter("aabbcc")
c2 = Counter("bbccdd")
print(c1 + c2)  # 加法
print(c1 - c2)  # 减法
print(c1 & c2)  # 交集
print(c1 | c2)  # 并集

集合 (Set)

创建集合

python
# 创建集合
fruits = {"苹果", "香蕉", "橙子"}
numbers = set([1, 2, 3, 2, 1])  # {1, 2, 3}
empty = set()  # 空集合(不能用 {})

# 集合推导式
squares = {x ** 2 for x in range(1, 6)}
# {1, 4, 9, 16, 25}

集合操作

python
fruits = {"苹果", "香蕉", "橙子"}

# 添加
fruits.add("葡萄")

# 删除
fruits.remove("香蕉")     # 不存在会报错
fruits.discard("西瓜")    # 不存在不报错
item = fruits.pop()       # 弹出任意元素
fruits.clear()            # 清空

# 判断
print("苹果" in fruits)   # True
print("西瓜" in fruits)   # False

# 长度
print(len(fruits))

集合运算

python
a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}

# 并集
print(a | b)              # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))

# 交集
print(a & b)              # {4, 5}
print(a.intersection(b))

# 差集
print(a - b)              # {1, 2, 3}
print(a.difference(b))

# 对称差集(并集减交集)
print(a ^ b)              # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))

# 子集/超集
c = {1, 2}
print(c.issubset(a))      # True
print(a.issuperset(c))    # True

# 不相交
d = {6, 7}
print(a.isdisjoint(d))    # True

不可变集合

python
# frozenset - 不可变集合
fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError

# 可以作为字典键
d = {frozenset([1, 2]): "value"}

字符串方法

字符串操作

python
s = "Hello, World!"

# 大小写
print(s.lower())          # "hello, world!"
print(s.upper())          # "HELLO, WORLD!"
print(s.title())          # "Hello, World!"
print(s.capitalize())     # "Hello, world!"
print(s.swapcase())       # "hELLO, wORLD!"

# 查找
print(s.find("World"))    # 7
print(s.rfind("o"))       # 8
print(s.index("World"))   # 7(找不到会报错)
print(s.count("l"))       # 3

# 替换
print(s.replace("World", "Python"))

# 分割和连接
print(s.split(", "))      # ['Hello', 'World!']
print(s.rsplit(" ", 1))   # 从右边分割
print("a\nb\nc".splitlines())  # ['a', 'b', 'c']
print(",".join(["a", "b", "c"]))  # "a,b,c"

# 去除空白
s = "  hello  "
print(s.strip())          # "hello"
print(s.lstrip())         # "hello  "
print(s.rstrip())         # "  hello"

# 判断
print("123".isdigit())    # True
print("abc".isalpha())    # True
print("abc123".isalnum()) # True
print("hello".islower())  # True
print("HELLO".isupper())  # True
print("Hello World".istitle())  # True

# 前后缀
print(s.startswith("Hello"))  # True
print(s.endswith("!"))        # True

# 填充
print("hello".center(10))     # "  hello   "
print("hello".ljust(10))      # "hello     "
print("hello".rjust(10))      # "     hello"
print("5".zfill(3))           # "005"

字符串格式化

python
name = "张三"
age = 25

# f-string (推荐)
print(f"姓名: {name}, 年龄: {age}")
print(f"明年: {age + 1}岁")
print(f"浮点数: {3.14159:.2f}")
print(f"百分比: {0.85:.1%}")

# format 方法
print("姓名: {}, 年龄: {}".format(name, age))
print("姓名: {0}, 年龄: {1}".format(name, age))
print("姓名: {name}, 年龄: {age}".format(name=name, age=age))

# 格式化选项
print("{:<10}".format("左对齐"))   # 左对齐
print("{:>10}".format("右对齐"))   # 右对齐
print("{:^10}".format("居中"))     # 居中
print("{:05d}".format(42))         # 00042
print("{:.2f}".format(3.14159))    # 3.14
print("{:,}".format(1234567))      # 1,234,567

队列和栈

列表实现

python
# 栈(后进先出)
stack = []
stack.append(1)
stack.append(2)
stack.append(3)
print(stack.pop())  # 3

# 队列(先进先出)- 效率低
queue = []
queue.append(1)
queue.append(2)
queue.append(3)
print(queue.pop(0))  # 1

collections.deque

python
from collections import deque

# 双端队列
dq = deque([1, 2, 3])

# 右端操作
dq.append(4)       # 右端添加
dq.pop()           # 右端弹出

# 左端操作
dq.appendleft(0)   # 左端添加
dq.popleft()       # 左端弹出

# 旋转
dq.rotate(1)       # 向右旋转
dq.rotate(-1)      # 向左旋转

# 扩展
dq.extend([4, 5])
dq.extendleft([0, -1])

# 栈
stack = deque()
stack.append(1)
stack.append(2)
stack.pop()  # 2

# 队列
queue = deque()
queue.append(1)
queue.append(2)
queue.popleft()  # 1

queue 模块

python
import queue

# FIFO 队列
q = queue.Queue()
q.put(1)
q.put(2)
print(q.get())  # 1

# LIFO 队列(栈)
lq = queue.LifoQueue()
lq.put(1)
lq.put(2)
print(lq.get())  # 2

# 优先队列
pq = queue.PriorityQueue()
pq.put((2, "低优先级"))
pq.put((1, "高优先级"))
print(pq.get())  # (1, '高优先级')

python
import heapq

# 创建堆
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(numbers)  # 原地转换为堆
print(numbers)  # [1, 1, 2, 3, 5, 9, 4, 6]

# 添加元素
heapq.heappush(numbers, 0)
print(numbers)  # [0, 1, 2, 1, 5, 9, 4, 6, 3]

# 弹出最小元素
print(heapq.heappop(numbers))  # 0

# 弹出并添加
print(heapq.heapreplace(numbers, 10))  # 弹出最小,添加新元素

# 查找最小/最大的 n 个元素
nums = [3, 1, 4, 1, 5, 9, 2, 6]
print(heapq.nsmallest(3, nums))  # [1, 1, 2]
print(heapq.nlargest(3, nums))   # [9, 6, 5]

# 合并有序序列
a = [1, 3, 5]
b = [2, 4, 6]
print(list(heapq.merge(a, b)))  # [1, 2, 3, 4, 5, 6]

实践示例

数据分析

python
from collections import Counter, defaultdict

def analyze_text(text):
    """分析文本统计"""
    # 单词统计
    words = text.lower().split()
    word_count = Counter(words)

    # 字符统计
    char_count = Counter(text)

    # 按长度分组
    by_length = defaultdict(list)
    for word in words:
        by_length[len(word)].append(word)

    return {
        "word_count": len(words),
        "unique_words": len(word_count),
        "most_common": word_count.most_common(5),
        "by_length": dict(by_length)
    }

text = "the quick brown fox jumps over the lazy dog the fox is quick"
result = analyze_text(text)
print(result)

数据处理管道

python
def process_data(data):
    """数据处理管道"""
    # 过滤
    filtered = [x for x in data if x > 0]

    # 转换
    transformed = [x * 2 for x in filtered]

    # 分组
    grouped = defaultdict(list)
    for x in transformed:
        grouped[x % 3].append(x)

    # 统计
    stats = {
        "count": len(transformed),
        "sum": sum(transformed),
        "avg": sum(transformed) / len(transformed) if transformed else 0,
        "max": max(transformed) if transformed else None,
        "min": min(transformed) if transformed else None
    }

    return {"grouped": dict(grouped), "stats": stats}

data = [1, -2, 3, 4, -5, 6, 7, 8, -9, 10]
result = process_data(data)
print(result)