Appearance
数据结构
Python 提供了多种内置数据结构,包括列表、元组、字典、集合等。
列表 (List)
创建列表
python
# 创建列表
fruits = ["苹果", "香蕉", "橙子"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True]
empty = []
# 使用 list() 函数
chars = list("hello") # ['h', 'e', 'l', 'l', 'o']
range_list = list(range(5)) # [0, 1, 2, 3, 4]
# 列表推导式
squares = [x ** 2 for x in range(1, 6)]
# [1, 4, 9, 16, 25]访问元素
python
fruits = ["苹果", "香蕉", "橙子", "葡萄", "西瓜"]
# 索引访问
print(fruits[0]) # 苹果
print(fruits[-1]) # 西瓜(负索引)
print(fruits[-2]) # 葡萄
# 切片
print(fruits[1:3]) # ['香蕉', '橙子']
print(fruits[:3]) # ['苹果', '香蕉', '橙子']
print(fruits[2:]) # ['橙子', '葡萄', '西瓜']
print(fruits[::2]) # ['苹果', '橙子', '西瓜'] 步长
print(fruits[::-1]) # ['西瓜', '葡萄', '橙子', '香蕉', '苹果'] 反转
# 遍历
for fruit in fruits:
print(fruit)
# 带索引遍历
for index, fruit in enumerate(fruits):
print(f"{index}: {fruit}")修改列表
python
fruits = ["苹果", "香蕉", "橙子"]
# 修改元素
fruits[0] = "草莓"
print(fruits) # ['草莓', '香蕉', '橙子']
# 添加元素
fruits.append("葡萄") # 末尾添加
fruits.insert(1, "西瓜") # 指定位置插入
print(fruits)
# 扩展列表
fruits.extend(["芒果", "荔枝"])
print(fruits)
# 删除元素
fruits.remove("香蕉") # 按值删除
del fruits[0] # 按索引删除
popped = fruits.pop() # 弹出最后一个
popped = fruits.pop(0) # 弹出指定位置
fruits.clear() # 清空列表列表操作
python
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# 长度
print(len(numbers)) # 8
# 计数
print(numbers.count(1)) # 2
# 查找索引
print(numbers.index(5)) # 4
# 排序
numbers.sort() # 原地排序
numbers.sort(reverse=True) # 降序
numbers.sort(key=lambda x: -x) # 自定义排序
# 反转
numbers.reverse() # 原地反转
# 复制
new_list = numbers.copy()
new_list = numbers[:]
# 判断
print(3 in numbers) # True
print(10 in numbers) # False
# 最值与求和
print(max(numbers))
print(min(numbers))
print(sum(numbers))列表嵌套
python
# 二维列表
matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
# 访问元素
print(matrix[0][0]) # 1
print(matrix[1][2]) # 6
# 遍历
for row in matrix:
for item in row:
print(item, end=" ")
print()
# 列表推导式
flat = [item for row in matrix for item in row]
print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# 转置
transposed = [[row[i] for row in matrix] for i in range(3)]
print(transposed) # [[1, 4, 7], [2, 5, 8], [3, 6, 9]]元组 (Tuple)
创建元组
python
# 创建元组
point = (3, 4)
single = (1,) # 单元素元组需要逗号
empty = ()
coordinates = 1, 2, 3 # 省略括号
# 使用 tuple() 函数
numbers = tuple([1, 2, 3])
chars = tuple("hello") # ('h', 'e', 'l', 'l', 'o')
# 命名元组
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y) # 3 4元组操作
python
point = (3, 4)
# 访问
print(point[0]) # 3
print(point[-1]) # 4
# 切片
print(point[:1]) # (3,)
# 解包
x, y = point
print(x, y) # 3 4
# 扩展解包
a, *b, c = (1, 2, 3, 4, 5)
print(a) # 1
print(b) # [2, 3, 4]
print(c) # 5
# 元组不可修改
# point[0] = 5 # TypeError
# 但可以重新赋值
point = (5, 6)
# 元组可以包含可变对象
data = ([1, 2], [3, 4])
data[0].append(3) # 可以修改列表元素
print(data) # ([1, 2, 3], [3, 4])元组方法
python
numbers = (1, 2, 3, 2, 2, 4)
# 计数
print(numbers.count(2)) # 3
# 索引
print(numbers.index(3)) # 2
# 长度
print(len(numbers)) # 6
# 最值
print(max(numbers))
print(min(numbers))
# 求和
print(sum(numbers))元组 vs 列表
python
# 元组:不可变,可以作为字典键
locations = {(0, 0): "原点", (1, 0): "x轴"}
# 列表:可变,不能作为字典键
# locations = {[0, 0]: "原点"} # TypeError
# 元组更节省内存
import sys
lst = [1, 2, 3, 4, 5]
tpl = (1, 2, 3, 4, 5)
print(sys.getsizeof(lst)) # 更大
print(sys.getsizeof(tpl)) # 更小字典 (Dict)
创建字典
python
# 创建字典
person = {"name": "张三", "age": 25, "city": "北京"}
empty = {}
# 使用 dict() 函数
person2 = dict(name="李四", age=30)
person3 = dict([("name", "王五"), ("age", 28)])
# 字典推导式
squares = {x: x ** 2 for x in range(1, 6)}
# {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
# fromkeys
keys = ["a", "b", "c"]
d = dict.fromkeys(keys, 0)
# {'a': 0, 'b': 0, 'c': 0}访问字典
python
person = {"name": "张三", "age": 25, "city": "北京"}
# 通过键访问
print(person["name"]) # 张三
# print(person["gender"]) # KeyError
# 使用 get 方法
print(person.get("name")) # 张三
print(person.get("gender")) # None
print(person.get("gender", "未知")) # 未知
# 检查键是否存在
print("name" in person) # True
print("gender" in person) # False
# 获取所有键、值、键值对
print(person.keys()) # dict_keys(['name', 'age', 'city'])
print(person.values()) # dict_values(['张三', 25, '北京'])
print(person.items()) # dict_items([('name', '张三'), ...])
# 遍历
for key in person:
print(key, person[key])
for key, value in person.items():
print(f"{key}: {value}")修改字典
python
person = {"name": "张三", "age": 25}
# 添加/修改
person["city"] = "北京" # 添加
person["age"] = 26 # 修改
# update 方法
person.update({"gender": "男", "job": "工程师"})
# setdefault - 键不存在时设置默认值
person.setdefault("hobby", "阅读")
print(person["hobby"]) # 阅读
# 删除
del person["city"] # 删除键值对
age = person.pop("age") # 弹出并返回值
job = person.pop("job", None) # 键不存在时返回默认值
# popitem - 弹出最后一个键值对 (Python 3.7+)
item = person.popitem()
# 清空
person.clear()字典操作
python
# 合并字典 (Python 3.9+)
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
merged = dict1 | dict2
print(merged) # {'a': 1, 'b': 2, 'c': 3, 'd': 4}
# 更新字典 (Python 3.9+)
dict1 |= dict2
# 使用 ** 解包
merged = {**dict1, **dict2}
# 嵌套字典
users = {
"user1": {"name": "张三", "age": 25},
"user2": {"name": "李四", "age": 30}
}
print(users["user1"]["name"]) # 张三默认字典
python
from collections import defaultdict
# 默认值为列表
word_lengths = defaultdict(list)
words = ["apple", "banana", "cherry", "date"]
for word in words:
word_lengths[len(word)].append(word)
print(word_lengths)
# {5: ['apple'], 6: ['banana', 'cherry'], 4: ['date']}
# 默认值为整数(计数器)
counter = defaultdict(int)
for char in "hello":
counter[char] += 1
print(counter) # {'h': 1, 'e': 1, 'l': 2, 'o': 1}
# 默认值为集合
groups = defaultdict(set)
groups["A"].add("张三")
groups["A"].add("李四")
print(groups) # {'A': {'张三', '李四'}}有序字典
python
from collections import OrderedDict
# Python 3.7+ 普通 dict 已经有序
# OrderedDict 提供额外功能
od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3
# 移动到末尾
od.move_to_end("a")
# 弹出最后/最前
last = od.popitem()
first = od.popitem(last=False)计数器
python
from collections import Counter
# 创建计数器
text = "hello world"
counter = Counter(text)
print(counter) # Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})
# 从列表创建
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_count = Counter(words)
print(word_count) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})
# 方法
print(word_count.most_common(2)) # [('apple', 3), ('banana', 2)]
print(word_count.elements()) # 迭代器
print(list(word_count.elements())) # ['apple', 'apple', 'apple', 'banana', 'banana', 'cherry']
# 更新
word_count.update(["apple", "date"])
print(word_count)
# 运算
c1 = Counter("aabbcc")
c2 = Counter("bbccdd")
print(c1 + c2) # 加法
print(c1 - c2) # 减法
print(c1 & c2) # 交集
print(c1 | c2) # 并集集合 (Set)
创建集合
python
# 创建集合
fruits = {"苹果", "香蕉", "橙子"}
numbers = set([1, 2, 3, 2, 1]) # {1, 2, 3}
empty = set() # 空集合(不能用 {})
# 集合推导式
squares = {x ** 2 for x in range(1, 6)}
# {1, 4, 9, 16, 25}集合操作
python
fruits = {"苹果", "香蕉", "橙子"}
# 添加
fruits.add("葡萄")
# 删除
fruits.remove("香蕉") # 不存在会报错
fruits.discard("西瓜") # 不存在不报错
item = fruits.pop() # 弹出任意元素
fruits.clear() # 清空
# 判断
print("苹果" in fruits) # True
print("西瓜" in fruits) # False
# 长度
print(len(fruits))集合运算
python
a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}
# 并集
print(a | b) # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))
# 交集
print(a & b) # {4, 5}
print(a.intersection(b))
# 差集
print(a - b) # {1, 2, 3}
print(a.difference(b))
# 对称差集(并集减交集)
print(a ^ b) # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))
# 子集/超集
c = {1, 2}
print(c.issubset(a)) # True
print(a.issuperset(c)) # True
# 不相交
d = {6, 7}
print(a.isdisjoint(d)) # True不可变集合
python
# frozenset - 不可变集合
fs = frozenset([1, 2, 3])
# fs.add(4) # AttributeError
# 可以作为字典键
d = {frozenset([1, 2]): "value"}字符串方法
字符串操作
python
s = "Hello, World!"
# 大小写
print(s.lower()) # "hello, world!"
print(s.upper()) # "HELLO, WORLD!"
print(s.title()) # "Hello, World!"
print(s.capitalize()) # "Hello, world!"
print(s.swapcase()) # "hELLO, wORLD!"
# 查找
print(s.find("World")) # 7
print(s.rfind("o")) # 8
print(s.index("World")) # 7(找不到会报错)
print(s.count("l")) # 3
# 替换
print(s.replace("World", "Python"))
# 分割和连接
print(s.split(", ")) # ['Hello', 'World!']
print(s.rsplit(" ", 1)) # 从右边分割
print("a\nb\nc".splitlines()) # ['a', 'b', 'c']
print(",".join(["a", "b", "c"])) # "a,b,c"
# 去除空白
s = " hello "
print(s.strip()) # "hello"
print(s.lstrip()) # "hello "
print(s.rstrip()) # " hello"
# 判断
print("123".isdigit()) # True
print("abc".isalpha()) # True
print("abc123".isalnum()) # True
print("hello".islower()) # True
print("HELLO".isupper()) # True
print("Hello World".istitle()) # True
# 前后缀
print(s.startswith("Hello")) # True
print(s.endswith("!")) # True
# 填充
print("hello".center(10)) # " hello "
print("hello".ljust(10)) # "hello "
print("hello".rjust(10)) # " hello"
print("5".zfill(3)) # "005"字符串格式化
python
name = "张三"
age = 25
# f-string (推荐)
print(f"姓名: {name}, 年龄: {age}")
print(f"明年: {age + 1}岁")
print(f"浮点数: {3.14159:.2f}")
print(f"百分比: {0.85:.1%}")
# format 方法
print("姓名: {}, 年龄: {}".format(name, age))
print("姓名: {0}, 年龄: {1}".format(name, age))
print("姓名: {name}, 年龄: {age}".format(name=name, age=age))
# 格式化选项
print("{:<10}".format("左对齐")) # 左对齐
print("{:>10}".format("右对齐")) # 右对齐
print("{:^10}".format("居中")) # 居中
print("{:05d}".format(42)) # 00042
print("{:.2f}".format(3.14159)) # 3.14
print("{:,}".format(1234567)) # 1,234,567队列和栈
列表实现
python
# 栈(后进先出)
stack = []
stack.append(1)
stack.append(2)
stack.append(3)
print(stack.pop()) # 3
# 队列(先进先出)- 效率低
queue = []
queue.append(1)
queue.append(2)
queue.append(3)
print(queue.pop(0)) # 1collections.deque
python
from collections import deque
# 双端队列
dq = deque([1, 2, 3])
# 右端操作
dq.append(4) # 右端添加
dq.pop() # 右端弹出
# 左端操作
dq.appendleft(0) # 左端添加
dq.popleft() # 左端弹出
# 旋转
dq.rotate(1) # 向右旋转
dq.rotate(-1) # 向左旋转
# 扩展
dq.extend([4, 5])
dq.extendleft([0, -1])
# 栈
stack = deque()
stack.append(1)
stack.append(2)
stack.pop() # 2
# 队列
queue = deque()
queue.append(1)
queue.append(2)
queue.popleft() # 1queue 模块
python
import queue
# FIFO 队列
q = queue.Queue()
q.put(1)
q.put(2)
print(q.get()) # 1
# LIFO 队列(栈)
lq = queue.LifoQueue()
lq.put(1)
lq.put(2)
print(lq.get()) # 2
# 优先队列
pq = queue.PriorityQueue()
pq.put((2, "低优先级"))
pq.put((1, "高优先级"))
print(pq.get()) # (1, '高优先级')堆
python
import heapq
# 创建堆
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(numbers) # 原地转换为堆
print(numbers) # [1, 1, 2, 3, 5, 9, 4, 6]
# 添加元素
heapq.heappush(numbers, 0)
print(numbers) # [0, 1, 2, 1, 5, 9, 4, 6, 3]
# 弹出最小元素
print(heapq.heappop(numbers)) # 0
# 弹出并添加
print(heapq.heapreplace(numbers, 10)) # 弹出最小,添加新元素
# 查找最小/最大的 n 个元素
nums = [3, 1, 4, 1, 5, 9, 2, 6]
print(heapq.nsmallest(3, nums)) # [1, 1, 2]
print(heapq.nlargest(3, nums)) # [9, 6, 5]
# 合并有序序列
a = [1, 3, 5]
b = [2, 4, 6]
print(list(heapq.merge(a, b))) # [1, 2, 3, 4, 5, 6]实践示例
数据分析
python
from collections import Counter, defaultdict
def analyze_text(text):
"""分析文本统计"""
# 单词统计
words = text.lower().split()
word_count = Counter(words)
# 字符统计
char_count = Counter(text)
# 按长度分组
by_length = defaultdict(list)
for word in words:
by_length[len(word)].append(word)
return {
"word_count": len(words),
"unique_words": len(word_count),
"most_common": word_count.most_common(5),
"by_length": dict(by_length)
}
text = "the quick brown fox jumps over the lazy dog the fox is quick"
result = analyze_text(text)
print(result)数据处理管道
python
def process_data(data):
"""数据处理管道"""
# 过滤
filtered = [x for x in data if x > 0]
# 转换
transformed = [x * 2 for x in filtered]
# 分组
grouped = defaultdict(list)
for x in transformed:
grouped[x % 3].append(x)
# 统计
stats = {
"count": len(transformed),
"sum": sum(transformed),
"avg": sum(transformed) / len(transformed) if transformed else 0,
"max": max(transformed) if transformed else None,
"min": min(transformed) if transformed else None
}
return {"grouped": dict(grouped), "stats": stats}
data = [1, -2, 3, 4, -5, 6, 7, 8, -9, 10]
result = process_data(data)
print(result)