brain tumor dataset

姜智浩 Lv5

数据来源

https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset

导入库

1
2
3
4
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.densenet import preprocess_input

检查是否启用了GPU

1
print(tf.config.list_physical_devices("GPU"))

[PhysicalDevice(name=’/physical_device:GPU:0’, device_type=’GPU’)]

构造数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 参数
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
DATA_DIR = "/Volumes/HIKSEMI/ipynb/Kaggle/Brain Tumor Dataset"

train_ds_raw = tf.keras.preprocessing.image_dataset_from_directory(
DATA_DIR,
validation_split=0.2,
subset="training",
seed=123,
color_mode="rgb",
image_size=IMG_SIZE,
batch_size=BATCH_SIZE,
)
val_ds_raw = tf.keras.preprocessing.image_dataset_from_directory(
DATA_DIR,
validation_split=0.2,
subset="validation",
seed=123,
color_mode="rgb",
image_size=IMG_SIZE,
batch_size=BATCH_SIZE,
)

Found 10560 files belonging to 4 classes.
Using 8448 files for training.
Found 10560 files belonging to 4 classes.
Using 2112 files for validation.

1
2
3
4
5
6
7
class_names = train_ds_raw.class_names
num_classes = len(class_names)
print("类别:", class_names)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds_raw.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds_raw.cache().prefetch(buffer_size=AUTOTUNE)

查看不同标签图片的数量

1
2
3
4
5
6
7
import numpy as np
import matplotlib.pyplot as plt

counts = np.bincount([y for x, y in train_ds.unbatch()])
plt.bar(range(len(class_names)), counts)
plt.xticks(range(len(class_names)), class_names)
plt.show()

photo

数据增强

1
2
3
4
5
6
7
8
9
IMG_SIZE = (224, 224)

data_augmentation = tf.keras.Sequential([
layers.RandomFlip("horizontal_and_vertical"),
layers.RandomRotation(0.15), # 随机旋转
layers.RandomZoom(0.2), # 随机缩放
layers.RandomContrast(0.2), # 随机对比度
layers.RandomBrightness(0.2) # 随机亮度
])

模型选择

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# DenseNet121
base_model = tf.keras.applications.DenseNet121(
include_top=False,
weights='imagenet',
input_shape=(224, 224, 3)
)
base_model.trainable = False

model = models.Sequential([
data_augmentation,
layers.Lambda(preprocess_input),
base_model,
layers.GlobalAveragePooling2D(),
layers.Dropout(0.3),
layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
layers.Dense(4, activation='softmax')
])

迁移学习

第一阶段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-3),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)

print("第一阶段")

history1 = model.fit(
train_ds,
validation_data=val_ds,
epochs=10,
callbacks=[
tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
]
)

Epoch 1/10
2025-11-16 07:24:40.974143: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
264/264 ━━━━━━━━━━━━━━━━━━━━ 183s 645ms/step - accuracy: 0.6757 - loss: 0.8693 - val_accuracy: 0.8177 - val_loss: 0.4993
Epoch 2/10
264/264 ━━━━━━━━━━━━━━━━━━━━ 135s 511ms/step - accuracy: 0.7649 - loss: 0.6108 - val_accuracy: 0.8191 - val_loss: 0.5096
Epoch 3/10
264/264 ━━━━━━━━━━━━━━━━━━━━ 143s 540ms/step - accuracy: 0.7749 - loss: 0.5895 - val_accuracy: 0.8329 - val_loss: 0.4379
Epoch 4/10
264/264 ━━━━━━━━━━━━━━━━━━━━ 140s 531ms/step - accuracy: 0.7840 - loss: 0.5671 - val_accuracy: 0.7779 - val_loss: 0.5830
Epoch 5/10
264/264 ━━━━━━━━━━━━━━━━━━━━ 135s 511ms/step - accuracy: 0.7770 - loss: 0.5739 - val_accuracy: 0.8030 - val_loss: 0.4965
Epoch 6/10
264/264 ━━━━━━━━━━━━━━━━━━━━ 142s 539ms/step - accuracy: 0.7874 - loss: 0.5575 - val_accuracy: 0.8376 - val_loss: 0.4554

第二阶段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
base_model.trainable = True

for layer in base_model.layers[:-80]:
layer.trainable = False

print(f"第二阶段")
print(f"可训练参数: {sum([tf.size(v).numpy() for v in model.trainable_variables])}")

model.compile(
optimizer=tf.keras.optimizers.Adam(1e-5),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)

callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=3,
min_lr=1e-7
)
]

history2 = model.fit(
train_ds,
validation_data=val_ds,
epochs=40,
callbacks=callbacks
)


Epoch 39/40
264/264 ━━━━━━━━━━━━━━━━━━━━ 273s 1s/step - accuracy: 0.9297 - loss: 0.2094 - val_accuracy: 0.9081 - val_loss: 0.2820 - learning_rate: 5.0000e-06
Epoch 40/40
264/264 ━━━━━━━━━━━━━━━━━━━━ 324s 1s/step - accuracy: 0.9315 - loss: 0.1987 - val_accuracy: 0.9058 - val_loss: 0.2970 - learning_rate: 5.0000e-06

验证

1
2
val_loss, val_acc = model.evaluate(val_ds_raw)
print(f"验证集准确率: {val_acc:.3f}")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# 准确率
plt.subplot(1, 2, 1)
plt.plot(history2.history["accuracy"], label="Train Accuracy")
plt.plot(history2.history["val_accuracy"], label="Val Accuracy")
plt.legend()
plt.title("Accuracy over Epochs")

# 损失
plt.subplot(1, 2, 2)
plt.plot(history2.history["loss"], label="Train Loss")
plt.plot(history2.history["val_loss"], label="Val Loss")
plt.legend()
plt.title("Loss over Epochs")

plt.show()

photo

保存模型

1
2
model.save("brain_tumor_model_3.keras")
print("模型已保存为 brain_tumor_model_3.keras")

加载模型

1
2
3
4
model = tf.keras.models.load_model(
"brain_tumor_model_3.keras",
custom_objects={"preprocess_input": preprocess_input}
)

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import numpy as np
import matplotlib.pyplot as plt

class_names = train_ds_raw.class_names

for images, labels in val_ds_raw.take(1):
preds = model.predict(images)
pred_labels = np.argmax(preds, axis=1)

plt.figure(figsize=(12, 12))
for i in range(9):
plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
true_label = class_names[labels[i]]
pred_label = class_names[pred_labels[i]]
color = "green" if true_label == pred_label else "red"
plt.title(f"Pred: {pred_label}\nTrue: {true_label}", color=color)
plt.axis("off")
plt.show()

photo

分类报告

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

y_true = []
y_pred = []

for images, labels in val_ds:
preds = model.predict(images, verbose=0)
preds_class = np.argmax(preds, axis=1)
y_true.extend(labels.numpy())
y_pred.extend(preds_class)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("Classification Report:")
print(classification_report(y_true, y_pred, digits=4))

cm = confusion_matrix(y_true, y_pred)
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(8,6))
sns.heatmap(cm_norm, annot=True, fmt=".2f", cmap="Blues", cbar=True)
plt.title("Classification Report")
plt.show()

val_acc = np.mean(y_true == y_pred)
print(f"验证集准确率: {val_acc:.4f}")

photo

  • Title: brain tumor dataset
  • Author: 姜智浩
  • Created at : 2025-12-02 11:45:14
  • Updated at : 2025-12-02 10:36:41
  • Link: https://super-213.github.io/zhihaojiang.github.io/2025/12/02/20251202brain tumor dataset/
  • License: This work is licensed under CC BY-NC-SA 4.0.