master

分支 (1)

管理

管理

master

faiss_dog_cat_question-main1
/
data_utils.py

# data_utils.py
import os
import logging
import pickle
import numpy as np
from imutils import paths
from tqdm import tqdm
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
import cv2

def createXY(train_folder, dest_folder, method='flat'):
    # 自动创建目标文件夹
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
        logging.info(f"创建目标文件夹: {dest_folder}")

    x_file_path = os.path.join(dest_folder, "X.pkl")
    y_file_path = os.path.join(dest_folder, "y.pkl")

    if os.path.exists(x_file_path) and os.path.exists(y_file_path):
        with open(x_file_path, 'rb') as f:
            X = pickle.load(f)
        with open(y_file_path, 'rb') as f:
            y = pickle.load(f)
        return X, y

    logging.info(f"正在检查路径: {train_folder}")
    image_paths = list(paths.list_images(train_folder))
    logging.info(f"找到的图像路径: {image_paths}")
    logging.info(f"找到 {len(image_paths)} 张图像")

    if not image_paths:
        raise ValueError(f"路径 {train_folder} 中没有找到图像")

    X = []
    y = []

    if method == 'vgg':
        model = VGG16(weights='imagenet', include_top=False)
        for imagePath in tqdm(image_paths, desc="提取特征"):
            img = image.load_img(imagePath, target_size=(224, 224))
            img = image.img_to_array(img)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)
            features = model.predict(img)
            features = features.flatten()
            X.append(features)
            label = 1 if 'dog' in imagePath else 0
            y.append(label)
    elif method == 'flat':
        for imagePath in tqdm(image_paths, desc="提取特征"):
            img = cv2.imread(imagePath, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (32, 32))
            img = img.flatten()
            X.append(img)
            label = 1 if 'dog' in imagePath else 0
            y.append(label)

    X = np.array(X, dtype=np.float32)  # 确保 X 是 float32 类型
    X = np.ascontiguousarray(X)  # 确保 X 是连续的
    y = np.array(y)

    with open(x_file_path, 'wb') as f:
        pickle.dump(X, f)
    with open(y_file_path, 'wb') as f:
        pickle.dump(y, f)

    return X, y