Source code for sharpedge.pooling_image

import numpy as np
from sharpedge._utils.utility import Utility

[docs] def pooling_image(img, window_size, pooling_method=np.mean): """ Perform pooling on an image using a specified window size and pooling function. Parameters ---------- img : numpy.ndarray The input image as a 2D numpy array (grayscale) or 3D numpy array (RGB). window_size : int The size of the pooling window (e.g., 10 for 10x10 windows). pooling_method : callable, optional The pooling function to apply to each window. Common options include `numpy.mean`, `numpy.median`, `numpy.max`, and `numpy.min`. Default is `numpy.mean`. Returns ------- numpy.ndarray The resized image, reduced by the pooling operation based on the specified window size and pooling function. For grayscale images, the result is a 2D array. For RGB images, the result is a 3D array normalized to the range [0.0, 1.0]. Raises ------ TypeError If `window_size` is not an integer or `pooling_method` is not callable. ValueError If the image dimensions are not divisible by the window size. Examples -------- >>> img = np.random.rand(100, 100) >>> pooled_img = pooling_image(img, window_size=10, pooling_method=np.mean) For an RGB image: >>> img_rgb = np.random.rand(100, 100, 3) >>> pooled_img = pooling_image(img_rgb, window_size=20, pooling_method=np.max) """ # Input validation Utility._input_checker(img) if not isinstance(window_size, int): raise TypeError("window_size must be an integer.") if not callable(pooling_method): raise TypeError("pooling_method must be callable.") img_rows, img_cols = img.shape[:2] # Check if dimensions are divisible by window size if img_rows % window_size != 0 or img_cols % window_size != 0: raise ValueError("Image dimensions are not divisible by the window size.") # Ensure image is in float32 format for calculations img = img.astype(np.float32) # Initialize the result array with appropriate dimensions result_rows = img_rows // window_size result_cols = img_cols // window_size if img.ndim == 2: # Grayscale image pooled_image = np.zeros((result_rows, result_cols)) for i in range(result_rows): for j in range(result_cols): window = img[i*window_size:(i+1)*window_size, j*window_size:(j+1)*window_size] pooled_image[i, j] = pooling_method(window) else: # RGB image pooled_image = np.zeros((result_rows, result_cols, img.shape[2])) for i in range(result_rows): for j in range(result_cols): window = img[i*window_size:(i+1)*window_size, j*window_size:(j+1)*window_size, :] for c in range(img.shape[2]): pooled_image[i, j, c] = pooling_method(window[:, :, c]) # Normalize RGB image to [0.0, 1.0] pooled_image /= 255.0 return pooled_image