Source code for pyveg.src.subgraph_centrality

"""
Python version of mao_pollen.m matlab code to look at connectedness of
pixels on a binary image, using "Subgraph Centrality" as described in:

Mander et.al. "A morphometric analysis of vegetation patterns in dryland ecosystems",
R. Soc. open sci. (2017)
https://royalsocietypublishing.org/doi/10.1098/rsos.160443

Mander et.al. "Classification of grass pollen through the quantitative
analysis of surface ornamentation and texture", Proc R Soc B 280: 20131905.
https://royalsocietypublishing.org/doi/pdf/10.1098/rspb.2013.1905

Estrada et.al. "Subgraph Centrality in Complex Networks"
https://arxiv.org/pdf/cond-mat/0504730.pdf
"""

import numpy as np
from PIL import Image
from scipy import spatial
import argparse
import igraph

from .image_utils import image_from_array, image_file_to_array


[docs]def make_graph(adj_matrix):
    """
    Use igraph to create a graph from our adjacency matrix
    """
    graph = igraph.Graph.Adjacency((adj_matrix > 0).tolist())
    return graph


[docs]def calc_euler_characteristic(pix_indices, graph):
    """
    Find the edges where both ends are within the pix_indices list
    """
    V = len(pix_indices)
    edges = []
    for edge in graph.get_edgelist():
        if edge[0] in pix_indices and edge[1] in pix_indices:
            edges.append(edge)
    E = len(edges) / 2
    return V - E


[docs]def write_csv(feature_vec, output_filename):
    """
    Write the feature vector to a 1-line csv
    """
    output_string = ""
    for feat in feature_vec:
        output_string += str(feat) + ","
    output_string = output_string[:-1] + "\n"
    with open(output_filename, "w") as outfile:
        outfile.write(output_string)
    return True


[docs]def write_dict_to_csv(metrics_dict, output_filename):
    with open(output_filename, "w") as f:
        for key in metrics_dict.keys():
            f.write("%s,%s\n" % (key, metrics_dict[key]))
    return True


[docs]def text_file_to_array(input_filename):
    """
    Read a csv-like representation of an image, where each row (representing a row
    of pixels in the image) is a comma-separated list of pixel values 0 (for black)
    or 255 (for white).
    """
    input_image = np.loadtxt(input_filename, dtype="i", delimiter=",")
    return input_image


[docs]def crop_image_array(input_image, x_range, y_range):
    """
    return a new image from specified pixel range of input image
    """
    return input_image[x_range[0] : x_range[1], y_range[0] : y_range[1]]


[docs]def fill_sc_pixels(sel_pixels, orig_image, val=200):
    """
    Given an original 2D array where all the elements are 0 (background)
    or 255 (signal), fill in a selected subset of signal pixels as 123 (grey).
    """
    new_image = np.copy(orig_image)
    for pix in sel_pixels:
        new_image[pix] = 200
    return new_image


[docs]def generate_sc_images(sel_pixels, orig_image, val=200):
    """
    Return a dict of images with the selected subsets of signal
    pixels filled in in cyan.
    """
    image_dict = {}
    for k, v in sel_pixels.items():
        new_image_array = fill_sc_pixels(v, orig_image, val)
        new_image = image_from_array(new_image_array, 400)
        image_dict[k] = new_image
    return image_dict


[docs]def save_sc_images(image_dict, file_prefix):
    """
    Saves images from dictionary.
    """
    for key in image_dict:
        im = image_dict[key]
        im.save(file_prefix + "_" + str(key), "PNG")


[docs]def get_signal_pixels(input_array, threshold=255, lower_threshold=True, invert_y=False):
    """
    Find coordinates of all pixels within the image that are > or <
    the threshold ( require < threshold if lower_threshold==True)
    NOTE - if invert_y is set, we make the second coordinate negative, for reasons.
    """
    y_sign = -1 if invert_y else 1
    # find all the "white" pixels
    signal_x_y = np.where(input_array >= threshold)

    # convert ([x1, x2, ...], [y1,y2,...]) to ([x1,y1],...)
    signal_coords = [
        (signal_x_y[0][i], y_sign * signal_x_y[1][i]) for i in range(len(signal_x_y[0]))
    ]
    return signal_coords


[docs]def invert_y_coord(coord_list):
    """
    Convert [(x1,y1),(x2,y2),...] to [(x1,-y1),(x2,-y2),...]
    """
    return [(x, -1 * y) for (x, y) in coord_list]


[docs]def calc_distance_matrix(signal_coords):
    """
    calculate the distances between all signal pixels in the original image
    """
    # find all the distances between pairs of white pixels
    # (if there are N white pixels, this will be a
    # vector of length N(N-1)/2 )
    distances = spatial.distance.pdist(signal_coords)
    # convert this into an NxN distance matrix, with each element ij
    # as the distance between pixel i and pixel j
    dist_square = spatial.distance.squareform(distances)
    return distances, dist_square


[docs]def feature_vector_metrics(feature_vector, output_csv=None):
    """
    Calculate different metrics for the feature vector
    """
    feature_vec_metrics = {}

    if len(feature_vector) == 0:
        raise RuntimeError("Empty feature vector")

    # slope of the vector
    feature_vec_metrics["slope"] = (feature_vector[-1] - feature_vector[0]) / len(
        feature_vector
    )

    # difference between last and first indexes
    feature_vec_metrics["offset"] = feature_vector[-1] - feature_vector[0]

    # difference between last and middle indexes
    feature_vec_metrics["offset50"] = (
        feature_vector[-1] - feature_vector[len(feature_vector) // 2]
    )

    # mean value on the feature_vector
    feature_vec_metrics["mean"] = np.mean(feature_vector)

    # std value on the feature_vector
    feature_vec_metrics["std"] = np.std(feature_vector)

    if output_csv:
        output_csv = output_csv[:-4] + "_metrics.csv"
        # write the feature vector to a csv file
        write_dict_to_csv(feature_vec_metrics, output_csv)

    return feature_vec_metrics


[docs]def calc_adjacency_matrix(distance_matrix, include_diagonal_neighbours=False):
    """
    Return a symmetric matrix of
    (n-pixels-over-threshold)x(n-pixel-over-threshold)
    where each element ij is 0 or 1 depending on whether the distance between
    pixel i and pixel j is < or > neighbour_threshold.
    """

    # prepare empty array
    adj_matrix = np.zeros(distance_matrix.shape)
    # get 2xM array where M is the number of elements of
    # the distance matrix that satisfy the "neighbour threshold" criterion.
    # i.e. the x,y coordinates of all "neighbours" in the distance matrix.
    if not include_diagonal_neighbours:
        neighbour_x_y = np.where(distance_matrix == 1)
    else:
        neighbour_x_y = np.where((distance_matrix > 0) & (distance_matrix < 2))

    # loop over coordinates of neighbours in distance matrix
    for i in range(len(neighbour_x_y[0])):
        adj_matrix[neighbour_x_y[0][i]][neighbour_x_y[1][i]] = 1

    return adj_matrix


[docs]def calc_and_sort_sc_indices(adjacency_matrix):
    """
    Given an input adjacency matrix, calculate eigenvalues and eigenvectors,
    calculate the subgraph centrality (ref: <== ADD REF), then sort.
    """

    # get eigenvalues
    am_lambda, am_phi = np.linalg.eigh(adjacency_matrix)

    # calculate the subgraph centrality (SC)
    phi2_explambda = np.dot(am_phi * am_phi, np.exp(am_lambda))

    # order the pixels by subgraph centrality, then find their indices
    # (corresponding to their position in the 1D list of white pixels)
    indices = np.argsort(phi2_explambda)[::-1]
    return indices


[docs]def fill_feature_vector(pix_indices, coords, adj_matrix, num_quantiles=20):
    """
    Given indices and coordinates of signal pixels ordered by SC value, put them into
    quantiles and calculate an element of a feature vector for each quantile.
    by using the Euler Characteristic.

    Will return:
        selected_pixels, feature_vector
    where selected_pixels is a vector of the pixel coordinates in each quantile,
    and a feature_vector is either num-connected-components or Euler characteristic,
    for each quantile.
    """
    # adj_matrix will be square - take the length of a side
    n = max(adj_matrix.shape)

    graph = make_graph(adj_matrix)
    # set the diagonals of the adjacency matrix to 1 (previously
    # zero by definition because a pixel can't be adjacent to itself)
    for j in range(n):
        adj_matrix[j][j] = 1

    # find the different quantiles
    start = 0
    end = 100
    step = (end - start) / num_quantiles
    x = [i for i in range(start, end + 1, int(step))]
    # create feature vector of size num_quantiles
    feature_vector = np.zeros(num_quantiles)
    # create a dictionary of selected pixels for each quantile.
    selected_pixels = {}
    # Loop through the quantiles to fill the feature vector
    for i in range(1, len(feature_vector)):
        # print("calculating subregion {} of {}".format(i, num_quantiles))
        # how many pixels in this sub-region?
        n_pix = round(x[i] * n / 100)
        sub_region = pix_indices[0:n_pix]
        sel_pix = [coords[j] for j in sub_region]
        selected_pixels[x[i]] = sel_pix
        # now calculate the feature vector element using the selected method
        feature_vector[i] = calc_euler_characteristic(sub_region, graph)

    # fill in the last quantile (100%) of selected pixels
    selected_pixels[100] = coords

    return feature_vector, selected_pixels


[docs]def subgraph_centrality(
    image,
    use_diagonal_neighbours=False,
    num_quantiles=20,
    threshold=255,  # what counts as a signal pixel?
    lower_threshold=True,
    output_csv=None,
):
    """
    Go through the whole calculation, from input image to output vector of
    pixels in each SC quantile, and feature vector (either connected-components
    or Euler characteristic).
    """

    feature_vec = [
        0 for i in range(0, num_quantiles)
    ]  # need the "+1" to include 100% quantile
    sel_pixels = {}

    # making sure that there image is not empty (all black)
    if np.array(np.where(image != 0)).size != 0:

        # get the coordinates of all the signal pixels
        signal_coords = get_signal_pixels(image, threshold, lower_threshold)
        # get the distance matrix
        dist_vec, dist_matrix = calc_distance_matrix(signal_coords)

        # will use to fill our feature vector
        adj_matrix = calc_adjacency_matrix(dist_matrix, use_diagonal_neighbours)
        # calculate the subgraph centrality and order our signal pixels accordingly
        sorted_pix_indices = calc_and_sort_sc_indices(adj_matrix)
        # calculate the feature vector and get the subsets of pixels in each quantile
        feature_vec, sel_pixels = fill_feature_vector(
            sorted_pix_indices, signal_coords, adj_matrix, num_quantiles
        )
    # write the feature vector to a csv file
    if output_csv:
        write_csv(feature_vec, output_csv)

    return feature_vec, sel_pixels