Commit 67bce534 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

KMeansRSImage: Added get_random_spectra_from_each_cluster() and _im2spectra().

Former-commit-id: 25aea6ba
parent c98938a4
......@@ -9,6 +9,7 @@ import scipy as sp
import matplotlib.pyplot as plt
from logging import Logger
from sklearn.cluster import KMeans, k_means_
from pandas import DataFrame
from geoarray import GeoArray # noqa F401 # flake8 issue
......@@ -169,6 +170,7 @@ class SpectralResampler(object):
class KMeansRSImage(object):
_clusters = None
_im_clust = None
_spectra = None
def __init__(self, im, n_clusters):
# type: (GeoArray, int) -> None
......@@ -193,21 +195,19 @@ class KMeansRSImage(object):
return self._im_clust
def compute_clusters(self):
# implement like this:
pixels2d = *,
kmeans = KMeans(n_clusters=self.n_clusters, random_state=0)
self.clusters =
self.clusters =
return self.clusters
def apply_clusters(self, image):
image = GeoArray(image)
pixels2d = * image.cols, image.bands))
labels = self.clusters.predict(pixels2d)
labels = self.clusters.predict(self._im2spectra(GeoArray(image)))
return labels
def _im2spectra(geoArr):
return geoArr.reshape((geoArr.rows * geoArr.cols, geoArr.bands))
def plot_cluster_centers(self, figsize=(15, 5)):
# type: (tuple) -> None
"""Show a plot of the cluster center signatures.
......@@ -263,9 +263,30 @@ class KMeansRSImage(object):
def get_random_spectra_from_each_cluster(self, samplesize=50):
# type: (int) -> dict
"""Returns a given number of spectra randomly selected within each cluster.
E.g., 50 spectra of belonging to cluster 1, 50 spectra of belonging to cluster 2 and so on."""
E.g., 50 spectra of belonging to cluster 1, 50 spectra of belonging to cluster 2 and so on.
:param samplesize: number of spectra to be randomly selected from each cluster
# get DataFrame with columns [cluster_label, B1, B2, B3, ...]
df = DataFrame(self._im2spectra(, columns=['B%s' % band for band in range(1, + 1)], )
df.insert(0, 'cluster_label', self.clusters.labels_)
# get random sample from each cluster and generate a dict like {cluster_label: random_sample}
random_samples = dict()
for label in range(self.n_clusters):
cluster_subset = df[df.cluster_label == label].loc[:, 'B1':]
# get random sample while filling it with duplicates of the same sample when cluster has not enough spectra
random_samples[label] = np.array(cluster_subset.sample(samplesize, replace=True))
return random_samples
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment