Skip to content

earthlab-education/clustering-byandell

 
 

Repository files navigation

Assignment ea-02-clustering

This repository contains the notebooks for this week's assignment. See 2. Clustering: Land cover classification at the Mississippi Delta.

Complete the homework in each .ipynb notebook, commit your work, and push the changes to github. Your instructor will pull the completed assignment after the deadline.

Once grading is complete, your instructor will push the results to a file called feedback.html. You will need to pull the changes to your local copy and open this file in a browser to view (because GitHub will not render html files).

Notes for next assignment on classes. A class is a function with output of an object that has new methods, which are in turn functions defined in the class. In addition, the @property decorator defines attributes for the object.

  • apppeears.py
  • add functionality to class
  • diff functions with same parameter--streamline, keep track of metadata
import **stuff**

class ArrayDataFrame(pd.DataFrame): # inherits pd.DataFrame class

    def set_array_column(self, arrays):
        self['arrays'] = arrays
        return self

    def __repr__(self):
        for_printing = self.copy()
        for_printing.arrays = [arr.min() for arr in self.arrays]
        return for_printing.__repr__()
        

ArrayDataFrame({'url': ['https://...']}).set_array_column([xr.DataArray()])

# Slow example where class would help.
import random
import numpy as np

def gen_data_array(size=10):
    data = (
        np.array([random.gauss(0,1) for _ in range(size**2)]).reshape(size, size))
    data = xr.DataArray(
        data = data,
        coords = (
            'x': [i * random.uniform(0,1) for i in range(size)],
            'y': [i * random.uniform(0,1) for i in range(size)],
        )
    )
    return da

df_len = 10
md_df = pd.DataFrame([
    'id': list(range(df_len)),
    'array': [gen_data_array(10) for _ in range(df_len)]])
my_df
class FunDataFrame(pd.DataFrame):

    def __repr__(self):
    return 'stuff!'

md_df = FunDataFrame([
    'id': list(range(df_len)),
    'array': [gen_data_array(10) for _ in range(df_len)]])
my_df

# Add ipython method (under the hood)

class FunDataFrame(pd.DataFrame):

    def __repr__(self):
    return 'stuff!'

    def _repr_html_(self):
        return 'more stuff!!!'

md_df = FunDataFrame([
    'id': list(range(df_len)),
    'array': [gen_data_array(10) for _ in range(df_len)]])
my_df

# Set up my dataframe class to show what I want

class FunDataFrame(pd.DataFrame):

    # tell it what I define
    array_types = [xr.DataArray]

    # make array as a property
    @property
    def array_cols(self):
        array_cols = []
        for col in self:
            if type(self[col][0]) == xr.DataArray:
                array_cols.append(col)
                return array_cols
            

    @property
    def _df_for_repr_(self):
        df = self.drop(columns = self.array_cols).copy()
        for array_col in self.array_cols:
            arr_str_list = []
            for arr in self[array_col]:
                arr_min = round(float(arr.x.min()), 2)
                arr_max = round(float(arr.x.max()), 2)
                arr_str_list.append(
                    f'DataArray(x ({arr_min}, {arr_max}))'
                )
            df[array_col] = arr_str_list
            #df[array_col] = ['DataArray' for _ in range(len(df))]
        return df
    
    # represent
    def __repr__(self):
        return self._df_for_repr_.__repr__()'

    # ipython method
    def _repr_html_(self):
        return self._df_for_repr_._repr_html_()

md_df = FunDataFrame([
    'id': list(range(df_len)),
    'array': [gen_data_array(10) for _ in range(df_len)]],
    'array2': [gen_data_array(10) for _ in range(df_len)]])
my_df

About

earth-analytics-2025-clustering-ea-02-clustering-template created by GitHub Classroom

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages

  • Jupyter Notebook 100.0%