Merged to CIF-247

wri · Sep 13, 2024 · 1d5ddb2 · 1d5ddb2
2 parents 103b762 + e95d189
commit 1d5ddb2
Show file tree

Hide file tree

Showing 14 changed files with 526 additions and 326 deletions.
diff --git a/.github/environment.yml b/.github/environment.yml
@@ -0,0 +1,28 @@
+name: cities-cif
+channels:
+  - conda-forge
+dependencies:
+  - python=3.10
+  - earthengine-api=0.1.411
+  - geocube=0.4.2
+  - geopandas=0.14.4
+  - rioxarray=0.15.0
+  - odc-stac=0.3.8
+  - pystac-client=0.7.5
+  - pytest=7.4.3
+  - xarray-spatial=0.3.7
+  - xee=0.0.15
+  - utm=0.7.0
+  - osmnx=1.9.0
+  - dask[complete]=2023.11.0
+  - matplotlib=3.8.2
+  - jupyterlab=4.0.10
+  - s3fs=2024.5.0
+  - geemap=0.32.0
+  - pip=23.3.1
+  - boto3=1.34.124
+  - scikit-learn=1.5.1
+  - scikit-image=0.24.0
+  - exactextract=0.2.0
+  - pip:
+      - overturemaps==0.6.0
diff --git a/.github/requirements.txt b/.github/requirements.txt
diff --git a/.github/workflows/dev_ci_cd.yml → .github/workflows/dev_ci_cd_conda.yml b/.github/workflows/dev_ci_cd.yml → .github/workflows/dev_ci_cd_conda.yml
@@ -1,4 +1,4 @@
-name: Dev CIF API CI/CD
+name: Dev CIF API CI/CD Conda
 
 on:
   pull_request:
@@ -9,29 +9,31 @@ permissions:
 jobs:
   build:
     runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
     strategy:
       max-parallel: 4
       matrix:
         python-version: ["3.10"]
-
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
+    - uses: conda-incubator/setup-miniconda@v2
       with:
+        activate-environment: cities-cif
+        environment-file: .github/environment.yml
         python-version: ${{ matrix.python-version }}
+        auto-update-conda: true
     - name: Install Linux dependencies
       run: |
         sudo apt update
         sudo apt install -y gdal-bin libgdal-dev
-    - name: Install Packages
+    - name: Install other packages
       run: |
-        python -m pip install --upgrade pip
-        pip install -r .github/requirements.txt
-        pip install GDAL==`gdal-config --version`
+        conda install pytest --yes
     - name: Run Tests
       env:
         GOOGLE_APPLICATION_USER: ${{ secrets.GOOGLE_APPLICATION_USER }}
-        GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}     
+        GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
       run: |
-          pytest tests
+          pytest tests
diff --git a/city_metrix/layers/impervious_surface.py b/city_metrix/layers/impervious_surface.py
@@ -7,8 +7,14 @@
 
 
 class ImperviousSurface(Layer):
-    def __init__(self, **kwargs):
+    """
+    Attributes:
+        spatial_resolution: raster resolution in meters (see https://github.com/stac-extensions/raster)
+    """
+
+    def __init__(self, spatial_resolution=100, **kwargs):
         super().__init__(**kwargs)
+        self.spatial_resolution = spatial_resolution
 
     def get_data(self, bbox):
         # load impervious_surface
@@ -19,5 +25,5 @@ def get_data(self, bbox):
                                          .sum()
                                          )
 
-        data = get_image_collection(imperv_surf, bbox, 100, "imperv surf")
+        data = get_image_collection(imperv_surf, bbox, self.spatial_resolution, "imperv surf")
         return data.change_year_index
diff --git a/city_metrix/layers/open_street_map.py b/city_metrix/layers/open_street_map.py
@@ -26,6 +26,12 @@ class OpenStreetMapClass(Enum):
 				'amenity': ['school', 'kindergarten']}
     HIGHER_EDUCATION = {'amenity': ['college', 'university'],
 						'building': ['college', 'university']}
+    TRANSIT_STOP = {'amenity':['ferry_terminal'],
+                    'railway':['stop', 'platform', 'halt', 'tram_stop', 'subway_entrance', 'station'],
+                    'highway':['bus_stop', 'platform'],
+                    'public_transport': ['platform', 'stop_position', 'stop_area'],
+                    'station':['subway'],
+                    'aerialway':['station']}
 
 
 class OpenStreetMap(Layer):
@@ -44,11 +50,16 @@ def get_data(self, bbox):
             osm_feature = gpd.GeoDataFrame(pd.DataFrame(columns=['osmid', 'geometry']+list(self.osm_class.value.keys())), geometry='geometry')
             osm_feature.crs = "EPSG:4326"
 
-        # Filter out Point and LineString (if the feature is not ROAD)
-        if self.osm_class != OpenStreetMapClass.ROAD:
-            osm_feature = osm_feature[osm_feature.geom_type.isin(['Polygon', 'MultiPolygon'])]
-        else:
+        # Filter by geo_type
+        if self.osm_class == OpenStreetMapClass.ROAD:
+            # Filter out Point
             osm_feature = osm_feature[osm_feature.geom_type != 'Point']
+        elif self.osm_class == OpenStreetMapClass.TRANSIT_STOP:
+            # Keep Point
+            osm_feature = osm_feature[osm_feature.geom_type == 'Point']
+        else:
+            # Filter out Point and LineString
+            osm_feature = osm_feature[osm_feature.geom_type.isin(['Polygon', 'MultiPolygon'])]
 
         # keep only columns desired to reduce file size
         keep_col = ['osmid', 'geometry']
@@ -61,10 +72,3 @@ def get_data(self, bbox):
         osm_feature = osm_feature.reset_index()[keep_col]
 
         return osm_feature
-
-    def write(self, output_path):
-        self.data['bbox'] = str(self.data.total_bounds)
-        self.data['osm_class'] = str(self.osm_class.value)
-
-        # Write to a GeoJSON file
-        self.data.to_file(output_path, driver='GeoJSON')
diff --git a/environment.yml b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - python=3.10
-  - earthengine-api=0.1.379
+  - earthengine-api=0.1.411
   - geocube=0.4.2
   - geopandas=0.14.4
   - rioxarray=0.15.0
@@ -22,7 +22,7 @@ dependencies:
   - pip=23.3.1
   - boto3=1.34.124
   - scikit-learn=1.5.1
-  - scikit-image==0.24.0
-  - exactextract=0.2.0.dev252
+  - scikit-image=0.24.0
+  - exactextract=0.2.0
   - pip:
       - overturemaps==0.6.0
diff --git a/setup.py b/setup.py
@@ -28,9 +28,9 @@
         "s3fs",
         "dask>=2023.11.0",
         "boto3",
-        "exactextract<=0.2.0.dev252",
         "overturemaps",
         "scikit-learn>=1.5.1",
-        "scikit-image>=0.24.0"
+        "scikit-image>=0.24.0",
+        "exactextract>=0.2.0"
     ],
 )
diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/conftest.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/conftest.py
@@ -26,15 +26,16 @@
 def pytest_configure(config):
     qgis_project_file = 'layers_for_br_lauro_de_freitas2.qgz'
 
-    source_folder = os.path.dirname(__file__)
-    target_folder = get_target_folder_path()
-    create_target_folder(target_folder, True)
+    if RUN_DUMPS is True:
+        source_folder = os.path.dirname(__file__)
+        target_folder = get_target_folder_path()
+        create_target_folder(target_folder, True)
 
-    source_qgis_file = os.path.join(source_folder, qgis_project_file)
-    target_qgis_file = os.path.join(target_folder, qgis_project_file)
-    shutil.copyfile(source_qgis_file, target_qgis_file)
+        source_qgis_file = os.path.join(source_folder, qgis_project_file)
+        target_qgis_file = os.path.join(target_folder, qgis_project_file)
+        shutil.copyfile(source_qgis_file, target_qgis_file)
 
-    print("\n\033[93m QGIS project file and layer files written to folder %s.\033[0m\n" % target_folder)
+        print("\n\033[93m QGIS project file and layer files written to folder %s.\033[0m\n" % target_folder)
 
 @pytest.fixture
 def target_folder():

diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/layers_for_br_lauro_de_freitas.qgz b/tests/resources/layer_dumps_for_br_lauro_de_freitas/layers_for_br_lauro_de_freitas.qgz
diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py
@@ -68,12 +68,12 @@ def test_write_high_land_surface_temperature(self, target_folder, bbox_info, tar
         HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
         assert verify_file_is_populated(file_path)
 
-    # @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
-    # def test_write_impervious_surface(self, target_folder, bbox_info, target_spatial_resolution_multiplier):
-    #     file_path = prep_output_path(target_folder, 'impervious_surface.tif')
-    #     target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(ImperviousSurface())
-    #     LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
-    #     assert verify_file_is_populated(file_path)
+    @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
+    def test_write_impervious_surface(self, target_folder, bbox_info, target_spatial_resolution_multiplier):
+        file_path = prep_output_path(target_folder, 'impervious_surface.tif')
+        target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(ImperviousSurface())
+        LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
+        assert verify_file_is_populated(file_path)
 
     @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
     def test_write_land_surface_temperature(self, target_folder, bbox_info, target_spatial_resolution_multiplier):

diff --git a/tests/test_layer_dimensions.py b/tests/test_layer_dimensions.py
@@ -1,19 +1,128 @@
-from city_metrix.layers import NdviSentinel2
+import ee
+import pytest
+
+from city_metrix.layers import NdviSentinel2, TreeCover, Albedo, AlosDSM
 from tests.resources.bbox_constants import BBOX_BRA_LAURO_DE_FREITAS_1
+from city_metrix.layers.layer import get_image_collection
 from tests.tools.general_tools import post_process_layer
 
+EE_IMAGE_DIMENSION_TOLERANCE = 1  # Tolerance compensates for variable results from GEE service
 COUNTRY_CODE_FOR_BBOX = 'BRA'
 BBOX = BBOX_BRA_LAURO_DE_FREITAS_1
 
+def test_read_image_collection():
+    ic = ee.ImageCollection("ESA/WorldCover/v100")
+    data = get_image_collection(ic, BBOX, 10, "test")
+
+    expected_crs = 32724
+    expected_x_dimension = 187
+    expected_y_dimension = 199
+
+    assert data.rio.crs == expected_crs
+    assert (
+        pytest.approx(expected_x_dimension, rel=EE_IMAGE_DIMENSION_TOLERANCE) == "x",
+        pytest.approx(expected_y_dimension, rel=EE_IMAGE_DIMENSION_TOLERANCE) == "y"
+    )
+
+def test_read_image_collection_scale():
+    ic = ee.ImageCollection("ESA/WorldCover/v100")
+    data = get_image_collection(ic, BBOX, 100, "test")
+    expected_x_dimension = 19
+    expected_y_dimension = 20
+    assert data.dims == {"x": expected_x_dimension, "y": expected_y_dimension}
+
+def test_albedo_dimensions():
+    data = Albedo().get_data(BBOX)
+    analysis_data = post_process_layer(data, value_threshold=0.1, convert_to_percentage=True)
+
+    expected_min = 0
+    expected_max = 34
+    expected_peak_value = 15
+    # peak_value, peak_count = get_count_by_value(analysis_data, expected_min, expected_max)
+
+    # Bounding values
+    actual_min = analysis_data.values.min()
+    actual_max = analysis_data.values.max()
+
+    # Peak frequency
+    full_count = analysis_data.size
+    mid_count_pct = get_value_percent(analysis_data, expected_peak_value, full_count, 0)
+
+    # Value range
+    assert actual_min == expected_min
+    assert actual_max == expected_max
+    # Peak frequency
+    assert mid_count_pct == 21
+
+def test_alos_dsm_dimensions():
+    analysis_data = AlosDSM().get_data(BBOX)
+
+    expected_min = 16
+    expected_max = 86
+    expected_peak_value = 56
+    peak_value, peak_count = get_count_by_value(analysis_data, expected_min, expected_max)
+
+    # Bounding values
+    actual_min = analysis_data.values.min()
+    actual_max = analysis_data.values.max()
+
+    # Peak frequency
+    full_count = analysis_data.size
+    mid_count_pct = get_value_percent(analysis_data, expected_peak_value, full_count, 0)
+
+    # Value range
+    assert actual_min == expected_min
+    assert actual_max == expected_max
+    # Peak frequency
+    assert mid_count_pct == 3
+
 def test_ndvi_dimensions():
     data = NdviSentinel2(year=2023).get_data(BBOX)
-    data_for_map = post_process_layer(data, value_threshold=0.4, convert_to_percentage=True)
+    analysis_data = post_process_layer(data, value_threshold=0.4, convert_to_percentage=True)
 
     expected_min = 0
-    actual_min = data_for_map.values.min()
     expected_max = 85
-    actual_max = data_for_map.values.max()
+    expected_peak_value = 78
+    # peak_value, peak_count = get_count_by_value(analysis_data, expected_min, expected_max)
+
+    # Bounding values
+    actual_min = analysis_data.values.min()
+    actual_max = analysis_data.values.max()
 
+    # Peak frequency
+    full_count = analysis_data.size
+    mid_count_pct = get_value_percent(analysis_data, expected_peak_value, full_count, 0)
+
+    # Value range
     assert actual_min == expected_min
     assert actual_max == expected_max
+    # Peak frequency
+    assert mid_count_pct == 11
+
+
+def test_tree_cover():
+    actual = TreeCover().get_data(BBOX).mean()
+    expected = 54.0
+    tolerance = 0.1
+    assert (
+            pytest.approx(expected, rel=tolerance) == actual
+    )
+
+def get_value_percent(data, value, full_count, precision):
+    count_for_value = data.values[data.values == value].size
+    percent_of_cells_with_value = get_rounded_pct(full_count, count_for_value, precision)
+    return percent_of_cells_with_value
+
+def get_rounded_pct(full_count, this_count, precision):
+    return round((this_count/full_count)*100, precision)
+
+def get_count_by_value(data, min_value, max_value):
+    peak_value = None
+    peak_count = 0
+    for x in range(min_value, max_value):
+        count = data.values[data.values == x].size
+        if count > peak_count:
+            peak_count = count
+            peak_value = x
 
+    return peak_value, peak_count