From cc5733b1df857559e7977010faf571b4726ba6da Mon Sep 17 00:00:00 2001 From: Jonathan Maynard Date: Tue, 20 Jan 2026 15:27:12 -0800 Subject: [PATCH 1/4] Update CEC infill method and rename column - Fixed CEC duplicate column issue in process_horizon_data by filling cec7_r in-place before rename - CEC values now return as numeric instead of strings" --- soil_id/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/soil_id/utils.py b/soil_id/utils.py index 54a95a1..f5fb997 100644 --- a/soil_id/utils.py +++ b/soil_id/utils.py @@ -1611,10 +1611,9 @@ def process_horizon_data(muhorzdata_pd): ["cokey", "chkey", "hzname"] ].astype(str) - # Infill missing CEC values with ECEC - muhorzdata_pd["CEC"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"]) + # Infill missing CEC values with ECEC and rename columns for better clarity + muhorzdata_pd["cec7_r"] = muhorzdata_pd["cec7_r"].fillna(muhorzdata_pd["ecec_r"]) - # Rename columns for better clarity muhorzdata_pd = muhorzdata_pd.rename( columns={"cec7_r": "CEC", "ph1to1h2o_r": "pH", "ec_r": "EC"} ) From b2ff97780362cc9b0be499010bdc04e42fa65d1e Mon Sep 17 00:00:00 2001 From: Jonathan Maynard Date: Tue, 20 Jan 2026 15:34:41 -0800 Subject: [PATCH 2/4] fix/OSD data aggregation - Fixed array length inconsistencies in OSD infilling by using stored horizon depths from hzb_lyrs instead of muhorzdata_pd_group - Applied fix to sand/clay/texture aggregation (lines 919-942) - Applied fix to LAB/Munsell aggregation (lines 775-806) --- soil_id/us_soil.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py index 51a268c..ec99359 100644 --- a/soil_id/us_soil.py +++ b/soil_id/us_soil.py @@ -766,20 +766,29 @@ def list_soils(lon, lat): lab_lyrs.append(["", "", ""]) munsell_lyrs.append("") else: + # Use the horizon bottom depths that match the stored horizon structure + # Convert string values to float, filtering out empty strings + horizon_bottom_depths = [ + float(v) if v != "" else np.nan + for v in hzb_lyrs[index].values() + ] + # Filter out NaN values + horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + # Aggregate data for each color dimension l_d = aggregate_data( data=lab_intpl["l"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") a_d = aggregate_data( data=lab_intpl["a"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") b_d = aggregate_data( data=lab_intpl["b"], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, sd=2, ).fillna("") @@ -909,16 +918,25 @@ def list_soils(lon, lat): getProfile_cokey[index] = getProfile_mod + # Use the horizon bottom depths that match the stored horizon structure + # Convert string values to float, filtering out empty strings + horizon_bottom_depths = [ + float(v) if v != "" else np.nan + for v in hzb_lyrs[index].values() + ] + # Filter out NaN values + horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + # Aggregate sand data snd_d_osd = aggregate_data( data=OSD_sand_intpl.iloc[:, 0], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Aggregate clay data cly_d_osd = aggregate_data( data=OSD_clay_intpl.iloc[:, 1], - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Calculate texture data based on sand and clay data @@ -931,7 +949,7 @@ def list_soils(lon, lat): # Aggregate rock fragment data rf_d_osd = aggregate_data( data=OSD_rfv_intpl.c_cfpct_intpl, - bottom_depths=muhorzdata_pd_group["hzdepb_r"].tolist(), + bottom_depths=horizon_bottom_depths, ) # Fill NaN values @@ -952,9 +970,9 @@ def list_soils(lon, lat): # Update cec, ph, and ec layers if they contain only a single # empty string for lyr in [cec_lyrs, ph_lyrs, ec_lyrs]: - if len(lyr[index]) == 1 and lyr[index][0] == "": + if len(lyr[index]) == 1 and list(lyr[index].values())[0] == "": empty_values = [""] * len(hzb_lyrs[index]) - lyr[index] = dict(zip(hzb_lyrs[index], empty_values)) + lyr[index] = dict(zip(hzb_lyrs[index].keys(), empty_values)) else: OSDhorzdata_group_cokey[index] = group_sorted From 5c2840d00303fc9e1fa61ed4cbbeb836329446b8 Mon Sep 17 00:00:00 2001 From: garo Date: Tue, 3 Mar 2026 17:26:04 -0800 Subject: [PATCH 3/4] chore: fix lint error --- soil_id/us_soil.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py index ec99359..f376798 100644 --- a/soil_id/us_soil.py +++ b/soil_id/us_soil.py @@ -725,9 +725,6 @@ def list_soils(lon, lat): comp_max_depths_group = comp_max_depths[ comp_max_depths["cokey"].isin(group_sorted["cokey"]) ] - muhorzdata_pd_group = muhorzdata_pd[ - muhorzdata_pd["cokey"].isin(group_sorted["cokey"]) - ] # Check if OSD depth adjustment is needed if OSD_max_bottom < comp_max_depths_group["comp_max_bottom"].iloc[0]: @@ -769,11 +766,12 @@ def list_soils(lon, lat): # Use the horizon bottom depths that match the stored horizon structure # Convert string values to float, filtering out empty strings horizon_bottom_depths = [ - float(v) if v != "" else np.nan - for v in hzb_lyrs[index].values() + float(v) if v != "" else np.nan for v in hzb_lyrs[index].values() ] # Filter out NaN values - horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + horizon_bottom_depths = [ + d for d in horizon_bottom_depths if not np.isnan(d) + ] # Aggregate data for each color dimension l_d = aggregate_data( @@ -921,11 +919,12 @@ def list_soils(lon, lat): # Use the horizon bottom depths that match the stored horizon structure # Convert string values to float, filtering out empty strings horizon_bottom_depths = [ - float(v) if v != "" else np.nan - for v in hzb_lyrs[index].values() + float(v) if v != "" else np.nan for v in hzb_lyrs[index].values() ] # Filter out NaN values - horizon_bottom_depths = [d for d in horizon_bottom_depths if not np.isnan(d)] + horizon_bottom_depths = [ + d for d in horizon_bottom_depths if not np.isnan(d) + ] # Aggregate sand data snd_d_osd = aggregate_data( From 9a43b334b95c0a273ee5893d4af0316045d858af Mon Sep 17 00:00:00 2001 From: garo Date: Tue, 3 Mar 2026 17:27:49 -0800 Subject: [PATCH 4/4] chore: run test even if lint fails --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13d9487..192d584 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,6 @@ jobs: test: runs-on: ubuntu-latest - needs: lint steps: - name: Checkout