Skip to content

Commit

Permalink
Add Quarto quick fixes (#334)
Browse files Browse the repository at this point in the history
* Remove multicard sales

* Fix to work with duplicate sales

* Drop any observations used for mapping that are missing lat/lon

* Move NA check to outcomes

* Fix filling checking

* Update run ID

* Move NA tables

* Add plot tweaks

---------

Co-authored-by: Damonamajor <damon.major@cookcountyil.gov>
  • Loading branch information
dfsnow and Damonamajor authored Jan 27, 2025
1 parent f823872 commit ca6baaf
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 45 deletions.
3 changes: 2 additions & 1 deletion reports/performance/_comp.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ target_df <- comp_df %>%
by = c("pin" = "meta_pin", "card" = "meta_card_num"),
relationship = "many-to-many"
) %>%
left_join(
inner_join(
assessment_card %>%
filter(!is.na(loc_latitude) & !is.na(loc_longitude)) %>%
select(
meta_pin, meta_card_num, pred_card_initial_fmv,
loc_latitude, loc_longitude,
Expand Down
47 changes: 44 additions & 3 deletions reports/performance/_input.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ input_char_filling_df <- training_data %>%
),
.by = meta_pin
) %>%
filter(n() > 6, .by = meta_pin) %>%
filter(n() > 5, .by = meta_pin) %>%
select(
PIN = meta_pin,
Year = meta_year,
Expand All @@ -172,7 +172,7 @@ input_char_filling_df <- training_data %>%
`Walkability (2017)` = loc_access_cmap_walk_total_score,
`GS Rating (2021)` = prox_avg_school_rating_in_half_mile
) %>%
arrange(PIN, Year)
arrange(PIN, Year, `Sale Date`)
input_char_filling_df %>%
datatable(rownames = FALSE)
Expand Down Expand Up @@ -243,7 +243,8 @@ input_complex_id_map <- input_complex_id_targets %>%
char_yrblt
),
by = "meta_pin"
)
) %>%
filter(!is.na(loc_latitude) & !is.na(loc_longitude))
input_complex_id_palette <- colorFactor(
palette = "Set1",
Expand Down Expand Up @@ -432,3 +433,43 @@ assessment_pin %>%
```

:::


## Potential Output Issues

::: panel-tabset

### Card Values

```{r _outcomes_card_values}
assessment_card %>%
summarize(
"NA Initial FMV" = sum(is.na(pred_card_initial_fmv)),
"Negative Initial FMV" = sum(pred_card_initial_fmv < 0, na.rm = TRUE),
"NA Final FMV" = sum(is.na(pred_card_final_fmv)),
"Negative Final FMV" = sum(pred_card_final_fmv < 0, na.rm = TRUE)
) %>%
datatable(rownames = FALSE)
```

### PIN Values

```{r _outcomes_pin_values}
assessment_pin %>%
summarize(
"NA Initial FMV" = sum(is.na(pred_pin_initial_fmv)),
"Negative Initial FMV" = sum(pred_pin_initial_fmv < 0, na.rm = TRUE),
"NA Final FMV" = sum(is.na(pred_pin_final_fmv)),
"Negative Final FMV" = sum(pred_pin_final_fmv < 0, na.rm = TRUE),
"NA Final Land FMV" = sum(is.na(pred_pin_final_fmv_land)),
"Negative Final Land FMV" = sum(pred_pin_final_fmv_land < 0, na.rm = TRUE),
"NA Final Building FMV" = sum(is.na(pred_pin_final_fmv_bldg)),
"Negative Final Building FMV" = sum(
pred_pin_final_fmv_bldg < 0,
na.rm = TRUE
)
) %>%
datatable(rownames = FALSE)
```

:::
3 changes: 3 additions & 0 deletions reports/performance/_model.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -1148,6 +1148,7 @@ Tables of the largest misses (in absolute terms) from the **test set**, **assess
```{r _model_big_misses_test}
model_big_misses_test <- test_card %>%
mutate(township_name = ccao::town_convert(meta_township_code)) %>%
filter(n_distinct(meta_card_num) == 1, .by = meta_pin) %>%
filter(meta_triad_code == run_triad_code) %>%
select(
Town = township_name, PIN = meta_pin, Class = meta_class,
Expand Down Expand Up @@ -1204,6 +1205,7 @@ model_big_misses_assessment <- assessment_pin %>%
filter(
meta_triad_code == run_triad_code,
!is.na(sale_recent_1_price),
meta_pin_num_cards == 1,
!is.na(pred_pin_final_fmv_round),
year(sale_recent_1_date) == max(year(sale_recent_1_date), na.rm = TRUE)
) %>%
Expand Down Expand Up @@ -1264,6 +1266,7 @@ model_big_misses_assessment %>%
```{r _model_big_misses_training}
model_big_misses_training <- training_data_pred %>%
filter(!sv_is_outlier) %>%
filter(n_distinct(meta_card_num) == 1, .by = meta_pin) %>%
mutate(township_name = ccao::town_convert(meta_township_code)) %>%
filter(meta_triad_code == run_triad_code, !is.na(meta_sale_price)) %>%
select(
Expand Down
1 change: 1 addition & 0 deletions reports/performance/_outcomes.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Map of the 10 largest year-over-year increases in market value by neighborhood.

```{r _outcomes_map_of_largest_change}
outcomes_map_big_changes <- assessment_pin %>%
filter(!is.na(loc_latitude) & !is.na(loc_longitude)) %>%
group_by(meta_nbhd_code) %>%
left_join(ccao::nbhd_shp, by = c("meta_nbhd_code" = "town_nbhd")) %>%
distinct(meta_pin, .keep_all = TRUE) %>%
Expand Down
45 changes: 6 additions & 39 deletions reports/performance/_outliers.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -504,10 +504,14 @@ outlier_decile_breakout <- function(data, dec) {
x = "Outlier Types"
) +
geom_bar(stat = "identity") +
geom_text(aes(label = comma(count)), vjust = -0.1) +
geom_text(
aes(label = comma(count)),
hjust = 0,
nudge_y = 20
) +
scale_y_continuous(
labels = comma,
limits = c(0, 1.05 * outlier_decile_y_axis_lim)
limits = c(0, 1.10 * outlier_decile_y_axis_lim)
) +
theme_minimal() +
theme(
Expand All @@ -528,40 +532,3 @@ outlier_decile_breakout(training_data, 1)
```

:::

## Potential Output Issues

::: panel-tabset

### Card Values

```{r _outliers_card_values}
assessment_card %>%
summarize(
"NA Initial FMV" = sum(is.na(pred_card_initial_fmv)),
"Negative Initial FMV" = sum(pred_card_initial_fmv < 0, na.rm = TRUE),
"NA Final FMV" = sum(is.na(pred_card_final_fmv)),
"Negative Final FMV" = sum(pred_card_final_fmv < 0, na.rm = TRUE)
)
```

### Pin Values

```{r _outliers_pin_values}
assessment_pin %>%
summarize(
"NA Initial FMV" = sum(is.na(pred_pin_initial_fmv)),
"Negative Initial FMV" = sum(pred_pin_initial_fmv < 0, na.rm = TRUE),
"NA Final FMV" = sum(is.na(pred_pin_final_fmv)),
"Negative Final FMV" = sum(pred_pin_final_fmv < 0, na.rm = TRUE),
"NA Final Land FMV" = sum(is.na(pred_pin_final_fmv_land)),
"Negative Final Land FMV" = sum(pred_pin_final_fmv_land < 0, na.rm = TRUE),
"NA Final Building FMV" = sum(is.na(pred_pin_final_fmv_bldg)),
"Negative Final Building FMV" = sum(
pred_pin_final_fmv_bldg < 0,
na.rm = TRUE
)
)
```

:::
3 changes: 2 additions & 1 deletion reports/performance/_shap.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,9 @@ The following map shows the impact of 20 key predictors for neighborhood-level h

```{r _shap_map}
shap_df_imp_by_nbhd <- shap_df_filtered %>%
left_join(
inner_join(
assessment_card %>%
filter(!is.na(loc_latitude) & !is.na(loc_longitude)) %>%
select(meta_year, meta_pin, meta_card_num, nbhd_code = meta_nbhd_code),
by = c("meta_pin", "meta_card_num", "meta_year")
) %>%
Expand Down
2 changes: 1 addition & 1 deletion reports/performance/performance.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ knitr:
out.width: "100%"
editor: source
params:
run_id: "2025-01-10-serene-boni"
run_id: "2025-01-23-dreamy-ida"
year: "2025"
---

Expand Down

0 comments on commit ca6baaf

Please sign in to comment.