Skip to content

Commit

Permalink
Merge pull request #20 from ss77995ss/feature/baserun-stealing
Browse files Browse the repository at this point in the history
[Feature][Statcast] Runner Basestealing
  • Loading branch information
ss77995ss authored Dec 26, 2024
2 parents 2b1862e + d2dfdfa commit e559f6e
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 6 deletions.
34 changes: 34 additions & 0 deletions docs/runner_basestealing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Statcast Runner Basestealing

## `runner_basestealing`

Function to get baserunner stealing base data from each stolen base attempt. Attempts include successful stolen bases (`SB`), advances via balk (`BK`), caught stealing (`CS`), and pickoffs (`PK`). Also pickoff attempts over three times and not successful are included (`FB`). Based on Baseball Savant's [Runner Basestealing](https://baseballsavant.mlb.com/leaderboard/basestealing-run-value).

**Examples**

```python
from baseball_stats_python import runner_basestealing

# Get Shohei Ohtani's runner basestealing data
runner_basestealing('660271')

# Get Shohei Ohtani's runner basestealing data in 2023
runner_basestealing('660271', season='2023')

# Get Shohei Ohtani's catcher throwing data in playoffs
catcher_throwing('660271', game_type=GameType.PLAYOFFS)
```

**Arguments**

| Argument | Data Type | Description |
| -------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| runner_id (Required) | `str` | The MLBAM ID of the catcher. |
| game_type | `str` or `GameType` | The game type to filter by. Can be `R` for regular season, `PO` for playoffs, or `All` for all games. Check enum [GameType](../enums/statcast_leaderboard.py) |
| season | `str` | The season to filter by. The earliest season available is 2016. |
| pitch_hand | `str` or `Hand` | The pitch hand to filter by. Default is "all". Check enum [Hand](../enums/statcast_leaderboard.py) |
| prior_pk | `str` | The number of prior pick-off attempts from pitcher before the stolen base opportunity. Default is "all". Can be "all", "1", "2", or "3". "3" is include all prior pick-off attempts over 3. |

**Return**

A DataFrame with columns that related to the [Runner Basestealing](https://baseballsavant.mlb.com/leaderboard/basestealing-run-value) leaderboard. The DataFrame will represent each stolen base attempt for a specific runner which contains data like `r_primary_lead`, `r_secondary_lead`, `run_value`, `r_sec_minus_prim_lead`, `runner_moved_cd`, etc.
4 changes: 0 additions & 4 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
)
from src.baseball_stats_python.enums.minor import MinorGameType
from src.baseball_stats_python.enums.statcast import GameType, MlbTeam, Month
from src.baseball_stats_python.statcast.catcher_throwing import catcher_throwing


def example():
Expand Down Expand Up @@ -36,6 +35,3 @@ def mlbam_id_example():
# example()
# minor_example()
# mlbam_id_example()

df = catcher_throwing('669257', game_type=123)
print(df)
10 changes: 8 additions & 2 deletions src/baseball_stats_python/enums/statcast_leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@


class GameType(EnumBase):
REGULAR_SEASON = 'R'
PLAYOFFS = 'PO'
REGULAR_SEASON = 'Regular'
PLAYOFFS = 'Playoff'
ALL = 'All'


class Hand(EnumBase):
RIGHT = 'R'
LEFT = 'L'
ALL = 'all'
84 changes: 84 additions & 0 deletions src/baseball_stats_python/statcast/runner_basestealing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import pandas as pd
import requests

from ..constants import DEFAULT_SEASON
from ..enums.statcast_leaderboard import GameType, Hand
from ..utils.statcast_leaderboard import get_hand_param_str, get_prior_pk_param_str

session = requests.Session()

API_URL = (
'https://baseballsavant.mlb.com/leaderboard/services/basestealing-running-game'
)


def get_run_value(df: pd.DataFrame) -> float:
if df['is_runner_cs']:
return -0.45
if df['is_runner_sb']:
return 0.2
if df['is_runner_pk']:
return -0.45
if df['is_runner_bk']:
return 0.2
if df['is_runner_fb']:
return 0.2

raise ValueError(f'Invalid DataFrame: {df}')


def runner_basestealing(
runner_id: str,
game_type: str | GameType = GameType.REGULAR_SEASON,
season: str = str(DEFAULT_SEASON),
pitch_hand: str | Hand = Hand.ALL,
prior_pk: str = 'all',
) -> pd.DataFrame:
"""
Get basestealing data from each stolen base opportunity for a specific runner.
ref: https://baseballsavant.mlb.com/leaderboard/basestealing-run-value
Args:
runner_id (str): The MLBAM ID of the runner. (Required)
game_type (str | GameType): The game type to filter by. Default is "Regular".
season (str): The season to filter by. The earliest season available is 2016.
pitch_hand (str | Hand): The pitch hand to filter by. Default is "all".
prior_pk (str): The number of prior pick-off attempts from pitcher before the stolen base opportunity. Default is "all".
Can be "all", "1", "2", or "3". "3" is include all prior pick-off attempts over 3.
Returns:
pd.DataFrame: A DataFrame containing the basestealing data.
"""

if not runner_id:
raise ValueError('runner_id is required')

if not isinstance(game_type, str) and not isinstance(game_type, GameType):
raise ValueError(f'Invalid type for game_type: {type(game_type)}')

if not GameType.has_value(game_type):
raise ValueError(f'Invalid game type: {game_type}')

if int(season) < 2016:
raise ValueError(
f'Invalid season: {season}, The earliest season available is 2016'
)

params = {
'game_type': game_type,
'season': season,
'n': 0,
'pitch_hand': get_hand_param_str(pitch_hand),
'prior_pk': get_prior_pk_param_str(prior_pk),
}

response = session.get(f'{API_URL}/{runner_id}', params=params)

if response.status_code == 200:
result = response.json()
df = pd.DataFrame(result['data'])
df['run_value'] = df.apply(get_run_value, axis=1)
return df
else:
raise Exception(
f'Failed to fetch data: {response.status_code} - {response.text}'
)
21 changes: 21 additions & 0 deletions src/baseball_stats_python/utils/statcast_leaderboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from ..enums.statcast_leaderboard import Hand


def get_hand_param_str(hand: str | Hand) -> str:
if not isinstance(hand, str) and not isinstance(hand, Hand):
raise ValueError(f'Invalid type for hand: {type(hand)}')

if not Hand.has_value(hand):
raise ValueError(f'Invalid hand: {hand}')

return f'{hand}'


def get_prior_pk_param_str(prior_pk: str) -> str:
if not isinstance(prior_pk, str):
raise ValueError(f'Invalid type for prior_pk: {type(prior_pk)}')

if prior_pk not in ['all', '1', '2', '3']:
raise ValueError(f'Invalid prior_pk: {prior_pk}')

return prior_pk

0 comments on commit e559f6e

Please sign in to comment.