-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b640145
commit da49e55
Showing
11 changed files
with
673 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
""" | ||
Functions to apply rbscore | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import torch | ||
from torch import nn | ||
from winterrb.utils import make_triplet | ||
|
||
|
||
def apply_rb_to_table(model: nn.Module, table: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
Apply the realbogus score to a table of sources | ||
:param model: Pytorch model | ||
:param table: Table of sources | ||
:return: Table of sources with realbogus score | ||
""" | ||
|
||
rb_scores = [] | ||
|
||
for _, row in table.iterrows(): | ||
triplet = make_triplet(row, normalize=True) | ||
triplet_reshaped = np.transpose(np.expand_dims(triplet, axis=0), (0, 3, 1, 2)) | ||
with torch.no_grad(): | ||
outputs = model(torch.from_numpy(triplet_reshaped)) | ||
|
||
rb_scores.append(float(outputs[0])) | ||
|
||
table["rb"] = rb_scores | ||
|
||
return table |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
""" | ||
Module for machine learning models | ||
""" | ||
|
||
from mirar.processors.sources.machine_learning.pytorch import Pytorch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
""" | ||
Module with classes to use apply an ML score from pytorch | ||
""" | ||
|
||
import logging | ||
from pathlib import Path | ||
from typing import Callable | ||
|
||
import pandas as pd | ||
import requests | ||
import torch | ||
from torch import nn | ||
|
||
from mirar.data import SourceBatch | ||
from mirar.paths import ml_models_dir | ||
from mirar.processors.base_processor import BaseSourceProcessor | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Pytorch(BaseSourceProcessor): | ||
""" | ||
Class to apply a pytorch model to a source table | ||
""" | ||
|
||
base_key = "pytorch" | ||
|
||
def __init__( | ||
self, | ||
model: nn.Module, | ||
model_weights_url: str, | ||
apply_to_table: Callable[[nn.Module, pd.DataFrame], pd.DataFrame], | ||
): | ||
super().__init__() | ||
self._model = model | ||
self.model_weights_url = model_weights_url | ||
self.model_name = Path(self.model_weights_url).name | ||
self.apply_to_table = apply_to_table | ||
|
||
self.model = None | ||
|
||
def __str__(self) -> str: | ||
return f"Processor to use Pytorch model {self.model_name} to score sources" | ||
|
||
def get_ml_path(self) -> Path: | ||
""" | ||
Get the path to the ML model | ||
:return: Path to the ML model | ||
""" | ||
return ml_models_dir.joinpath(self.model_name) | ||
|
||
def download_model(self): | ||
""" | ||
Download the ML model | ||
""" | ||
|
||
url = self.model_weights_url | ||
local_path = self.get_ml_path() | ||
|
||
logger.info( | ||
f"Downloading model {self.model_name} " f"from {url} to {local_path}" | ||
) | ||
|
||
with requests.get(url, stream=True, timeout=120.0) as r: | ||
r.raise_for_status() | ||
with open(local_path, "wb") as f: | ||
for chunk in r.iter_content(chunk_size=8192): | ||
# If you have chunk encoded response uncomment if | ||
# and set chunk_size parameter to None. | ||
# if chunk: | ||
f.write(chunk) | ||
|
||
if not local_path.exists(): | ||
err = f"Model {self.model_name} not downloaded" | ||
logger.error(err) | ||
raise FileNotFoundError(err) | ||
|
||
@staticmethod | ||
def load_model(path): | ||
""" | ||
Function to load a pytorch model dict from a path | ||
:param path: Path to the model | ||
:return: Pytorch model dict | ||
""" | ||
if not path.exists(): | ||
err = f"Model {path} not found" | ||
logger.error(err) | ||
raise FileNotFoundError(err) | ||
|
||
if path.suffix in [".pth", ".pt"]: | ||
return torch.load(path) | ||
|
||
raise ValueError(f"Unknown model type {path.suffix}") | ||
|
||
def get_model(self): | ||
""" | ||
Load the ML model weights. Download it if it doesn't exist. | ||
:return: ML model | ||
""" | ||
|
||
if self.model is None: | ||
|
||
model = self._model | ||
|
||
local_path = self.get_ml_path() | ||
|
||
if not local_path.exists(): | ||
self.download_model() | ||
|
||
model.load_state_dict(torch.load(local_path)) | ||
model.eval() | ||
|
||
self.model = model | ||
|
||
return self.model | ||
|
||
def _apply_to_sources( | ||
self, | ||
batch: SourceBatch, | ||
) -> SourceBatch: | ||
|
||
model = self.get_model() | ||
|
||
for source_table in batch: | ||
sources = source_table.get_data() | ||
new = self.apply_to_table(model, sources) | ||
source_table.set_data(new) | ||
|
||
return batch |
Oops, something went wrong.