Skip to content

Commit

Permalink
cleanup to rainforest competition
Browse files Browse the repository at this point in the history
  • Loading branch information
mnpinto committed Feb 17, 2021
1 parent d3bab0f commit ff7e742
Show file tree
Hide file tree
Showing 47 changed files with 8,428 additions and 174 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
nbs/data
nbs/models
nbs/preds
*.bak
.gitattributes
.last_checked
Expand Down
48 changes: 0 additions & 48 deletions 00_core.ipynb

This file was deleted.

35 changes: 35 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
.ONESHELL:
SHELL := /bin/bash
SRC = $(wildcard nbs/*.ipynb)

all: dl_pipeline docs

dl_pipeline: $(SRC)
nbdev_build_lib
touch dl_pipeline

sync:
nbdev_update_lib

docs_serve: docs
cd docs && bundle exec jekyll serve

docs: $(SRC)
nbdev_build_docs
touch docs

test:
nbdev_test_nbs

release: pypi
nbdev_conda_package
nbdev_bump_version

pypi: dist
twine upload --repository pypi dist/*

dist: clean
python setup.py sdist bdist_wheel

clean:
rm -rf dist
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
# nbdev template
# Project name here
> Summary description here.

This file will become your README and also the index of your documentation.

## Install

`pip install your_project_name`

## How to use

Fill me in please! Don't forget code examples:

```
1+1
```




2

Use this template to more easily create your nbdev project.

1 change: 1 addition & 0 deletions dl_pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
122 changes: 122 additions & 0 deletions dl_pipeline/_nbdev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# AUTOGENERATED BY NBDEV! DO NOT EDIT!

__all__ = ["index", "modules", "custom_doc_links", "git_url"]

index = {"seed_everything": "00_core.ipynb",
"after_loss": "00vision_losses.ipynb",
"cross_entropy": "00vision_losses.ipynb",
"cross_entropy_mixup": "00vision_losses.ipynb",
"binary_cross_entropy": "00vision_losses.ipynb",
"binary_cross_entropy_mixup": "00vision_losses.ipynb",
"binary_cross_entropy_scaled_mixup": "00vision_losses.ipynb",
"focal_loss": "00vision_losses.ipynb",
"get_loss": "00vision_losses.ipynb",
"gem": "00vision_models.ipynb",
"GeM": "00vision_models.ipynb",
"AdaptiveConcatPool2d_GeM": "00vision_models.ipynb",
"MobileNetV2": "00vision_models.ipynb",
"ResNet_": "00vision_models.ipynb",
"ResNet18_swsl": "00vision_models.ipynb",
"ResNet50_swsl": "00vision_models.ipynb",
"ResNet50_32x4d_swsl": "00vision_models.ipynb",
"xResNet_": "00vision_models.ipynb",
"xResNet50_ssa": "00vision_models.ipynb",
"ResNeSt_": "00vision_models.ipynb",
"ResNeSt50": "00vision_models.ipynb",
"ResNeSt101": "00vision_models.ipynb",
"ResNeSt200": "00vision_models.ipynb",
"ResNeSt269": "00vision_models.ipynb",
"ResNeSt50_fast_1s1x64d": "00vision_models.ipynb",
"ResNeSt50_fast_1s2x40d": "00vision_models.ipynb",
"ResNeSt50_fast_1s4x24d": "00vision_models.ipynb",
"ResNeSt50_fast_2s1x64d": "00vision_models.ipynb",
"ResNeSt50_fast_2s2x40d": "00vision_models.ipynb",
"ResNeSt50_fast_4s1x64d": "00vision_models.ipynb",
"ResNeSt50_fast_4s2x40d": "00vision_models.ipynb",
"DenseNet_": "00vision_models.ipynb",
"DenseNet121": "00vision_models.ipynb",
"DenseNet169": "00vision_models.ipynb",
"DenseNet201": "00vision_models.ipynb",
"DenseNet161": "00vision_models.ipynb",
"DenseNetBlur121": "00vision_models.ipynb",
"EfficientNet_": "00vision_models.ipynb",
"EfficientNetB0": "00vision_models.ipynb",
"EfficientNetB1": "00vision_models.ipynb",
"EfficientNetB2": "00vision_models.ipynb",
"EfficientNetB3": "00vision_models.ipynb",
"EfficientNetB4": "00vision_models.ipynb",
"EfficientNetB5": "00vision_models.ipynb",
"EfficientNetB6": "00vision_models.ipynb",
"EfficientNetB7": "00vision_models.ipynb",
"Head": "00vision_models.ipynb",
"EmbResNeSt_": "00vision_models.ipynb",
"EmbResNeSt50": "00vision_models.ipynb",
"get_model": "00vision_models.ipynb",
"SampleEpisode": "00vision_triplet.ipynb",
"compute_distance_matrix": "00vision_triplet.ipynb",
"EpisodeDataLoader": "00vision_triplet.ipynb",
"get_preds": "kaggle_rfcx-species-audio-detection.ipynb",
"distance": "00vision_triplet.ipynb",
"remove_duplicates": "00vision_triplet.ipynb",
"map5": "00vision_triplet.ipynb",
"accuracy": "01audio_util.ipynb",
"AddGaussianSNR": "01audio_augmentations.ipynb",
"ClippingDistortion": "01audio_augmentations.ipynb",
"FrequencyMask": "01audio_augmentations.ipynb",
"TimeMask": "01audio_augmentations.ipynb",
"Gain": "01audio_augmentations.ipynb",
"PitchShift": "01audio_augmentations.ipynb",
"Shift": "01audio_augmentations.ipynb",
"TimeStretch": "01audio_augmentations.ipynb",
"MelSpectrogram": "01audio_augmentations.ipynb",
"SAMPLE_FILE": "01audio_core.ipynb",
"TensorAudio": "01audio_core.ipynb",
"TensorAudioLabel": "01audio_core.ipynb",
"load_npy": "01audio_core.ipynb",
"sample_file": "01audio_core.ipynb",
"melspectrogram": "01audio_core.ipynb",
"show_sample": "01audio_core.ipynb",
"audio2npy": "01audio_core.ipynb",
"Datasets": "01audio_dataset.ipynb",
"DataLoader": "01audio_dataset.ipynb",
"DataLoaders": "01audio_dataset.ipynb",
"RenameColumns": "01audio_dataset.ipynb",
"load_dataframe": "01audio_dataset.ipynb",
"group_labels": "01audio_dataset.ipynb",
"time2pix_image": "01audio_dataset.ipynb",
"time2pix_wave": "01audio_dataset.ipynb",
"pix2time": "01audio_dataset.ipynb",
"pix2pix_image": "01audio_dataset.ipynb",
"time_labels": "01audio_dataset.ipynb",
"audio_crop": "01audio_dataset.ipynb",
"TilesTransform": "01audio_dataset.ipynb",
"reorganize_batch": "01audio_dataset.ipynb",
"create_dataset_item": "01audio_dataset.ipynb",
"apply_augmentations": "01audio_dataset.ipynb",
"audio_augment": "kaggle_rfcx-species-audio-detection.ipynb",
"show_augmentations": "01audio_dataset.ipynb",
"mask2category": "01audio_util.ipynb",
"lrap": "01audio_util.ipynb",
"kfold_dataframes": "01audio_util.ipynb",
"OneHot": "01audio_util.ipynb",
"MixUp": "01audio_util.ipynb",
"LabelSED": "01audio_util.ipynb",
"train": "kaggle_rfcx-species-audio-detection.ipynb",
"test": "kaggle_rfcx-species-audio-detection.ipynb",
"main": "kaggle_rfcx-species-audio-detection.ipynb"}

modules = ["core.py",
"vision/losses.py",
"vision/models.py",
"vision/triplet.py",
"audio/augmentations.py",
"audio/core.py",
"audio/dataset.py",
"audio/util.py",
"kaggle/rfcx_species_audio_detection.py"]

doc_url = "https://mnpinto.github.io/dl_pipeline/"

git_url = "https://github.com/mnpinto/dl_pipeline/tree/master/"

def custom_doc_links(name): return None
Empty file added dl_pipeline/audio/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions dl_pipeline/audio/all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01audio_all.ipynb (unless otherwise specified).

__all__ = []
138 changes: 138 additions & 0 deletions dl_pipeline/audio/augmentations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01audio_augmentations.ipynb (unless otherwise specified).

__all__ = ['AddGaussianSNR', 'ClippingDistortion', 'FrequencyMask', 'TimeMask', 'Gain', 'PitchShift', 'Shift',
'TimeStretch', 'MelSpectrogram']

# Cell
import matplotlib.pyplot as plt
import audiomentations as aug
from nnAudio import Spectrogram
from fastcore.all import *
from fastai.vision.augment import RandTransform
from fastai.vision.all import *
from .core import *

# Cell
class AddGaussianSNR(Transform):
"Add Gaussian Signal-to-noise ratio (SNR) noise"
def __init__(self, sample_rate, min_SNR=0.001, max_SNR=1.0, p=0.5, **kwargs):
store_attr('min_SNR'), store_attr('max_SNR'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.AddGaussianSNR(min_SNR=min_SNR, max_SNR=max_SNR, p=p),
sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class ClippingDistortion(Transform):
"Apply clipping distortion"
def __init__(self, sample_rate, min_percentile_threshold=0,
max_percentile_threshold=40, p=0.5, **kwargs):
store_attr('min_percentile_threshold'), store_attr('max_percentile_threshold')
store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.ClippingDistortion(
min_percentile_threshold=min_percentile_threshold,
max_percentile_threshold=max_percentile_threshold, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class FrequencyMask(Transform):
"Applies a frequency mask to a range of frequencies"
def __init__(self, sample_rate,min_frequency_band=0.0, max_frequency_band=0.5,
p=0.5, **kwargs):
store_attr('min_frequency_band'), store_attr('max_frequency_band'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.FrequencyMask(min_frequency_band=min_frequency_band,
max_frequency_band=max_frequency_band, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))


# Cell
class TimeMask(Transform):
"Applies a mask to a section of the audio clip"
def __init__(self, sample_rate, min_band_part=0.0, max_band_part=0.5, p=0.5, **kwargs):
store_attr('min_band_part'), store_attr('max_band_part'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.TimeMask(min_band_part=min_band_part,
max_band_part=max_band_part, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class Gain(Transform):
"Apply a random gain between 'min_gain_in_db' and 'max_gain_in_db'"
def __init__(self, sample_rate, min_gain_in_db=-12, max_gain_in_db=12, p=0.5, **kwargs):
store_attr('min_gain_in_db'), store_attr('max_gain_in_db'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.Gain(min_gain_in_db=min_gain_in_db,
max_gain_in_db=max_gain_in_db, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class PitchShift(Transform):
"Shift pitch by a random value of semitones between 'min_semitones' and 'max_semitones'"
def __init__(self, sample_rate, min_semitones=-4, max_semitones=4, p=0.5, **kwargs):
store_attr('min_semitones'), store_attr('max_semitones'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.PitchShift(min_semitones=min_semitones,
max_semitones=max_semitones, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class Shift(Transform):
"Shift pitch by a random value of semitones between 'min_semitones' and 'max_semitones'"
def __init__(self, sample_rate, min_fraction=-0.5, max_fraction=0.5,
rollover=True, p=0.5, **kwargs):
store_attr('min_fraction'), store_attr('max_fraction')
store_attr('rollover'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.Shift(min_fraction=min_fraction, max_fraction=max_fraction,
rollover=rollover, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))


# Cell
class TimeStretch(Transform):
"Shift pitch by a random value of semitones between 'min_semitones' and 'max_semitones'"
def __init__(self, sample_rate, min_rate=0.8, max_rate=1.25,
leave_length_unchanged=True, p=0.5, **kwargs):
store_attr('min_rate'), store_attr('max_rate')
store_attr('leave_length_unchanged'), store_attr('p')
super().__init__(**kwargs)
self.tfm = partial(aug.TimeStretch(min_rate=min_rate, max_rate=max_rate,
leave_length_unchanged=leave_length_unchanged, p=p), sample_rate=sample_rate)
def encodes(self, wav:TensorAudio):
return TensorAudio(self.tfm(wav.float().numpy()))

# Cell
class MelSpectrogram(Transform):
"Shift pitch by a random value of semitones between 'min_semitones' and 'max_semitones'"
def __init__(self, sample_rate, n_mels=128, hop_length=512, eps=1e-6,
normalize_spectro=True, device=torch.device("cuda:0"), **kwargs):
store_attr('sample_rate'), store_attr('n_mels'), store_attr('hop_length')
store_attr('eps')
super().__init__(**kwargs)
self.spectro = Spectrogram.MelSpectrogram(
sr=sample_rate, n_mels=n_mels, hop_length=hop_length,
verbose=False, **kwargs).to(device)
self.relu = nn.ReLU(inplace=True)
self.normalize_spectro = normalize_spectro
self.eps = eps
self.device = device

def encodes(self, x:TensorAudio):
with torch.no_grad():
d = x.device
x = x.to(self.device)
x = self.relu(self.spectro(x)).unsqueeze(1)
x = x.add(self.eps).log()
if self.normalize_spectro:
x = (x - x.mean((2,3))[...,None,None])/x.std((2,3))[...,None,None]
assert np.sum(np.isnan(x.detach().cpu().numpy())) == 0
return TensorImage(x.to(d))
Loading

0 comments on commit ff7e742

Please sign in to comment.