-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseparation.html
1837 lines (1656 loc) · 125 KB
/
separation.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Separation algorithms — nussl 1.0.0 documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/language_data.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/theme_overrides.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Citing nussl" href="citation.html" />
<link rel="prev" title="Machine Learning" href="ml.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> nussl
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul>
<li class="toctree-l1"><a class="reference internal" href="getting_started.html">Getting Started</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="tutorials.html">Tutorials</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="examples/examples.html">Examples</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="recipes/recipes.html">Recipes</a></li>
</ul>
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="api.html">API Documentation</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="core.html">Core</a></li>
<li class="toctree-l2"><a class="reference internal" href="datasets.html">Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="evaluation.html">Evaluation</a></li>
<li class="toctree-l2"><a class="reference internal" href="ml.html">Machine Learning</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Separation algorithms</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#base-classes">Base classes</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#base-for-all-methods">Base for all methods</a></li>
<li class="toctree-l4"><a class="reference internal" href="#base-for-masking-based-methods">Base for masking-based methods</a></li>
<li class="toctree-l4"><a class="reference internal" href="#base-for-clustering-based-methods">Base for clustering-based methods</a></li>
<li class="toctree-l4"><a class="reference internal" href="#mix-in-for-nmf-based-methods">Mix-in for NMF-based methods</a></li>
<li class="toctree-l4"><a class="reference internal" href="#mix-in-for-deep-methods">Mix-in for deep methods</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#benchmark-methods">Benchmark methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#high-pass-filter">High pass filter</a></li>
<li class="toctree-l4"><a class="reference internal" href="#ideal-binary-mask">Ideal binary mask</a></li>
<li class="toctree-l4"><a class="reference internal" href="#ideal-ratio-mask">Ideal ratio mask</a></li>
<li class="toctree-l4"><a class="reference internal" href="#wiener-filter">Wiener filter</a></li>
<li class="toctree-l4"><a class="reference internal" href="#mix-as-estimate">Mix as estimate</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#deep-methods">Deep methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#deep-clustering">Deep clustering</a></li>
<li class="toctree-l4"><a class="reference internal" href="#deep-mask-estimation">Deep mask estimation</a></li>
<li class="toctree-l4"><a class="reference internal" href="#deep-audio-estimation">Deep audio estimation</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#composite-methods">Composite methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#ensemble-clustering">Ensemble clustering</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#factorization-based-methods">Factorization-based methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#robust-principle-component-analysis">Robust principle component analysis</a></li>
<li class="toctree-l4"><a class="reference internal" href="#independent-component-analysis">Independent component analysis</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#primitive-methods">Primitive methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cluster-sources-by-timbre">Cluster sources by timbre</a></li>
<li class="toctree-l4"><a class="reference internal" href="#foreground-background-via-2dft">Foreground/background via 2DFT</a></li>
<li class="toctree-l4"><a class="reference internal" href="#harmonic-percussive-separation">Harmonic/percussive separation</a></li>
<li class="toctree-l4"><a class="reference internal" href="#foreground-background-via-repet">Foreground/background via REPET</a></li>
<li class="toctree-l4"><a class="reference internal" href="#foreground-background-via-repet-sim">Foreground/background via REPET-SIM</a></li>
<li class="toctree-l4"><a class="reference internal" href="#vocal-melody-extraction-via-melodia">Vocal melody extraction via Melodia</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#spatial-methods">Spatial methods</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cluster-by-inter-phase-and-inter-level-difference">Cluster by inter-phase and inter-level difference</a></li>
<li class="toctree-l4"><a class="reference internal" href="#projet-separate-via-spatial-projections">PROJET: Separate via spatial projections</a></li>
<li class="toctree-l4"><a class="reference internal" href="#duet">DUET</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="citation.html">Citing nussl</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="contributing.html">Contribution Guide</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">nussl</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> »</li>
<li><a href="api.html">API Documentation</a> »</li>
<li>Separation algorithms</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/separation.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<style>
/* CSS overrides for sphinx_rtd_theme */
/* 24px margin */
.nbinput.nblast.container,
.nboutput.nblast.container {
margin-bottom: 19px; /* padding has already 5px */
}
/* ... except between code cells! */
.nblast.container + .nbinput.container {
margin-top: -19px;
}
.admonition > p:before {
margin-right: 4px; /* make room for the exclamation icon */
}
/* Fix math alignment, see https://github.com/rtfd/sphinx_rtd_theme/pull/686 */
.math {
text-align: unset;
}
</style>
<span class="target" id="module-nussl.separation"></span><div class="section" id="separation-algorithms">
<h1>Separation algorithms<a class="headerlink" href="#separation-algorithms" title="Permalink to this headline">¶</a></h1>
<div class="section" id="base-classes">
<h2>Base classes<a class="headerlink" href="#base-classes" title="Permalink to this headline">¶</a></h2>
<p>These classes are used to build every type of source separation
algorithm currently in nussl. They provide helpful utilities
and make it such that the end-user only has to implement
one or two functions to create a new separation algorithm,
depending on what sort of algorithm they are trying to
implement.</p>
<span class="target" id="module-nussl.separation.base"></span><div class="section" id="base-for-all-methods">
<h3>Base for all methods<a class="headerlink" href="#base-for-all-methods" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.SeparationBase">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.</code><code class="sig-name descname">SeparationBase</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/separation_base.html#SeparationBase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.SeparationBase" title="Permalink to this definition">¶</a></dt>
<dd><p>Base class for all separation algorithms in nussl.</p>
<p>Do not call this. It will not do anything.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>input_audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – This will always be a copy of the provided AudioSignal object.</p>
</dd>
</dl>
<p><strong>Attributes</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.SeparationBase.audio_signal" title="nussl.separation.SeparationBase.audio_signal"><code class="xref py py-obj docutils literal notranslate"><span class="pre">audio_signal</span></code></a></p></td>
<td><p>Copy of AudioSignal that is made on initialization.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.SeparationBase.sample_rate" title="nussl.separation.SeparationBase.sample_rate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sample_rate</span></code></a></p></td>
<td><p>Sample rate of <code class="xref py py-attr docutils literal notranslate"><span class="pre">audio_signal</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.SeparationBase.stft_params" title="nussl.separation.SeparationBase.stft_params"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stft_params</span></code></a></p></td>
<td><p>STFTParams object containing the STFT parameters of the copied AudioSignal.</p></td>
</tr>
</tbody>
</table>
<p><strong>Methods</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.SeparationBase.make_audio_signals" title="nussl.separation.SeparationBase.make_audio_signals"><code class="xref py py-obj docutils literal notranslate"><span class="pre">make_audio_signals</span></code></a>()</p></td>
<td><p>Makes <code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code> objects after separation algorithm is run</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.SeparationBase.run" title="nussl.separation.SeparationBase.run"><code class="xref py py-obj docutils literal notranslate"><span class="pre">run</span></code></a>()</p></td>
<td><p>Runs separation algorithm.</p></td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="nussl.separation.SeparationBase.audio_signal">
<em class="property">property </em><code class="sig-name descname">audio_signal</code><a class="headerlink" href="#nussl.separation.SeparationBase.audio_signal" title="Permalink to this definition">¶</a></dt>
<dd><p>Copy of AudioSignal that is made on initialization.</p>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.SeparationBase.make_audio_signals">
<code class="sig-name descname">make_audio_signals</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/separation_base.html#SeparationBase.make_audio_signals"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.SeparationBase.make_audio_signals" title="Permalink to this definition">¶</a></dt>
<dd><p>Makes <code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code> objects after separation algorithm is run</p>
<dl class="field-list simple">
<dt class="field-odd">Raises</dt>
<dd class="field-odd"><p><strong>NotImplementedError</strong> – Cannot call base class</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.SeparationBase.run">
<code class="sig-name descname">run</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/separation_base.html#SeparationBase.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.SeparationBase.run" title="Permalink to this definition">¶</a></dt>
<dd><p>Runs separation algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Raises</dt>
<dd class="field-odd"><p><strong>NotImplementedError</strong> – Cannot call base class</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.SeparationBase.sample_rate">
<em class="property">property </em><code class="sig-name descname">sample_rate</code><a class="headerlink" href="#nussl.separation.SeparationBase.sample_rate" title="Permalink to this definition">¶</a></dt>
<dd><p>Sample rate of <a class="reference internal" href="#nussl.separation.SeparationBase.audio_signal" title="nussl.separation.SeparationBase.audio_signal"><code class="xref py py-attr docutils literal notranslate"><span class="pre">audio_signal</span></code></a>.
Literally <code class="xref py py-attr docutils literal notranslate"><span class="pre">audio_signal.sample_rate</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Type</dt>
<dd class="field-odd"><p>(int)</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.SeparationBase.stft_params">
<em class="property">property </em><code class="sig-name descname">stft_params</code><a class="headerlink" href="#nussl.separation.SeparationBase.stft_params" title="Permalink to this definition">¶</a></dt>
<dd><p>STFTParams object containing the STFT parameters of the copied AudioSignal.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="base-for-masking-based-methods">
<h3>Base for masking-based methods<a class="headerlink" href="#base-for-masking-based-methods" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.MaskSeparationBase">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.</code><code class="sig-name descname">MaskSeparationBase</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">mask_type='soft'</em>, <em class="sig-param">mask_threshold=0.5</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/mask_separation_base.html#MaskSeparationBase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.MaskSeparationBase" title="Permalink to this definition">¶</a></dt>
<dd><p>Base class for separation algorithms that create a mask (binary or soft) to do
their separation. Most algorithms in nussl are derived from
<a class="reference internal" href="#nussl.separation.MaskSeparationBase" title="nussl.separation.MaskSeparationBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">MaskSeparationBase</span></code></a>.</p>
<p>Although this class will do nothing if you instantiate and run it by itself,
algorithms that are derived from this class are expected to return a list of
<code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.mask_base.MaskBase</span></code> -derived objects
(i.e., either a <code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.binary_mask.BinaryMask</span></code> or
<code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.soft_mask.SoftMask</span></code> object) by their <a class="reference internal" href="#nussl.separation.MaskSeparationBase.run" title="nussl.separation.MaskSeparationBase.run"><code class="xref py py-func docutils literal notranslate"><span class="pre">run()</span></code></a>
method. Being a subclass of <a class="reference internal" href="#nussl.separation.MaskSeparationBase" title="nussl.separation.MaskSeparationBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">MaskSeparationBase</span></code></a> is an implicit contract
assuring this. Returning a <code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.mask_base.MaskBase</span></code>-derived
object standardizes algorithm return types for
<code class="xref py py-class docutils literal notranslate"><span class="pre">evaluation.evaluation_base.EvaluationBase</span></code>-derived objects.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> – (<code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code>) An
<code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code> object containing the mixture to be
separated.</p></li>
<li><p><strong>mask_type</strong> – (str, BinaryMask, or SoftMask) Indicates whether to make
binary or soft masks. See <a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_type" title="nussl.separation.MaskSeparationBase.mask_type"><code class="xref py py-attr docutils literal notranslate"><span class="pre">mask_type</span></code></a> property for details.</p></li>
<li><p><strong>mask_threshold</strong> – (float) Value between [0.0, 1.0] to convert a soft mask
to a binary mask. See <a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_threshold" title="nussl.separation.MaskSeparationBase.mask_threshold"><code class="xref py py-attr docutils literal notranslate"><span class="pre">mask_threshold</span></code></a> property for details.</p></li>
</ul>
</dd>
</dl>
<p><strong>Methods</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.make_audio_signals" title="nussl.separation.MaskSeparationBase.make_audio_signals"><code class="xref py py-obj docutils literal notranslate"><span class="pre">make_audio_signals</span></code></a>()</p></td>
<td><p>Makes <code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code> objects after mask-based separation algorithm is run.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.ones_mask" title="nussl.separation.MaskSeparationBase.ones_mask"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ones_mask</span></code></a>(shape)</p></td>
<td><p>Creates a new ones mask with this object’s type.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.run" title="nussl.separation.MaskSeparationBase.run"><code class="xref py py-obj docutils literal notranslate"><span class="pre">run</span></code></a>()</p></td>
<td><p>Runs mask-based separation algorithm.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.zeros_mask" title="nussl.separation.MaskSeparationBase.zeros_mask"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros_mask</span></code></a>(shape)</p></td>
<td><p>Creates a new zeros mask with this object’s type.</p></td>
</tr>
</tbody>
</table>
<p><strong>Attributes</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_threshold" title="nussl.separation.MaskSeparationBase.mask_threshold"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mask_threshold</span></code></a></p></td>
<td><p>Threshold of determining True/False if <code class="xref py py-attr docutils literal notranslate"><span class="pre">mask_type</span></code> is <code class="xref py py-attr docutils literal notranslate"><span class="pre">BINARY_MASK</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_type" title="nussl.separation.MaskSeparationBase.mask_type"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mask_type</span></code></a></p></td>
<td><p>This property indicates what type of mask the derived algorithm will create and be returned by <code class="xref py py-func docutils literal notranslate"><span class="pre">run()</span></code>.</p></td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.make_audio_signals">
<code class="sig-name descname">make_audio_signals</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/mask_separation_base.html#MaskSeparationBase.make_audio_signals"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.MaskSeparationBase.make_audio_signals" title="Permalink to this definition">¶</a></dt>
<dd><p>Makes <code class="xref py py-class docutils literal notranslate"><span class="pre">audio_signal.AudioSignal</span></code> objects after mask-based
separation algorithm is run. This looks in <code class="docutils literal notranslate"><span class="pre">self.result_masks</span></code>
which must be filled by <code class="docutils literal notranslate"><span class="pre">run</span></code> in the algorithm that
subclasses this. It applies each mask to the mixture audio
signal and returns a list of the estimates, which are each
AudioSignal objects.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><dl class="simple">
<dt>List of AudioSignal objects corresponding to the</dt><dd><p>separated estimates.</p>
</dd>
</dl>
</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p>list</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.mask_threshold">
<em class="property">property </em><code class="sig-name descname">mask_threshold</code><a class="headerlink" href="#nussl.separation.MaskSeparationBase.mask_threshold" title="Permalink to this definition">¶</a></dt>
<dd><p>Threshold of determining True/False if <a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_type" title="nussl.separation.MaskSeparationBase.mask_type"><code class="xref py py-attr docutils literal notranslate"><span class="pre">mask_type</span></code></a> is
<code class="xref py py-attr docutils literal notranslate"><span class="pre">BINARY_MASK</span></code>. Some algorithms will first make a soft mask and then
convert that to a binary mask using this threshold parameter. All
values of the soft mask are between <code class="docutils literal notranslate"><span class="pre">[0.0,</span> <span class="pre">1.0]</span></code> and as such
<a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_threshold" title="nussl.separation.MaskSeparationBase.mask_threshold"><code class="xref py py-func docutils literal notranslate"><span class="pre">mask_threshold()</span></code></a> is expected to be a float between
<code class="docutils literal notranslate"><span class="pre">[0.0,</span> <span class="pre">1.0]</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><dl class="simple">
<dt>Value between <code class="docutils literal notranslate"><span class="pre">[0.0,</span> <span class="pre">1.0]</span></code> that indicates</dt><dd><p>the True/False cutoff when converting a soft mask to binary mask.</p>
</dd>
</dl>
</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p>mask_threshold (float)</p>
</dd>
<dt class="field-odd">Raises</dt>
<dd class="field-odd"><p><strong>ValueError if not a float</strong><strong> or </strong><strong>if set outside</strong><strong> [</strong><strong>0.0</strong><strong>, </strong><strong>1.0</strong><strong>]</strong><strong></strong> – </p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.mask_type">
<em class="property">property </em><code class="sig-name descname">mask_type</code><a class="headerlink" href="#nussl.separation.MaskSeparationBase.mask_type" title="Permalink to this definition">¶</a></dt>
<dd><p>This property indicates what type of mask the derived algorithm will create
and be returned by <a class="reference internal" href="#nussl.separation.MaskSeparationBase.run" title="nussl.separation.MaskSeparationBase.run"><code class="xref py py-func docutils literal notranslate"><span class="pre">run()</span></code></a>. Options are either ‘soft’ or ‘binary’.
<a class="reference internal" href="#nussl.separation.MaskSeparationBase.mask_type" title="nussl.separation.MaskSeparationBase.mask_type"><code class="xref py py-attr docutils literal notranslate"><span class="pre">mask_type</span></code></a> is usually set when initializing a
<a class="reference internal" href="#nussl.separation.MaskSeparationBase" title="nussl.separation.MaskSeparationBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">MaskSeparationBase</span></code></a>-derived class and defaults to ‘soft..</p>
<p>This property, though stored as a string, can be set in two ways when
initializing:</p>
<ul>
<li><p>First, it is possible to set this property with a string. Only <code class="docutils literal notranslate"><span class="pre">'soft'</span></code>
and <code class="docutils literal notranslate"><span class="pre">'binary'</span></code> are accepted (case insensitive), every other value will
raise an error. When initializing with a string, two helper
attributes are provided: <code class="xref py py-attr docutils literal notranslate"><span class="pre">BINARY_MASK</span></code> and <code class="xref py py-attr docutils literal notranslate"><span class="pre">SOFT_MASK</span></code>.</p>
<p>It is <strong>HIGHLY</strong> encouraged to use these, as the API may change and code
that uses bare strings (e.g. <code class="docutils literal notranslate"><span class="pre">mask_type</span> <span class="pre">=</span> <span class="pre">'soft'</span></code> or
<code class="docutils literal notranslate"><span class="pre">mask_type</span> <span class="pre">=</span> <span class="pre">'binary'</span></code>) for assignment might not be future-proof.
<code class="xref py py-attr docutils literal notranslate"><span class="pre">BINARY_MASK`</span></code> and <code class="xref py py-attr docutils literal notranslate"><span class="pre">SOFT_MASK</span></code> are safe aliases in case these
underlying types change.</p>
</li>
<li><p>The second way to set this property is by using a class prototype of
either the <code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.binary_mask.BinaryMask</span></code> or
<code class="xref py py-class docutils literal notranslate"><span class="pre">separation.masks.soft_mask.SoftMask</span></code> class prototype. This is
probably the most stable way to set this, and it’s fairly succinct.
For example, <code class="docutils literal notranslate"><span class="pre">mask_type</span> <span class="pre">=</span> <span class="pre">nussl.BinaryMask</span></code> or
<code class="docutils literal notranslate"><span class="pre">mask_type</span> <span class="pre">=</span> <span class="pre">nussl.SoftMask</span></code> are both perfectly valid.</p></li>
</ul>
<p>Though uncommon, this can be set outside of <code class="xref py py-func docutils literal notranslate"><span class="pre">__init__()</span></code></p>
<p>Examples of both methods are shown below.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>Either <code class="docutils literal notranslate"><span class="pre">'soft'</span></code> or <code class="docutils literal notranslate"><span class="pre">'binary'</span></code>.</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p>mask_type (str)</p>
</dd>
<dt class="field-odd">Raises</dt>
<dd class="field-odd"><p><strong>ValueError if set invalidly.</strong> – </p>
</dd>
</dl>
<p>Example:</p>
<div class="highlight-python notranslate"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre> 1
2
3
4
5
6
7
8
9
10
11
12
13
14</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">nussl</span>
<span class="n">mixture_signal</span> <span class="o">=</span> <span class="n">nussl</span><span class="o">.</span><span class="n">AudioSignal</span><span class="p">()</span>
<span class="c1"># Two options for determining mask upon init...</span>
<span class="c1"># Option 1: Init with a string (BINARY_MASK is a string 'constant')</span>
<span class="n">repet_sim</span> <span class="o">=</span> <span class="n">nussl</span><span class="o">.</span><span class="n">RepetSim</span><span class="p">(</span><span class="n">mixture_signal</span><span class="p">,</span> <span class="n">mask_type</span><span class="o">=</span><span class="n">nussl</span><span class="o">.</span><span class="n">MaskSeparationBase</span><span class="o">.</span><span class="n">BINARY_MASK</span><span class="p">)</span>
<span class="c1"># Option 2: Init with a class type</span>
<span class="n">ola</span> <span class="o">=</span> <span class="n">nussl</span><span class="o">.</span><span class="n">OverlapAdd</span><span class="p">(</span><span class="n">mixture_signal</span><span class="p">,</span> <span class="n">mask_type</span><span class="o">=</span><span class="n">nussl</span><span class="o">.</span><span class="n">SoftMask</span><span class="p">)</span>
<span class="c1"># It's also possible to change these values after init by changing the `mask_type` property...</span>
<span class="n">repet_sim</span><span class="o">.</span><span class="n">mask_type</span> <span class="o">=</span> <span class="n">nussl</span><span class="o">.</span><span class="n">MaskSeparationBase</span><span class="o">.</span><span class="n">SOFT_MASK</span> <span class="c1"># using a string</span>
<span class="n">ola</span><span class="o">.</span><span class="n">mask_type</span> <span class="o">=</span> <span class="n">nussl</span><span class="o">.</span><span class="n">BinaryMask</span> <span class="c1"># or using a class type</span>
</pre></div>
</td></tr></table></div>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.ones_mask">
<code class="sig-name descname">ones_mask</code><span class="sig-paren">(</span><em class="sig-param">shape</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/mask_separation_base.html#MaskSeparationBase.ones_mask"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.MaskSeparationBase.ones_mask" title="Permalink to this definition">¶</a></dt>
<dd><p>Creates a new ones mask with this object’s type.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>shape</strong> (<em>tuple</em>) – tuple with shape of mask</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>A subclass of <cite>MaskBase</cite> containing 1s.</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.run">
<code class="sig-name descname">run</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/mask_separation_base.html#MaskSeparationBase.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.MaskSeparationBase.run" title="Permalink to this definition">¶</a></dt>
<dd><p>Runs mask-based separation algorithm. Base class: Do not call directly!</p>
<dl class="field-list simple">
<dt class="field-odd">Raises</dt>
<dd class="field-odd"><p><strong>NotImplementedError</strong> – Cannot call base class!</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.MaskSeparationBase.zeros_mask">
<code class="sig-name descname">zeros_mask</code><span class="sig-paren">(</span><em class="sig-param">shape</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/mask_separation_base.html#MaskSeparationBase.zeros_mask"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.MaskSeparationBase.zeros_mask" title="Permalink to this definition">¶</a></dt>
<dd><p>Creates a new zeros mask with this object’s type.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>shape</strong> (<em>tuple</em>) – tuple with shape of mask</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>A subclass of <cite>MaskBase</cite> containing 0s.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="base-for-clustering-based-methods">
<h3>Base for clustering-based methods<a class="headerlink" href="#base-for-clustering-based-methods" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.ClusteringSeparationBase">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.</code><code class="sig-name descname">ClusteringSeparationBase</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">num_sources</em>, <em class="sig-param">clustering_type='KMeans'</em>, <em class="sig-param">fit_clusterer=True</em>, <em class="sig-param">percentile=90</em>, <em class="sig-param">beta=5.0</em>, <em class="sig-param">mask_type='soft'</em>, <em class="sig-param">mask_threshold=0.5</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/clustering_separation_base.html#ClusteringSeparationBase"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.ClusteringSeparationBase" title="Permalink to this definition">¶</a></dt>
<dd><p>A base class for any clustering-based separation approach. Subclasses
of this class must implement just one function to use it: <cite>extract_features</cite>.
This function should uses the internal variables of the class to
extract the appropriate time-frequency features of the signal. These
time-frequency features will then be clustered by <cite>cluster_features</cite>.
Masks will then be produced by the run function and applied to the
audio signal to produce separated estimates.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> – (<cite>AudioSignal</cite>) An AudioSignal object containing the
mixture to be separated.</p></li>
<li><p><strong>num_sources</strong> (<em>int</em>) – Number of sources to cluster the features of and separate
the mixture.</p></li>
<li><p><strong>clustering_type</strong> (<em>str</em>) – One of ‘KMeans’, ‘GaussianMixture’, and ‘MiniBatchKMeans’.
The clustering approach to use on the features. Defaults to ‘KMeans’.</p></li>
<li><p><strong>fit_clusterer</strong> (<em>bool</em><em>, </em><em>optional</em>) – Whether or not to call fit on the clusterer.
If False, then the clusterer should already be fit for this to work. Defaults
to True.</p></li>
<li><p><strong>percentile</strong> (<em>int</em><em>, </em><em>optional</em>) – Percentile of time-frequency points to consider by loudness.
Audio spectrograms are very high dimensional, and louder points tend to
matter more than quieter points. By setting the percentile high, one can more
efficiently cluster an auditory scene by considering only points above
that threshold. Defaults to 90 (which means the top 10 percentile of
time-frequency points will be used for clustering).</p></li>
<li><p><strong>beta</strong> (<em>float</em><em>, </em><em>optional</em>) – When using KMeans, we use soft KMeans, which has an additional
parameter <cite>beta</cite>. <cite>beta</cite> controls how soft the assignments are. As beta
increases, the assignments become more binary (either 0 or 1). Defaults to
5.0, a value discovered through cross-validation.</p></li>
<li><p><strong>mask_type</strong> (<em>str</em><em>, </em><em>optional</em>) – Masking approach to use. Passed up to MaskSeparationBase.</p></li>
<li><p><strong>mask_threshold</strong> (<em>float</em><em>, </em><em>optional</em>) – Threshold for masking. Passed up to MaskSeparationBase.</p></li>
<li><p><strong>**kwargs</strong> (<em>dict</em><em>, </em><em>optional</em>) – Additional keyword arguments that are passed to the clustering
object (one of KMeans, GaussianMixture, or MiniBatchKMeans).</p></li>
</ul>
</dd>
<dt class="field-even">Raises</dt>
<dd class="field-even"><p><strong>SeparationException</strong> – If clustering type is not one of the allowed ones, or if
the output of <cite>extract_features</cite> has the wrong shape according to the STFT
shape of the AudioSignal.</p>
</dd>
</dl>
<p><strong>Methods</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.ClusteringSeparationBase.cluster_features" title="nussl.separation.ClusteringSeparationBase.cluster_features"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cluster_features</span></code></a>(features, clusterer)</p></td>
<td><p>Clusters each time-frequency point according to features for each time-frequency point.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.ClusteringSeparationBase.confidence" title="nussl.separation.ClusteringSeparationBase.confidence"><code class="xref py py-obj docutils literal notranslate"><span class="pre">confidence</span></code></a>([approach])</p></td>
<td><p>In clustering-based separation algorithms, we can compute a confidence measure based on the clusterability of the feature space.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.ClusteringSeparationBase.extract_features" title="nussl.separation.ClusteringSeparationBase.extract_features"><code class="xref py py-obj docutils literal notranslate"><span class="pre">extract_features</span></code></a>()</p></td>
<td><p>This function should be implemented by the subclass.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.ClusteringSeparationBase.run" title="nussl.separation.ClusteringSeparationBase.run"><code class="xref py py-obj docutils literal notranslate"><span class="pre">run</span></code></a>([features])</p></td>
<td><p>Clusters the features using the chosen clustering algorithm.</p></td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="nussl.separation.ClusteringSeparationBase.cluster_features">
<code class="sig-name descname">cluster_features</code><span class="sig-paren">(</span><em class="sig-param">features</em>, <em class="sig-param">clusterer</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/clustering_separation_base.html#ClusteringSeparationBase.cluster_features"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.ClusteringSeparationBase.cluster_features" title="Permalink to this definition">¶</a></dt>
<dd><p>Clusters each time-frequency point according to features for each
time-frequency point. Features should be on the last axis.</p>
<dl class="simple">
<dt>Features should come in in the shape:</dt><dd><p><cite>(…, n_features)</cite></p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>features</strong> (<em>np.ndarray</em>) – Features to cluster, for each time-frequency point.</p></li>
<li><p><strong>clusterer</strong> (<em>object</em>) – Clustering object to use.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Responsibilities for each cluster for each time-frequency point.</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>np.ndarray</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.ClusteringSeparationBase.confidence">
<code class="sig-name descname">confidence</code><span class="sig-paren">(</span><em class="sig-param">approach='silhouette_confidence'</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/clustering_separation_base.html#ClusteringSeparationBase.confidence"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.ClusteringSeparationBase.confidence" title="Permalink to this definition">¶</a></dt>
<dd><p>In clustering-based separation algorithms, we can compute a confidence
measure based on the clusterability of the feature space. This can be computed
only after the features have been extracted by <code class="docutils literal notranslate"><span class="pre">extract_features</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>approach</strong> (<em>str</em><em>, </em><em>optional</em>) – What approach to use for getting the confidence
measure. Options are ‘jensen_shannon_confidence’, ‘posterior_confidence’,
‘silhouette_confidence’, ‘loudness_confidence’, ‘whitened_kmeans_confidence’,
‘dpcl_classic_confidence’. Defaults to ‘silhouette_confidence’.</p></li>
<li><p><strong>kwargs</strong> – Keyword arguments to the function being used to compute the confidence.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.ClusteringSeparationBase.extract_features">
<code class="sig-name descname">extract_features</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/clustering_separation_base.html#ClusteringSeparationBase.extract_features"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.ClusteringSeparationBase.extract_features" title="Permalink to this definition">¶</a></dt>
<dd><p>This function should be implemented by the subclass. It should extract
features. If the STFT shape is <cite>(n_freq, n_time, n_chan)</cite>, the output of this
function should be <cite>(n_freq, n_time, n_chan, n_features)</cite>.</p>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.ClusteringSeparationBase.run">
<code class="sig-name descname">run</code><span class="sig-paren">(</span><em class="sig-param">features=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/clustering_separation_base.html#ClusteringSeparationBase.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.ClusteringSeparationBase.run" title="Permalink to this definition">¶</a></dt>
<dd><p>Clusters the features using the chosen clustering algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>features</strong> (<em>np.ndarray</em><em>, </em><em>optional</em>) – If features are given, then the
<cite>extract_features</cite> step will be skipped. Defaults to None (so
<cite>extract_features</cite> will be run.)</p>
</dd>
<dt class="field-even">Raises</dt>
<dd class="field-even"><p><strong>SeparationException</strong> – If features.shape doesn’t match what is expected
in the STFT of the audio signal, an exception is raised.</p>
</dd>
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>List of Mask objects in self.result_masks.</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p>list</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="mix-in-for-nmf-based-methods">
<h3>Mix-in for NMF-based methods<a class="headerlink" href="#mix-in-for-nmf-based-methods" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.NMFMixin">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.</code><code class="sig-name descname">NMFMixin</code><a class="reference internal" href="_modules/nussl/separation/base/nmf_mixin.html#NMFMixin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.NMFMixin" title="Permalink to this definition">¶</a></dt>
<dd><p><strong>Methods</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.NMFMixin.fit" title="nussl.separation.NMFMixin.fit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fit</span></code></a>(audio_signals, n_components[, …])</p></td>
<td><p>Fits an NMF model to the magnitude spectrograms of each audio signal.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#nussl.separation.NMFMixin.inverse_transform" title="nussl.separation.NMFMixin.inverse_transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">inverse_transform</span></code></a>(components, activations)</p></td>
<td><p>Reconstructs the magnitude spectrogram by matrix multiplying the components with the activations.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.NMFMixin.transform" title="nussl.separation.NMFMixin.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transform</span></code></a>(audio_signal, model)</p></td>
<td><p>Use an already fit model to transform the magnitude spectrogram of an audio signal into components and activations.</p></td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="nussl.separation.NMFMixin.fit">
<em class="property">static </em><code class="sig-name descname">fit</code><span class="sig-paren">(</span><em class="sig-param">audio_signals</em>, <em class="sig-param">n_components</em>, <em class="sig-param">beta_loss='frobenius'</em>, <em class="sig-param">l1_ratio=0.5</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/nmf_mixin.html#NMFMixin.fit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.NMFMixin.fit" title="Permalink to this definition">¶</a></dt>
<dd><p>Fits an NMF model to the magnitude spectrograms of each
audio signal. If <cite>audio_signals</cite> is a list, the magnitude
spectrograms of each signal are concatenated into a single
data matrix to which NMF is fit. If <cite>audio_signals</cite>
is a single audio signal, then NMF is fit only to the
magnitude spectrogram for that audio signal. If any of
the audio signals are multichannel, the channels are
concatenated into a single (longer) data matrix.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>audio_signals</strong> (<em>list</em><em> or </em><a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – AudioSignal object(s) that
NMF will be fit to.</p></li>
<li><p><strong>n_components</strong> (<em>int</em>) – Number of components to use in the NMF
module. Corresponds to number of spectral templates.</p></li>
<li><p><strong>beta_loss</strong> (<em>float</em><em> or </em><em>string</em>) – String must be in
{‘frobenius’, ‘kullback-leibler’, ‘itakura-saito’}.
Beta divergence to be minimized, measuring the distance between X
and the dot product WH. Note that values different from ‘frobenius’
(or 2) and ‘kullback-leibler’ (or 1) lead to significantly slower
fits. Note that for beta_loss <= 0 (or ‘itakura-saito’), the input
matrix X cannot contain zeros. Used only in ‘mu’ solver. Defaults to
‘frobenius’.</p></li>
<li><p><strong>l1_ratio</strong> (<em>float</em>) – The regularization mixing parameter, with 0 <= l1_ratio <= 1.
For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm).
For l1_ratio = 1 it is an elementwise L1 penalty.
For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
Defaults to 1.0 (sparse templates and activations).</p></li>
<li><p><strong>kwargs</strong> (<em>dict</em>) – Additional keyword arguments to initialization of the NMF
decomposition method.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><p>Fitted NMF model to the audio signal(s).
components (np.ndarray): Spectral templates (n_components, n_features)
activations (np.ndarray): Activations (n_components, n_time, n_channels)</p>
<blockquote>
<div><p>The shape here is as if it was like an STFT but with components as the
features rather than frequencies of the STFT.</p>
</div></blockquote>
</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>model (NMF)</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.NMFMixin.inverse_transform">
<em class="property">static </em><code class="sig-name descname">inverse_transform</code><span class="sig-paren">(</span><em class="sig-param">components</em>, <em class="sig-param">activations</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/nmf_mixin.html#NMFMixin.inverse_transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.NMFMixin.inverse_transform" title="Permalink to this definition">¶</a></dt>
<dd><p>Reconstructs the magnitude spectrogram by matrix multiplying the components
with the activations. Components and activations are considered to be 2D matrices,
but if they are more, then the first dimension is interpreted to be the batch
dimension.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>components</strong> (<em>np.ndarray</em>) – Spectral templates (n_components, n_features)</p></li>
<li><p><strong>activations</strong> (<em>np.ndarray</em>) – Activations (n_components, n_time, n_channels)
The shape here is as if it was like an STFT but with components as the
features rather than frequencies of the STFT.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="nussl.separation.NMFMixin.transform">
<em class="property">static </em><code class="sig-name descname">transform</code><span class="sig-paren">(</span><em class="sig-param">audio_signal</em>, <em class="sig-param">model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/nmf_mixin.html#NMFMixin.transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.NMFMixin.transform" title="Permalink to this definition">¶</a></dt>
<dd><p>Use an already fit model to transform the magnitude spectrogram of an
audio signal into components and activations. These can be multiplied to
reconstruct the original matrix, or used to separate out sounds that correspond
to components in the model.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – AudioSignal object to transform with model.</p></li>
<li><p><strong>model</strong> (<em>NMF</em>) – NMF model to separate with. Must be fitted prior to this call.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><p>Spectral templates (n_components, n_features)
activations (np.ndarray): Activations (n_components, n_time, n_channels)</p>
<blockquote>
<div><p>The shape here is as if it was like an STFT but with components as the
features rather than frequencies of the STFT.</p>
</div></blockquote>
</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>components (np.ndarray)</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="mix-in-for-deep-methods">
<h3>Mix-in for deep methods<a class="headerlink" href="#mix-in-for-deep-methods" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.DeepMixin">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.</code><code class="sig-name descname">DeepMixin</code><a class="reference internal" href="_modules/nussl/separation/base/deep_mixin.html#DeepMixin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.DeepMixin" title="Permalink to this definition">¶</a></dt>
<dd><p><strong>Methods</strong></p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#nussl.separation.DeepMixin.load_model" title="nussl.separation.DeepMixin.load_model"><code class="xref py py-obj docutils literal notranslate"><span class="pre">load_model</span></code></a>(model_path[, device])</p></td>
<td><p>Loads the model at specified path <cite>model_path</cite>.</p></td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="nussl.separation.DeepMixin.load_model">
<code class="sig-name descname">load_model</code><span class="sig-paren">(</span><em class="sig-param">model_path</em>, <em class="sig-param">device='cpu'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/base/deep_mixin.html#DeepMixin.load_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.DeepMixin.load_model" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads the model at specified path <cite>model_path</cite>. Uses GPU if
available.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model_path</strong> (<em>str</em>) – path to model saved as SeparatonModel.</p></li>
<li><p><strong>device</strong> (<em>str</em><em> or </em><em>torch.Device</em>) – loads model on CPU or GPU. Defaults to
‘cuda’.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Loaded model, nn.Module
metadata (dict): metadata associated with model, used for making
the input data into the model.</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>model (<a class="reference internal" href="ml.html#nussl.ml.SeparationModel" title="nussl.ml.SeparationModel">SeparationModel</a>)</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="benchmark-methods">
<h2>Benchmark methods<a class="headerlink" href="#benchmark-methods" title="Permalink to this headline">¶</a></h2>
<p>These methods are used for obtaining upper and lower baselines
for source separation algorithms.</p>
<span class="target" id="module-nussl.separation.benchmark"></span><div class="section" id="high-pass-filter">
<h3>High pass filter<a class="headerlink" href="#high-pass-filter" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.benchmark.HighLowPassFilter">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.benchmark.</code><code class="sig-name descname">HighLowPassFilter</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">high_pass_cutoff_hz</em>, <em class="sig-param">mask_type='binary'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/benchmark/high_low_pass_filter.html#HighLowPassFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.benchmark.HighLowPassFilter" title="Permalink to this definition">¶</a></dt>
<dd><p>Implements a super simple separation algorithm that just masks everything below
the specified hz. It does this by zeroing out the associated FFT bins via a mask to
produce the “high” source, and the residual is the “low” source.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – Signal to separate.</p></li>
<li><p><strong>high_pass_cutoff_hz</strong> (<em>float</em>) – Cutoff in Hz. Will be rounded off</p></li>
<li><p><strong>mask_type</strong> (<em>str</em><em>, </em><em>optional</em>) – Mask type. Defaults to ‘binary’.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="ideal-binary-mask">
<h3>Ideal binary mask<a class="headerlink" href="#ideal-binary-mask" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.benchmark.IdealBinaryMask">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.benchmark.</code><code class="sig-name descname">IdealBinaryMask</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">sources</em>, <em class="sig-param">mask_type='binary'</em>, <em class="sig-param">mask_threshold=0.5</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/benchmark/ideal_binary_mask.html#IdealBinaryMask"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.benchmark.IdealBinaryMask" title="Permalink to this definition">¶</a></dt>
<dd><p>Implements an ideal binary mask (IBM) that is computed by using the known
ground truth performance. This is one of the upper baselines.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – Signal to separate.</p></li>
<li><p><strong>sources</strong> (<em>list</em>) – List of audio signal objects that correspond to the sources.</p></li>
<li><p><strong>mask_type</strong> (<em>str</em><em>, </em><em>optional</em>) – Mask type. Defaults to ‘binary’.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="ideal-ratio-mask">
<h3>Ideal ratio mask<a class="headerlink" href="#ideal-ratio-mask" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.benchmark.IdealRatioMask">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.benchmark.</code><code class="sig-name descname">IdealRatioMask</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">sources</em>, <em class="sig-param">approach='psa'</em>, <em class="sig-param">mask_type='soft'</em>, <em class="sig-param">mask_threshold=0.5</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/benchmark/ideal_ratio_mask.html#IdealRatioMask"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.benchmark.IdealRatioMask" title="Permalink to this definition">¶</a></dt>
<dd><p>Implements an ideal ratio mask (IRM) that is computed by using the known
ground truth performance. This is one of the upper baselines.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – Signal to separate.</p></li>
<li><p><strong>sources</strong> (<em>list</em>) – List of audio signal objects that correspond to the sources.</p></li>
<li><p><strong>approach</strong> (<em>str</em>) – Either ‘psa’ (phase sensitive spectrum approximation) or ‘msa’
(magnitude spectrum approximation). Generally ‘psa’ does better.</p></li>
<li><p><strong>mask_type</strong> (<em>str</em><em>, </em><em>optional</em>) – Mask type. Defaults to ‘soft’.</p></li>
<li><p><strong>mask_threshold</strong> (<em>float</em><em>, </em><em>optional</em>) – Masking threshold. Defaults to 0.5.</p></li>
<li><p><strong>kwargs</strong> (<em>dict</em>) – Extra keyword arguments are passed to the transform classes at
initialization.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="wiener-filter">
<h3>Wiener filter<a class="headerlink" href="#wiener-filter" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.benchmark.WienerFilter">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.benchmark.</code><code class="sig-name descname">WienerFilter</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">estimates</em>, <em class="sig-param">iterations=1</em>, <em class="sig-param">mask_type='soft'</em>, <em class="sig-param">mask_threshold=0.5</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/benchmark/wiener_filter.html#WienerFilter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.benchmark.WienerFilter" title="Permalink to this definition">¶</a></dt>
<dd><p>Implements a multichannel Wiener filter that is computed by using some
source estimates. When using the estimates produced by IdealRatioMask or
IdealBinaryMask, this is one of the upper baselines.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>input_audio_signal</strong> (<a class="reference internal" href="core.html#nussl.core.AudioSignal" title="nussl.core.AudioSignal"><em>AudioSignal</em></a>) – Signal to separate.</p></li>
<li><p><strong>estimates</strong> (<em>list</em>) – List of audio signal objects that correspond to the estimates.</p></li>
<li><p><strong>iterations</strong> (<em>int</em>) – Number of iterations for expectation-maximization in Wiener
filter.</p></li>
<li><p><strong>mask_type</strong> (<em>str</em><em>, </em><em>optional</em>) – Mask type. Defaults to ‘soft’.</p></li>
<li><p><strong>mask_threshold</strong> (<em>float</em><em>, </em><em>optional</em>) – Threshold for masking binary. Defaults to 0.5.</p></li>
<li><p><strong>kwargs</strong> (<em>dict</em>) – Additional keyword arguments to <cite>norbert.wiener</cite>.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="mix-as-estimate">
<h3>Mix as estimate<a class="headerlink" href="#mix-as-estimate" title="Permalink to this headline">¶</a></h3>
<dl class="class">
<dt id="nussl.separation.benchmark.MixAsEstimate">
<em class="property">class </em><code class="sig-prename descclassname">nussl.separation.benchmark.</code><code class="sig-name descname">MixAsEstimate</code><span class="sig-paren">(</span><em class="sig-param">input_audio_signal</em>, <em class="sig-param">num_sources</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/nussl/separation/benchmark/mix_as_estimate.html#MixAsEstimate"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#nussl.separation.benchmark.MixAsEstimate" title="Permalink to this definition">¶</a></dt>
<dd><p>This algorithm does nothing but scale the mix by the number of sources. This can
be used to compute the improvement metrics (e.g. improvement in SDR over using the
mixture as the estimate).</p>
<dl class="field-list simple">