-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathProgram.cs
106 lines (79 loc) · 3 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
using CoenM.ImageHash;
using CoenM.ImageHash.HashAlgorithms;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
const string ImageFilesMask = "*.jpg";
const string SamplesFolderName = "Samples2"; // .../User/MyPictures + Samples/*.jpg
const double ImageSimilarityThreshold = 90.0; // percents
string samplesFolderPath =
Path.Combine(
Environment.GetFolderPath(
Environment.SpecialFolder.MyPictures ),
SamplesFolderName );
IImageHash hashMethod = new PerceptualHash();
List<ImageInfo> pictures = [];
var imageHashComparer =
new ImageHashComparer( ImageSimilarityThreshold );
var sw = Stopwatch.StartNew();
var precomputedHashesFileName =
Path.Combine( samplesFolderPath, "hashs.json" );
if (File.Exists( precomputedHashesFileName )) {
Console.WriteLine( "\n+++ Restoring hashes..." );
using var fs = File.OpenRead( precomputedHashesFileName );
pictures = JsonSerializer.Deserialize<List<ImageInfo>>( fs )!;
}
else {
Console.WriteLine( "\n+++ Computing hashes..." );
var _pictureFileNames =
Directory.GetFiles(
samplesFolderPath, ImageFilesMask,
SearchOption.AllDirectories );
foreach (var picturePath in _pictureFileNames) {
Console.WriteLine( $"+ {picturePath}" );
using var stream = File.OpenRead( picturePath );
ulong hash = hashMethod.Hash( stream );
pictures.Add(
new ImageInfo( picturePath, hash ) );
}
using var fs = File.OpenWrite( precomputedHashesFileName );
JsonSerializer.Serialize( fs, pictures );
}
Console.WriteLine( "\n+++ Chasing duplicates..." );
var _groups =
pictures
.GroupBy( x => x.Hash, imageHashComparer )
.Where( grp => grp.Count() > 1 ) // remove not duplicates
.Select( grp => grp.ToList() )
.ToList();
Console.WriteLine( $"\n+++ Similarity: max= {imageHashComparer.MaxSim}% / min= {imageHashComparer.MinSim}%" );
Console.WriteLine( $"\n+++ Duplicate Groups ({_groups.Count}):" );
var groupIx = 0;
foreach (var group in _groups) {
Console.WriteLine( $"Group #{groupIx}" );
foreach (var iinfo in group)
Console.WriteLine( $"\t{Path.GetFileName( iinfo.Path )}" );
groupIx++;
}
Console.WriteLine( $"\n+++ TOTAL: {sw.Elapsed}" );
sw.Stop();
file class ImageInfo( string path, ulong hash )
{
public string Path { get; init; } = path;
public ulong Hash { get; init; } = hash;
}
file class ImageHashComparer( double threshold ) : IEqualityComparer<ulong>
{
public double MinSim { get; private set; } = 200.0;
public double MaxSim { get; private set; }
public bool Equals( ulong x, ulong y )
{
double similarity = CompareHash.Similarity( x, y );
MaxSim = Math.Max( MaxSim, similarity );
MinSim = Math.Min( MinSim, similarity );
return similarity > _threshold;
}
// always return zero to always call Equals(x, y)
public int GetHashCode( [DisallowNull] ulong obj ) => 0;
private readonly double _threshold = threshold;
}