Skip to content

Commit

Permalink
Don't highlight ignored fragments in compare view
Browse files Browse the repository at this point in the history
  • Loading branch information
rien committed May 27, 2024
1 parent 3e00bc2 commit 1104f12
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 25 deletions.
9 changes: 5 additions & 4 deletions cli/src/cli/views/fileView.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ import {
function writeCSVto<T>(
out: Writable,
data: T[],
extractor: {[field: string]: (obj: T) => string | number | boolean | null}
extractor: {[field: string]: (obj: T) => string | number | null}
): void {

const csv = stringify();
csv.pipe(out);

const keys: string[] = [];
const extractors: Array<(obj: T) => string | number | boolean | null> = [];
const extractors: Array<(obj: T) => string | number | null> = [];
for (const [key, extract] of Object.entries(extractor)) {
keys.push(key);
extractors.push(extract);
Expand Down Expand Up @@ -77,7 +77,7 @@ export class FileView extends View {
{
"id": s => s.id,
"hash": s => s.hash,
"ignored": s => s.ignored,
"ignored": s => s.ignored ? "true" : "false",
"data": s => s.kgram?.join(" ") || null,
"files": s => JSON.stringify(s.files().map(f => f.id))
});
Expand All @@ -86,9 +86,10 @@ export class FileView extends View {
public writeFiles(out: Writable): void {
writeCSVto<FileEntry>(
out,
this.report.entries(),
this.report.entries().concat(this.report.ignoredEntries()),
{
"id": f => f.file.id,
"ignored": f => f.isIgnored ? "true" : "false",
"path": f => f.file.path,
"content": f => f.file.content,
"amountOfKgrams": f => f.kgrams.length,
Expand Down
41 changes: 31 additions & 10 deletions core/src/algorithm/fingerprintIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ export type Hash = number;

export interface FileEntry {
file: TokenizedFile;
kgrams: Array<Range>,
shared: Set<SharedFingerprint>,
kgrams: Array<Range>;
shared: Set<SharedFingerprint>;
ignored: Set<SharedFingerprint>;
isIgnored: boolean;
}

export interface Occurrence {
Expand All @@ -28,11 +29,16 @@ export interface FingerprintIndexOptions {
}

export class FingerprintIndex {
// HashFilter transforms tokens into (a selection of) hashes
private readonly hashFilter: HashFilter;
// A map of file id to FileEntry object that has been analysed
private readonly files: Map<number, FileEntry>;
private readonly ignored: Map<number, FileEntry>;
// A map of file id to FileEntry object that is ignored (e.g. template code)
private readonly ignoredFiles: Map<number, FileEntry>;
// A map of hashes to their Shared Fingerprints (which keep track of the files they are in)
private readonly index: Map<Hash, SharedFingerprint>;
private readonly ignoredFileFingerprints: Set<SharedFingerprint>;
// A set of ignored hashes (either manually added, or through the ignored files, NOT because of maxFileCount)
private readonly ignoredHashes: Set<number>;

/**
* Creates a Fingerprint Index which is able to compare files with each other
Expand All @@ -47,13 +53,13 @@ export class FingerprintIndex {
) {
this.hashFilter = new WinnowFilter(this.kgramLength, this.kgramsInWindow, kgramData);
this.files = new Map<number, FileEntry>();
this.ignored = new Map<number, FileEntry>();
this.ignoredFiles = new Map<number, FileEntry>();
this.index = new Map<Hash, SharedFingerprint>();
this.ignoredFileFingerprints = new Set<SharedFingerprint>();
this.ignoredHashes = new Set<number>();
}

public addIgnoredFile(tokenizedFile: TokenizedFile): void {
assert(!this.ignored.has(tokenizedFile.id), `This file has already been ignored: ${tokenizedFile.file.path}`);
assert(!this.ignoredFiles.has(tokenizedFile.id), `This file has already been ignored: ${tokenizedFile.file.path}`);
this.addFileToIndex(tokenizedFile, true);
}

Expand All @@ -67,7 +73,7 @@ export class FingerprintIndex {
}
this.maxFingerprintFileCount = maxFingerprintFileCount || Number.MAX_SAFE_INTEGER;
for (const shared of this.index.values()) {
if (!this.ignoredFileFingerprints.has(shared)) {
if (!this.ignoredHashes.has(shared.hash)) {
if (shared.fileCount() > this.maxFingerprintFileCount && !shared.ignored) {
this.ignoreSharedFingerprint(shared);
} else if (shared.fileCount() <= this.maxFingerprintFileCount && shared.ignored) {
Expand All @@ -90,10 +96,11 @@ export class FingerprintIndex {
return this.index;
}

private addFileToIndex(file: TokenizedFile, template = false): void {
private addFileToIndex(file: TokenizedFile, ignored = false): void {
const entry: FileEntry = {
file,
kgrams: [],
isIgnored: ignored,
shared: new Set<SharedFingerprint>(),
ignored: new Set<SharedFingerprint>()
};
Expand Down Expand Up @@ -147,7 +154,7 @@ export class FingerprintIndex {
}

shared.add(part);
if (template || shared.fileCount() > this.maxFingerprintFileCount) {
if (ignored || shared.fileCount() > this.maxFingerprintFileCount || this.ignoredHashes.has(hash)) {
this.ignoreSharedFingerprint(shared);
} else {
entry.shared.add(shared);
Expand All @@ -157,6 +164,16 @@ export class FingerprintIndex {
}
}

public addIgnoredHashes(hashes: Array<Hash>): void {
for (const hash of hashes) {
this.ignoredHashes.add(hash);
const shared = this.index.get(hash);
if (shared) {
this.ignoreSharedFingerprint(shared);
}
}
}

private ignoreSharedFingerprint(shared: SharedFingerprint): void {
shared.ignored = true;
for (const other of shared.files()) {
Expand All @@ -183,6 +200,10 @@ export class FingerprintIndex {
return Array.from(this.files.values());
}

public ignoredEntries(): Array<FileEntry> {
return Array.from(this.ignoredFiles.values());
}

public getPair(file1: TokenizedFile, file2: TokenizedFile): Pair {
const entry1 = this.files.get(file1.id);
const entry2 = this.files.get(file2.id);
Expand Down
5 changes: 3 additions & 2 deletions core/src/algorithm/pair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ export class Pair extends Identifiable {
this.rightIgnored = leftEntry.ignored.size;
this.leftTotal = leftEntry.kgrams.length;
this.rightTotal = rightEntry.kgrams.length;
if (this.leftTotal + this.rightTotal > 0) {
this.similarity = (this.leftCovered + this.rightCovered) / (this.leftTotal + this.rightTotal - this.leftIgnored - this.rightIgnored);
const denominator = this.leftTotal + this.rightTotal - this.leftIgnored - this.rightIgnored;
if (denominator > 0) {
this.similarity = (this.leftCovered + this.rightCovered) / denominator;
} else {
this.similarity = 0;
}
Expand Down
7 changes: 4 additions & 3 deletions core/src/test/pair.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,16 @@ test("paired occurrence merging & squashing", t => {
kgrams: new Array<Range>(),
shared: new Set<SharedFingerprint>(),
ignored: new Set<SharedFingerprint>(),

file: leftFile
file: leftFile,
isIgnored: false,
};

const right = {
kgrams: new Array<Range>(),
shared: new Set<SharedFingerprint>(),
ignored: new Set<SharedFingerprint>(),
file: rightFile
file: rightFile,
isIgnored: false,
};


Expand Down
4 changes: 4 additions & 0 deletions lib/src/lib/report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ export class Report {
return this.index.entries();
}

public ignoredEntries(): Array<FileEntry> {
return this.index.ignoredEntries();
}

public metadata(): Metadata {
return {
...this.options.asObject(),
Expand Down
1 change: 1 addition & 0 deletions web/src/api/models/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export interface FileIndeterminate {
ast: string[] | string;
mapping: Selection[] | string;
amountOfKgrams: number;
ignored: boolean;
label: Label;
extra: {
timestamp?: Date;
Expand Down
1 change: 1 addition & 0 deletions web/src/api/models/kgram.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Hash, File } from "@/api/models";

export interface Kgram {
id: number;
ignored: boolean;
hash: Hash;
data: string;
files: File[];
Expand Down
5 changes: 4 additions & 1 deletion web/src/api/stores/file.store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ export const useFileStore = defineStore("file", () => {
const extra = JSON.parse(row.extra || "{}");
extra.timestamp = extra.createdAt && new Date(extra.createdAt);
hasTimestamps = hasTimestamps || !!extra.timestamp;
file.ignored = row.ignored == "true"
file.extra = extra;
file.ast = JSON.parse(row.ast);
file.mapping = JSON.parse(row.mapping);
Expand Down Expand Up @@ -203,7 +204,9 @@ export const useFileStore = defineStore("file", () => {
labels: extra.labels,
};

files[file.id] = file;
if (!file.ignored) {
files[file.id] = file;
}
}

// Find the common path in the files.
Expand Down
1 change: 1 addition & 0 deletions web/src/api/stores/kgram.store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export const useKgramStore = defineStore("kgrams", () => {
kgrams[id] = {
id,
hash: parseInt(row.hash),
ignored: row.ignored == "true",
data: row.data,
files,
};
Expand Down
1 change: 1 addition & 0 deletions web/src/api/workers/data.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export function populateFragments(
const leftFile = fileToTokenizedFile(pair.leftFile);
const rightFile = fileToTokenizedFile(pair.rightFile);
index.addFiles([leftFile, rightFile]);
index.addIgnoredHashes(kgrams.filter(k => k.ignored).map(k => k.hash));
const reportPair = index.getPair(leftFile, rightFile);

const kmersMap: Map<Hash, Kgram> = new Map();
Expand Down
7 changes: 2 additions & 5 deletions web/src/composables/useMonacoEditorWorkers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import EditorWorker from 'monaco-editor/esm/vs/editor/editor.worker?worker'

export function useMonacoEditorWorkers() {
onMounted(() => {
self.MonacoEnvironment = {
getWorker() {
return new EditorWorker()
}
}
self.MonacoEnvironment ||= {};
self.MonacoEnvironment.getWorker = () => new EditorWorker();
});
}

0 comments on commit 1104f12

Please sign in to comment.