Remove fuzzy matching

I didn't realize fuzzysort requires (almost?) the whole search string to be present in the string being matched against. That means it wasn't adding a lot of value in this particular use case. Anyway, I should have started simpler. Matching on title, with some simple normalization rules, is probably good enough most of the time; in the future I could add a way for the user to provide an explicit mapping of Kindle titles to citation IDs in the front matter.
brokensandals · Feb 5, 2025 · 7eb3d47 · 7eb3d47
1 parent 25c6fe9
commit 7eb3d47
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 97 deletions.
diff --git a/README.md b/README.md
@@ -38,10 +38,8 @@ The `Paste quote` command will change the citation format when pasting this. The
 
 ![A sample document after pasting, which includes the quote and a citation in the format (*The War of Art*, p. 40-41)](docs/cite-no-refs-post.png)
 
-If your document _does_ have a references section in its frontmatter, then the plugin will try to find the correct reference for the quote based on the title and authors, and generate a Pandoc-style citation using the corresponding `id`. Example:
+If your document _does_ have a references section in its frontmatter, then the plugin will try to find the correct reference for the quote based on the title, and generate a Pandoc-style citation using the corresponding `id`. Example:
 
 ![A sample document containing a references section with a reference with id "pressfield2002"](docs/cite-refs-pre.png)
 
 ![A sample document after pasting, which includes the quote and a citation in the format \[@pressfield2002, p. 40-41\]](docs/cite-refs-post.png)
-
-Note: the match between the title/authors on the clipboard and the title/authors of the reference entry doesn't have to be exact. Since the plugin uses fuzzy matching, it is possible for it to choose the wrong reference.
diff --git a/main.ts b/main.ts
@@ -1,5 +1,5 @@
 import { Editor, MarkdownView, Plugin, TFile } from 'obsidian';
-import { CslReference, parseQuote, Quote, replaceDoubleQuotes, scoreRefMatches } from 'src/quotes';
+import { parseQuote, Quote, replaceDoubleQuotes, guessCiteId } from 'src/quotes';
 
 export default class PasteQuotePlugin extends Plugin {
 	async onload() {
@@ -30,18 +30,28 @@ export default class PasteQuotePlugin extends Plugin {
   }
 
 	citationForQuote(quote: Quote, file: TFile | null): string {
-		if (!(quote.title || quote.authors || quote.page)) {
+		if (!(quote.title || quote.page)) {
 			return "";
 		}
 
+		if (!quote.title) {
+			// TODO make this configurable
+			return `(p. ${quote.page})`;
+		}
+
 		const fileCache = file == null ? null : this.app.metadataCache.getFileCache(file);
 		const refs = fileCache?.frontmatter?.references || [];
 		if (refs.length == 0) {
 			// TODO make this configurable
-			return ` (*${quote.title}*, p. ${quote.page})`;
+			let citation = ` (*${quote.title}*`;
+			if (quote.page) {
+				citation += `, p. ${quote.page}`;
+			}
+			citation += ')';
+			return citation;
 		}
 
-		const id = this.citeIdForQuote(quote, refs) || `TODO ${quote.title}`;
+		const id = guessCiteId(quote.title, refs) || `TODO ${quote.title}`;
 		let citation = `[@${id}`;
 		if (quote.page) {
 			citation += ', p. ' + quote.page;
@@ -50,15 +60,6 @@ export default class PasteQuotePlugin extends Plugin {
 		return citation;
 	}
 
-	citeIdForQuote(quote: Quote, refs: CslReference[]): string | null {
-		const results = scoreRefMatches(quote, refs);
-		if (results.length == 0) {
-			return null;
-		}
-
-		return results[0].id;
-	}
-
 	onunload() {
 
 	}

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -25,8 +25,5 @@
 		"ts-jest": "^29.2.5",
 		"tslib": "2.4.0",
 		"typescript": "4.7.4"
-	},
-	"dependencies": {
-		"fuzzysort": "^3.1.0"
 	}
 }
diff --git a/src/quotes.test.ts b/src/quotes.test.ts
@@ -1,4 +1,4 @@
-import { parseQuote, replaceDoubleQuotes, scoreRefMatches } from './quotes';
+import { parseQuote, replaceDoubleQuotes, guessCiteId } from './quotes';
 
 describe('parseQuote', () => {
   it('should fall back to using the raw clipboard contents if it cannot parse the quote', () => {
@@ -83,7 +83,7 @@ describe('replaceDoubleQuotes', () => {
   });
 });
 
-describe('scoreRefMatches', () => {
+describe('guessCiteId', () => {
   const references = [
     {
       "id": "sidgwick1981",
@@ -119,51 +119,31 @@ describe('scoreRefMatches', () => {
         }
       ],
       "title": "Guilty, free, and wise: Determinism and psychopathy diminish learning from negative emotions",
+    },
+    {
+      "id": "hari2018",
+      "author": [
+        {
+          "family": "Hari",
+          "given": "Johann"
+        }
+      ],
+      "title": "Lost connections: uncovering the real causes of depression-- and the unexpected solutions",
     }
   ];
 
-  it('should find a close match', () => {
-    let quote = {raw: "irrelevant", body: "irrelevant", title: "Parfit", authors: ["Edmonds, David"]};
-    let results = scoreRefMatches(quote, references);
-    expect(results[0].id).toBe("edmondsParfitPhilosopherHis2023");
-    expect(results[0].score).toBeGreaterThan(0.5);
-
-    quote = {raw: "irrelevant", body: "irrelevant", title: "Methods of Ethics", authors: ["sidgwick, henry"]};
-    results = scoreRefMatches(quote, references);
-    expect(results[0].id).toBe("sidgwick1981");
-    expect(results[0].score).toBeGreaterThan(0.5);
-  });
-
-  it('should find a close title-only match', () => {
-    const quote = {raw: "irrelevant", body: "irrelevant", title: "Methods of Ethics"};
-    const results = scoreRefMatches(quote, references);
-    expect(results[0].id).toBe("sidgwick1981");
-    expect(results[0].score).toBeGreaterThan(0.5);
-  });
-
-  it('sometimes matches stuff it probably should not', () => {
-    // I'm including this test to illustrate how this sort of fuzzy matching isn't perfect for this use case—but I still think it's probably good enough
-    const quote = {raw: "irrelevant", body: "irrelevant", title: "Parfit"};
-    const results = scoreRefMatches(quote, references);
-    expect(results[0].id).toBe("edmondsParfitPhilosopherHis2023");
-  });
-
-  it('should find a partial title and author match', () => {
-    const quote = {raw: "irrelevant", body: "irrelevant", title: "guilty free wise", authors: ["baumeister"]};
-    const results = scoreRefMatches(quote, references);
-    expect(results[0].id).toBe("stillman2010");
-    expect(results[0].score).toBeGreaterThan(0.5);
+  it('should find an exact match match', () => {
+    expect(guessCiteId("Parfit: a philosopher and his mission to save morality", references)).toBe("edmondsParfitPhilosopherHis2023");
+    expect(guessCiteId("The Methods of Ethics", references)).toBe("sidgwick1981");
   });
 
-  it('should not find a match based solely on authors', () => {
-    const quote = {raw: "irrelevant", body: "irrelevant", authors: ["Baumeister, Roy F.", "Stillman, Tyler F."]};
-    const results = scoreRefMatches(quote, references);
-    expect(results.length).toBe(0);
+  it('should ignore some punctuation', () => {
+    expect(guessCiteId("guilty free and wise determinism and psychopathy diminish learning from negative emotions",  references)).toBe("stillman2010");
+    // note the mismatch between – and --
+    expect(guessCiteId('Lost Connections: Uncovering the Real Causes of Depression – and the Unexpected Solutions', references)).toBe("hari2018");
   });
 
-  it('should not find a very loose match', () => {
-    const quote = {raw: "irrelevant", body: "irrelevant", title: "psychopathy", authors: ["Baumeister, Franklin"]};
-    const results = scoreRefMatches(quote, references);
-    expect(results.length).toBe(0);
+  it('does not find approximate matches', () => {
+    expect(guessCiteId('The Methodses of Ethicses', references)).toBeNull();
   });
 });
diff --git a/src/quotes.ts b/src/quotes.ts
@@ -1,5 +1,3 @@
-import * as fuzzysort from "fuzzysort";
-
 export interface Quote {
   raw: string,
   body: string,
@@ -72,30 +70,11 @@ export function replaceDoubleQuotes(original: string): string {
     .replace(/”/g, "’");
 }
 
-export interface ReferenceMatchScore {
-  score: number,
-  id: string,
+function searchableTitle(title: string): string {
+  return title.toLowerCase().replace(/[.:,()-\u002d\u2010\u2011\u2012\u2013\u2014\u2015\ufe58\ufe63\uff0d]/gu, '').replace(/\s+/g, ' ').trim();
 }
 
-export function scoreRefMatches(quote: Quote, refs: CslReference[]): ReferenceMatchScore[] {
-  function refSearchStringForQuote(quote: Quote): string {
-    const authors = (quote.authors || []).map(s => s.toLowerCase().replace(/\.,/g, ''));
-    authors.sort();
-    return `${quote.title} ${authors.join(' ')}`.toLowerCase();
-  }
-
-  function refSearchStringForRef(ref: CslReference): string {
-    function authorString(author: CslAuthor): string {
-      return `${author.family}, ${author.given}`.toLowerCase().replace(/\.,/g, '');
-    }
-
-    const authors = (ref.author || []).map(authorString);
-    authors.sort();
-    return `${ref.title} ${authors.join(' ')}`.toLowerCase();
-  }
-
-  const search = refSearchStringForQuote(quote);
-  const options = refs.map(ref => ({id: ref.id, searchString: refSearchStringForRef(ref)}));
-  const results = fuzzysort.go(search, options, {key: 'searchString'});
-  return results.map(result => ({score: result.score, id: result.obj.id}));
+export function guessCiteId(title: string, refs: CslReference[]): string | null {
+  const st = searchableTitle(title);
+  return refs.find((ref) => searchableTitle(ref.title || "") === st)?.id || null;
 }