Skip to content

Commit

Permalink
Remove fuzzy matching
Browse files Browse the repository at this point in the history
I didn't realize fuzzysort requires (almost?) the whole search string to be present in the string being matched against. That means it wasn't adding a lot of value in this particular use case. Anyway, I should have started simpler. Matching on title, with some simple normalization rules, is probably good enough most of the time; in the future I could add a way for the user to provide an explicit mapping of Kindle titles to citation IDs in the front matter.
  • Loading branch information
brokensandals committed Feb 5, 2025
1 parent 25c6fe9 commit 7eb3d47
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 97 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ The `Paste quote` command will change the citation format when pasting this. The

![A sample document after pasting, which includes the quote and a citation in the format (*The War of Art*, p. 40-41)](docs/cite-no-refs-post.png)

If your document _does_ have a references section in its frontmatter, then the plugin will try to find the correct reference for the quote based on the title and authors, and generate a Pandoc-style citation using the corresponding `id`. Example:
If your document _does_ have a references section in its frontmatter, then the plugin will try to find the correct reference for the quote based on the title, and generate a Pandoc-style citation using the corresponding `id`. Example:

![A sample document containing a references section with a reference with id "pressfield2002"](docs/cite-refs-pre.png)

![A sample document after pasting, which includes the quote and a citation in the format \[@pressfield2002, p. 40-41\]](docs/cite-refs-post.png)

Note: the match between the title/authors on the clipboard and the title/authors of the reference entry doesn't have to be exact. Since the plugin uses fuzzy matching, it is possible for it to choose the wrong reference.
27 changes: 14 additions & 13 deletions main.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Editor, MarkdownView, Plugin, TFile } from 'obsidian';
import { CslReference, parseQuote, Quote, replaceDoubleQuotes, scoreRefMatches } from 'src/quotes';
import { parseQuote, Quote, replaceDoubleQuotes, guessCiteId } from 'src/quotes';

export default class PasteQuotePlugin extends Plugin {
async onload() {
Expand Down Expand Up @@ -30,18 +30,28 @@ export default class PasteQuotePlugin extends Plugin {
}

citationForQuote(quote: Quote, file: TFile | null): string {
if (!(quote.title || quote.authors || quote.page)) {
if (!(quote.title || quote.page)) {
return "";
}

if (!quote.title) {
// TODO make this configurable
return `(p. ${quote.page})`;
}

const fileCache = file == null ? null : this.app.metadataCache.getFileCache(file);
const refs = fileCache?.frontmatter?.references || [];
if (refs.length == 0) {
// TODO make this configurable
return ` (*${quote.title}*, p. ${quote.page})`;
let citation = ` (*${quote.title}*`;
if (quote.page) {
citation += `, p. ${quote.page}`;
}
citation += ')';
return citation;
}

const id = this.citeIdForQuote(quote, refs) || `TODO ${quote.title}`;
const id = guessCiteId(quote.title, refs) || `TODO ${quote.title}`;
let citation = `[@${id}`;
if (quote.page) {
citation += ', p. ' + quote.page;
Expand All @@ -50,15 +60,6 @@ export default class PasteQuotePlugin extends Plugin {
return citation;
}

citeIdForQuote(quote: Quote, refs: CslReference[]): string | null {
const results = scoreRefMatches(quote, refs);
if (results.length == 0) {
return null;
}

return results[0].id;
}

onunload() {

}
Expand Down
13 changes: 2 additions & 11 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,5 @@
"ts-jest": "^29.2.5",
"tslib": "2.4.0",
"typescript": "4.7.4"
},
"dependencies": {
"fuzzysort": "^3.1.0"
}
}
62 changes: 21 additions & 41 deletions src/quotes.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { parseQuote, replaceDoubleQuotes, scoreRefMatches } from './quotes';
import { parseQuote, replaceDoubleQuotes, guessCiteId } from './quotes';

describe('parseQuote', () => {
it('should fall back to using the raw clipboard contents if it cannot parse the quote', () => {
Expand Down Expand Up @@ -83,7 +83,7 @@ describe('replaceDoubleQuotes', () => {
});
});

describe('scoreRefMatches', () => {
describe('guessCiteId', () => {
const references = [
{
"id": "sidgwick1981",
Expand Down Expand Up @@ -119,51 +119,31 @@ describe('scoreRefMatches', () => {
}
],
"title": "Guilty, free, and wise: Determinism and psychopathy diminish learning from negative emotions",
},
{
"id": "hari2018",
"author": [
{
"family": "Hari",
"given": "Johann"
}
],
"title": "Lost connections: uncovering the real causes of depression-- and the unexpected solutions",
}
];

it('should find a close match', () => {
let quote = {raw: "irrelevant", body: "irrelevant", title: "Parfit", authors: ["Edmonds, David"]};
let results = scoreRefMatches(quote, references);
expect(results[0].id).toBe("edmondsParfitPhilosopherHis2023");
expect(results[0].score).toBeGreaterThan(0.5);

quote = {raw: "irrelevant", body: "irrelevant", title: "Methods of Ethics", authors: ["sidgwick, henry"]};
results = scoreRefMatches(quote, references);
expect(results[0].id).toBe("sidgwick1981");
expect(results[0].score).toBeGreaterThan(0.5);
});

it('should find a close title-only match', () => {
const quote = {raw: "irrelevant", body: "irrelevant", title: "Methods of Ethics"};
const results = scoreRefMatches(quote, references);
expect(results[0].id).toBe("sidgwick1981");
expect(results[0].score).toBeGreaterThan(0.5);
});

it('sometimes matches stuff it probably should not', () => {
// I'm including this test to illustrate how this sort of fuzzy matching isn't perfect for this use case—but I still think it's probably good enough
const quote = {raw: "irrelevant", body: "irrelevant", title: "Parfit"};
const results = scoreRefMatches(quote, references);
expect(results[0].id).toBe("edmondsParfitPhilosopherHis2023");
});

it('should find a partial title and author match', () => {
const quote = {raw: "irrelevant", body: "irrelevant", title: "guilty free wise", authors: ["baumeister"]};
const results = scoreRefMatches(quote, references);
expect(results[0].id).toBe("stillman2010");
expect(results[0].score).toBeGreaterThan(0.5);
it('should find an exact match match', () => {
expect(guessCiteId("Parfit: a philosopher and his mission to save morality", references)).toBe("edmondsParfitPhilosopherHis2023");
expect(guessCiteId("The Methods of Ethics", references)).toBe("sidgwick1981");
});

it('should not find a match based solely on authors', () => {
const quote = {raw: "irrelevant", body: "irrelevant", authors: ["Baumeister, Roy F.", "Stillman, Tyler F."]};
const results = scoreRefMatches(quote, references);
expect(results.length).toBe(0);
it('should ignore some punctuation', () => {
expect(guessCiteId("guilty free and wise determinism and psychopathy diminish learning from negative emotions", references)).toBe("stillman2010");
// note the mismatch between – and --
expect(guessCiteId('Lost Connections: Uncovering the Real Causes of Depression – and the Unexpected Solutions', references)).toBe("hari2018");
});

it('should not find a very loose match', () => {
const quote = {raw: "irrelevant", body: "irrelevant", title: "psychopathy", authors: ["Baumeister, Franklin"]};
const results = scoreRefMatches(quote, references);
expect(results.length).toBe(0);
it('does not find approximate matches', () => {
expect(guessCiteId('The Methodses of Ethicses', references)).toBeNull();
});
});
31 changes: 5 additions & 26 deletions src/quotes.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import * as fuzzysort from "fuzzysort";

export interface Quote {
raw: string,
body: string,
Expand Down Expand Up @@ -72,30 +70,11 @@ export function replaceDoubleQuotes(original: string): string {
.replace(//g, "’");
}

export interface ReferenceMatchScore {
score: number,
id: string,
function searchableTitle(title: string): string {
return title.toLowerCase().replace(/[.:,()-\u002d\u2010\u2011\u2012\u2013\u2014\u2015\ufe58\ufe63\uff0d]/gu, '').replace(/\s+/g, ' ').trim();
}

export function scoreRefMatches(quote: Quote, refs: CslReference[]): ReferenceMatchScore[] {
function refSearchStringForQuote(quote: Quote): string {
const authors = (quote.authors || []).map(s => s.toLowerCase().replace(/\.,/g, ''));
authors.sort();
return `${quote.title} ${authors.join(' ')}`.toLowerCase();
}

function refSearchStringForRef(ref: CslReference): string {
function authorString(author: CslAuthor): string {
return `${author.family}, ${author.given}`.toLowerCase().replace(/\.,/g, '');
}

const authors = (ref.author || []).map(authorString);
authors.sort();
return `${ref.title} ${authors.join(' ')}`.toLowerCase();
}

const search = refSearchStringForQuote(quote);
const options = refs.map(ref => ({id: ref.id, searchString: refSearchStringForRef(ref)}));
const results = fuzzysort.go(search, options, {key: 'searchString'});
return results.map(result => ({score: result.score, id: result.obj.id}));
export function guessCiteId(title: string, refs: CslReference[]): string | null {
const st = searchableTitle(title);
return refs.find((ref) => searchableTitle(ref.title || "") === st)?.id || null;
}

0 comments on commit 7eb3d47

Please sign in to comment.