Skip to content

Commit

Permalink
improve index cache storage
Browse files Browse the repository at this point in the history
  • Loading branch information
ker0olos committed Mar 21, 2024
1 parent 58f5466 commit 73fd44f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/search-index.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: search-index

on:
# push:
push:
workflow_dispatch:
# schedule:
# # Once a week "At 00:00 on Sunday" (see https://crontab.guru/once-a-week)
Expand Down
1 change: 1 addition & 0 deletions deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"mongodb": "npm:mongodb",
"mongodb-memory-server": "npm:mongodb-memory-server-core",
"orama": "npm:@orama/orama@2.0.11",
"orama-persist": "npm:@orama/plugin-data-persistence/server",
"levenshtein": "https://deno.land/x/fastest_levenshtein@1.0.10/mod.ts"
}
}
46 changes: 17 additions & 29 deletions search-index/cache.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
import { create, insert, Orama } from 'orama';

import { join } from '$std/path/mod.ts';
import { persistToFile } from 'orama-persist';

import utils from '~/src/utils.ts';

import { gql, request } from '~/packs/anilist/graphql.ts';

import { AniListCharacter, AniListMedia } from '~/packs/anilist/types.ts';

import { characterSchema, mediaSchema } from '~/search-index/mod.ts';
import {
characterSchema,
charactersIndexCachePath,
mediaIndexCachePath,
mediaSchema,
} from '~/search-index/mod.ts';

const anilistAPI = 'https://graphql.anilist.co';

const dirname = new URL('.', import.meta.url).pathname;

const charactersIndexCachePath = join(dirname, './characters.json');
const mediaIndexCachePath = join(dirname, './media.json');
const mediaIndex: Orama<typeof mediaSchema> = await create({
schema: mediaSchema,
});

const charactersIndex: Orama<typeof characterSchema> = await create({
schema: characterSchema,
});

const mediaIndex: Orama<typeof mediaSchema> = await create({
schema: mediaSchema,
});

type PageInfo = {
hasNextPage: boolean;
};
Expand Down Expand Up @@ -160,6 +160,10 @@ let mediaPage = 1;

while (true) {
try {
if (mediaPage === 5) {
break;
}

const { pageInfo, media } = await queryMedia(mediaPage);

for (const { id, characters: firstPage } of media) {
Expand Down Expand Up @@ -208,8 +212,6 @@ while (true) {
title: mediaTitle,
popularity: media?.node.popularity,
});

console.log(`indexed 1 media with id:${mediaId}`);
}

if (
Expand All @@ -225,8 +227,6 @@ while (true) {
popularity: media?.node.popularity,
role: media.characterRole,
});

console.log(`indexed 1 character with id:${id}`);
}
}
}
Expand All @@ -242,7 +242,7 @@ while (true) {
// handle the rate limit
// (see https://anilist.gitbook.io/anilist-apiv2-docs/overview/rate-limiting)
if (e.message?.includes('Too Many Requests')) {
console.log('sleeping for a minute...');
// console.log('sleeping for a minute...');
await utils.sleep(60);
continue;
}
Expand Down Expand Up @@ -272,20 +272,8 @@ while (true) {
}

await Promise.all([
Deno.writeTextFile(
join(dirname, charactersIndexCachePath),
JSON.stringify({
data: charactersIndex.data,
internalDocumentIDStore: charactersIndex.internalDocumentIDStore,
}),
),
Deno.writeTextFile(
join(dirname, mediaIndexCachePath),
JSON.stringify({
data: mediaIndex.data,
internalDocumentIDStore: mediaIndex.internalDocumentIDStore,
}),
),
persistToFile(charactersIndex, 'binary', charactersIndexCachePath),
persistToFile(mediaIndex, 'binary', mediaIndexCachePath),
]);

console.log('\n\nwritten caches to disk');
25 changes: 21 additions & 4 deletions search-index/mod.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import { join } from '$std/path/mod.ts';

import { restoreFromFile } from 'orama-persist';

import type { Orama } from 'orama';

import type { CharacterRole } from '~/src/types.ts';

const dirname = new URL('.', import.meta.url).pathname;

export const mediaIndexCachePath = join(dirname, './media.msp');
export const charactersIndexCachePath = join(dirname, './characters.msp');

export type IndexedCharacter = Orama<{
id: string;
name: string[];
Expand Down Expand Up @@ -34,8 +43,16 @@ export const mediaSchema = {
// term: 'luka',
// };

// const characterDB: Orama<typeof characterSchema> = await create({
// schema: characterSchema,
// });

// const result: Results<TypedDocument<IndexedCharacter>> = await search(characterDB, searchParams);

export const loadMediaIndex = () =>
restoreFromFile<Orama<typeof mediaSchema>>(
'binary',
mediaIndexCachePath,
);

export const loadCharactersIndex = () =>
restoreFromFile<Orama<typeof characterSchema>>(
'binary',
charactersIndexCachePath,
);

0 comments on commit 73fd44f

Please sign in to comment.