Skip to content

Commit

Permalink
all-tweets.jsonl
Browse files Browse the repository at this point in the history
  • Loading branch information
catdevnull committed Feb 25, 2025
1 parent 81b5e48 commit a4b7f46
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions sitio/src/routes/api/datasets/all-tweets.jsonl/+server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { db } from "$lib/db";
import { and, desc, lt, sql } from "drizzle-orm";
import { retweets, tweets } from "../../../../schema";
import type { RequestHandler } from "@sveltejs/kit";

export const GET: RequestHandler = async () => {
console.time("all-tweets-jsonl");

const BATCH_SIZE = 1000;
let lastId: string | null = null;
let allRecords: (typeof tweets.$inferSelect)[] = [];
// workaround because libsql sucks and returns "LibsqlError: RESPONSE_TOO_LARGE: Response is too large"
while (true) {
const batch: (typeof tweets.$inferSelect)[] =
await db.query.tweets.findMany({
orderBy: desc(tweets.id),
limit: BATCH_SIZE,
where: and(lastId ? lt(tweets.id, lastId) : undefined),
});

if (batch.length === 0) break;

allRecords = allRecords.concat(batch);
lastId = batch[batch.length - 1].id;

if (batch.length < BATCH_SIZE) break;
}

const records = allRecords.map((row) => ({
...(row.twitterScraperJson as any),
capturedAt: row.capturedAt.toISOString(),
}));

console.timeEnd("all-tweets-jsonl");

return new Response(
records.map((record) => JSON.stringify(record)).join("\n"),
{
headers: {
"Content-Type": "application/jsonl",
"Content-Disposition": `attachment; filename=all-tweets-milei.nulo.lol-${new Date().toISOString()}.jsonl`,
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET",
"Access-Control-Allow-Headers": "Content-Type",
},
},
);
};

0 comments on commit a4b7f46

Please sign in to comment.