Skip to content

Commit

Permalink
tweets.jsonl
Browse files Browse the repository at this point in the history
  • Loading branch information
catdevnull committed Feb 17, 2025
1 parent 2ecd899 commit 1c00b19
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 1 deletion.
52 changes: 52 additions & 0 deletions sitio/src/routes/api/datasets/tweets.jsonl/+server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { db } from "$lib/db";
import { and, desc, lt, sql } from "drizzle-orm";
import { retweets, tweets } from "../../../../schema";
import { stringify } from "csv-stringify/sync";
import type { RequestHandler } from "@sveltejs/kit";

export const GET: RequestHandler = async () => {
console.time("tweets-jsonl");

const BATCH_SIZE = 1000;
let lastId: string | null = null;
let allRecords: (typeof tweets.$inferSelect)[] = [];
// workaround because libsql sucks and returns "LibsqlError: RESPONSE_TOO_LARGE: Response is too large"
while (true) {
const batch: (typeof tweets.$inferSelect)[] =
await db.query.tweets.findMany({
orderBy: desc(tweets.id),
limit: BATCH_SIZE,
where: and(
sql`json_extract(twitter_scraper_json, '$.isRetweet') = false`,
lastId ? lt(tweets.id, lastId) : undefined,
),
});

if (batch.length === 0) break;

allRecords = allRecords.concat(batch);
lastId = batch[batch.length - 1].id;

if (batch.length < BATCH_SIZE) break;
}

const records = allRecords.map((row) => ({
...(row.twitterScraperJson as any),
capturedAt: row.capturedAt.toISOString(),
}));

console.timeEnd("tweets-jsonl");

return new Response(
records.map((record) => JSON.stringify(record)).join("\n"),
{
headers: {
"Content-Type": "application/jsonl",
"Content-Disposition": `attachment; filename=tweets-milei.nulo.lol-${new Date().toISOString()}.jsonl`,
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET",
"Access-Control-Allow-Headers": "Content-Type",
},
},
);
};
5 changes: 4 additions & 1 deletion sitio/src/routes/info/faq/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,11 @@ Podés descargar los últimos datos de likes y retweets en formato CSV:
<a href="/api/datasets/likes.csv" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-neutral-800 px-4 py-2 mr-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 dark:hover:bg-neutral-800/50 hover:bg-neutral-900/80 focus:outline-none focus:ring-2 focus:ring-neutral-900 focus:ring-offset-2">
likes.csv
</a>
<a href="/api/datasets/retweets.csv" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-neutral-800 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 dark:hover:bg-neutral-800/50 hover:bg-neutral-900/80 focus:outline-none focus:ring-2 focus:ring-neutral-900 focus:ring-offset-2">
<a href="/api/datasets/retweets.csv" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-neutral-800 px-4 py-2 mr-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 dark:hover:bg-neutral-800/50 hover:bg-neutral-900/80 focus:outline-none focus:ring-2 focus:ring-neutral-900 focus:ring-offset-2">
retweets.csv
</a>
<a href="/api/datasets/tweets.jsonl" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-neutral-800 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 dark:hover:bg-neutral-800/50 hover:bg-neutral-900/80 focus:outline-none focus:ring-2 focus:ring-neutral-900 focus:ring-offset-2">
tweets.jsonl
</a>
Tené en cuenta que solo contempla los datos que pudimos capturar.
Expand Down

0 comments on commit 1c00b19

Please sign in to comment.