diff --git a/README.md b/README.md index 6484fef..6fe23b7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-# [DQL](https://deno.land/x/dql) +# [๐Ÿฆ• DQL](https://deno.land/x/dql) ### _**Web Scraping with Deno ย โ€“ย  DOM + GraphQL**_ @@ -8,148 +8,163 @@ --- -**`DQL`** lets you use GraphQL queries to extract data from the DOM of a web page or HTML fragment (for sandboxing or use cases without network access). It accepts [**GraphQL Queries**](https://graphql.org/learn/queries) as input, and returns formatted JSON data as output. - -> - [**Try out a real-world example of `useQuery` in the `Deno Playground`**](https://dash.deno.com/playground/dql) -> - [**View the example's JSON endpoint at `dql.deno.dev`**](https://dql.deno.dev) - -## Summary - -This is a fork of [**DenoQL**](https://deno.land/x/denoql) with some heavy refactoring and some additional features: +**`DQL`** is a web scraping module for Deno and Deno Deploy that integrates the power of [**GraphQL Queries**](https://graphql.org/learn/queries) with the DOM tree of a remote webpage or HTML document fragment. This is a fork of [**DenoQL**](https://deno.land/x/denoql) with some heavy refactoring and some additional features: - [x] Compatibility with the [**Deno Deploy**](https://deno.com/deploy) architecture - [x] Ability to pass variables alongside all queries - [x] New state-management class with additional methods - [x] Modular project structure (as opposed to a mostly single-file design) - [x] Improved types and schema structure -- [ ] **This is a work-in-progress and there is still much to be done.** * -## Usage +> **Note**: _This is a work-in-progress and there is still a lot to be done._ -The primary function exported by the module is the workhorse named `useQuery`: +### ๐Ÿ›ย  [**`GraphQL Playground`**](https://dql.deno.dev) -```ts -import { useQuery } from "https://deno.land/x/dql/mod.ts"; +### ๐Ÿ“ย  [**`HackerNews Scraper`**](https://dash.deno.com/playground/dql-hn) -const data = await useQuery(`query { ... }`); -``` +### ๐Ÿš›ย  [**`Junkyard Scraper`**](https://dash.deno.com/playground/dirty-sparrow-69) -### Query Options +--- -You can also provide an options object for the second argument of `useQuery`: +## `useQuery` + +The primary function exported by the module is the workhorse named `useQuery`: ```ts -const data = await useQuery(`query { ... }`, { - concurrency: 8, - fetch_options: { - // passed as the second param to fetch() - }, - variables: { - // any variables used in your queries go here - }, -}); +import { useQuery } from "https://deno.land/x/dql/mod.ts"; + +const data = await useQuery(`query { ... }`); ``` -### Authenticated Requests +### `QueryOptions` -To authenticate your requests, you can add an `Authorization` header like so: +You can also provide a `QueryOptions` object as the second argument of `useQuery`, to further control the behavior of your query requests. All properties are optional. ```ts const data = await useQuery(`query { ... }`, { - fetch_options: { + concurrency: 8, // passed directly to PQueue initializer + fetch_options: { // passed directly to Fetch API requests headers: { "Authorization": "Bearer ghp_a5025a80a24defd0a7d06b4fc215bb5635a167c6", }, }, + variables: {}, // variables defined in your queries + operationName: "", // when using multiple queries }); ``` -## GraphQL Playground - -### Deno Deploy +## `createServer` -With [**Deno Deploy**](https://dash.deno.com/new), you can deploy **`DQL`** with a GraphQL Playground in **only 2 LOC**: +With [**Deno Deploy**](https://dash.deno.com/new), you can deploy **`DQL`** with a GraphQL Playground in **only 2 lines of code**: ```ts import { createServer } from "https://deno.land/x/dql/mod.ts"; -// change the endpoint to your unique URL ([...].deno.dev) -createServer(80, { endpoint: "https://dirty-sparrow-69.deno.dev" }); +createServer(80, { endpoint: "https://dql.deno.dev" }); ``` -> - [**Try it out at `dirty-sparrow-69.deno.dev`**](https://dirty-sparrow-69.deno.dev) -> - [**View the public code in the `Deno Playground`**](https://dash.deno.com/playground/dirty-sparrow-69) +`๐Ÿ›` [Try the **GraphQL Playground** at **`dql.deno.dev`**](https://dql.deno.dev)\ +`๐Ÿฆ•` [View the source code in the **`Deno Playground`**](https://dash.deno.com/playground/dql) -### Command Line Usage (CLI) +## Command Line Usage (CLI) ```bash -# spin up a playground on port 8080 deno run -A --unstable https://deno.land/x/dql/serve.ts ``` +#### Custom port (default is `8080`) + ```bash -# ... or using a custom port -deno run -A --unstable https://deno.land/x/dql/serve.ts --port 3000 +deno run -A https://deno.land/x/dql/serve.ts --port 3000 ``` -> **Note**: you need to have the [**Deno CLI**](https://deno.land) installed for CLI usage. +> **Warning**: you need to have the [**Deno CLI**](https://deno.land) installed first. -### Programmatic Usage +--- -```ts -import { createServer } from "https://deno.land/x/dql/mod.ts"; +## ๐Ÿ’ป Examples -// start a playground on port 8080 -createServer(); +### `๐Ÿš›` Junkyard Scraper ยท [**`Deno Playground ๐Ÿฆ•`**](https://dash.deno.com/playground/dirty-sparrow-69) -// or using a custom port -createServer(3000); +```ts +import { useQuery } from "https://deno.land/x/dql/mod.ts"; +import { serve } from "https://deno.land/std@0.147.0/http/server.ts"; + +serve(async (res: Request) => + await useQuery( + ` + query Junkyard ( + $url: String + $itemSelector: String = "table > tbody > tr" + ) { + vehicles: page(url: $url) { + totalCount: count(selector: $itemSelector) + nodes: queryAll(selector: $itemSelector) { + id: index + vin: text(selector: "td:nth-child(7)", trim: true) + sku: text(selector: "td:nth-child(6)", trim: true) + year: text(selector: "td:nth-child(1)", trim: true) + model: text(selector: "td:nth-child(2) > .notranslate", trim: true) + aisle: text(selector: "td:nth-child(3)", trim: true) + store: text(selector: "td:nth-child(4)", trim: true) + color: text(selector: "td:nth-child(5)", trim: true) + date: attr(selector: "td:nth-child(8)", name: "data-value") + image: src(selector: "td > a > img") + } + } + }`, + { + variables: { + "url": "http://nvpap.deno.dev/action=getVehicles&makes=BMW", + }, + }, + ) + .then((data) => JSON.stringify(data, null, 2)) + .then((json) => + new Response(json, { + headers: { "content-type": "application/json;charset=utf-8" }, + }) + ) +); ``` -## Examples - -### Junkyard Inventory Scraper - -> - [**Try it for yourself in the `Deno Playground`**](https://dash.deno.com/playground/dql) -> - [**View the JSON endpoint at `dql.deno.dev`**](https://dql.deno.dev) +### ๐Ÿ“ HackerNews Scraper ยท [**`Deno Playground ๐Ÿฆ•`**](https://dash.deno.com/playground/dql-hn) ```ts import { useQuery } from "https://deno.land/x/dql/mod.ts"; - -const query = `query Junkyard ($url: String, $itemSelector: String) { - vehicles: page(url: $url) { - totalCount: count(selector: $itemSelector) - items: queryAll(selector: $itemSelector) { - id: index - vin: text(selector: "td:nth-child(7)", trim: true) - sku: text(selector: "td:nth-child(6)", trim: true) - year: text(selector: "td:nth-child(1)", trim: true) - model: text(selector: "td:nth-child(2) > .notranslate", trim: true) - aisle: text(selector: "td:nth-child(3)", trim: true) - store: text(selector: "td:nth-child(4)", trim: true) - color: text(selector: "td:nth-child(5)", trim: true) - date: attr(selector: "td:nth-child(8)", name: "data-value") - image: src(selector: "td > a > img") +import { serve } from "https://deno.land/std@0.147.0/http/server.ts"; + +serve(async (res: Request) => + await useQuery(` + query HackerNews ( + $url: String = "http://news.ycombinator.com" + $rowSelector: String = "tr.athing" + ) { + page(url: $url) { + title + totalCount: count(selector: $rowSelector) + nodes: queryAll(selector: $rowSelector) { + rank: text(selector: "td span.rank", trim: true) + title: text(selector: "td.title a", trim: true) + site: text(selector: "span.sitestr", trim: true) + url: href(selector: "td.title a") + attrs: next { + score: text(selector: "span.score", trim: true) + user: text(selector: "a.hnuser", trim: true) + date: attr(selector: "span.age", name: "title") + } + } } - } -}`; - -// pass any variables using the 'variables' key -const response = await useQuery(query, { - variables: { - "url": "http://nvpap.deno.dev/action=getVehicles&makes=BMW", - "itemSelector": "table > tbody > tr", - }, -}); - -// do something with response (Object) -console.log(response); + }`) + .then((data) => JSON.stringify(data, null, 2)) + .then((json) => + new Response(json, { + headers: { "content-type": "application/json;charset=utf-8" }, + }) + ) +); ``` ---- +## License -
- -MIT ยฉ [Nicholas Berlette](https://github.com/nberlette) โ€ข based on [DenoQL](https://deno.land/x/denoql) by [nyancodeid](https://github.com/nyancodeid) - -
+MIT ยฉ [**Nicholas Berlette**](https://github.com/nberlette), based on [DenoQL](https://deno.land/x/denoql).