-
-
Notifications
You must be signed in to change notification settings - Fork 471
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2237 from sudoAlireza/colly
add colly scraping and storing to db example
- Loading branch information
Showing
17 changed files
with
1,007 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= | ||
github.com/aws/aws-lambda-go v1.46.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A= | ||
github.com/awslabs/aws-lambda-go-api-proxy v0.16.1/go.mod h1:31WDgvTzVyra022CWzO6uEZFel9/y7QKaZpUQEqYLr0= | ||
github.com/gofiber/fiber/v2 v2.52.1/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= | ||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= | ||
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= | ||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= | ||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= | ||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= | ||
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= | ||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= | ||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= | ||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= | ||
github.com/valyala/fasthttp v1.52.0/go.mod h1:hf5C4QnVMkNXMspnsUlfM3WitlgYflyhHYoKol/szxQ= | ||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= | ||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
## Simple Web Scraping Colly App with Fiber | ||
|
||
This is a basic Go application using the Fiber framework to create scraping tasks in colly. | ||
|
||
### How to Run | ||
|
||
1. Clone the repository. | ||
2. Navigate to the project directory. | ||
3. Run `docker compose up --build`. | ||
4. Visit `http://127.0.0.1:3000/api/healthchecker` in a web browser or use a tool like `curl` to test it. | ||
5. Send `GET` request to `http://127.0.0.1:3000/scrape/coursera` to start scraping Coursera courses. And `http://127.0.0.1:3000/scrape/quotes` to scrape `quotes.toscrape.com`. | ||
|
||
|
||
### What It Does | ||
|
||
- Scrapes data from websites and stores in PostgreSQL database. |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM golang:1.20 | ||
|
||
RUN apt update && apt upgrade -y &&\ | ||
apt install -y git | ||
|
||
WORKDIR /go/src/app | ||
|
||
COPY . ./ | ||
|
||
RUN go mod tidy && go mod verify | ||
|
||
ENTRYPOINT [ "go", "run", "./cmd/api" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
POSTGRES_HOST=colly_db | ||
POSTGRES_PORT=5432 | ||
POSTGRES_USER=postgres | ||
POSTGRES_PASSWORD=postgres | ||
POSTGRES_DB=colly | ||
|
||
DATABASE_URL=postgres://postgres:postgres@colly_db:5432/colly?schema=public |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
POSTGRES_HOST=colly_db | ||
POSTGRES_PORT=5432 | ||
POSTGRES_USER=postgres | ||
POSTGRES_PASSWORD=postgres | ||
POSTGRES_DB=colly | ||
|
||
DATABASE_URL=postgres://postgres:postgres@colly_db:5432/colly?schema=public |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package main | ||
|
||
import ( | ||
"log" | ||
|
||
"fiber-colly-gorm/internals/consts" | ||
"fiber-colly-gorm/internals/services/database" | ||
"fiber-colly-gorm/internals/services/scrapers" | ||
|
||
"github.com/gofiber/fiber/v2" | ||
"github.com/gofiber/fiber/v2/middleware/cors" | ||
"github.com/gofiber/fiber/v2/middleware/logger" | ||
) | ||
|
||
func main() { | ||
|
||
config, err := consts.LoadConfig(".") | ||
if err != nil { | ||
log.Fatalln("Failed to load environment variables!\n", err.Error()) | ||
} | ||
database.ConnectDb(&config) | ||
|
||
app := fiber.New() | ||
micro := fiber.New() | ||
scrape := fiber.New() | ||
|
||
app.Mount("/api", micro) | ||
app.Mount("/scrape", scrape) | ||
app.Use(logger.New()) | ||
app.Use(cors.New(cors.Config{ | ||
AllowOrigins: "http://localhost:3000", | ||
AllowHeaders: "Origin, Content-Type, Accept", | ||
AllowMethods: "GET", | ||
AllowCredentials: true, | ||
})) | ||
|
||
micro.Get("/healthchecker", func(c *fiber.Ctx) error { | ||
return c.Status(200).JSON(fiber.Map{ | ||
"status": "success", | ||
"message": "Welcome to Golang, Fiber, and Colly", | ||
}) | ||
}) | ||
|
||
scrape.Get("quotes", func(c *fiber.Ctx) error { | ||
go scrapers.Quotes() | ||
return c.Status(200).JSON(fiber.Map{ | ||
"status": "success", | ||
"message": "Start scraping quotes.toscrape.com ...", | ||
}) | ||
}) | ||
|
||
scrape.Get("coursera", func(c *fiber.Ctx) error { | ||
go scrapers.CourseraCourses() | ||
return c.Status(200).JSON(fiber.Map{ | ||
"status": "success", | ||
"message": "Start scraping courses details from coursera.org...", | ||
}) | ||
}) | ||
|
||
log.Fatal(app.Listen(":3000")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
module fiber-colly-gorm | ||
|
||
go 1.20 | ||
|
||
require ( | ||
github.com/gocolly/colly v1.2.0 | ||
github.com/gofiber/fiber/v2 v2.52.1 | ||
github.com/spf13/viper v1.16.0 | ||
gorm.io/driver/postgres v1.5.2 | ||
gorm.io/gorm v1.25.3 | ||
) | ||
|
||
require ( | ||
github.com/PuerkitoBio/goquery v1.8.1 // indirect | ||
github.com/andybalholm/brotli v1.0.5 // indirect | ||
github.com/andybalholm/cascadia v1.3.1 // indirect | ||
github.com/antchfx/htmlquery v1.3.0 // indirect | ||
github.com/antchfx/xmlquery v1.3.17 // indirect | ||
github.com/antchfx/xpath v1.2.4 // indirect | ||
github.com/fsnotify/fsnotify v1.6.0 // indirect | ||
github.com/gobwas/glob v0.2.3 // indirect | ||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect | ||
github.com/golang/protobuf v1.5.3 // indirect | ||
github.com/google/uuid v1.5.0 // indirect | ||
github.com/hashicorp/hcl v1.0.0 // indirect | ||
github.com/jackc/pgpassfile v1.0.0 // indirect | ||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect | ||
github.com/jackc/pgx/v5 v5.3.1 // indirect | ||
github.com/jinzhu/inflection v1.0.0 // indirect | ||
github.com/jinzhu/now v1.1.5 // indirect | ||
github.com/kennygrant/sanitize v1.2.4 // indirect | ||
github.com/klauspost/compress v1.17.0 // indirect | ||
github.com/magiconair/properties v1.8.7 // indirect | ||
github.com/mattn/go-colorable v0.1.13 // indirect | ||
github.com/mattn/go-isatty v0.0.20 // indirect | ||
github.com/mattn/go-runewidth v0.0.15 // indirect | ||
github.com/mitchellh/mapstructure v1.5.0 // indirect | ||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect | ||
github.com/rivo/uniseg v0.2.0 // indirect | ||
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect | ||
github.com/spf13/afero v1.9.5 // indirect | ||
github.com/spf13/cast v1.5.1 // indirect | ||
github.com/spf13/jwalterweatherman v1.1.0 // indirect | ||
github.com/spf13/pflag v1.0.5 // indirect | ||
github.com/subosito/gotenv v1.4.2 // indirect | ||
github.com/temoto/robotstxt v1.1.2 // indirect | ||
github.com/valyala/bytebufferpool v1.0.0 // indirect | ||
github.com/valyala/fasthttp v1.51.0 // indirect | ||
github.com/valyala/tcplisten v1.0.0 // indirect | ||
golang.org/x/crypto v0.14.0 // indirect | ||
golang.org/x/net v0.17.0 // indirect | ||
golang.org/x/sys v0.15.0 // indirect | ||
golang.org/x/text v0.13.0 // indirect | ||
google.golang.org/appengine v1.6.7 // indirect | ||
google.golang.org/protobuf v1.30.0 // indirect | ||
gopkg.in/ini.v1 v1.67.0 // indirect | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
) |
Oops, something went wrong.