Skip to content

Commit

Permalink
Url check service and healtchcheck
Browse files Browse the repository at this point in the history
  • Loading branch information
goulven authored and goulven committed Jan 29, 2025
1 parent e40120c commit 9a6fc54
Show file tree
Hide file tree
Showing 11 changed files with 820 additions and 28 deletions.
6 changes: 6 additions & 0 deletions ui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
<optional>true</optional>
</dependency>

<dependency>
<groupId>com.github.crawler-commons</groupId>
<artifactId>crawler-commons</artifactId>
<version>1.4</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
Expand Down
3 changes: 2 additions & 1 deletion ui/src/main/java/org/open4goods/ui/Ui.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.open4goods.commons.store.repository.elastic.ElasticProductRepository;
import org.open4goods.commons.store.repository.elastic.ElasticTextRepository;
import org.open4goods.commons.store.repository.elastic.VerticalPagesRepository;
import org.open4goods.ui.repository.CheckedUrlRepository;
import org.open4goods.ui.repository.ContributionVoteRepository;
import org.open4goods.ui.repository.UserSearchRepository;
import org.slf4j.Logger;
Expand All @@ -30,7 +31,7 @@
@EnableScheduling
@EnableCaching
@Configuration
@EnableElasticsearchRepositories(basePackageClasses = {VerticalPagesRepository.class, ContributionVoteRepository.class, UserSearchRepository.class, ElasticProductRepository.class, BrandScoresRepository.class, ElasticTextRepository.class})
@EnableElasticsearchRepositories(basePackageClasses = {VerticalPagesRepository.class, CheckedUrlRepository.class, ContributionVoteRepository.class, UserSearchRepository.class, ElasticProductRepository.class, BrandScoresRepository.class, ElasticTextRepository.class})
//@EnableRedisRepositories(basePackageClasses = RedisProductRepository.class)
public class Ui {

Expand Down
15 changes: 15 additions & 0 deletions ui/src/main/java/org/open4goods/ui/config/yml/UiConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ public class UiConfig {
//
private WebConfig webConfig = new WebConfig();


/**
* Config for url checking
*/
private UrlCheckConfig urlcheck;

/***
* Config for IP and UA banChecking
Expand Down Expand Up @@ -508,6 +513,16 @@ public void setAllowedImagesSizeSuffixes(Set<String> allowedImagesSizeSuffixes)
}


public UrlCheckConfig getUrlcheck() {
return urlcheck;
}


public void setUrlcheck(UrlCheckConfig urlcheck) {
this.urlcheck = urlcheck;
}


public AmazonConfig getAmazonConfig() { return amazonConfig; }

public void setAmazonConfig(AmazonConfig amazon) { this.amazonConfig = amazon; }
Expand Down
66 changes: 66 additions & 0 deletions ui/src/main/java/org/open4goods/ui/config/yml/UrlCheckConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.open4goods.ui.config.yml;

import java.util.ArrayList;
import java.util.List;

import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

/**
* Holds external configuration for the URL check service,
* including the list of "bad patterns" to detect,
* the size of the thread pool,
* and the main sitemap URL to fetch.
*/
@Configuration
@ConfigurationProperties(prefix = "urlcheck")
public class UrlCheckConfig {

/**
* Main sitemap URL to read from. E.g.:
* urlcheck.sitemap-url: "https://www.example.com/sitemap_index.xml"
*/
private String sitemapUrl;

/**
* A list of bad patterns, read from application.yml
* Example:
* urlcheck.bad-patterns:
* - "Internal Server Error"
* - "database error"
* - "Fatal error"
*/
private List<String> badPatterns = new ArrayList<>();

/**
* The size of the thread pool used by the service.
*/
private int threadPoolSize = 5;


// ---- Getters & Setters ----

public String getSitemapUrl() {
return sitemapUrl;
}

public void setSitemapUrl(String sitemapUrl) {
this.sitemapUrl = sitemapUrl;
}

public List<String> getBadPatterns() {
return badPatterns;
}

public void setBadPatterns(List<String> badPatterns) {
this.badPatterns = badPatterns;
}

public int getThreadPoolSize() {
return threadPoolSize;
}

public void setThreadPoolSize(int threadPoolSize) {
this.threadPoolSize = threadPoolSize;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.open4goods.ui.controllers.ui;

import org.open4goods.commons.model.constants.RolesConstants;
import org.open4goods.ui.config.yml.UiConfig;
import org.open4goods.ui.services.UrlCheckService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

/**
* REST controller to trigger reading a sitemap and checking URLs for health.
*/
@RestController
@RequestMapping("/urlcheck")
@PreAuthorize("hasAuthority('" + RolesConstants.ROLE_ADMIN + "')")
public class UrlCheckController {

private final UrlCheckService urlCheckService;
@Autowired UiConfig uiConfig;

public UrlCheckController(UrlCheckService urlCheckService) {
this.urlCheckService = urlCheckService;
}

/**
* Endpoint to read a sitemap from the given URL and store newly found URLs in Elasticsearch.
* @param sitemapUrl the sitemap (or sitemap index) URL
* @return a short message
*/
@GetMapping("/read-sitemap")
public String readSitemap() {
try {
urlCheckService.readSitemapAndStore(uiConfig.getUrlcheck().getSitemapUrl());
return "Sitemap read successfully";
} catch (Exception e) {
return "Error reading sitemap: " + e.getMessage();
}
}

/**
* Endpoint to perform a check of all stored URLs.
* @return a short status message
*/
@GetMapping("/check-all")
public String checkAllUrls() {
urlCheckService.checkAllUrls();
// Return summary from counters
return String.format("Check completed. \n" +
"Total tested: %d\n" +
"HTTP 500: %d\n" +
"Bad patterns: %d\n" +
"Redirects (30x): %d\n" +
"Other statuses: %d\n",
urlCheckService.getTotalUrlsTested(),
urlCheckService.getTotal500Errors(),
urlCheckService.getTotalBadPatternHits(),
urlCheckService.getTotalRedirects(),
urlCheckService.getTotalOtherStatus()
);
}
}
151 changes: 151 additions & 0 deletions ui/src/main/java/org/open4goods/ui/model/CheckedUrl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package org.open4goods.ui.model;

import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;

import java.util.HashSet;
import java.util.Set;

/**
* Represents a single URL that has been read from a sitemap
* and then "checked" for status, timing, patterns, etc.
*/
@Document(indexName = "checked-urls") // <== Adjust index name as needed
public class CheckedUrl {

/**
* The URL as the document ID in Elasticsearch. Storing as keyword for exact matching.
*/
@Id
@Field(type = FieldType.Keyword)
private String url;

/**
* Time of creation (first seen in sitemap), stored as a date in Elasticsearch.
*/
@Field(type = FieldType.Date, format = DateFormat.epoch_millis)
private long created;

/**
* Last time any check was performed, stored as a date in Elasticsearch.
*/
@Field(type = FieldType.Date, format = DateFormat.epoch_millis)
private long updated;

/**
* Last observed HTTP status code, e.g. 200, 404, 500, etc.
*/
private int lastStatus;

/**
* Total request duration in milliseconds for the last check.
*/
private long durationMillis;

/**
* Connection time (milliseconds) for establishing HTTP connection in the last check.
*/
private long connectTimeMillis;

/**
* Whether the last check passed the health criteria.
*/
private boolean healthCheckOk;

/**
* Any "bad patterns" encountered in the last check.
*/
private Set<String> badPatternsEncountered = new HashSet<>();

public CheckedUrl() {
}

public CheckedUrl(String url) {
this.url = url;
this.created = System.currentTimeMillis();
this.updated = this.created;
}

// --- Getters / Setters ---

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}

public long getCreated() {
return created;
}

public void setCreated(long created) {
this.created = created;
}

public long getUpdated() {
return updated;
}

public void setUpdated(long updated) {
this.updated = updated;
}

public int getLastStatus() {
return lastStatus;
}

public void setLastStatus(int lastStatus) {
this.lastStatus = lastStatus;
}

public long getDurationMillis() {
return durationMillis;
}

public void setDurationMillis(long durationMillis) {
this.durationMillis = durationMillis;
}

public long getConnectTimeMillis() {
return connectTimeMillis;
}

public void setConnectTimeMillis(long connectTimeMillis) {
this.connectTimeMillis = connectTimeMillis;
}

public boolean isHealthCheckOk() {
return healthCheckOk;
}

public void setHealthCheckOk(boolean healthCheckOk) {
this.healthCheckOk = healthCheckOk;
}

public Set<String> getBadPatternsEncountered() {
return badPatternsEncountered;
}

public void setBadPatternsEncountered(Set<String> badPatternsEncountered) {
this.badPatternsEncountered = badPatternsEncountered;
}

@Override
public String toString() {
return "CheckedUrl{" +
"url='" + url + '\'' +
", created=" + created +
", updated=" + updated +
", lastStatus=" + lastStatus +
", durationMillis=" + durationMillis +
", connectTimeMillis=" + connectTimeMillis +
", healthCheckOk=" + healthCheckOk +
", badPatternsEncountered=" + badPatternsEncountered +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.open4goods.ui.repository;

import java.util.List;

import org.open4goods.ui.model.CheckedUrl;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;

/**
* Elasticsearch repository for CheckedUrl documents.
*/
@Repository
public interface CheckedUrlRepository extends ElasticsearchRepository<CheckedUrl, String> {

/**
* Retrieves all URLs that have a specific lastStatus code.
* Example usage: repository.getByLastStatus(500)
*
* @param lastStatus The HTTP status code
* @return A list of CheckedUrl documents matching the given status
*/
List<CheckedUrl> getByLastStatus(int lastStatus);




}
Loading

0 comments on commit 9a6fc54

Please sign in to comment.