From afebbb313ba1acc127beb06c6f15cacc2b01696c Mon Sep 17 00:00:00 2001 From: goulven Date: Sat, 30 Dec 2023 03:42:35 +0100 Subject: [PATCH] GoogleTaxonomy + Batch layout --- .../org/open4goods/api/config/ApiConfig.java | 25 +- .../api/controller/api/BatchController.java | 16 ++ .../open4goods/api/services/BatchService.java | 67 ++---- .../services/RealtimeAggregationService.java | 16 +- .../AttributeRealtimeAggregationService.java | 7 +- ...> TaxonomyRealTimeAggregationService.java} | 61 ++++- .../org/open4goods/config/TestConfig.java | 22 ++ .../org/open4goods/dao/ProductRepository.java | 2 +- .../open4goods/model/data/DataFragment.java | 2 +- .../org/open4goods/model/product/Product.java | 16 ++ .../services/GoogleTaxonomyService.java | 215 ++++++++++++++++++ .../commons/GoogleTaxonomyServiceTest.java | 76 +++++++ 12 files changed, 461 insertions(+), 64 deletions(-) rename api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/{VerticalRealTimeAggregationService.java => TaxonomyRealTimeAggregationService.java} (52%) create mode 100644 commons/src/main/java/org/open4goods/config/TestConfig.java create mode 100644 commons/src/main/java/org/open4goods/services/GoogleTaxonomyService.java create mode 100644 commons/src/test/java/org/open4goods/commons/GoogleTaxonomyServiceTest.java diff --git a/api/src/main/java/org/open4goods/api/config/ApiConfig.java b/api/src/main/java/org/open4goods/api/config/ApiConfig.java index 6f371e75d..0303213d8 100644 --- a/api/src/main/java/org/open4goods/api/config/ApiConfig.java +++ b/api/src/main/java/org/open4goods/api/config/ApiConfig.java @@ -23,6 +23,7 @@ import org.open4goods.crawler.services.fetching.CsvDatasourceFetchingService; import org.open4goods.crawler.services.fetching.WebDatasourceFetchingService; import org.open4goods.dao.ProductRepository; +import org.open4goods.exceptions.InvalidParameterException; import org.open4goods.model.constants.CacheConstants; import org.open4goods.model.constants.Currency; import org.open4goods.model.constants.TimeConstants; @@ -34,6 +35,7 @@ import org.open4goods.services.BrandService; import org.open4goods.services.DataSourceConfigService; import org.open4goods.services.EvaluationService; +import org.open4goods.services.GoogleTaxonomyService; import org.open4goods.services.Gs1PrefixService; import org.open4goods.services.ImageMagickService; import org.open4goods.services.RemoteFileCachingService; @@ -149,6 +151,23 @@ AiService aiService (AiAgent nudgerAgent, VerticalsConfigService verticalService return new AiService(nudgerAgent, verticalService, spelEvaluationService); } + @Bean + public GoogleTaxonomyService googleTaxonomyService(@Autowired RemoteFileCachingService remoteFileCachingService) { + GoogleTaxonomyService gts = new GoogleTaxonomyService(remoteFileCachingService); + + // TODO : From conf + // TODO : Add others + try { + gts.loadGoogleTaxonUrl("https://www.google.com/basepages/producttype/taxonomy-with-ids.fr-FR.txt", "fr"); + gts.loadGoogleTaxonUrl("https://www.google.com/basepages/producttype/taxonomy-with-ids.en-US.txt", "fr"); + } catch (Exception e) { + logger.error("Error loading google taxonomy", e); + } + + + return gts; + } + @Bean AiAgent nudgerAgent(@Autowired ChatLanguageModel chatLanguageModel) { return AiServices.builder(AiAgent.class) @@ -178,8 +197,10 @@ RealtimeAggregationService realtimeAggregationService( @Autowired EvaluationServ @Autowired DataSourceConfigService dataSourceConfigService, @Autowired VerticalsConfigService configService, @Autowired BarcodeValidationService barcodeValidationService, - @Autowired BrandService brandservice) { - return new RealtimeAggregationService(evaluationService, referentielService, standardiserService, autowireBeanFactory, aggregatedDataRepository, apiProperties, gs1prefixService, dataSourceConfigService, configService, barcodeValidationService,brandservice); + @Autowired BrandService brandservice, + @Autowired GoogleTaxonomyService gts + ) { + return new RealtimeAggregationService(evaluationService, referentielService, standardiserService, autowireBeanFactory, aggregatedDataRepository, apiProperties, gs1prefixService, dataSourceConfigService, configService, barcodeValidationService,brandservice, gts); } @Bean diff --git a/api/src/main/java/org/open4goods/api/controller/api/BatchController.java b/api/src/main/java/org/open4goods/api/controller/api/BatchController.java index ea1ac2868..7c698ed73 100644 --- a/api/src/main/java/org/open4goods/api/controller/api/BatchController.java +++ b/api/src/main/java/org/open4goods/api/controller/api/BatchController.java @@ -6,12 +6,16 @@ import org.open4goods.api.services.BatchService; import org.open4goods.config.yml.ui.VerticalConfig; +import org.open4goods.dao.ProductRepository; import org.open4goods.exceptions.InvalidParameterException; +import org.open4goods.exceptions.ResourceNotFoundException; import org.open4goods.model.constants.RolesConstants; import org.open4goods.services.SerialisationService; import org.open4goods.services.VerticalsConfigService; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.PutMapping; import org.springframework.web.bind.annotation.RequestBody; @@ -39,6 +43,11 @@ public class BatchController { private final BatchService batchService; + + @Autowired + private ProductRepository repository; + + public BatchController(BatchService batchService, SerialisationService serialisationService, VerticalsConfigService verticalsConfigService) { this.serialisationService = serialisationService; this.service = verticalsConfigService; @@ -85,5 +94,12 @@ public void scoreVerticals() throws InvalidParameterException, IOException { public void sanitize() throws InvalidParameterException, IOException { batchService.sanitize(); } + + @GetMapping("/sanitisation/{gtin}") + @Operation(summary="Launch sanitisation of all products") + public void sanitizeOne(@PathVariable String gtin ) throws InvalidParameterException, IOException, ResourceNotFoundException { + batchService.sanitizeOne(repository.getById(gtin)); + } + } diff --git a/api/src/main/java/org/open4goods/api/services/BatchService.java b/api/src/main/java/org/open4goods/api/services/BatchService.java index 7412e1d1f..91351a9f6 100644 --- a/api/src/main/java/org/open4goods/api/services/BatchService.java +++ b/api/src/main/java/org/open4goods/api/services/BatchService.java @@ -66,8 +66,6 @@ public BatchService( this.realtimeAggregationService = realtimeAggregationService; } - - /** * update all verticals. Scheduled @@ -82,53 +80,6 @@ public void scoreAll() { -// /** -// * Update a vertical -// * @param verticalId -// */ -// public void fullUpdate(String verticalId) { -// VerticalConfig vertical = verticalsService.getConfigById(verticalId).orElseThrow(); -// fullUpdate(vertical); -// } - -// /** -// * Update verticals with the batch Aggragator -// * @throws AggregationSkipException -// */ -// public void fullUpdate(VerticalConfig vertical) { -// -// logger.info("Full update for {}", vertical.getId()); -// ScoringBatchedAggregator batchAgg = batchAggregationService.getAggregator(vertical); -// RealTimeAggregator rtAgg = realtimeAggregationService.getAggregator(vertical); -// -// Stream productsStream = dataRepository.getProductsMatchingVertical(vertical); -// -// List productBag = new ArrayList<>(); -// logger.info("Starting realtime aggregation"); -// // Realtime aggregation -// productsStream.forEach(data -> { -// try { -// dedicatedLogger.debug("Realtime aggregation for {}", data); -// //TODO : Bad design -// productBag.add( rtAgg.build(data.getFragment(), data)); -// } catch (AggregationSkipException e) { -// dedicatedLogger.warn("Error on realtimeaggregation aggregation for {}", data, e); -// } -// }); -// -// dedicatedLogger.info("Starting batch aggregation"); -// // Batched (scoring) aggregation -// batchAgg.update(productBag); -// -// // TODO : Bulk size from conf -// Lists.partition(productBag, 200).forEach(p -> { -// dedicatedLogger.info("Indexing {} products", p.size()); -// dataRepository.index(p); -// }); -// -// } -// - /** * Score verticals with the batch Aggragator @@ -140,7 +91,7 @@ public void batchScore(VerticalConfig vertical) { ScoringBatchedAggregator batchAgg = batchAggregationService.getScoringAggregator(vertical); - List productBag = dataRepository.getProductsMatchingVertical(vertical).toList(); + List productBag = dataRepository.getProductsMatchingCategories(vertical).toList(); // Batched (scoring) aggregation batchAgg.update(productBag); logger.info("Score batching : indexing {} products", productBag.size()); @@ -164,8 +115,20 @@ public void sanitize() { batchAgg.update(p); dataRepository.index(p); }); - logger.info("started : Sanitisation batching for all items"); - + logger.info("done: Sanitisation batching for all items"); + } + + /** + * Launch the sanitisation of one product + * @param product + */ + public void sanitizeOne(Product product) { + logger.info("started : Sanitisation batching for {}",product); + SanitisationBatchedAggregator batchAgg = batchAggregationService.getFullSanitisationAggregator(); + + batchAgg.update(product); + dataRepository.index(product); + logger.info("done : Sanitisation batching for {}", product); } diff --git a/api/src/main/java/org/open4goods/api/services/RealtimeAggregationService.java b/api/src/main/java/org/open4goods/api/services/RealtimeAggregationService.java index c488d1ce6..49fbdbd15 100644 --- a/api/src/main/java/org/open4goods/api/services/RealtimeAggregationService.java +++ b/api/src/main/java/org/open4goods/api/services/RealtimeAggregationService.java @@ -14,7 +14,7 @@ import org.open4goods.api.services.aggregation.services.realtime.MediaAggregationService; import org.open4goods.api.services.aggregation.services.realtime.NamesAggregationService; import org.open4goods.api.services.aggregation.services.realtime.PriceAggregationService; -import org.open4goods.api.services.aggregation.services.realtime.VerticalRealTimeAggregationService; +import org.open4goods.api.services.aggregation.services.realtime.TaxonomyRealTimeAggregationService; import org.open4goods.config.yml.ui.VerticalConfig; import org.open4goods.config.yml.ui.VerticalProperties; import org.open4goods.dao.ProductRepository; @@ -25,6 +25,7 @@ import org.open4goods.services.BrandService; import org.open4goods.services.DataSourceConfigService; import org.open4goods.services.EvaluationService; +import org.open4goods.services.GoogleTaxonomyService; import org.open4goods.services.Gs1PrefixService; import org.open4goods.services.StandardiserService; import org.open4goods.services.VerticalsConfigService; @@ -67,6 +68,8 @@ public class RealtimeAggregationService { private BarcodeValidationService barcodeValidationService; private BrandService brandService; + + private GoogleTaxonomyService taxonomyService; public RealtimeAggregationService(EvaluationService evaluationService, ReferentielService referentielService, StandardiserService standardiserService, @@ -74,7 +77,9 @@ public RealtimeAggregationService(EvaluationService evaluationService, ApiProperties apiProperties, Gs1PrefixService gs1prefixService, DataSourceConfigService dataSourceConfigService, VerticalsConfigService configService, BarcodeValidationService barcodeValidationService, - BrandService brandService) { + BrandService brandService, + GoogleTaxonomyService taxonomyService + ) { super(); this.evaluationService = evaluationService; this.referentielService = referentielService; @@ -87,7 +92,7 @@ public RealtimeAggregationService(EvaluationService evaluationService, verticalConfigService = configService; this.brandService=brandService; this.barcodeValidationService = barcodeValidationService; - + this.taxonomyService = taxonomyService; aggregator = getAggregator(configService.getConfigById(VerticalsConfigService.MAIN_VERTICAL_NAME).get()); @@ -131,7 +136,7 @@ RealTimeAggregator getAggregator(VerticalConfig config) { services.add(new BarCodeAggregationService(apiProperties.logsFolder(), gs1prefixService,barcodeValidationService, apiProperties.isDedicatedLoggerToConsole())); - services.add(new VerticalRealTimeAggregationService( apiProperties.logsFolder(), verticalConfigService, apiProperties.isDedicatedLoggerToConsole())); + services.add(new TaxonomyRealTimeAggregationService( apiProperties.logsFolder(), verticalConfigService, taxonomyService, apiProperties.isDedicatedLoggerToConsole())); services.add(new AttributeRealtimeAggregationService(verticalConfigService, brandService, apiProperties.logsFolder(), apiProperties.isDedicatedLoggerToConsole())); @@ -159,6 +164,9 @@ RealTimeAggregator getAggregator(VerticalConfig config) { services.add(new MediaAggregationService(config, apiProperties.logsFolder(), apiProperties.isDedicatedLoggerToConsole())); + + + final RealTimeAggregator ret = new RealTimeAggregator(services); autowireBeanFactory.autowireBean(ret); diff --git a/api/src/main/java/org/open4goods/api/services/aggregation/services/AttributeRealtimeAggregationService.java b/api/src/main/java/org/open4goods/api/services/aggregation/services/AttributeRealtimeAggregationService.java index 31d270203..af7901408 100644 --- a/api/src/main/java/org/open4goods/api/services/aggregation/services/AttributeRealtimeAggregationService.java +++ b/api/src/main/java/org/open4goods/api/services/aggregation/services/AttributeRealtimeAggregationService.java @@ -171,7 +171,12 @@ public void onDataFragment(final DataFragment dataFragment, final Product produc // Removing - product.getAttributes().setUnmapedAttributes(product.getAttributes().getUnmapedAttributes().stream().filter(e -> !toRemoveFromUnmatched.contains(e.getName())) .collect(Collectors.toSet())); + product.getAttributes().setUnmapedAttributes(product.getAttributes().getUnmapedAttributes().stream() + // TODO : Should be from path + // TODO : apply from sanitisation + .filter(e -> !e.getName().contains("CATEGORY")) + .filter(e -> !toRemoveFromUnmatched.contains(e.getName())) + .collect(Collectors.toSet())); diff --git a/api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/VerticalRealTimeAggregationService.java b/api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/TaxonomyRealTimeAggregationService.java similarity index 52% rename from api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/VerticalRealTimeAggregationService.java rename to api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/TaxonomyRealTimeAggregationService.java index e45d72d90..f28c175e5 100644 --- a/api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/VerticalRealTimeAggregationService.java +++ b/api/src/main/java/org/open4goods/api/services/aggregation/services/realtime/TaxonomyRealTimeAggregationService.java @@ -1,11 +1,15 @@ package org.open4goods.api.services.aggregation.services.realtime; +import java.util.ArrayList; +import java.util.List; + import org.apache.commons.lang3.StringUtils; import org.open4goods.api.services.aggregation.AbstractRealTimeAggregationService; import org.open4goods.config.yml.ui.VerticalConfig; import org.open4goods.model.data.DataFragment; import org.open4goods.model.data.UnindexedKeyVal; import org.open4goods.model.product.Product; +import org.open4goods.services.GoogleTaxonomyService; import org.open4goods.services.VerticalsConfigService; /** @@ -13,19 +17,71 @@ * @author goulven * */ -public class VerticalRealTimeAggregationService extends AbstractRealTimeAggregationService { +public class TaxonomyRealTimeAggregationService extends AbstractRealTimeAggregationService { private VerticalsConfigService verticalService; + private GoogleTaxonomyService taxonomyService; - public VerticalRealTimeAggregationService( final String logsFolder, final VerticalsConfigService verticalService,boolean toConsole) { + public TaxonomyRealTimeAggregationService( final String logsFolder, final VerticalsConfigService verticalService,GoogleTaxonomyService taxonomyService, boolean toConsole) { super(logsFolder, toConsole); this.verticalService = verticalService; + this.taxonomyService = taxonomyService; } @Override public void onDataFragment(final DataFragment input, final Product output) { + setVerticalFromCategories(input, output); + + Integer taxonomy = googleTaxonomy(input); + + if (null != taxonomy) { + output.setGoogleTaxonomyId(taxonomy); + } + } + + + + /** + * Try to detect the google taxonomy id + * @param input + * @return + */ + private Integer googleTaxonomy(final DataFragment input) { + Integer taxonomyId = null; + + List taxons =new ArrayList<>(); + + //TODO : equivalent in a batch service, for stock processing + input.getAttributes().forEach(a -> { + String i = a.getName(); + + if (i.contains("CATEGORY")) { + Integer t = taxonomyService.resolve(a.getValue()); + if (null != t) { + taxons.add(t); + } + } + }); + + if (taxons.size() == 1) { + taxonomyId = taxons.stream().findAny().orElse(null); + } else if (taxons.size() > 1) { + // TODO : The language (should not be needed), will bug when other languages + taxonomyId = taxonomyService.selectDeepest("fr", taxons); + } + + return taxonomyId; + } + + + /** + * Defines a vertical and a taxonomy id from the config based matching + * @param input + * @param output + */ + private void setVerticalFromCategories(final DataFragment input, final Product output) { String category = input.getCategory(); @@ -55,7 +111,6 @@ public void onDataFragment(final DataFragment input, final Product output) { dedicatedLogger.info("No category in {}, removing vertical", output); output.setVertical(null); } - } } diff --git a/commons/src/main/java/org/open4goods/config/TestConfig.java b/commons/src/main/java/org/open4goods/config/TestConfig.java new file mode 100644 index 000000000..ac69cac82 --- /dev/null +++ b/commons/src/main/java/org/open4goods/config/TestConfig.java @@ -0,0 +1,22 @@ +package org.open4goods.config; + +import org.open4goods.services.GoogleTaxonomyService; +import org.open4goods.services.RemoteFileCachingService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class TestConfig { + + @Bean + public RemoteFileCachingService remoteFileCachingService() { + // TODO : from env variable + return new RemoteFileCachingService("/tmp"); + } + + @Bean + public GoogleTaxonomyService googleTaxonomyService(@Autowired RemoteFileCachingService remoteFileCachingService) { + return new GoogleTaxonomyService(remoteFileCachingService); + } +} \ No newline at end of file diff --git a/commons/src/main/java/org/open4goods/dao/ProductRepository.java b/commons/src/main/java/org/open4goods/dao/ProductRepository.java index 2361f9165..36c26ee57 100644 --- a/commons/src/main/java/org/open4goods/dao/ProductRepository.java +++ b/commons/src/main/java/org/open4goods/dao/ProductRepository.java @@ -90,7 +90,7 @@ public ProductRepository() { * @param v * @return */ - public Stream getProductsMatchingVertical(VerticalConfig v) { + public Stream getProductsMatchingCategories(VerticalConfig v) { Criteria c = new Criteria("datasourceCategories").in(v.getMatchingCategories()) // TODO : Add exclusion // .and(new Criteria("datasourceCategories").notIn(v.getMatchingCategories())) diff --git a/commons/src/main/java/org/open4goods/model/data/DataFragment.java b/commons/src/main/java/org/open4goods/model/data/DataFragment.java index ce9833c1f..0b554ba11 100644 --- a/commons/src/main/java/org/open4goods/model/data/DataFragment.java +++ b/commons/src/main/java/org/open4goods/model/data/DataFragment.java @@ -801,7 +801,7 @@ public void addProductTag(final String category) { return; } - this.category = IdHelper.getCategoryName(category.trim().toUpperCase()); + this.category = IdHelper.getCategoryName(category); } diff --git a/commons/src/main/java/org/open4goods/model/product/Product.java b/commons/src/main/java/org/open4goods/model/product/Product.java index 5190eb69b..91e9e5dde 100644 --- a/commons/src/main/java/org/open4goods/model/product/Product.java +++ b/commons/src/main/java/org/open4goods/model/product/Product.java @@ -134,6 +134,14 @@ public class Product implements Standardisable { @Field(index = false, store = false, type = FieldType.Object) private GtinInfo gtinInfos = new GtinInfo(); + + /** + * The google taxonomy id + */ + @Field(index = true, store = false, type = FieldType.Integer) + private Integer googleTaxonomyId; + + /** * The set of participating "productCategories", on datasources that build this * aggregatedData @@ -766,6 +774,14 @@ public void setAiDescriptions(Map aiDescriptions) { this.aiDescriptions = aiDescriptions; } + public Integer getGoogleTaxonomyId() { + return googleTaxonomyId; + } + + public void setGoogleTaxonomyId(Integer googleTaxonomyId) { + this.googleTaxonomyId = googleTaxonomyId; + } + diff --git a/commons/src/main/java/org/open4goods/services/GoogleTaxonomyService.java b/commons/src/main/java/org/open4goods/services/GoogleTaxonomyService.java new file mode 100644 index 000000000..cd46f34d5 --- /dev/null +++ b/commons/src/main/java/org/open4goods/services/GoogleTaxonomyService.java @@ -0,0 +1,215 @@ +package org.open4goods.services; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.open4goods.exceptions.InvalidParameterException; +import org.open4goods.helper.IdHelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; + +/** + * This service is in charge to google taxonomy id, from a product category + * + * @author goulven + * + * + */ +public class GoogleTaxonomyService { + + protected static final Logger logger = LoggerFactory.getLogger(GoogleTaxonomyService.class); + + /** + * A mat associating last categories path with taxonomy id + */ + Map lastCategoriesId = new HashMap<>(); + + /** + * A mat associating last categories path with taxonomy id + */ + Map fullCategoriesId = new HashMap<>(); + + /** + * The full taxonomy indexed by language + */ + + Map>> localizedTaxonomy = new HashMap<>(); + + private RemoteFileCachingService fileCachingService; + + public GoogleTaxonomyService(RemoteFileCachingService fileCachingService) { + super(); + this.fileCachingService = fileCachingService; + } + + /** + * Load a localized taxonomy file + * + * @param url + * @param language + * @throws IOException + * @throws InvalidParameterException + */ + public void loadGoogleTaxonUrl(String url, String language) throws IOException, InvalidParameterException { + + localizedTaxonomy.put(language, new HashMap<>()); + + File taxonFile = fileCachingService.getResource(url); + + List lines = Files.readAllLines(taxonFile.toPath()); + + for (String line : lines) { + + // Ignorer les commentaires + if (line.startsWith("#")) { + continue; + } + + int pos = line.indexOf("-"); + + // Retrieving the id + Integer id = Integer.valueOf(line.substring(0, pos - 1)); + + // Adding in the full category id + fullCategoriesId.put(IdHelper.azCharAndDigits(line.substring(pos + 2)).toLowerCase(), id); + + // The number + List fragments = Arrays.asList(line.substring(pos+1).split(">")).stream().map(e -> e.trim()).toList(); + +// // Utilisation d'une variable pour stocker la catégorie trouvée + String foundCategory = null; + + // Recherche de la dernière catégorie non vide + for (int i = fragments.size() - 1; i >= 0; i--) { + String val = fragments.get(i); + + if (!StringUtils.isEmpty(val)) { + foundCategory = val; + break; + } + } + + // Traitement de la catégorie trouvée + if (foundCategory != null) { + String fcc = IdHelper.azCharAndDigits(foundCategory).toLowerCase(); + if (lastCategoriesId.containsKey(fcc)) { + // TODO : logger.error +// System.err.println("Category exists : " + foundCategory); + } else { + List cats = new ArrayList<>(); +// cats.add(foundCategory); + // Utilisation d'une boucle améliorée pour la récupération des catégories + for (int j = 0; j < fragments.size(); j++) { + String catVal = fragments.get(j); + if (StringUtils.isEmpty(catVal)) { + break; + } else { + cats.add(catVal); + } + } + + lastCategoriesId.put(fcc, id); + localizedTaxonomy.get(language).put(id, cats); + } + } + } + + } + + /** + * Resolve a category to a taxonomy id + * @param category + * @return + */ + public Integer resolve (String category) { + + if (StringUtils.isEmpty(category)) { + return null; + } + + String token = IdHelper.azCharAndDigits( category).toLowerCase(); + + // First resolving with full path + Integer ret = fullCategoriesId.get(token); + + if (null == ret ) { + // Fail, resolving with last path id + ret = lastCategoriesId.get(token); + } + + return ret; + + } + + + /** + * Resolve the deepest category if from several one + * @param taxonomyIds + * @return + */ + public int selectDeepest( String language, List taxonomyIds) { + + int deepest = -1; + int deepestSize = -1; + + + for (int i = 0; i < taxonomyIds.size(); i++) { + + int size = localizedTaxonomy.get(language).get(taxonomyIds.get(i)).size(); + if ( size > deepestSize) { + deepest = taxonomyIds.get(i); + deepestSize = size; + } + } + + return deepest; + + } + + /** + * Resolve the deepest category if from several one + * @param language + * @param taxonomyIds + * @return + */ + public int selectDeepest(String language, Integer... taxonomyIds) { + return selectDeepest(language, Arrays.asList(taxonomyIds)); + } + + + public Map getLastCategoriesId() { + return lastCategoriesId; + } + + public void setLastCategoriesId(Map lastCategoriesId) { + this.lastCategoriesId = lastCategoriesId; + } + + public Map>> getLocalizedTaxonomy() { + return localizedTaxonomy; + } + + public void setLocalizedTaxonomy(Map>> localizedTaxonomy) { + this.localizedTaxonomy = localizedTaxonomy; + } + + public Map getFullCategoriesId() { + return fullCategoriesId; + } + + public void setFullCategoriesId(Map fullCategoriesId) { + this.fullCategoriesId = fullCategoriesId; + } + +} diff --git a/commons/src/test/java/org/open4goods/commons/GoogleTaxonomyServiceTest.java b/commons/src/test/java/org/open4goods/commons/GoogleTaxonomyServiceTest.java new file mode 100644 index 000000000..a60e1604e --- /dev/null +++ b/commons/src/test/java/org/open4goods/commons/GoogleTaxonomyServiceTest.java @@ -0,0 +1,76 @@ +package org.open4goods.commons; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.junit.jupiter.api.Test; +import org.open4goods.config.TestConfig; +import org.open4goods.exceptions.InvalidParameterException; +import org.open4goods.services.GoogleTaxonomyService; +import org.open4goods.services.RemoteFileCachingService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; + +@SpringBootTest(classes = TestConfig.class) +public class GoogleTaxonomyServiceTest { + + @Autowired + private GoogleTaxonomyService gts; + + + @Test + public void testLoadFile() throws IOException, InvalidParameterException { + + // Call the loadFile method + gts.loadGoogleTaxonUrl("https://www.google.com/basepages/producttype/taxonomy-with-ids.fr-FR.txt", "fr"); + gts.loadGoogleTaxonUrl("https://www.google.com/basepages/producttype/taxonomy-with-ids.fr-CH.txt", "fr"); + + + // Primary resolution + int id = gts.resolve("Appareils électroniques > Réseaux"); + assertTrue(id == 342); + + // Null / empty check + assertTrue(gts.resolve("") == null); + assertTrue(gts.resolve(null) == null); + assertTrue(gts.resolve("sc<;w,x; C?ML>W") == null); + + + // Checking the deep resolution + + gts.getLocalizedTaxonomy(); + + // 505767 + int deep4 = gts.resolve("Appareils électroniques > Accessoires électroniques > Composants d'ordinateur > Périphériques de stockage > Accessoires pour disques durs > Boîtiers et fixations pour disques durs"); + assertTrue(deep4 == 505767); + + // 276 + int deep3 = gts.resolve("Appareils électroniques > Accessoires électroniques > Alimentation > Piles"); + assertTrue(deep3 == 276); + + + // 3895 + int deep2 = gts.resolve("Appareils électroniques > Accessoires pour GPS"); + assertTrue(deep2 == 3895); + + + // 222 + int deep1 = gts.resolve("Appareils électroniques"); + assertTrue(deep1 == 222); + + assertTrue(gts.selectDeepest("fr", 505767, 276, 3895, 222)== 505767); + + + // With a buggy id + assertTrue(gts.selectDeepest("fr", 279, 222, 276)== 276); + + + assertTrue(gts.selectDeepest("fr", 3895, 222)== 3895); + + assertTrue(gts.selectDeepest("fr", 222)== 222); + + + + + } +} \ No newline at end of file