Skip to content

Commit

Permalink
feat: duplicate_geography_id, duplicate_geo_json_key and `forbidd…
Browse files Browse the repository at this point in the history
…en_geography_id` notices (#1953)
  • Loading branch information
cka-y authored Jan 31, 2025
1 parent e84a9aa commit 6e2c75e
Show file tree
Hide file tree
Showing 10 changed files with 425 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2025 MobilityData LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mobilitydata.gtfsvalidator.notice;

import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR;

import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;

/**
* A key in `locations.geojson` is duplicated.
*
* <p>The key must be unique for each feature in the GeoJSON file.
*/
@GtfsValidationNotice(severity = ERROR)
public class DuplicateGeoJsonKeyNotice extends ValidationNotice {

/** The duplicated key. */
private final String featureId;

/** The index of the first feature with the same key. */
private final int firstIndex;

/** The index of the other feature with the same key. */
private final int secondIndex;

public DuplicateGeoJsonKeyNotice(String featureId, int firstIndex, int secondIndex) {
this.featureId = featureId;
this.firstIndex = firstIndex;
this.secondIndex = secondIndex;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ public GtfsGeoJsonFeature extractFeature(
gtfsGeoJsonFeature = new GtfsGeoJsonFeature();
gtfsGeoJsonFeature.setFeatureId(
featureObject.get(GtfsGeoJsonFeature.FEATURE_ID_FIELD_NAME).getAsString());
gtfsGeoJsonFeature.setFeatureIndex(featureIndex);

String type = geometry.get(GtfsGeoJsonFeature.GEOMETRY_TYPE_FIELD_NAME).getAsString();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public final class GtfsGeoJsonFeature implements GtfsEntity {
private Geometry geometryDefinition; // The geometry of the feature.
private String stopName; // The name of the location as displayed to the riders.
private String stopDesc; // A description of the location.
private int featureIndex;

public GtfsGeoJsonFeature() {}

Expand All @@ -45,6 +46,10 @@ public int csvRowNumber() {
return 0;
}

public int featureIndex() {
return featureIndex;
}

@Nonnull
public String featureId() {
return featureId;
Expand Down Expand Up @@ -113,6 +118,10 @@ public void setStopDesc(@Nullable String stopDesc) {
this.stopDesc = stopDesc;
}

public void setFeatureIndex(int featureIndex) {
this.featureIndex = featureIndex;
}

/** Builder class for GtfsGeoJsonFeature. */
public static class Builder {
private String featureId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.mobilitydata.gtfsvalidator.notice.DuplicateGeoJsonKeyNotice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;

/**
Expand Down Expand Up @@ -76,14 +77,11 @@ private void setupIndices(NoticeContainer noticeContainer) {
GtfsGeoJsonFeature oldEntity = byLocationIdMap.getOrDefault(newEntity.featureId(), null);
if (oldEntity == null) {
byLocationIdMap.put(newEntity.featureId(), newEntity);
} else {
noticeContainer.addValidationNotice(
new DuplicateGeoJsonKeyNotice(
oldEntity.featureId(), oldEntity.featureIndex(), newEntity.featureIndex()));
}
// TODO: Removed that code until the notice is supported.
// else {
// noticeContainer.addValidationNotice(
// new JsonDuplicateKeyNotice(
// gtfsFilename(), GtfsGeoJsonFeature.FEATURE_ID_FIELD_NAME,
// newEntity.featureId()));
// }
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import org.mobilitydata.gtfsvalidator.table.GtfsStopTime;

/**
* Validates that only one of stop_id, location_group_id or location_id is defined in a given record
* of stop_times.txt
* Validates that only one of `stop_id`, `location_group_id` or `location_id` is defined in a given
* record of stop_times.txt
*
* <p>Generated notice: {@link MissingRequiredFieldNotice}.
*
Expand Down Expand Up @@ -51,26 +51,21 @@ public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) {
noticeContainer.addValidationNotice(
new MissingRequiredFieldNotice(
GtfsStopTime.FILENAME, stopTime.csvRowNumber(), GtfsStopTime.STOP_ID_FIELD_NAME));
} else if (presenceCount > 1) {
// More than one geography ID is present, but only one is allowed
noticeContainer.addValidationNotice(
new ForbiddenGeographyIdNotice(
stopTime.csvRowNumber(),
stopTime.hasStopId() ? stopTime.stopId() : null,
stopTime.hasLocationGroupId() ? stopTime.locationGroupId() : null,
stopTime.hasLocationId() ? stopTime.locationId() : null));
}
// TODO: Put this back once we are ready to publish this notice.
// else if (presenceCount > 1) {
// // More than one geography ID is present, but only one is allowed
// noticeContainer.addValidationNotice(
// new ForbiddenGeographyIdNotice(
// stopTime.csvRowNumber(),
// stopTime.stopId(),
// stopTime.locationGroupId(),
// stopTime.locationId()));
// }
}

@Override
public boolean shouldCallValidate(ColumnInspector header) {
if (header.hasColumn(GtfsStopTime.STOP_ID_FIELD_NAME)
return header.hasColumn(GtfsStopTime.STOP_ID_FIELD_NAME)
|| header.hasColumn(GtfsStopTime.LOCATION_GROUP_ID_FIELD_NAME)
|| header.hasColumn(GtfsStopTime.LOCATION_ID_FIELD_NAME)) {
return true;
}
return false;
|| header.hasColumn(GtfsStopTime.LOCATION_ID_FIELD_NAME);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Copyright 2025 MobilityData
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.mobilitydata.gtfsvalidator.validator;

import static org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRef.FILE_REQUIREMENTS;
import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.ValidationNotice;
import org.mobilitydata.gtfsvalidator.table.*;

/**
* Validates that the feature id from "locations.geojson" is not a duplicate of any stop_id from
* "stops.txt" or location_group_id from "location_group_stops.txt"
*
* <p>Generated notice: {@link DuplicateGeographyIdNotice}.
*/
@GtfsValidator
public class UniqueGeographyIdValidator extends FileValidator {
private final GtfsStopTableContainer stopTable;
private final GtfsLocationGroupsTableContainer locationGroupStopsTable;
private final GtfsGeoJsonFeaturesContainer geoJsonFeatures;

@Inject
UniqueGeographyIdValidator(
GtfsGeoJsonFeaturesContainer geoJsonFeatures,
GtfsStopTableContainer stopTable,
GtfsLocationGroupsTableContainer locationGroupTable) {
this.geoJsonFeatures = geoJsonFeatures;
this.stopTable = stopTable;
this.locationGroupStopsTable = locationGroupTable;
}

@Override
public void validate(NoticeContainer noticeContainer) {
// Collect all ID entries from each file
List<IdEntry> allEntries =
Stream.concat(
geoJsonFeatures.getEntities().stream()
.map(
f ->
new IdEntry(
f.featureId(), GtfsGeoJsonFeature.FILENAME, f.featureIndex())),
Stream.concat(
stopTable.getEntities().stream()
.map(s -> new IdEntry(s.stopId(), GtfsStop.FILENAME, s.csvRowNumber())),
locationGroupStopsTable.getEntities().stream()
.map(
g ->
new IdEntry(
g.locationGroupId(),
GtfsLocationGroupStops.FILENAME,
g.csvRowNumber()))))
.collect(Collectors.toList());

// Group by ID and check for duplicates across files
allEntries.stream()
.collect(Collectors.groupingBy(IdEntry::id))
.forEach(
(id, entries) -> {
if (entries.size() > 1) {
Set<String> uniqueFilenames =
entries.stream().map(IdEntry::filename).collect(Collectors.toSet());
if (uniqueFilenames.size() == 1) return;
noticeContainer.addValidationNotice(
new DuplicateGeographyIdNotice(
id,
getRowNumber(entries, GtfsStop.FILENAME),
getRowNumber(entries, GtfsLocationGroupStops.FILENAME),
getRowNumber(entries, GtfsGeoJsonFeature.FILENAME)));
}
});
}

// Utility method to extract row number by filename
private Integer getRowNumber(List<IdEntry> entries, String filename) {
return entries.stream()
.filter(e -> e.filename().equals(filename))
.map(IdEntry::instanceIndex)
.findFirst()
.orElse(null);
}

// Helper record to hold ID entries
private static class IdEntry {
private final String id;
private final String filename;
private final int instanceIndex;

public IdEntry(String id, String filename, int instanceIndex) {
this.id = id;
this.filename = filename;
this.instanceIndex = instanceIndex;
}

public int instanceIndex() {
return instanceIndex;
}

public String id() {
return id;
}

public String filename() {
return filename;
}
}

/**
* Geography id is duplicated across multiple files.
*
* <p>ID must be unique across all `stops.stop_id`, `locations.geojson` `id`, and
* `location_groups.location_group_id` values.
*/
@GtfsValidationNotice(
severity = ERROR,
files =
@GtfsValidationNotice.FileRefs({
GtfsLocationGroupsSchema.class,
GtfsStopTimeSchema.class,
GtfsLocationGroupsSchema.class
}),
sections = @GtfsValidationNotice.SectionRefs(FILE_REQUIREMENTS))
public static class DuplicateGeographyIdNotice extends ValidationNotice {

/** The geography id that is duplicated. */
private final String geographyId;

/** The csv row number in stops.txt */
private final Integer csvRowNumberA;

/** The csv row number in location_group_stops.txt */
private final Integer csvRowNumberB;

/** The feature index in locations.geojson */
private final Integer featureIndex;

public DuplicateGeographyIdNotice(
String geographyId, Integer csvRowNumberA, Integer csvRowNumberB, Integer featureIndex) {
this.geographyId = geographyId;
this.csvRowNumberA = csvRowNumberA;
this.csvRowNumberB = csvRowNumberB;
this.featureIndex = featureIndex;
}
}
}
Loading

0 comments on commit 6e2c75e

Please sign in to comment.