Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhrystiuk-ks committed Dec 21, 2024
1 parent b184c83 commit 7e66c66
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 122 deletions.
12 changes: 2 additions & 10 deletions src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package liqp.filters.date.fuzzy;

import static liqp.filters.date.fuzzy.extractors.Extractors.ISO8601YMDPatternExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.englishDateExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.allYMDPatternExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.fullMonthExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.fullWeekdaysExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.plainYearExtractor;
Expand Down Expand Up @@ -55,14 +54,7 @@ List<Part> recognizePart(List<Part> parts, DatePatternRecognizingContext ctx) {
ctx.hasTime = false;
}
if (notSet(ctx.hasYear)) {
LookupResult result = lookup(parts, ISO8601YMDPatternExtractor.get(ctx.locale));
if (result.found) {
ctx.hasYear = true;
ctx.hasMonth = true;
ctx.hasDay = true;
return result.parts;
}
result = lookup(parts, englishDateExtractor.get(ctx.locale));
LookupResult result = lookup(parts, allYMDPatternExtractor.get(ctx.locale));
if (result.found) {
ctx.hasYear = true;
ctx.hasMonth = true;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package liqp.filters.date.fuzzy.extractors;

import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pD;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pM;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY2;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY4;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp;

import java.util.ArrayList;
import java.util.List;
import liqp.filters.date.fuzzy.PartExtractor;

public class AllYMDPatternExtractor implements PartExtractor {

private final List<AnyYMDPatternExtractor> extractors = new ArrayList<>();

public AllYMDPatternExtractor() {
AnyYMDPatternExtractor iSO8601Y4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy-MM-dd
extractors.add(iSO8601Y4MDPatternExtractor);
AnyYMDPatternExtractor americanY4MDPatternExtractor = new AnyYMDPatternExtractor(
pM(), pp("/"), pD(), pp("/"), pY4()); // MM/dd/yyyy
extractors.add(americanY4MDPatternExtractor);
// next are top-rated locale formats, according to gpt
AnyYMDPatternExtractor indianY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("-"), pM(), pp("-"), pY4()); // d-M-yyyy
extractors.add(indianY4MDPatternExtractor);
AnyYMDPatternExtractor chineseY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("/"), pM(), pp("/"), pD()); // yyyy/M/d
extractors.add(chineseY4MDPatternExtractor);
AnyYMDPatternExtractor englishY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("/"), pM(), pp("/"), pY4()); // d/M/yyyy
extractors.add(englishY4MDPatternExtractor);
AnyYMDPatternExtractor slavicY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("."), pM(), pp("."), pY4());
extractors.add(slavicY4MDPatternExtractor);
AnyYMDPatternExtractor coldEuropeY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy-MM-dd
extractors.add(coldEuropeY4MDPatternExtractor);
AnyYMDPatternExtractor espanaY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy/MM/dd
extractors.add(espanaY4MDPatternExtractor);
AnyYMDPatternExtractor americanY2MDPatternExtractor = new AnyYMDPatternExtractor(
pM(), pp("/"), pD(), pp("/"), pY4()); // MM/dd/yy
extractors.add(americanY2MDPatternExtractor);
AnyYMDPatternExtractor indianY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("-"), pM(), pp("-"), pY2()); // d-M-yy
extractors.add(indianY2MDPatternExtractor);
AnyYMDPatternExtractor chineseY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("/"), pM(), pp("/"), pD()); // yyyy/M/d
extractors.add(chineseY2MDPatternExtractor);
AnyYMDPatternExtractor englishY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("/"), pM(), pp("/"), pY2()); // d/M/yy
extractors.add(englishY2MDPatternExtractor);
AnyYMDPatternExtractor slavicY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("."), pM(), pp("."), pY2());
extractors.add(slavicY2MDPatternExtractor);
AnyYMDPatternExtractor coldEuropeY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("-"), pM(), pp("-"), pD()); // yy-MM-dd
extractors.add(coldEuropeY2MDPatternExtractor);
AnyYMDPatternExtractor espanaY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("-"), pM(), pp("-"), pD()); // yy/MM/dd
extractors.add(espanaY2MDPatternExtractor);
}

@Override
public PartExtractorResult extract(String source) {
for (AnyYMDPatternExtractor extractor : extractors) {
PartExtractorResult result = extractor.extract(source);
if (result.found) {
return result;
}
}
return new PartExtractorResult();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@
import java.util.Optional;
import java.util.regex.Matcher;

abstract class AnyYMDPatternExtractor extends RegexPartExtractor {
class AnyYMDPatternExtractor extends RegexPartExtractor {

public enum RuleType {
Y, M, D, PUNCTUATION;
}
public static class RulePart {
private final RuleType type;
private final Integer[] length;
private final Integer length;
private final String content;
private RulePart(RuleType type, String content) {
this.type = type;
this.content = content;
this.length = new Integer[0];
this.length = null;
}

private RulePart(RuleType type, Integer[] length) {
private RulePart(RuleType type, Integer length) {
this.type = type;
this.length = length;
this.content = null;
Expand All @@ -32,17 +32,17 @@ private RulePart(RuleType type, Integer[] length) {
static RulePart pp(String content) {
return new RulePart(RuleType.PUNCTUATION, content);
}
static RulePart pY(Integer length) {
return new RulePart(RuleType.Y, new Integer[]{length});
static RulePart pY4() {
return new RulePart(RuleType.Y, 4);
}
static RulePart pY(Integer length1, Integer length2) {
return new RulePart(RuleType.Y, new Integer[]{length1, length2});
static RulePart pY2() {
return new RulePart(RuleType.Y, 2);
}
static RulePart pM() {
return new RulePart(RuleType.M, (Integer[])null);
return new RulePart(RuleType.M, (Integer)null);
}
static RulePart pD() {
return new RulePart(RuleType.D, (Integer[])null);
return new RulePart(RuleType.D, (Integer)null);
}
private final RulePart[] partsInOrder;
protected AnyYMDPatternExtractor(RulePart... partsInOrder) {
Expand All @@ -54,18 +54,17 @@ private static String reconstructPattern(RulePart[] partsInOrder) {
StringBuilder sb = new StringBuilder("(?:^|.*?\\D)");
for (RulePart part : partsInOrder) {
if (part.type == RuleType.PUNCTUATION) {
sb.append(part.content);
if (".".equals(part.content)) {
sb.append("\\.");
} else {
sb.append(part.content);
}
} else {
if (part.type == RuleType.Y) {
if (part.length == null || part.length.length == 0) {
if (part.length == null) {
throw new IllegalArgumentException("Year part must have length");
}
if (part.length.length == 1) {
sb.append("(?<year>\\d{").append(part.length[0]).append("})");
} else {
sb.append("(?<year>\\d{").append(part.length[0]).append("}|\\d{")
.append(part.length[1]).append("})");
}
sb.append("(?<year>\\d{").append(part.length).append("})");
} else if (part.type == RuleType.M) {
sb.append("(?<month>0?[1-9]|1[0-2])");
} else if (part.type == RuleType.D) {
Expand Down

This file was deleted.

11 changes: 2 additions & 9 deletions src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,14 @@ public PartExtractor get(Locale locale) {
* BASIC_ISO_DATE = 20111203 (yyyyMMdd)
*
*/
ISO8601YMDPatternExtractor {
private final PartExtractor partExtractor = new ISO8601YMDPatternExtractor();
allYMDPatternExtractor {
private final PartExtractor partExtractor = new AllYMDPatternExtractor();
@Override
public PartExtractor get(Locale locale) {
return partExtractor;
}
},

englishDateExtractor {
private final PartExtractor partExtractor = new EnglishDMYPatternExtractor();
@Override
public PartExtractor get(Locale locale) {
return partExtractor;
}
},
;

public abstract PartExtractor get(Locale locale);
Expand Down

This file was deleted.

This file was deleted.

0 comments on commit 7e66c66

Please sign in to comment.