-
Notifications
You must be signed in to change notification settings - Fork 520
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
make date-check more lightweight #1394
Changes from 8 commits
b570c88
5b5ab38
5524470
d946154
9eef34f
c9fcae3
f4803a7
e82c245
df6b805
3faf82c
74b9b45
ea2f1c2
e697cb6
dcfb973
ade2924
2a5b586
483a0a5
456008c
c60a888
86ecc94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,12 @@ use std::{ | |
convert::TryInto as _, | ||
env, fmt, fs, | ||
path::{Path, PathBuf}, | ||
str::FromStr, | ||
}; | ||
|
||
use chrono::{Datelike as _, TimeZone as _, Utc}; | ||
use chrono::{Datelike as _, Month, TimeZone as _, Utc}; | ||
use glob::glob; | ||
use regex::Regex; | ||
use regex::{Regex, RegexSet}; | ||
|
||
#[derive(Debug, Copy, Clone, PartialEq, Eq)] | ||
struct Date { | ||
|
@@ -35,50 +36,56 @@ impl fmt::Display for Date { | |
} | ||
} | ||
|
||
fn make_date_regex() -> Regex { | ||
Regex::new( | ||
r"(?x) # insignificant whitespace mode | ||
<!--\s* | ||
[dD]ate:\s* | ||
(?P<y>\d{4}) # year | ||
- | ||
(?P<m>\d{2}) # month | ||
\s*-->", | ||
) | ||
.unwrap() | ||
fn make_date_regex() -> Vec<Regex> { | ||
let patterns = [ | ||
r"<!--\s+date-check:\s+(\w+)\s+(\d+{4})\s+-->", | ||
r"<!--\s+date-check\s+-->\s+(\w+)\s+(\d+{4})", | ||
]; | ||
let set = RegexSet::new(&patterns).unwrap(); | ||
set.patterns() | ||
.iter() | ||
.map(|pattern| Regex::new(pattern).unwrap()) | ||
.collect() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this usage of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a confused mess is what it is |
||
} | ||
|
||
fn collect_dates_from_file(date_regex: &Regex, text: &str) -> Vec<(usize, Date)> { | ||
let mut line = 1; | ||
let mut end_of_last_cap = 0; | ||
date_regex | ||
.captures_iter(&text) | ||
.map(|cap| { | ||
( | ||
cap.get(0).unwrap().range(), | ||
Date { | ||
year: cap["y"].parse().unwrap(), | ||
month: cap["m"].parse().unwrap(), | ||
}, | ||
) | ||
}) | ||
.map(|(byte_range, date)| { | ||
line += text[end_of_last_cap..byte_range.end] | ||
.chars() | ||
.filter(|c| *c == '\n') | ||
.count(); | ||
end_of_last_cap = byte_range.end; | ||
(line, date) | ||
}) | ||
.collect() | ||
fn collect_dates_from_file(date_regexes: &[Regex], text: &str) -> Vec<(usize, Date)> { | ||
let mut output = Vec::new(); | ||
for date_regex in date_regexes { | ||
let mut line = 1; | ||
let mut end_of_last_cap = 0; | ||
let results: Vec<_> = date_regex | ||
.captures_iter(text) | ||
.filter_map(|cap| { | ||
if let (Some(year), Some(month)) = (cap.get(2), cap.get(1)) { | ||
let year = year.as_str().parse().expect("year"); | ||
let month = Month::from_str(month.as_str()) | ||
.expect("month") | ||
.number_from_month(); | ||
Some((cap.get(0).expect("all").range(), Date { year, month })) | ||
} else { | ||
None | ||
} | ||
}) | ||
.map(|(byte_range, date)| { | ||
line += text[end_of_last_cap..byte_range.end] | ||
.chars() | ||
.filter(|c| *c == '\n') | ||
.count(); | ||
end_of_last_cap = byte_range.end; | ||
(line, date) | ||
}) | ||
.collect(); | ||
output.extend(results); | ||
} | ||
output | ||
camelid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
fn collect_dates(paths: impl Iterator<Item = PathBuf>) -> BTreeMap<PathBuf, Vec<(usize, Date)>> { | ||
let date_regex = make_date_regex(); | ||
let date_regexes = make_date_regex(); | ||
let mut data = BTreeMap::new(); | ||
for path in paths { | ||
let text = fs::read_to_string(&path).unwrap(); | ||
let dates = collect_dates_from_file(&date_regex, &text); | ||
let dates = collect_dates_from_file(&date_regexes, &text); | ||
if !dates.is_empty() { | ||
data.insert(path, dates); | ||
} | ||
|
@@ -182,61 +189,129 @@ mod tests { | |
|
||
#[test] | ||
fn test_date_regex() { | ||
let regex = make_date_regex(); | ||
assert!(regex.is_match("foo <!-- date: 2021-01 --> bar")); | ||
} | ||
|
||
#[test] | ||
fn test_date_regex_capitalized() { | ||
let regex = make_date_regex(); | ||
assert!(regex.is_match("foo <!-- Date: 2021-08 --> bar")); | ||
let regexes = &make_date_regex(); | ||
assert!(regexes[0].is_match("<!-- date-check: jan 2021 -->")); | ||
assert!(regexes[0].is_match("<!-- date-check: january 2021 -->")); | ||
assert!(regexes[0].is_match("<!-- date-check: Jan 2021 -->")); | ||
assert!(regexes[0].is_match("<!-- date-check: January 2021 -->")); | ||
assert!(regexes[1].is_match("<!-- date-check --> jan 2021")); | ||
assert!(regexes[1].is_match("<!-- date-check --> january 2021")); | ||
assert!(regexes[1].is_match("<!-- date-check --> Jan 2021")); | ||
assert!(regexes[1].is_match("<!-- date-check --> January 2021")); | ||
} | ||
|
||
#[test] | ||
fn test_collect_dates_from_file() { | ||
let text = "Test1\n<!-- date: 2021-01 -->\nTest2\nFoo<!-- date: 2021-02 \ | ||
-->\nTest3\nTest4\nFoo<!-- date: 2021-03 -->Bar\n<!-- date: 2021-04 \ | ||
-->\nTest5\nTest6\nTest7\n<!-- date: \n\n2021-05 -->\nTest8 | ||
let text = r" | ||
Test1 | ||
<!-- date-check: jan 2021 --> | ||
Test2 | ||
Foo<!-- date-check: february 2021 | ||
--> | ||
Test3 | ||
Test4 | ||
Foo<!-- date-check: Mar 2021 -->Bar | ||
<!-- date-check: April 2021 | ||
--> | ||
Test5 | ||
Test6 | ||
Test7 | ||
<!-- date-check: | ||
|
||
may 2021 --> | ||
Test8 | ||
Test1 | ||
<!-- date-check --> jan 2021 | ||
Test2 | ||
Foo<!-- date-check | ||
--> february 2021 | ||
Test3 | ||
Test4 | ||
Foo<!-- date-check --> mar 2021 Bar | ||
<!-- date-check | ||
--> apr 2021 | ||
Test5 | ||
Test6 | ||
Test7 | ||
<!-- date-check | ||
|
||
--> may 2021 | ||
Test8 \ | ||
"; | ||
assert_eq!( | ||
collect_dates_from_file(&make_date_regex(), text), | ||
vec![ | ||
( | ||
2, | ||
3, | ||
Date { | ||
year: 2021, | ||
month: 1, | ||
} | ||
), | ||
( | ||
6, | ||
Date { | ||
year: 2021, | ||
month: 2, | ||
} | ||
), | ||
( | ||
9, | ||
Date { | ||
year: 2021, | ||
month: 3, | ||
} | ||
), | ||
( | ||
11, | ||
Date { | ||
year: 2021, | ||
month: 4, | ||
} | ||
), | ||
( | ||
17, | ||
Date { | ||
year: 2021, | ||
month: 5, | ||
} | ||
), | ||
( | ||
20, | ||
Date { | ||
year: 2021, | ||
month: 1, | ||
} | ||
), | ||
( | ||
4, | ||
23, | ||
Date { | ||
year: 2021, | ||
month: 2, | ||
} | ||
), | ||
( | ||
7, | ||
26, | ||
Date { | ||
year: 2021, | ||
month: 3, | ||
} | ||
), | ||
( | ||
8, | ||
28, | ||
Date { | ||
year: 2021, | ||
month: 4, | ||
} | ||
), | ||
( | ||
14, | ||
34, | ||
Date { | ||
year: 2021, | ||
month: 5, | ||
} | ||
), | ||
] | ||
], | ||
); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -437,17 +437,28 @@ Just a few things to keep in mind: | |
the project. | ||
|
||
- The date the comment was added, e.g. instead of writing _"Currently, ..."_ | ||
or _"As of now, ..."_, consider writing | ||
_"As of January 2021, ..."_. | ||
Try to format the date as `<MONTH> <YEAR>` to ease search. | ||
tshepang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
or _"As of now, ..."_, | ||
consider adding the date, in one of the following formats: | ||
- Jan 2021 | ||
- January 2021 | ||
- jan 2021 | ||
- january 2021 | ||
|
||
- Additionally, include a machine-readable comment of the form `<!-- date: | ||
2022-04 -->` (if the current month is April 2022). We have an automated | ||
tool that uses these (in `ci/date-check`). | ||
There is a CI action (in "~/.github/workflows/date-check.yml") | ||
tshepang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
that generates a monthly issue with any of these that are over 6 months old. | ||
|
||
So, for the month of April 2022, the comment would look like: `As of <!-- | ||
date: 2022-04 --> April 2022`. Make sure to put the comment *between* `as of` | ||
and `April 2022`; see [PR #1066][rdg#1066] for the rationale. | ||
For the action to pick the date, add this annotation: | ||
|
||
<!-- date-check --> | ||
|
||
Example: | ||
|
||
As of <!-- date-check --> Jul 2022, the foo did the bar. | ||
|
||
For cases where the date should not be part of the visible rendered output, | ||
use the following instead: | ||
|
||
<!-- date-check: Jul 2022 --> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: could you use fenced code blocks and mark it as markdown? |
||
|
||
- A link to a relevant WG, tracking issue, `rustc` rustdoc page, or similar, that may provide | ||
further explanation for the change process or a way to verify that the information is not | ||
|
@@ -459,7 +470,6 @@ Just a few things to keep in mind: | |
|
||
[rdg]: https://rustc-dev-guide.rust-lang.org/ | ||
[rdgrepo]: https://github.com/rust-lang/rustc-dev-guide | ||
[rdg#1066]: https://github.com/rust-lang/rustc-dev-guide/pull/1066 | ||
|
||
## Issue Triage | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the
+
after\d
is correct (it wasn't there before either):There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, could you combine these regexes into one to avoid all this complexity with making multiple passes? It might be a little more permissive wrt accepted input but I think it'd be better overall.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did this with an earlier iteration, but it moved complexity elsewhere, where I had 4 regex groups (instead of just 2). Is that preferable, or is there a better alternative?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, it's not so bad 2a5b586