Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use watchfiles as a file watching backend #3151

Merged
merged 7 commits into from
Oct 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 8 additions & 17 deletions pelican/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pelican.readers import Readers
from pelican.server import ComplexHTTPRequestHandler, RootedHTTPServer
from pelican.settings import read_settings
from pelican.utils import (FileSystemWatcher, clean_output_dir, maybe_pluralize)
from pelican.utils import clean_output_dir, maybe_pluralize, wait_for_changes
from pelican.writers import Writer

try:
Expand Down Expand Up @@ -452,26 +452,19 @@ def autoreload(args, excqueue=None):
console.print(' --- AutoReload Mode: Monitoring `content`, `theme` and'
' `settings` for changes. ---')
pelican, settings = get_instance(args)
watcher = FileSystemWatcher(args.settings, Readers, settings)
sleep = False
settings_file = os.path.abspath(args.settings)
while True:
try:
# Don't sleep first time, but sleep afterwards to reduce cpu load
if sleep:
time.sleep(0.5)
else:
sleep = True
pelican.run()

modified = watcher.check()
changed_files = wait_for_changes(args.settings, Readers, settings)
changed_files = {c[1] for c in changed_files}

if modified['settings']:
if settings_file in changed_files:
pelican, settings = get_instance(args)
watcher.update_watchers(settings)

if any(modified.values()):
console.print('\n-> Modified: {}. re-generating...'.format(
', '.join(k for k, v in modified.items() if v)))
pelican.run()
console.print('\n-> Modified: {}. re-generating...'.format(
', '.join(changed_files)))

except KeyboardInterrupt:
if excqueue is not None:
Expand Down Expand Up @@ -558,8 +551,6 @@ def main(argv=None):
listen(settings.get('BIND'), settings.get('PORT'),
settings.get("OUTPUT_PATH"))
else:
watcher = FileSystemWatcher(args.settings, Readers, settings)
watcher.check()
with console.status("Generating..."):
pelican.run()
except KeyboardInterrupt:
Expand Down
88 changes: 0 additions & 88 deletions pelican/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import os
import shutil
import time
from datetime import timezone
from sys import platform
from tempfile import mkdtemp
Expand All @@ -14,7 +13,6 @@

from pelican import utils
from pelican.generators import TemplatePagesGenerator
from pelican.readers import Readers
from pelican.settings import read_settings
from pelican.tests.support import (LoggedTestCase, get_article,
locale_available, unittest)
Expand Down Expand Up @@ -412,92 +410,6 @@ def test_process_translations(self):
self.assertNotIn(a_arts[4], b_arts[5].translations)
self.assertNotIn(a_arts[5], b_arts[4].translations)

def test_filesystemwatcher(self):
def create_file(name, content):
with open(name, 'w') as f:
f.write(content)

# disable logger filter
from pelican.utils import logger
logger.disable_filter()

# create a temp "project" dir
root = mkdtemp()
content_path = os.path.join(root, 'content')
static_path = os.path.join(root, 'content', 'static')
config_file = os.path.join(root, 'config.py')
theme_path = os.path.join(root, 'mytheme')

# populate
os.mkdir(content_path)
os.mkdir(theme_path)
create_file(config_file,
'PATH = "content"\n'
'THEME = "mytheme"\n'
'STATIC_PATHS = ["static"]')

t = time.time() - 1000 # make sure it's in the "past"
os.utime(config_file, (t, t))
settings = read_settings(config_file)

watcher = utils.FileSystemWatcher(config_file, Readers, settings)
# should get a warning for static not not existing
self.assertLogCountEqual(1, 'Watched path does not exist: .*static')

# create it and update config
os.mkdir(static_path)
watcher.update_watchers(settings)
# no new warning
self.assertLogCountEqual(1, 'Watched path does not exist: .*static')

# get modified values
modified = watcher.check()
# empty theme and content should raise warnings
self.assertLogCountEqual(1, 'No valid files found in content')
self.assertLogCountEqual(1, 'Empty theme folder. Using `basic` theme')

self.assertIsNone(modified['content']) # empty
self.assertIsNone(modified['theme']) # empty
self.assertIsNone(modified['[static]static']) # empty
self.assertTrue(modified['settings']) # modified, first time

# add a content, add file to theme and check again
create_file(os.path.join(content_path, 'article.md'),
'Title: test\n'
'Date: 01-01-2020')

create_file(os.path.join(theme_path, 'dummy'),
'test')

modified = watcher.check()
# no new warning
self.assertLogCountEqual(1, 'No valid files found in content')
self.assertLogCountEqual(1, 'Empty theme folder. Using `basic` theme')

self.assertIsNone(modified['[static]static']) # empty
self.assertFalse(modified['settings']) # not modified
self.assertTrue(modified['theme']) # modified
self.assertTrue(modified['content']) # modified

# change config, remove static path
create_file(config_file,
'PATH = "content"\n'
'THEME = "mytheme"\n'
'STATIC_PATHS = []')

settings = read_settings(config_file)
watcher.update_watchers(settings)

modified = watcher.check()
self.assertNotIn('[static]static', modified) # should be gone
self.assertTrue(modified['settings']) # modified
self.assertFalse(modified['content']) # not modified
self.assertFalse(modified['theme']) # not modified

# cleanup
logger.enable_filter()
shutil.rmtree(root)

def test_clean_output_dir(self):
retention = ()
test_directory = os.path.join(self.temp_output,
Expand Down
189 changes: 31 additions & 158 deletions pelican/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from backports.zoneinfo import ZoneInfo
from markupsafe import Markup

import watchfiles


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -755,167 +757,38 @@ def order_content(content_list, order_by='slug'):
return content_list


class FileSystemWatcher:
def __init__(self, settings_file, reader_class, settings=None):
self.watchers = {
'settings': FileSystemWatcher.file_watcher(settings_file)
}

self.settings = None
self.reader_class = reader_class
self._extensions = None
self._content_path = None
self._theme_path = None
self._ignore_files = None

if settings is not None:
self.update_watchers(settings)

def update_watchers(self, settings):
new_extensions = set(self.reader_class(settings).extensions)
new_content_path = settings.get('PATH', '')
new_theme_path = settings.get('THEME', '')
new_ignore_files = set(settings.get('IGNORE_FILES', []))

extensions_changed = new_extensions != self._extensions
content_changed = new_content_path != self._content_path
theme_changed = new_theme_path != self._theme_path
ignore_changed = new_ignore_files != self._ignore_files

# Refresh content watcher if related settings changed
if extensions_changed or content_changed or ignore_changed:
self.add_watcher('content',
new_content_path,
new_extensions,
new_ignore_files)

# Refresh theme watcher if related settings changed
if theme_changed or ignore_changed:
self.add_watcher('theme',
new_theme_path,
[''],
new_ignore_files)

# Watch STATIC_PATHS
old_static_watchers = set(key
for key in self.watchers
if key.startswith('[static]'))

for path in settings.get('STATIC_PATHS', []):
key = '[static]{}'.format(path)
if ignore_changed or (key not in self.watchers):
self.add_watcher(
key,
os.path.join(new_content_path, path),
[''],
new_ignore_files)
if key in old_static_watchers:
old_static_watchers.remove(key)

# cleanup removed static watchers
for key in old_static_watchers:
del self.watchers[key]

# update values
self.settings = settings
self._extensions = new_extensions
self._content_path = new_content_path
self._theme_path = new_theme_path
self._ignore_files = new_ignore_files

def check(self):
'''return a key:watcher_status dict for all watchers'''
result = {key: next(watcher) for key, watcher in self.watchers.items()}

# Various warnings
if result.get('content') is None:
reader_descs = sorted(
{
' | %s (%s)' % (type(r).__name__, ', '.join(r.file_extensions))
for r in self.reader_class(self.settings).readers.values()
if r.enabled
}
)
logger.warning(
'No valid files found in content for the active readers:\n'
+ '\n'.join(reader_descs))

if result.get('theme') is None:
logger.warning('Empty theme folder. Using `basic` theme.')

return result

def add_watcher(self, key, path, extensions=[''], ignores=[]):
watcher = self.get_watcher(path, extensions, ignores)
if watcher is not None:
self.watchers[key] = watcher

def get_watcher(self, path, extensions=[''], ignores=[]):
'''return a watcher depending on path type (file or folder)'''
def wait_for_changes(settings_file, reader_class, settings):
content_path = settings.get('PATH', '')
theme_path = settings.get('THEME', '')
ignore_files = set(
fnmatch.translate(pattern) for pattern in settings.get('IGNORE_FILES', [])
)

candidate_paths = [
settings_file,
theme_path,
content_path,
]

candidate_paths.extend(
os.path.join(content_path, path) for path in settings.get('STATIC_PATHS', [])
)

watching_paths = []
for path in candidate_paths:
if not path:
continue
path = os.path.abspath(path)
if not os.path.exists(path):
logger.warning("Watched path does not exist: %s", path)
return None

if os.path.isdir(path):
return self.folder_watcher(path, extensions, ignores)
logger.warning("Unable to watch path '%s' as it does not exist.", path)
else:
return self.file_watcher(path)

@staticmethod
def folder_watcher(path, extensions, ignores=[]):
'''Generator for monitoring a folder for modifications.

Returns a boolean indicating if files are changed since last check.
Returns None if there are no matching files in the folder'''
watching_paths.append(path)

def file_times(path):
'''Return `mtime` for each file in path'''

for root, dirs, files in os.walk(path, followlinks=True):
dirs[:] = [x for x in dirs if not x.startswith(os.curdir)]

for f in files:
valid_extension = f.endswith(tuple(extensions))
file_ignored = any(
fnmatch.fnmatch(f, ignore) for ignore in ignores
)
if valid_extension and not file_ignored:
try:
yield os.stat(os.path.join(root, f)).st_mtime
except OSError as e:
logger.warning('Caught Exception: %s', e)

LAST_MTIME = 0
while True:
try:
mtime = max(file_times(path))
if mtime > LAST_MTIME:
LAST_MTIME = mtime
yield True
except ValueError:
yield None
else:
yield False

@staticmethod
def file_watcher(path):
'''Generator for monitoring a file for modifications'''
LAST_MTIME = 0
while True:
if path:
try:
mtime = os.stat(path).st_mtime
except OSError as e:
logger.warning('Caught Exception: %s', e)
continue

if mtime > LAST_MTIME:
LAST_MTIME = mtime
yield True
else:
yield False
else:
yield None
return next(watchfiles.watch(
*watching_paths,
watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),
rust_timeout=0
))


def set_date_tzinfo(d, tz_name=None):
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ rich = ">=10.1"
unidecode = ">=1.1"
markdown = {version = ">=3.1", optional = true}
backports-zoneinfo = {version = "^0.2.1", python = "<3.9"}
watchfiles = "^0.19.0"

[tool.poetry.dev-dependencies]
BeautifulSoup4 = "^4.9"
Expand Down
15 changes: 12 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@

version = "4.8.0"

requires = ['feedgenerator >= 1.9', 'jinja2 >= 2.7', 'pygments',
'docutils>=0.15', 'blinker', 'unidecode', 'python-dateutil',
'rich', 'backports-zoneinfo[tzdata] >= 0.2; python_version<"3.9"']
requires = [
'feedgenerator >= 1.9',
'jinja2 >= 2.7',
'pygments',
'docutils>=0.15',
'blinker',
'unidecode',
'python-dateutil',
'rich',
'backports-zoneinfo[tzdata] >= 0.2; python_version<"3.9"',
'watchfiles'
]

entry_points = {
'console_scripts': [
Expand Down